upm/tests/check_file_encoding.py
Noel Eck edd8df4c50 encodings: Added check for non-8bit encodings in src tree.
This commit sanitizes source files for unicode encodings which
cause failures in downstream flows (docgen, python2 module loading,
etc...).

    * Removed explicit encodings from src files
    * Replaced 2 byte character encodings with ascii encodies:
        ± -> +/-
        ° -> deg
        “ -> "
        etc...
    * Added ctest to check src tree files for non-8bit encodings

Signed-off-by: Noel Eck <noel.eck@intel.com>
2016-10-05 14:39:30 -07:00

55 lines
1.9 KiB
Python
Executable File

#!/usr/bin/python
import unittest
import os
import chardet
target_exts = ['.h', '.hpp', '.hxx', '.txt']
valid_encodings = ['ascii', 'utf-8']
class EncodingTests(unittest.TestCase):
'''Non-ascii/utf-8 encodings can cause failures on downstream tools
such as documentation-generation and python2 module loading. This
class helps find those files which could cause an encoding problem'''
def test_headers_ascii(self):
'''Assert/print list of:
file:linenumber offending line
for all lines of matching files which are not in valid_encodings'''
# Keep a map of files with alternate encodes to report
invalid_files = {}
# Recusively search cwd for files with target_exts
for root, dirs, files in os.walk(os.curdir):
# Work on full paths
for file in files:
file = os.path.join(root, file)
# Skip any files not ending with target_exts
if not any(file.lower().endswith(x) for x in target_exts):
continue
# Check each with chardet
with open(file, 'r') as f:
for ndx, line in enumerate(f):
result = chardet.detect(line)
if not result['encoding'] in valid_encodings:
if not invalid_files.has_key(file):
invalid_files[file] = []
invalid_files[file].append([ndx,line])
# Sort the failures by filename
skeys = list(invalid_files.keys())
skeys.sort()
invalid_lines = ''
for fn in skeys:
for line in invalid_files[fn]:
invalid_lines += '%s:%d %s' % (fn, line[0], line[1])
self.assertEqual( len(invalid_files), 0,
"\nThe following modules have alternate encodings:\n" + \
invalid_lines)
if __name__ == '__main__':
unittest.main()