mirror of
https://github.com/eclipse/upm.git
synced 2025-07-02 01:41:12 +03:00
encodings: Added check for non-8bit encodings in src tree.
This commit sanitizes source files for unicode encodings which cause failures in downstream flows (docgen, python2 module loading, etc...). * Removed explicit encodings from src files * Replaced 2 byte character encodings with ascii encodies: ± -> +/- ° -> deg “ -> " etc... * Added ctest to check src tree files for non-8bit encodings Signed-off-by: Noel Eck <noel.eck@intel.com>
This commit is contained in:
54
tests/check_file_encoding.py
Executable file
54
tests/check_file_encoding.py
Executable file
@ -0,0 +1,54 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import unittest
|
||||
import os
|
||||
import chardet
|
||||
|
||||
target_exts = ['.h', '.hpp', '.hxx', '.txt']
|
||||
valid_encodings = ['ascii', 'utf-8']
|
||||
|
||||
class EncodingTests(unittest.TestCase):
|
||||
'''Non-ascii/utf-8 encodings can cause failures on downstream tools
|
||||
such as documentation-generation and python2 module loading. This
|
||||
class helps find those files which could cause an encoding problem'''
|
||||
|
||||
def test_headers_ascii(self):
|
||||
'''Assert/print list of:
|
||||
file:linenumber offending line
|
||||
for all lines of matching files which are not in valid_encodings'''
|
||||
# Keep a map of files with alternate encodes to report
|
||||
invalid_files = {}
|
||||
|
||||
# Recusively search cwd for files with target_exts
|
||||
for root, dirs, files in os.walk(os.curdir):
|
||||
# Work on full paths
|
||||
for file in files:
|
||||
file = os.path.join(root, file)
|
||||
|
||||
# Skip any files not ending with target_exts
|
||||
if not any(file.lower().endswith(x) for x in target_exts):
|
||||
continue
|
||||
|
||||
# Check each with chardet
|
||||
with open(file, 'r') as f:
|
||||
for ndx, line in enumerate(f):
|
||||
result = chardet.detect(line)
|
||||
if not result['encoding'] in valid_encodings:
|
||||
if not invalid_files.has_key(file):
|
||||
invalid_files[file] = []
|
||||
invalid_files[file].append([ndx,line])
|
||||
|
||||
# Sort the failures by filename
|
||||
skeys = list(invalid_files.keys())
|
||||
skeys.sort()
|
||||
invalid_lines = ''
|
||||
for fn in skeys:
|
||||
for line in invalid_files[fn]:
|
||||
invalid_lines += '%s:%d %s' % (fn, line[0], line[1])
|
||||
|
||||
self.assertEqual( len(invalid_files), 0,
|
||||
"\nThe following modules have alternate encodings:\n" + \
|
||||
invalid_lines)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Reference in New Issue
Block a user