mirror of
https://github.com/eclipse/upm.git
synced 2025-03-15 13:07:40 +03:00

This commit sanitizes source files for unicode encodings which cause failures in downstream flows (docgen, python2 module loading, etc...). * Removed explicit encodings from src files * Replaced 2 byte character encodings with ascii encodies: ± -> +/- ° -> deg “ -> " etc... * Added ctest to check src tree files for non-8bit encodings Signed-off-by: Noel Eck <noel.eck@intel.com>
55 lines
1.9 KiB
Python
Executable File
55 lines
1.9 KiB
Python
Executable File
#!/usr/bin/python
|
|
|
|
import unittest
|
|
import os
|
|
import chardet
|
|
|
|
target_exts = ['.h', '.hpp', '.hxx', '.txt']
|
|
valid_encodings = ['ascii', 'utf-8']
|
|
|
|
class EncodingTests(unittest.TestCase):
|
|
'''Non-ascii/utf-8 encodings can cause failures on downstream tools
|
|
such as documentation-generation and python2 module loading. This
|
|
class helps find those files which could cause an encoding problem'''
|
|
|
|
def test_headers_ascii(self):
|
|
'''Assert/print list of:
|
|
file:linenumber offending line
|
|
for all lines of matching files which are not in valid_encodings'''
|
|
# Keep a map of files with alternate encodes to report
|
|
invalid_files = {}
|
|
|
|
# Recusively search cwd for files with target_exts
|
|
for root, dirs, files in os.walk(os.curdir):
|
|
# Work on full paths
|
|
for file in files:
|
|
file = os.path.join(root, file)
|
|
|
|
# Skip any files not ending with target_exts
|
|
if not any(file.lower().endswith(x) for x in target_exts):
|
|
continue
|
|
|
|
# Check each with chardet
|
|
with open(file, 'r') as f:
|
|
for ndx, line in enumerate(f):
|
|
result = chardet.detect(line)
|
|
if not result['encoding'] in valid_encodings:
|
|
if not invalid_files.has_key(file):
|
|
invalid_files[file] = []
|
|
invalid_files[file].append([ndx,line])
|
|
|
|
# Sort the failures by filename
|
|
skeys = list(invalid_files.keys())
|
|
skeys.sort()
|
|
invalid_lines = ''
|
|
for fn in skeys:
|
|
for line in invalid_files[fn]:
|
|
invalid_lines += '%s:%d %s' % (fn, line[0], line[1])
|
|
|
|
self.assertEqual( len(invalid_files), 0,
|
|
"\nThe following modules have alternate encodings:\n" + \
|
|
invalid_lines)
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|