upm/tests/check_file_encoding.py

#!/usr/bin/python

import unittest
import os
import chardet

target_exts = ['.h', '.hpp', '.hxx', '.txt']
valid_encodings = ['ascii', 'utf-8']

class EncodingTests(unittest.TestCase):
    '''Non-ascii/utf-8 encodings can cause failures on downstream tools
    such as documentation-generation and python2 module loading.  This
    class helps find those files which could cause an encoding problem'''

    def test_headers_ascii(self):
        '''Assert/print list of:
            file:linenumber offending line
        for all lines of matching files which are not in valid_encodings'''
        # Keep a map of files with alternate encodes to report
        invalid_files = {}

        # Recusively search cwd for files with target_exts
        for root, dirs, files in os.walk(os.curdir):
            # Work on full paths
            for file in files:
                file = os.path.join(root, file)

                # Skip any files not ending with target_exts
                if not any(file.lower().endswith(x) for x in target_exts):
                    continue

                # Check each with chardet
                with open(file, 'r') as f:
                    for ndx, line in enumerate(f):
                        result = chardet.detect(line)
                        if not result['encoding'] in valid_encodings:
                            if not invalid_files.has_key(file):
                                invalid_files[file] = []
                            invalid_files[file].append([ndx,line])

        # Sort the failures by filename
        skeys = list(invalid_files.keys())
        skeys.sort()
        invalid_lines = ''
        for fn in skeys:
            for line in invalid_files[fn]:
                invalid_lines += '%s:%d %s' % (fn, line[0], line[1])

        self.assertEqual( len(invalid_files), 0,
                "\nThe following modules have alternate encodings:\n" + \
                invalid_lines)

if __name__ == '__main__':
    unittest.main()
encodings: Added check for non-8bit encodings in src tree. This commit sanitizes source files for unicode encodings which cause failures in downstream flows (docgen, python2 module loading, etc...). * Removed explicit encodings from src files * Replaced 2 byte character encodings with ascii encodies: ± -> +/- ° -> deg “ -> " etc... * Added ctest to check src tree files for non-8bit encodings Signed-off-by: Noel Eck <noel.eck@intel.com> 2016-10-05 13:36:56 -07:00			`#!/usr/bin/python`

			`import unittest`
			`import os`
			`import chardet`

			`target_exts = ['.h', '.hpp', '.hxx', '.txt']`
			`valid_encodings = ['ascii', 'utf-8']`

			`class EncodingTests(unittest.TestCase):`
			`'''Non-ascii/utf-8 encodings can cause failures on downstream tools`
			`such as documentation-generation and python2 module loading. This`
			`class helps find those files which could cause an encoding problem'''`

			`def test_headers_ascii(self):`
			`'''Assert/print list of:`
			`file:linenumber offending line`
			`for all lines of matching files which are not in valid_encodings'''`
			`# Keep a map of files with alternate encodes to report`
			`invalid_files = {}`

			`# Recusively search cwd for files with target_exts`
			`for root, dirs, files in os.walk(os.curdir):`
			`# Work on full paths`
			`for file in files:`
			`file = os.path.join(root, file)`

			`# Skip any files not ending with target_exts`
			`if not any(file.lower().endswith(x) for x in target_exts):`
			`continue`

			`# Check each with chardet`
			`with open(file, 'r') as f:`
			`for ndx, line in enumerate(f):`
			`result = chardet.detect(line)`
			`if not result['encoding'] in valid_encodings:`
			`if not invalid_files.has_key(file):`
			`invalid_files[file] = []`
			`invalid_files[file].append([ndx,line])`

			`# Sort the failures by filename`
			`skeys = list(invalid_files.keys())`
			`skeys.sort()`
			`invalid_lines = ''`
			`for fn in skeys:`
			`for line in invalid_files[fn]:`
			`invalid_lines += '%s:%d %s' % (fn, line[0], line[1])`

			`self.assertEqual( len(invalid_files), 0,`
			`"\nThe following modules have alternate encodings:\n" + \`
			`invalid_lines)`

			`if __name__ == '__main__':`
			`unittest.main()`