From 0c465a075a1fa68ec517f588a454dca98f686b07 Mon Sep 17 00:00:00 2001 From: LucianaMarques Date: Fri, 3 Jan 2020 22:32:03 -0300 Subject: [PATCH 01/10] Add encoding detecting to modulefinder --- Lib/modulefinder.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py index e0d29984f862cf..1bb380a5cce56a 100644 --- a/Lib/modulefinder.py +++ b/Lib/modulefinder.py @@ -8,6 +8,7 @@ import sys import types import warnings +import tokenize LOAD_CONST = dis.opmap['LOAD_CONST'] @@ -93,7 +94,11 @@ def _find_module(name, path=None): else: # Should never happen. return None, None, ("", "", _SEARCH_ERROR) - file = open(file_path, mode) + if mode == 'r': + file = open(file_path) + else: + file = open(file_path, mode) + suffix = os.path.splitext(file_path)[-1] return file, file_path, (suffix, mode, kind) From 5681b7c47f45259e58448c3c94ce77eebdf3d2ec Mon Sep 17 00:00:00 2001 From: Luciana Marques Date: Tue, 7 Jan 2020 20:32:25 -0300 Subject: [PATCH 02/10] Add encoding test for modulefinder --- Lib/modulefinder.py | 2 +- Lib/test/test_modulefinder.py | 28 ++++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py index 1bb380a5cce56a..5fd97a598f324a 100644 --- a/Lib/modulefinder.py +++ b/Lib/modulefinder.py @@ -95,7 +95,7 @@ def _find_module(name, path=None): return None, None, ("", "", _SEARCH_ERROR) if mode == 'r': - file = open(file_path) + file = tokenize.open(file_path) else: file = open(file_path, mode) diff --git a/Lib/test/test_modulefinder.py b/Lib/test/test_modulefinder.py index ebd96e1c8a2dd0..080170a0c85f46 100644 --- a/Lib/test/test_modulefinder.py +++ b/Lib/test/test_modulefinder.py @@ -5,6 +5,7 @@ import shutil import unittest import tempfile +import tokenize from test import support @@ -272,7 +273,7 @@ def create_package(source): class ModuleFinderTest(unittest.TestCase): - def _do_test(self, info, report=False, debug=0, replace_paths=[]): + def _do_test(self, info, report=False, debug=0, replace_paths=[], encoding=False): import_this, modules, missing, maybe_missing, source = info create_package(source) try: @@ -299,6 +300,16 @@ def _do_test(self, info, report=False, debug=0, replace_paths=[]): bad, maybe = mf.any_missing_maybe() self.assertEqual(bad, missing) self.assertEqual(maybe, maybe_missing) + + # check for modules encoding + if encoding: + for module in mf.modules.items(): + module_name, module_object = module + file = open(module_object.__file__, 'rb') + encoding = tokenize.detect_encoding(file.readline)[0] + self.assertEqual(encoding, 'iso-8859-1') + file.close() + finally: shutil.rmtree(TEST_DIR) @@ -352,6 +363,20 @@ def test_replace_paths(self): expected = "co_filename %r changed to %r" % (old_path, new_path) self.assertIn(expected, output) + def test_encoding(self): + encoding_test = [ + "a", + ["a", "b"], + [], [], + """\ +a.py + # coding=latin-1 + import b +b.py + # coding=latin-1 +"""] + self._do_test(encoding_test, encoding=True) + def test_extended_opargs(self): extended_opargs_test = [ "a", @@ -365,6 +390,5 @@ def test_extended_opargs(self): """ % list(range(2**16))] # 2**16 constants self._do_test(extended_opargs_test) - if __name__ == "__main__": unittest.main() From 1ddca5675e69b15a10f0aed1f7f1eea92f0c965a Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2020 22:39:24 +0000 Subject: [PATCH 03/10] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2020-01-06-22-39-23.bpo-39206.zKSTwb.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2020-01-06-22-39-23.bpo-39206.zKSTwb.rst diff --git a/Misc/NEWS.d/next/Library/2020-01-06-22-39-23.bpo-39206.zKSTwb.rst b/Misc/NEWS.d/next/Library/2020-01-06-22-39-23.bpo-39206.zKSTwb.rst new file mode 100644 index 00000000000000..79a33eb8d3ce4b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-01-06-22-39-23.bpo-39206.zKSTwb.rst @@ -0,0 +1 @@ +Add encoding detecting to _find_module() in modulefinder \ No newline at end of file From ee1571732a621f25b8e9478fbe4d3c89f17a6a23 Mon Sep 17 00:00:00 2001 From: Luciana Marques Date: Tue, 14 Jan 2020 12:42:39 -0300 Subject: [PATCH 04/10] Standarize file mode to rb --- Lib/modulefinder.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py index 5fd97a598f324a..06aed7aaf935c2 100644 --- a/Lib/modulefinder.py +++ b/Lib/modulefinder.py @@ -8,7 +8,6 @@ import sys import types import warnings -import tokenize LOAD_CONST = dis.opmap['LOAD_CONST'] @@ -81,27 +80,21 @@ def _find_module(name, path=None): if isinstance(spec.loader, importlib.machinery.SourceFileLoader): kind = _PY_SOURCE - mode = "r" elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader): kind = _C_EXTENSION - mode = "rb" elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader): kind = _PY_COMPILED - mode = "rb" else: # Should never happen. return None, None, ("", "", _SEARCH_ERROR) - if mode == 'r': - file = tokenize.open(file_path) - else: - file = open(file_path, mode) + file = open(file_path, 'rb') suffix = os.path.splitext(file_path)[-1] - return file, file_path, (suffix, mode, kind) + return file, file_path, (suffix, 'rb', kind) class Module: From df362ca22eabf84f26ab4aa76e71d004def8fead Mon Sep 17 00:00:00 2001 From: Luciana Marques Date: Tue, 14 Jan 2020 12:43:07 -0300 Subject: [PATCH 05/10] Remove final empty line to compile() --- Lib/modulefinder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py index 06aed7aaf935c2..57cf5ac1e0f293 100644 --- a/Lib/modulefinder.py +++ b/Lib/modulefinder.py @@ -338,7 +338,7 @@ def load_module(self, fqname, fp, pathname, file_info): self.msgout(2, "load_module ->", m) return m if type == _PY_SOURCE: - co = compile(fp.read()+'\n', pathname, 'exec') + co = compile(fp.read(), pathname, 'exec') elif type == _PY_COMPILED: try: data = fp.read() From 713d1a35361cf66a7b102612fa986a1431247d4c Mon Sep 17 00:00:00 2001 From: Luciana Marques Date: Tue, 14 Jan 2020 13:28:02 -0300 Subject: [PATCH 06/10] Update news file --- .../next/Library/2020-01-06-22-39-23.bpo-39206.zKSTwb.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2020-01-06-22-39-23.bpo-39206.zKSTwb.rst b/Misc/NEWS.d/next/Library/2020-01-06-22-39-23.bpo-39206.zKSTwb.rst index 79a33eb8d3ce4b..d3e7801e4e6a20 100644 --- a/Misc/NEWS.d/next/Library/2020-01-06-22-39-23.bpo-39206.zKSTwb.rst +++ b/Misc/NEWS.d/next/Library/2020-01-06-22-39-23.bpo-39206.zKSTwb.rst @@ -1 +1 @@ -Add encoding detecting to _find_module() in modulefinder \ No newline at end of file +Fix :mod:`modulefinder` to handle :pep:`263` encoding markers - by Luciana Marques From 2b594a97ee25a208bd5e84dbc5bdea269fc53428 Mon Sep 17 00:00:00 2001 From: Luciana Marques Date: Tue, 14 Jan 2020 14:02:59 -0300 Subject: [PATCH 07/10] Update encoding_test --- Lib/test/test_modulefinder.py | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_modulefinder.py b/Lib/test/test_modulefinder.py index 080170a0c85f46..239af7f55e31f2 100644 --- a/Lib/test/test_modulefinder.py +++ b/Lib/test/test_modulefinder.py @@ -273,7 +273,7 @@ def create_package(source): class ModuleFinderTest(unittest.TestCase): - def _do_test(self, info, report=False, debug=0, replace_paths=[], encoding=False): + def _do_test(self, info, report=False, debug=0, replace_paths=[]): import_this, modules, missing, maybe_missing, source = info create_package(source) try: @@ -301,15 +301,6 @@ def _do_test(self, info, report=False, debug=0, replace_paths=[], encoding=False self.assertEqual(bad, missing) self.assertEqual(maybe, maybe_missing) - # check for modules encoding - if encoding: - for module in mf.modules.items(): - module_name, module_object = module - file = open(module_object.__file__, 'rb') - encoding = tokenize.detect_encoding(file.readline)[0] - self.assertEqual(encoding, 'iso-8859-1') - file.close() - finally: shutil.rmtree(TEST_DIR) @@ -364,18 +355,15 @@ def test_replace_paths(self): self.assertIn(expected, output) def test_encoding(self): - encoding_test = [ - "a", - ["a", "b"], - [], [], - """\ -a.py - # coding=latin-1 - import b -b.py - # coding=latin-1 -"""] - self._do_test(encoding_test, encoding=True) + finder = modulefinder.ModuleFinder() + with open('f.py', 'w', encoding='cp1252') as f: + f.write('import b\nx = "€"\b') + finder.run_script(f.name) + for name, mod in finder.modules.items(): + file = open(mod.__file__, 'rb') + encoding = tokenize.detect_encoding(file.readline)[0] + self.assertEqual(encoding, 'cp1252') + file.close() def test_extended_opargs(self): extended_opargs_test = [ From 294b14670c8b1041f5092d3f24ce0b601b122790 Mon Sep 17 00:00:00 2001 From: Luciana Marques Date: Tue, 14 Jan 2020 15:47:27 -0300 Subject: [PATCH 08/10] Exclude import of tokenize --- Lib/test/test_modulefinder.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_modulefinder.py b/Lib/test/test_modulefinder.py index 239af7f55e31f2..06ebca2bc8a99a 100644 --- a/Lib/test/test_modulefinder.py +++ b/Lib/test/test_modulefinder.py @@ -5,7 +5,6 @@ import shutil import unittest import tempfile -import tokenize from test import support From 4b12ad8ae0eecdf9f1de79b620d0433396e47d7f Mon Sep 17 00:00:00 2001 From: Luciana Marques Date: Tue, 14 Jan 2020 15:53:41 -0300 Subject: [PATCH 09/10] Rewrite test_encoding --- Lib/test/test_modulefinder.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_modulefinder.py b/Lib/test/test_modulefinder.py index 06ebca2bc8a99a..f24d58604c6560 100644 --- a/Lib/test/test_modulefinder.py +++ b/Lib/test/test_modulefinder.py @@ -354,15 +354,10 @@ def test_replace_paths(self): self.assertIn(expected, output) def test_encoding(self): - finder = modulefinder.ModuleFinder() - with open('f.py', 'w', encoding='cp1252') as f: - f.write('import b\nx = "€"\b') - finder.run_script(f.name) - for name, mod in finder.modules.items(): - file = open(mod.__file__, 'rb') - encoding = tokenize.detect_encoding(file.readline)[0] - self.assertEqual(encoding, 'cp1252') - file.close() + os.makedirs(TEST_DIR, exist_ok=True) + with open(os.path.join(TEST_DIR,'a.py'), 'w', encoding='cp1252') as f: + f.write('# -*- coding: cp1252 -*-\nx = "€"\n') + self._do_test(["a", ["a"], [], [], ""]) def test_extended_opargs(self): extended_opargs_test = [ From 0adc639a06291cb52844014db162d3293a5977d3 Mon Sep 17 00:00:00 2001 From: Luciana Marques Date: Tue, 14 Jan 2020 15:59:20 -0300 Subject: [PATCH 10/10] Fix added and excluded blank lines --- Lib/test/test_modulefinder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_modulefinder.py b/Lib/test/test_modulefinder.py index f24d58604c6560..27f619b2b2c941 100644 --- a/Lib/test/test_modulefinder.py +++ b/Lib/test/test_modulefinder.py @@ -299,7 +299,6 @@ def _do_test(self, info, report=False, debug=0, replace_paths=[]): bad, maybe = mf.any_missing_maybe() self.assertEqual(bad, missing) self.assertEqual(maybe, maybe_missing) - finally: shutil.rmtree(TEST_DIR) @@ -372,5 +371,6 @@ def test_extended_opargs(self): """ % list(range(2**16))] # 2**16 constants self._do_test(extended_opargs_test) + if __name__ == "__main__": unittest.main()