python · miss-islington · Apr 14, 2020 · Apr 14, 2020
diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py
@@ -5,6 +5,7 @@
 import importlib.machinery
 import marshal
 import os
+import io
 import sys
 import types
 import warnings
@@ -68,35 +69,32 @@ def _find_module(name, path=None):
     # Some special cases:
 
     if spec.loader is importlib.machinery.BuiltinImporter:
-        return None, None, ("", "", _C_BUILTIN)
+        return None, None, ("", _C_BUILTIN)
 
     if spec.loader is importlib.machinery.FrozenImporter:
-        return None, None, ("", "", _PY_FROZEN)
+        return None, None, ("", _PY_FROZEN)
 
     file_path = spec.origin
 
     if spec.loader.is_package(name):
-        return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY)
+        return None, os.path.dirname(file_path), ("", _PKG_DIRECTORY)
 
     if isinstance(spec.loader, importlib.machinery.SourceFileLoader):
         kind = _PY_SOURCE
-        mode = "r"
 
     elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader):
         kind = _C_EXTENSION
-        mode = "rb"
 
     elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader):
         kind = _PY_COMPILED
-        mode = "rb"
 
     else:  # Should never happen.
-        return None, None, ("", "", _SEARCH_ERROR)
+        return None, None, ("", _SEARCH_ERROR)
 
-    file = open(file_path, mode)
+    file = io.open_code(file_path)
     suffix = os.path.splitext(file_path)[-1]
 
-    return file, file_path, (suffix, mode, kind)
+    return file, file_path, (suffix, kind)
 
 
 class Module:
@@ -160,15 +158,15 @@ def msgout(self, *args):
 
     def run_script(self, pathname):
         self.msg(2, "run_script", pathname)
-        with open(pathname) as fp:
-            stuff = ("", "r", _PY_SOURCE)
+        with io.open_code(pathname) as fp:
+            stuff = ("", _PY_SOURCE)
             self.load_module('__main__', fp, pathname, stuff)
 
     def load_file(self, pathname):
         dir, name = os.path.split(pathname)
         name, ext = os.path.splitext(name)
-        with open(pathname) as fp:
-            stuff = (ext, "r", _PY_SOURCE)
+        with io.open_code(pathname) as fp:
+            stuff = (ext, _PY_SOURCE)
             self.load_module(name, fp, pathname, stuff)
 
     def import_hook(self, name, caller=None, fromlist=None, level=-1):
@@ -333,14 +331,14 @@ def import_module(self, partname, fqname, parent):
         return m
 
     def load_module(self, fqname, fp, pathname, file_info):
-        suffix, mode, type = file_info
+        suffix, type = file_info
         self.msgin(2, "load_module", fqname, fp and "fp", pathname)
         if type == _PKG_DIRECTORY:
             m = self.load_package(fqname, pathname)
             self.msgout(2, "load_module ->", m)
             return m
         if type == _PY_SOURCE:
-            co = compile(fp.read()+'\n', pathname, 'exec')
+            co = compile(fp.read()+b'\n', pathname, 'exec')
         elif type == _PY_COMPILED:
             try:
                 data = fp.read()
@@ -504,7 +502,7 @@ def find_module(self, name, path, parent=None):
 
         if path is None:
             if name in sys.builtin_module_names:
-                return (None, None, ("", "", _C_BUILTIN))
+                return (None, None, ("", _C_BUILTIN))
 
             path = self.path
 

diff --git a/Lib/test/test_modulefinder.py b/Lib/test/test_modulefinder.py
@@ -40,7 +40,8 @@
                                 from c import something
 b/__init__.py
                                 from sys import *
-"""]
+""",
+]
 
 maybe_test_new = [
     "a.module",
@@ -245,6 +246,48 @@ def foo(): pass
 b/c.py
 """]
 
+coding_default_utf8_test = [
+    "a_utf8",
+    ["a_utf8", "b_utf8"],
+    [], [],
+    """\
+a_utf8.py
+                                # use the default of utf8
+                                print('Unicode test A code point 2090 \u2090 that is not valid in cp1252')
+                                import b_utf8
+b_utf8.py
+                                # use the default of utf8
+                                print('Unicode test B code point 2090 \u2090 that is not valid in cp1252')
+"""]
+
+coding_explicit_utf8_test = [
+    "a_utf8",
+    ["a_utf8", "b_utf8"],
+    [], [],
+    """\
+a_utf8.py
+                                # coding=utf8
+                                print('Unicode test A code point 2090 \u2090 that is not valid in cp1252')
+                                import b_utf8
+b_utf8.py
+                                # use the default of utf8
+                                print('Unicode test B code point 2090 \u2090 that is not valid in cp1252')
+"""]
+
+coding_explicit_cp1252_test = [
+    "a_cp1252",
+    ["a_cp1252", "b_utf8"],
+    [], [],
+    b"""\
+a_cp1252.py
+                                # coding=cp1252
+                                # 0xe2 is not allowed in utf8
+                                print('CP1252 test P\xe2t\xe9')
+                                import b_utf8
+b_utf8.py
+                                # use the default of utf8
+                                print('Unicode test A code point 2090 \u2090 that is not valid in cp1252')
+"""]
 
 def open_file(path):
     dirname = os.path.dirname(path)
@@ -253,18 +296,22 @@ def open_file(path):
     except OSError as e:
         if e.errno != errno.EEXIST:
             raise
-    return open(path, "w")
+    return open(path, 'wb')
 
 
 def create_package(source):
     ofi = None
     try:
         for line in source.splitlines():
-            if line.startswith(" ") or line.startswith("\t"):
-                ofi.write(line.strip() + "\n")
+            if type(line) != bytes:
+                line = line.encode('utf-8')
+            if line.startswith(b' ') or line.startswith(b'\t'):
+                ofi.write(line.strip() + b'\n')
             else:
                 if ofi:
                     ofi.close()
+                if type(line) == bytes:
+                    line = line.decode('utf-8')
                 ofi = open_file(os.path.join(TEST_DIR, line.strip()))
     finally:
         if ofi:
@@ -337,7 +384,7 @@ def test_bytecode(self):
         source_path = base_path + importlib.machinery.SOURCE_SUFFIXES[0]
         bytecode_path = base_path + importlib.machinery.BYTECODE_SUFFIXES[0]
         with open_file(source_path) as file:
-            file.write('testing_modulefinder = True\n')
+            file.write('testing_modulefinder = True\n'.encode('utf-8'))
         py_compile.compile(source_path, cfile=bytecode_path)
         os.remove(source_path)
         self._do_test(bytecode_test)
@@ -365,6 +412,14 @@ def test_extended_opargs(self):
 """ % list(range(2**16))]  # 2**16 constants
         self._do_test(extended_opargs_test)
 
+    def test_coding_default_utf8(self):
+        self._do_test(coding_default_utf8_test)
+
+    def test_coding_explicit_utf8(self):
+        self._do_test(coding_explicit_utf8_test)
+
+    def test_coding_explicit_cp1252(self):
+        self._do_test(coding_explicit_cp1252_test)
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2020-04-12-21-18-56.bpo-40260.F6VWaE.rst b/Misc/NEWS.d/next/Library/2020-04-12-21-18-56.bpo-40260.F6VWaE.rst
@@ -0,0 +1 @@
+Ensure :mod:`modulefinder` uses :func:`io.open_code` and respects coding comments.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Ensure :mod:`modulefinder` uses :func:`io.open_code` and respects coding comments.