From c16fead8678e080c076ccd57fcb7437698a6636f Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Thu, 5 Jul 2018 22:27:48 -0700 Subject: [PATCH 1/3] Always return bytes from _HackedGetData.get_data(). Ensure the imp.load_source shim always returns bytes by reopening the file in binary mode if needed. Hash-based pycs have to receive the source code in bytes. It's tempting to change imp.get_suffixes() to always return 'rb' as a mode, but that breaks some stdlib tests and likely 3rdparty code, too. Closes bpo-34056. --- Lib/imp.py | 11 ++++------- .../Library/2018-07-05-22-45-46.bpo-34056.86isrU.rst | 3 +++ 2 files changed, 7 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2018-07-05-22-45-46.bpo-34056.86isrU.rst diff --git a/Lib/imp.py b/Lib/imp.py index 866464b245b24c..fb83e91810f70a 100644 --- a/Lib/imp.py +++ b/Lib/imp.py @@ -144,15 +144,12 @@ def get_data(self, path): if self.file and path == self.path: if not self.file.closed: file = self.file - else: - self.file = file = open(self.path, 'r') + if 'b' not in file.mode: + file.close() + if self.file.closed: + self.file = file = open(self.path, 'rb') with file: - # Technically should be returning bytes, but - # SourceLoader.get_code() just passed what is returned to - # compile() which can handle str. And converting to bytes would - # require figuring out the encoding to decode to and - # tokenize.detect_encoding() only accepts bytes. return file.read() else: return super().get_data(path) diff --git a/Misc/NEWS.d/next/Library/2018-07-05-22-45-46.bpo-34056.86isrU.rst b/Misc/NEWS.d/next/Library/2018-07-05-22-45-46.bpo-34056.86isrU.rst new file mode 100644 index 00000000000000..edc0135efc6057 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-07-05-22-45-46.bpo-34056.86isrU.rst @@ -0,0 +1,3 @@ +Ensure the loader shim created by ``imp.load_module`` always returns bytes +from its ``get_data()`` function. This fixes using ``imp.load_module`` with +:pep:`552` hash-based pycs. From 226d4c6bc6158cc40fecf5b9dd6777c316f75e23 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Thu, 5 Jul 2018 23:13:11 -0700 Subject: [PATCH 2/3] add test --- Lib/test/test_imp.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py index a115e60d4e4f08..bb0144b12d4107 100644 --- a/Lib/test/test_imp.py +++ b/Lib/test/test_imp.py @@ -2,6 +2,7 @@ import importlib.util import os import os.path +import py_compile import sys from test import support from test.support import script_helper @@ -350,6 +351,20 @@ def test_pyc_invalidation_mode_from_cmdline(self): res = script_helper.assert_python_ok(*args) self.assertEqual(res.out.strip().decode('utf-8'), expected) + def test_find_and_load_checked_pyc(self): + # issue 34056 + with support.temp_cwd(): + with open('mymod.py', 'wb') as fp: + fp.write(b'x = 42\n') + py_compile.compile( + 'mymod.py', + doraise=True, + invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH, + ) + file, path, description = imp.find_module('mymod', path=['.']) + mod = imp.load_module('mymod', file, path, description) + self.assertEqual(mod.x, 42) + class ReloadTests(unittest.TestCase): From 329e7e6d3d2c51342b7659a6fd7758a4be9308e6 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Thu, 5 Jul 2018 23:14:18 -0700 Subject: [PATCH 3/3] add comment --- Lib/imp.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/imp.py b/Lib/imp.py index fb83e91810f70a..31f8c766381adc 100644 --- a/Lib/imp.py +++ b/Lib/imp.py @@ -142,6 +142,8 @@ def __init__(self, fullname, path, file=None): def get_data(self, path): """Gross hack to contort loader to deal w/ load_*()'s bad API.""" if self.file and path == self.path: + # The contract of get_data() requires us to return bytes. Reopen the + # file in binary mode if needed. if not self.file.closed: file = self.file if 'b' not in file.mode: