python · serhiy-storchaka · Jan 6, 2018 · Nov 9, 2017 · Nov 9, 2017 · Nov 9, 2017
diff --git a/Lib/pickle.py b/Lib/pickle.py
@@ -201,21 +201,46 @@ def commit_frame(self, force=False):
         if self.current_frame:
             f = self.current_frame
             if f.tell() >= self._FRAME_SIZE_TARGET or force:
-                with f.getbuffer() as data:
-                    n = len(data)
-                    write = self.file_write
-                    write(FRAME)
-                    write(pack("<Q", n))
-                    write(data)
-                f.seek(0)
-                f.truncate()
+                data = f.getbuffer()
+                write = self.file_write
+                # Issue a single call to the write method of the underlying
+                # file object for the frame opcode with the size of the
+                # frame. The concatenation is expected to be less expensive
+                # than issuing an additional call to write.
+                write(FRAME + pack("<Q", len(data)))
+
+                # Issue a separate call to write to append the frame
+                # contents without concatenation to the above to avoid a
+                # memory copy.
+                write(data)
+
+                # Start the new frame with a new io.BytesIO instance so that
+                # the file object can have delayed access to the previous frame
+                # contents via an unreleased memoryview of the previous
+                # io.BytesIO instance.
+                self.current_frame = io.BytesIO()
 
     def write(self, data):
         if self.current_frame:
             return self.current_frame.write(data)
         else:
             return self.file_write(data)
 
+    def write_large_bytes(self, header, payload):
+        write = self.file_write
+        if self.current_frame:
+            # Terminate the current frame and flush it to the file.
+            self.commit_frame(force=True)
+
+        # Perform direct write of the header and payload of the large binary
+        # object. Be careful not to concatenate the header and the payload
+        # prior to calling 'write' as we do not want to allocate a large
+        # temporary bytes object.
+        # We intentionally do not insert a protocol 4 frame opcode to make
+        # it possible to optimize file.read calls in the loader.
+        write(header)
+        write(payload)
+
 
 class _Unframer:
 
@@ -379,6 +404,7 @@ def __init__(self, file, protocol=None, *, fix_imports=True):
             raise TypeError("file must have a 'write' attribute")
         self.framer = _Framer(self._file_write)
         self.write = self.framer.write
+        self._write_large_bytes = self.framer.write_large_bytes
         self.memo = {}
         self.proto = int(protocol)
         self.bin = protocol >= 1
@@ -696,7 +722,9 @@ def save_bytes(self, obj):
         if n <= 0xff:
             self.write(SHORT_BINBYTES + pack("<B", n) + obj)
         elif n > 0xffffffff and self.proto >= 4:
-            self.write(BINBYTES8 + pack("<Q", n) + obj)
+            self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
+        elif n >= self.framer._FRAME_SIZE_TARGET:
+            self._write_large_bytes(BINBYTES + pack("<I", n), obj)
         else:
             self.write(BINBYTES + pack("<I", n) + obj)
         self.memoize(obj)
@@ -709,7 +737,9 @@ def save_str(self, obj):
             if n <= 0xff and self.proto >= 4:
                 self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
             elif n > 0xffffffff and self.proto >= 4:
-                self.write(BINUNICODE8 + pack("<Q", n) + encoded)
+                self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
+            elif n >= self.framer._FRAME_SIZE_TARGET:
+                self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
             else:
                 self.write(BINUNICODE + pack("<I", n) + encoded)
         else:

diff --git a/Lib/pickletools.py b/Lib/pickletools.py
@@ -2279,7 +2279,7 @@ def optimize(p):
             if arg > proto:
                 proto = arg
             if pos == 0:
-                protoheader = p[pos: end_pos]
+                protoheader = p[pos:end_pos]
             else:
                 opcodes.append((pos, end_pos))
         else:
@@ -2295,6 +2295,7 @@ def optimize(p):
         pickler.framer.start_framing()
     idx = 0
     for op, arg in opcodes:
+        frameless = False
         if op is put:
             if arg not in newids:
                 continue
@@ -2305,8 +2306,12 @@ def optimize(p):
             data = pickler.get(newids[arg])
         else:
             data = p[op:arg]
-        pickler.framer.commit_frame()
-        pickler.write(data)
+            frameless = len(data) > pickler.framer._FRAME_SIZE_TARGET
+        pickler.framer.commit_frame(force=frameless)
+        if frameless:
+            pickler.framer.file_write(data)
+        else:
+            pickler.write(data)
     pickler.framer.end_framing()
     return out.getvalue()
 

diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
@@ -2042,21 +2042,40 @@ def test_setitems_on_non_dicts(self):
     def check_frame_opcodes(self, pickled):
         """
         Check the arguments of FRAME opcodes in a protocol 4+ pickle.
+
+        Note that binary objects that are larger than FRAME_SIZE_TARGET are not
+        framed by default and are therefore considered a frame by themselves in
+        the following consistency check.
         """
-        frame_opcode_size = 9
-        last_arg = last_pos = None
+        last_arg = last_pos = last_frame_opcode_size = None
+        frameless_opcode_sizes = {
+            'BINBYTES': 5,
+            'BINUNICODE': 5,
+            'BINBYTES8': 9,
+            'BINUNICODE8': 9,
+        }
         for op, arg, pos in pickletools.genops(pickled):
-            if op.name != 'FRAME':
+            if op.name in frameless_opcode_sizes:
+                if len(arg) > self.FRAME_SIZE_TARGET:
+                    frame_opcode_size = frameless_opcode_sizes[op.name]
+                    arg = len(arg)
+                else:
+                    continue
+            elif op.name == 'FRAME':
+                frame_opcode_size = 9
+            else:
                 continue
+
             if last_pos is not None:
                 # The previous frame's size should be equal to the number
                 # of bytes up to the current frame.
-                frame_size = pos - last_pos - frame_opcode_size
+                frame_size = pos - last_pos - last_frame_opcode_size
                 self.assertEqual(frame_size, last_arg)
             last_arg, last_pos = arg, pos
+            last_frame_opcode_size = frame_opcode_size
         # The last frame's size should be equal to the number of bytes up
         # to the pickle's end.
-        frame_size = len(pickled) - last_pos - frame_opcode_size
+        frame_size = len(pickled) - last_pos - last_frame_opcode_size
         self.assertEqual(frame_size, last_arg)
 
     def test_framing_many_objects(self):
@@ -2076,15 +2095,36 @@ def test_framing_many_objects(self):
 
     def test_framing_large_objects(self):
         N = 1024 * 1024
-        obj = [b'x' * N, b'y' * N, b'z' * N]
+        obj = [b'x' * N, b'y' * N, 'z' * N]
         for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
-            with self.subTest(proto=proto):
-                pickled = self.dumps(obj, proto)
-                unpickled = self.loads(pickled)
-                self.assertEqual(obj, unpickled)
-                n_frames = count_opcode(pickle.FRAME, pickled)
-                self.assertGreaterEqual(n_frames, len(obj))
-                self.check_frame_opcodes(pickled)
+            for fast in [True, False]:
+                with self.subTest(proto=proto, fast=fast):
+                    if hasattr(self, 'pickler'):
+                        buf = io.BytesIO()
+                        pickler = self.pickler(buf, protocol=proto)
+                        pickler.fast = fast
+                        pickler.dump(obj)
+                        pickled = buf.getvalue()
+                    elif fast:
+                        continue
+                    else:
+                        # Fallback to self.dumps when fast=False and
+                        # self.pickler is not available.
+                        pickled = self.dumps(obj, proto)
+                    unpickled = self.loads(pickled)
+                    # More informative error message in case of failure.
+                    self.assertEqual([len(x) for x in obj],
+                                     [len(x) for x in unpickled])
+                    # Perform full equality check if the lengths match.
+                    self.assertEqual(obj, unpickled)
+                    n_frames = count_opcode(pickle.FRAME, pickled)
+                    if not fast:
+                        # One frame per memoize for each large object.
+                        self.assertGreaterEqual(n_frames, len(obj))
+                    else:
+                        # One frame at the beginning and one at the end.
+                        self.assertGreaterEqual(n_frames, 2)
+                    self.check_frame_opcodes(pickled)
 
     def test_optional_frames(self):
         if pickle.HIGHEST_PROTOCOL < 4:
@@ -2125,6 +2165,71 @@ def remove_frames(pickled, keep_frame=None):
                             count_opcode(pickle.FRAME, pickled))
             self.assertEqual(obj, self.loads(some_frames_pickle))
 
+    def test_framed_write_sizes_with_delayed_writer(self):
+        class ChunkAccumulator:
+            """Accumulate pickler output in a list of raw chunks."""
+
+            def __init__(self):
+                self.chunks = []
+
+            def write(self, chunk):
+                self.chunks.append(chunk)
+
+            def concatenate_chunks(self):
+                # Some chunks can be memoryview instances, we need to convert
+                # them to bytes to be able to call join
+                return b"".join([c.tobytes() if hasattr(c, 'tobytes') else c
+                                 for c in self.chunks])
+
+        small_objects = [(str(i).encode('ascii'), i % 42, {'i': str(i)})
+                         for i in range(int(1e4))]
+
+        for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
+            # Protocol 4 packs groups of small objects into frames and issues
+            # calls to write only once or twice per frame:
+            # The C pickler issues one call to write per-frame (header and
+            # contents) while Python pickler issues two calls to write: one for
+            # the frame header and one for the frame binary contents.
+            writer = ChunkAccumulator()
+            self.pickler(writer, proto).dump(small_objects)
+
+            # Actually read the binary content of the chunks after the end
+            # of the call to dump: ant memoryview passed to write should not
+            # be released otherwise this delayed access would not be possible.
+            pickled = writer.concatenate_chunks()
+            reconstructed = self.loads(pickled)
+            self.assertEqual(reconstructed, small_objects)
+            self.assertGreater(len(writer.chunks), 1)
+
+            n_frames, remainder = divmod(len(pickled), self.FRAME_SIZE_TARGET)
+            if remainder > 0:
+                n_frames += 1
+
+            # There should be at least one call to write per frame
+            self.assertGreaterEqual(len(writer.chunks), n_frames)
+
+            # but not too many either: there can be one for the proto,
+            # one per-frame header and one per frame for the actual contents.
+            self.assertGreaterEqual(2 * n_frames + 1, len(writer.chunks))
+
+            chunk_sizes = [len(c) for c in writer.chunks[:-1]]
+            large_sizes = [s for s in chunk_sizes
+                           if s >= self.FRAME_SIZE_TARGET]
+            small_sizes = [s for s in chunk_sizes
+                           if s < self.FRAME_SIZE_TARGET]
+
+            # Large chunks should not be too large:
+            for chunk_size in large_sizes:
+                self.assertGreater(2 * self.FRAME_SIZE_TARGET, chunk_size)
+
+            last_chunk_size = len(writer.chunks[-1])
+            self.assertGreater(2 * self.FRAME_SIZE_TARGET, last_chunk_size)
+
+            # Small chunks (if any) should be very small
+            # (only proto and frame headers)
+            for chunk_size in small_sizes:
+                self.assertGreaterEqual(9, chunk_size)
+
     def test_nested_names(self):
         global Nested
         class Nested:

diff --git a/Lib/test/test_pickletools.py b/Lib/test/test_pickletools.py
@@ -16,6 +16,9 @@ def loads(self, buf, **kwds):
     # Test relies on precise output of dumps()
     test_pickle_to_2x = None
 
+    # Test relies on writing by chunks into a file object.
+    test_framed_write_sizes_with_delayed_writer = None
+
     def test_optimize_long_binget(self):
         data = [str(i) for i in range(257)]
         data.append(data[-1])

diff --git a/Misc/NEWS.d/next/Library/2017-11-10-00-05-08.bpo-31993.-OMNg8.rst b/Misc/NEWS.d/next/Library/2017-11-10-00-05-08.bpo-31993.-OMNg8.rst
@@ -0,0 +1,14 @@
+The picklers do no longer allocate temporary memory when dumping large
+``bytes`` and ``str`` objects into a file object. Instead the data is
+directly streamed into the underlying file object.
+
+Previously the C implementation would buffer all content and issue a
+single call to ``file.write`` at the end of the dump. With protocol 4
+this behavior has changed to issue one call to ``file.write`` per frame.
+
+The Python pickler with protocol 4 now dumps each frame content as a
+memoryview to an IOBytes instance that is never reused and the
+memoryview is no longer released after the call to write. This makes it
+possible for the file object to delay access to the memoryview of
+previous frames without forcing any additional memory copy as was
+already possible with the C pickler.