From 571d01b25a6f6f84e5dfe0551f2b08fcfeee132e Mon Sep 17 00:00:00 2001 From: singularperturbation Date: Fri, 28 Oct 2016 03:11:30 -0500 Subject: [PATCH 1/3] Use protocol 4 if available for PickleSerializer --- python/pyspark/serializers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py index 2a1326947f4f5..d2a65351433bf 100644 --- a/python/pyspark/serializers.py +++ b/python/pyspark/serializers.py @@ -64,7 +64,7 @@ from itertools import izip as zip else: import pickle - protocol = 3 + protocol = min(pickle.HIGHEST_PROTOCOL, 4) xrange = range from pyspark import cloudpickle From cd5f8c76bdd89091c180ad54dd1d26d9a0a2cc3f Mon Sep 17 00:00:00 2001 From: singularperturbation Date: Fri, 28 Oct 2016 03:11:54 -0500 Subject: [PATCH 2/3] Use protocol from serializers in Broadcast as well --- python/pyspark/broadcast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py index 74dee1420754a..d3c3f5534c3f1 100644 --- a/python/pyspark/broadcast.py +++ b/python/pyspark/broadcast.py @@ -21,6 +21,7 @@ from tempfile import NamedTemporaryFile from pyspark.cloudpickle import print_exec +from pyspark.broadcast import protocol if sys.version < '3': import cPickle as pickle @@ -78,7 +79,7 @@ def __init__(self, sc=None, value=None, pickle_registry=None, path=None): def dump(self, value, f): try: - pickle.dump(value, f, 2) + pickle.dump(value, f, protocol) except pickle.PickleError: raise except Exception as e: From ce7893adfe5367c0a3cd6da8a6a82239a73049b1 Mon Sep 17 00:00:00 2001 From: Sloane Simmons Date: Mon, 31 Oct 2016 16:42:04 -0500 Subject: [PATCH 3/3] [SPARK-18161] Fix import (broadcast -> serializers).protocol --- python/pyspark/broadcast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py index d3c3f5534c3f1..e04f1c3fb63d5 100644 --- a/python/pyspark/broadcast.py +++ b/python/pyspark/broadcast.py @@ -21,7 +21,7 @@ from tempfile import NamedTemporaryFile from pyspark.cloudpickle import print_exec -from pyspark.broadcast import protocol +from pyspark.serializers import protocol if sys.version < '3': import cPickle as pickle