diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py index 6058e94d471e9..531108738f6c9 100644 --- a/python/pyspark/serializers.py +++ b/python/pyspark/serializers.py @@ -62,11 +62,12 @@ if sys.version < '3': import cPickle as pickle from itertools import izip as zip, imap as map + pickle_protocol = 2 else: import pickle basestring = unicode = str xrange = range -pickle_protocol = pickle.HIGHEST_PROTOCOL + pickle_protocol = 3 from pyspark import cloudpickle from pyspark.util import _exception_message diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py index 8707f46b6a25a..1c18e930eb91d 100644 --- a/python/pyspark/sql/tests/test_serde.py +++ b/python/pyspark/sql/tests/test_serde.py @@ -126,6 +126,12 @@ def test_BinaryType_serialization(self): df = self.spark.createDataFrame(data, schema=schema) df.collect() + def test_int_array_serialization(self): + # Note that this test seems dependent on parallelism. + data = self.spark.sparkContext.parallelize([[1, 2, 3, 4]] * 100, numSlices=12) + df = self.spark.createDataFrame(data, "array") + self.assertEqual(len(list(filter(lambda r: None in r.value, df.collect()))), 0) + if __name__ == "__main__": import unittest