From 58f025515eb81413457d72a23d8a6ca0e72e7648 Mon Sep 17 00:00:00 2001 From: Charles Chen Date: Mon, 9 Jul 2018 17:48:19 -0700 Subject: [PATCH] [BEAM-1251] Upgrade snappy and use a memoryview --- sdks/python/apache_beam/io/avroio.py | 4 ++-- sdks/python/apache_beam/io/tfrecordio.py | 2 +- sdks/python/apache_beam/runners/dataflow/internal/names.py | 2 +- sdks/python/container/Dockerfile | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sdks/python/apache_beam/io/avroio.py b/sdks/python/apache_beam/io/avroio.py index 9b86b58982b5..f90dc3c68330 100644 --- a/sdks/python/apache_beam/io/avroio.py +++ b/sdks/python/apache_beam/io/avroio.py @@ -341,8 +341,8 @@ def _decompress_bytes(data, codec): # Compressed data includes a 4-byte CRC32 checksum which we verify. # We take care to avoid extra copies of data while slicing large objects - # by use of a buffer. - result = snappy.decompress(buffer(data)[:-4]) + # by use of a memoryview. + result = snappy.decompress(memoryview(data)[:-4]) avroio.BinaryDecoder(io.BytesIO(data[-4:])).check_crc32(result) return result else: diff --git a/sdks/python/apache_beam/io/tfrecordio.py b/sdks/python/apache_beam/io/tfrecordio.py index 989247a96ee0..2ef7c5b4c729 100644 --- a/sdks/python/apache_beam/io/tfrecordio.py +++ b/sdks/python/apache_beam/io/tfrecordio.py @@ -43,7 +43,7 @@ def _default_crc32c_fn(value): if not _default_crc32c_fn.fn: try: import snappy # pylint: disable=import-error - _default_crc32c_fn.fn = snappy._crc32c # pylint: disable=protected-access + _default_crc32c_fn.fn = snappy._snappy._crc32c # pylint: disable=protected-access except ImportError: logging.warning('Couldn\'t find python-snappy so the implementation of ' '_TFRecordUtil._masked_crc32c is not as fast as it could ' diff --git a/sdks/python/apache_beam/runners/dataflow/internal/names.py b/sdks/python/apache_beam/runners/dataflow/internal/names.py index c31e43f78a56..fb4643fe0a1a 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/names.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/names.py @@ -42,7 +42,7 @@ # Update this version to the next version whenever there is a change that will # require changes to legacy Dataflow worker execution environment. -BEAM_CONTAINER_VERSION = 'beam-master-20180619' +BEAM_CONTAINER_VERSION = 'beam-master-20180709' # Update this version to the next version whenever there is a change that # requires changes to SDK harness container or SDK harness launcher. BEAM_FNAPI_CONTAINER_VERSION = 'beam-master-20180619' diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile index 90348c6e231a..afb6b43f9382 100644 --- a/sdks/python/container/Dockerfile +++ b/sdks/python/container/Dockerfile @@ -70,7 +70,7 @@ RUN \ # Optional packages pip install "cython == 0.28.1" && \ pip install "guppy == 0.1.10" && \ - pip install "python-snappy == 0.5.1" && \ + pip install "python-snappy == 0.5.3" && \ # These are additional packages likely to be used by customers. pip install "numpy == 1.13.3" --no-binary=:all: && \ pip install "pandas == 0.18.1" && \