From cc8bc3fd88d7989cf28ab062117fede102e35bed Mon Sep 17 00:00:00 2001 From: Robbe Sneyders Date: Mon, 30 Apr 2018 17:27:32 +0200 Subject: [PATCH] Futurize coders subpackage --- sdks/python/apache_beam/coders/__init__.py | 1 + sdks/python/apache_beam/coders/coder_impl.pxd | 4 +- sdks/python/apache_beam/coders/coder_impl.py | 63 +++++++------ sdks/python/apache_beam/coders/coders.py | 18 +++- sdks/python/apache_beam/coders/coders_test.py | 16 ++-- .../apache_beam/coders/coders_test_common.py | 39 ++++---- .../apache_beam/coders/fast_coders_test.py | 1 + sdks/python/apache_beam/coders/observable.py | 3 + .../apache_beam/coders/observable_test.py | 1 + .../apache_beam/coders/slow_coders_test.py | 1 + sdks/python/apache_beam/coders/slow_stream.py | 7 +- .../coders/standard_coders_test.py | 4 +- sdks/python/apache_beam/coders/stream_test.py | 3 + sdks/python/apache_beam/coders/typecoders.py | 11 ++- .../apache_beam/coders/typecoders_test.py | 21 +++-- sdks/python/generate_pydoc.sh | 3 + sdks/python/run_pylint.sh | 19 ---- sdks/python/run_pylint_2to3.sh | 90 +++++++++++++++++++ sdks/python/setup.py | 2 +- sdks/python/tox.ini | 21 ++++- 20 files changed, 230 insertions(+), 98 deletions(-) create mode 100755 sdks/python/run_pylint_2to3.sh diff --git a/sdks/python/apache_beam/coders/__init__.py b/sdks/python/apache_beam/coders/__init__.py index acca89f70f48..3192494ebbf1 100644 --- a/sdks/python/apache_beam/coders/__init__.py +++ b/sdks/python/apache_beam/coders/__init__.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import absolute_import from apache_beam.coders.coders import * from apache_beam.coders.typecoders import registry diff --git a/sdks/python/apache_beam/coders/coder_impl.pxd b/sdks/python/apache_beam/coders/coder_impl.pxd index 98dd508556a0..8a85d0854488 100644 --- a/sdks/python/apache_beam/coders/coder_impl.pxd +++ b/sdks/python/apache_beam/coders/coder_impl.pxd @@ -70,11 +70,11 @@ cdef class DeterministicFastPrimitivesCoderImpl(CoderImpl): cdef object NoneType cdef char UNKNOWN_TYPE, NONE_TYPE, INT_TYPE, FLOAT_TYPE, BOOL_TYPE -cdef char STR_TYPE, UNICODE_TYPE, LIST_TYPE, TUPLE_TYPE, DICT_TYPE, SET_TYPE +cdef char BYTES_TYPE, UNICODE_TYPE, LIST_TYPE, TUPLE_TYPE, DICT_TYPE, SET_TYPE cdef class FastPrimitivesCoderImpl(StreamCoderImpl): cdef CoderImpl fallback_coder_impl - @cython.locals(unicode_value=unicode, dict_value=dict) + @cython.locals(dict_value=dict) cpdef encode_to_stream(self, value, OutputStream stream, bint nested) diff --git a/sdks/python/apache_beam/coders/coder_impl.py b/sdks/python/apache_beam/coders/coder_impl.py index cc7ed87c3ad1..2fcd0f22a31b 100644 --- a/sdks/python/apache_beam/coders/coder_impl.py +++ b/sdks/python/apache_beam/coders/coder_impl.py @@ -24,13 +24,17 @@ This module may be optionally compiled with Cython, using the corresponding coder_impl.pxd file for type hints. +Py2/3 porting: Native range is used on both python versions instead of +future.builtins.range to avoid performance regression in Cython compiled code. + For internal use only; no backwards-compatibility guarantees. """ from __future__ import absolute_import +from __future__ import division -from types import NoneType - -import six +import sys +from builtins import chr +from builtins import object from apache_beam.coders import observable from apache_beam.utils import windowed_value @@ -54,10 +58,12 @@ from .slow_stream import get_varint_size # pylint: enable=wrong-import-order, wrong-import-position, ungrouped-imports -try: - long # Python 2 -except NameError: - long = int # Python 3 +try: # Python 2 + long # pylint: disable=long-builtin + unicode # pylint: disable=unicode-builtin +except NameError: # Python 3 + long = int + unicode = str class CoderImpl(object): @@ -199,7 +205,7 @@ def __init__(self, coder, step_label): self._step_label = step_label def _check_safe(self, value): - if isinstance(value, (str, six.text_type, long, int, float)): + if isinstance(value, (bytes, unicode, long, int, float)): pass elif value is None: pass @@ -253,7 +259,7 @@ def decode(self, encoded): NONE_TYPE = 0 INT_TYPE = 1 FLOAT_TYPE = 2 -STR_TYPE = 3 +BYTES_TYPE = 3 UNICODE_TYPE = 4 BOOL_TYPE = 9 LIST_TYPE = 5 @@ -279,7 +285,7 @@ def get_estimated_size_and_observables(self, value, nested=False): def encode_to_stream(self, value, stream, nested): t = type(value) - if t is NoneType: + if value is None: stream.write_byte(NONE_TYPE) elif t is int: stream.write_byte(INT_TYPE) @@ -287,13 +293,13 @@ def encode_to_stream(self, value, stream, nested): elif t is float: stream.write_byte(FLOAT_TYPE) stream.write_bigendian_double(value) - elif t is str: - stream.write_byte(STR_TYPE) + elif t is bytes: + stream.write_byte(BYTES_TYPE) stream.write(value, nested) - elif t is six.text_type: - unicode_value = value # for typing + elif t is unicode: + text_value = value # for typing stream.write_byte(UNICODE_TYPE) - stream.write(unicode_value.encode('utf-8'), nested) + stream.write(text_value.encode('utf-8'), nested) elif t is list or t is tuple or t is set: stream.write_byte( LIST_TYPE if t is list else TUPLE_TYPE if t is tuple else SET_TYPE) @@ -304,7 +310,13 @@ def encode_to_stream(self, value, stream, nested): dict_value = value # for typing stream.write_byte(DICT_TYPE) stream.write_var_int64(len(dict_value)) - for k, v in dict_value.iteritems(): + # Use iteritems() on Python 2 instead of future.builtins.iteritems to + # avoid performance regression in Cython compiled code. + if sys.version_info[0] == 2: + items = dict_value.iteritems() # pylint: disable=dict-iter-method + else: + items = dict_value.items() + for k, v in items: self.encode_to_stream(k, stream, True) self.encode_to_stream(v, stream, True) elif t is bool: @@ -322,7 +334,7 @@ def decode_from_stream(self, stream, nested): return stream.read_var_int64() elif t == FLOAT_TYPE: return stream.read_bigendian_double() - elif t == STR_TYPE: + elif t == BYTES_TYPE: return stream.read_all(nested) elif t == UNICODE_TYPE: return stream.read_all(nested).decode('utf-8') @@ -394,8 +406,9 @@ def _from_normal_time(self, value): def encode_to_stream(self, value, out, nested): span_micros = value.end.micros - value.start.micros - out.write_bigendian_uint64(self._from_normal_time(value.end.micros / 1000)) - out.write_var_int64(span_micros / 1000) + out.write_bigendian_uint64( + self._from_normal_time(value.end.micros // 1000)) + out.write_var_int64(span_micros // 1000) def decode_from_stream(self, in_, nested): end_millis = self._to_normal_time(in_.read_bigendian_uint64()) @@ -409,7 +422,7 @@ def estimate_size(self, value, nested=False): # An IntervalWindow is context-insensitive, with a timestamp (8 bytes) # and a varint timespam. span = value.end.micros - value.start.micros - return 8 + get_varint_size(span / 1000) + return 8 + get_varint_size(span // 1000) class TimestampCoderImpl(StreamCoderImpl): @@ -427,7 +440,7 @@ def estimate_size(self, unused_value, nested=False): return 8 -small_ints = [chr(_) for _ in range(128)] +small_ints = [chr(_).encode('latin-1') for _ in range(128)] class VarIntCoderImpl(StreamCoderImpl): @@ -474,7 +487,7 @@ def decode_from_stream(self, stream, nested): return self._value def encode(self, value): - b = '' # avoid byte vs str vs unicode error + b = b'' # avoid byte vs str vs unicode error return b def decode(self, encoded): @@ -783,7 +796,7 @@ def encode_to_stream(self, value, out, nested): # TODO(BEAM-1524): Clean this up once we have a BEAM wide consensus on # precision of timestamps. self._from_normal_time( - restore_sign * (abs(wv.timestamp_micros) / 1000))) + restore_sign * (abs(wv.timestamp_micros) // 1000))) self._windows_coder.encode_to_stream(wv.windows, out, True) # Default PaneInfo encoded byte representing NO_FIRING. self._pane_info_coder.encode_to_stream(wv.pane_info, out, True) @@ -797,9 +810,9 @@ def decode_from_stream(self, in_stream, nested): # were indeed MIN/MAX timestamps. # TODO(BEAM-1524): Clean this up once we have a BEAM wide consensus on # precision of timestamps. - if timestamp == -(abs(MIN_TIMESTAMP.micros) / 1000): + if timestamp == -(abs(MIN_TIMESTAMP.micros) // 1000): timestamp = MIN_TIMESTAMP.micros - elif timestamp == (MAX_TIMESTAMP.micros / 1000): + elif timestamp == (MAX_TIMESTAMP.micros // 1000): timestamp = MAX_TIMESTAMP.micros else: timestamp *= 1000 diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py index d1ebe6b15da2..b6aa40d6c82f 100644 --- a/sdks/python/apache_beam/coders/coders.py +++ b/sdks/python/apache_beam/coders/coders.py @@ -22,7 +22,7 @@ from __future__ import absolute_import import base64 -import cPickle as pickle +from builtins import object import google.protobuf from google.protobuf import wrappers_pb2 @@ -33,6 +33,12 @@ from apache_beam.portability.api import beam_runner_api_pb2 from apache_beam.utils import proto_utils +# This is for py2/3 compatibility. cPickle was renamed pickle in python 3. +try: + import cPickle as pickle # Python 2 +except ImportError: + import pickle # Python 3 + # pylint: disable=wrong-import-order, wrong-import-position, ungrouped-imports try: from .stream import get_varint_size @@ -210,11 +216,15 @@ def as_cloud_object(self): def __repr__(self): return self.__class__.__name__ + # pylint: disable=protected-access def __eq__(self, other): - # pylint: disable=protected-access return (self.__class__ == other.__class__ and self._dict_without_impl() == other._dict_without_impl()) - # pylint: enable=protected-access + + def __hash__(self): + return hash((self.__class__,) + + tuple(sorted(self._dict_without_impl().items()))) + # pylint: enable=protected-access _known_urns = {} @@ -312,7 +322,7 @@ class ToStringCoder(Coder): def encode(self, value): try: # Python 2 - if isinstance(value, unicode): + if isinstance(value, unicode): # pylint: disable=unicode-builtin return value.encode('utf-8') except NameError: # Python 3 pass diff --git a/sdks/python/apache_beam/coders/coders_test.py b/sdks/python/apache_beam/coders/coders_test.py index 705de8920d52..a6c456f619f3 100644 --- a/sdks/python/apache_beam/coders/coders_test.py +++ b/sdks/python/apache_beam/coders/coders_test.py @@ -14,11 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # - +from __future__ import absolute_import import base64 import logging import unittest +from builtins import object from apache_beam.coders import proto2_coder_test_messages_pb2 as test_message from apache_beam.coders import coders @@ -47,17 +48,11 @@ def test_equality(self): class CodersTest(unittest.TestCase): def test_str_utf8_coder(self): - real_coder = coders_registry.get_coder(str) - expected_coder = coders.BytesCoder() - self.assertEqual( - real_coder.encode('abc'), expected_coder.encode('abc')) - self.assertEqual('abc', real_coder.decode(real_coder.encode('abc'))) - real_coder = coders_registry.get_coder(bytes) expected_coder = coders.BytesCoder() self.assertEqual( - real_coder.encode('abc'), expected_coder.encode('abc')) - self.assertEqual('abc', real_coder.decode(real_coder.encode('abc'))) + real_coder.encode(b'abc'), expected_coder.encode(b'abc')) + self.assertEqual(b'abc', real_coder.decode(real_coder.encode(b'abc'))) # The test proto message file was generated by running the following: @@ -99,6 +94,9 @@ def __eq__(self, other): return True return False + def __hash__(self): + return hash(type(self)) + class FallbackCoderTest(unittest.TestCase): diff --git a/sdks/python/apache_beam/coders/coders_test_common.py b/sdks/python/apache_beam/coders/coders_test_common.py index 0ea7da2b6ad5..0b8b4c20fde2 100644 --- a/sdks/python/apache_beam/coders/coders_test_common.py +++ b/sdks/python/apache_beam/coders/coders_test_common.py @@ -21,6 +21,7 @@ import logging import math import unittest +from builtins import range import dill @@ -103,7 +104,7 @@ def test_custom_coder(self): self.check_coder(CustomCoder(), 1, -10, 5) self.check_coder(coders.TupleCoder((CustomCoder(), coders.BytesCoder())), - (1, 'a'), (-10, 'b'), (5, 'c')) + (1, b'a'), (-10, b'b'), (5, b'c')) def test_pickle_coder(self): self.check_coder(coders.PickleCoder(), 'a', 1, 1.5, (1, 2, 3)) @@ -129,7 +130,7 @@ def test_dill_coder(self): def test_fast_primitives_coder(self): coder = coders.FastPrimitivesCoder(coders.SingletonCoder(len)) - self.check_coder(coder, None, 1, -1, 1.5, 'str\0str', u'unicode\0\u0101') + self.check_coder(coder, None, 1, -1, 1.5, b'str\0str', u'unicode\0\u0101') self.check_coder(coder, (), (1, 2, 3)) self.check_coder(coder, [], [1, 2, 3]) self.check_coder(coder, dict(), {'a': 'b'}, {0: dict(), 1: len}) @@ -139,7 +140,7 @@ def test_fast_primitives_coder(self): self.check_coder(coders.TupleCoder((coder,)), ('a',), (1,)) def test_bytes_coder(self): - self.check_coder(coders.BytesCoder(), 'a', '\0', 'z' * 1000) + self.check_coder(coders.BytesCoder(), b'a', b'\0', b'z' * 1000) def test_varint_coder(self): # Small ints. @@ -190,7 +191,7 @@ def test_timestamp_coder(self): timestamp.Timestamp(micros=1234567890123456789)) self.check_coder( coders.TupleCoder((coders.TimestampCoder(), coders.BytesCoder())), - (timestamp.Timestamp.of(27), 'abc')) + (timestamp.Timestamp.of(27), b'abc')) def test_tuple_coder(self): kv_coder = coders.TupleCoder((coders.VarIntCoder(), coders.BytesCoder())) @@ -206,14 +207,14 @@ def test_tuple_coder(self): kv_coder.as_cloud_object()) # Test binary representation self.assertEqual( - '\x04abc', - kv_coder.encode((4, 'abc'))) + b'\x04abc', + kv_coder.encode((4, b'abc'))) # Test unnested self.check_coder( kv_coder, - (1, 'a'), - (-2, 'a' * 100), - (300, 'abc\0' * 5)) + (1, b'a'), + (-2, b'a' * 100), + (300, b'abc\0' * 5)) # Test nested self.check_coder( coders.TupleCoder( @@ -322,12 +323,12 @@ def test_windowed_value_coder(self): }, coder.as_cloud_object()) # Test binary representation - self.assertEqual('\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01', + self.assertEqual(b'\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01', coder.encode(window.GlobalWindows.windowed_value(1))) # Test decoding large timestamp self.assertEqual( - coder.decode('\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'), + coder.decode(b'\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'), windowed_value.create(0, MIN_TIMESTAMP.micros, (GlobalWindow(),))) # Test unnested @@ -364,7 +365,7 @@ def test_proto_coder(self): proto_coder = coders.ProtoCoder(ma.__class__) self.check_coder(proto_coder, ma) self.check_coder(coders.TupleCoder((proto_coder, coders.BytesCoder())), - (ma, 'a'), (mb, 'b')) + (ma, b'a'), (mb, b'b')) def test_global_window_coder(self): coder = coders.GlobalWindowCoder() @@ -391,16 +392,16 @@ def test_length_prefix_coder(self): }, coder.as_cloud_object()) # Test binary representation - self.assertEqual('\x00', coder.encode('')) - self.assertEqual('\x01a', coder.encode('a')) - self.assertEqual('\x02bc', coder.encode('bc')) - self.assertEqual('\xff\x7f' + 'z' * 16383, coder.encode('z' * 16383)) + self.assertEqual(b'\x00', coder.encode(b'')) + self.assertEqual(b'\x01a', coder.encode(b'a')) + self.assertEqual(b'\x02bc', coder.encode(b'bc')) + self.assertEqual(b'\xff\x7f' + b'z' * 16383, coder.encode(b'z' * 16383)) # Test unnested - self.check_coder(coder, '', 'a', 'bc', 'def') + self.check_coder(coder, b'', b'a', b'bc', b'def') # Test nested self.check_coder(coders.TupleCoder((coder, coder)), - ('', 'a'), - ('bc', 'def')) + (b'', b'a'), + (b'bc', b'def')) def test_nested_observables(self): class FakeObservableIterator(observable.ObservableMixin): diff --git a/sdks/python/apache_beam/coders/fast_coders_test.py b/sdks/python/apache_beam/coders/fast_coders_test.py index a13334a2c26f..eb4077344f65 100644 --- a/sdks/python/apache_beam/coders/fast_coders_test.py +++ b/sdks/python/apache_beam/coders/fast_coders_test.py @@ -16,6 +16,7 @@ # """Unit tests for compiled implementation of coder impls.""" +from __future__ import absolute_import import logging import unittest diff --git a/sdks/python/apache_beam/coders/observable.py b/sdks/python/apache_beam/coders/observable.py index fc952cf4e559..3d0a7fc10bf9 100644 --- a/sdks/python/apache_beam/coders/observable.py +++ b/sdks/python/apache_beam/coders/observable.py @@ -20,6 +20,9 @@ For internal use only; no backwards-compatibility guarantees. """ +from __future__ import absolute_import + +from builtins import object class ObservableMixin(object): diff --git a/sdks/python/apache_beam/coders/observable_test.py b/sdks/python/apache_beam/coders/observable_test.py index 09ca3041c298..ce32bf05ef23 100644 --- a/sdks/python/apache_beam/coders/observable_test.py +++ b/sdks/python/apache_beam/coders/observable_test.py @@ -16,6 +16,7 @@ # """Tests for the Observable mixin class.""" +from __future__ import absolute_import import logging import unittest diff --git a/sdks/python/apache_beam/coders/slow_coders_test.py b/sdks/python/apache_beam/coders/slow_coders_test.py index 97aa39ca094f..b543b56dd264 100644 --- a/sdks/python/apache_beam/coders/slow_coders_test.py +++ b/sdks/python/apache_beam/coders/slow_coders_test.py @@ -16,6 +16,7 @@ # """Unit tests for uncompiled implementation of coder impls.""" +from __future__ import absolute_import import logging import unittest diff --git a/sdks/python/apache_beam/coders/slow_stream.py b/sdks/python/apache_beam/coders/slow_stream.py index 1ab55d90f98d..da27a49883a6 100644 --- a/sdks/python/apache_beam/coders/slow_stream.py +++ b/sdks/python/apache_beam/coders/slow_stream.py @@ -19,8 +19,11 @@ For internal use only; no backwards-compatibility guarantees. """ +from __future__ import absolute_import import struct +from builtins import chr +from builtins import object class OutputStream(object): @@ -32,13 +35,13 @@ def __init__(self): self.data = [] def write(self, b, nested=False): - assert isinstance(b, str) + assert isinstance(b, bytes) if nested: self.write_var_int64(len(b)) self.data.append(b) def write_byte(self, val): - self.data.append(chr(val)) + self.data.append(chr(val).encode('latin-1')) def write_var_int64(self, v): if v < 0: diff --git a/sdks/python/apache_beam/coders/standard_coders_test.py b/sdks/python/apache_beam/coders/standard_coders_test.py index b595cfd59225..f704c490c975 100644 --- a/sdks/python/apache_beam/coders/standard_coders_test.py +++ b/sdks/python/apache_beam/coders/standard_coders_test.py @@ -17,6 +17,7 @@ """Unit tests for coders that must be consistent across all Beam SDKs. """ +from __future__ import absolute_import from __future__ import print_function import json @@ -24,6 +25,7 @@ import os.path import sys import unittest +from builtins import map import yaml @@ -74,7 +76,7 @@ class StandardCodersTest(unittest.TestCase): lambda x: IntervalWindow( start=Timestamp(micros=(x['end'] - x['span']) * 1000), end=Timestamp(micros=x['end'] * 1000)), - 'beam:coder:iterable:v1': lambda x, parser: map(parser, x), + 'beam:coder:iterable:v1': lambda x, parser: list(map(parser, x)), 'beam:coder:global_window:v1': lambda x: window.GlobalWindow(), 'beam:coder:windowed_value:v1': lambda x, value_parser, window_parser: windowed_value.create( diff --git a/sdks/python/apache_beam/coders/stream_test.py b/sdks/python/apache_beam/coders/stream_test.py index 15bc5eb9ba93..641fefad45d6 100644 --- a/sdks/python/apache_beam/coders/stream_test.py +++ b/sdks/python/apache_beam/coders/stream_test.py @@ -16,10 +16,13 @@ # """Tests for the stream implementations.""" +from __future__ import absolute_import +from __future__ import division import logging import math import unittest +from builtins import range from apache_beam.coders import slow_stream diff --git a/sdks/python/apache_beam/coders/typecoders.py b/sdks/python/apache_beam/coders/typecoders.py index 355c6230f923..e4efa2c7ffdf 100644 --- a/sdks/python/apache_beam/coders/typecoders.py +++ b/sdks/python/apache_beam/coders/typecoders.py @@ -63,12 +63,18 @@ def MakeXyzs(v): See apache_beam.typehints.decorators module for more details. """ +from __future__ import absolute_import -import six +from builtins import object from apache_beam.coders import coders from apache_beam.typehints import typehints +try: + unicode # pylint: disable=unicode-builtin +except NameError: + unicode = str + __all__ = ['registry'] @@ -84,9 +90,8 @@ def register_standard_coders(self, fallback_coder): """Register coders for all basic and composite types.""" self._register_coder_internal(int, coders.VarIntCoder) self._register_coder_internal(float, coders.FloatCoder) - self._register_coder_internal(str, coders.BytesCoder) self._register_coder_internal(bytes, coders.BytesCoder) - self._register_coder_internal(six.text_type, coders.StrUtf8Coder) + self._register_coder_internal(unicode, coders.StrUtf8Coder) self._register_coder_internal(typehints.TupleConstraint, coders.TupleCoder) # Default fallback coders applied in that order until the first matching # coder found. diff --git a/sdks/python/apache_beam/coders/typecoders_test.py b/sdks/python/apache_beam/coders/typecoders_test.py index 2b6aa7a51298..b64cb653e564 100644 --- a/sdks/python/apache_beam/coders/typecoders_test.py +++ b/sdks/python/apache_beam/coders/typecoders_test.py @@ -16,8 +16,10 @@ # """Unit tests for the typecoders module.""" +from __future__ import absolute_import import unittest +from builtins import object from apache_beam.coders import coders from apache_beam.coders import typecoders @@ -33,6 +35,9 @@ def __init__(self, n): def __eq__(self, other): return self.number == other.number + def __hash__(self): + return self.number + class CustomCoder(coders.Coder): @@ -75,7 +80,7 @@ def test_get_coder_with_composite_custom_coder(self): def test_get_coder_with_standard_coder(self): self.assertEqual(coders.BytesCoder, - typecoders.registry.get_coder(str).__class__) + typecoders.registry.get_coder(bytes).__class__) def test_fallbackcoder(self): coder = typecoders.registry.get_coder(typehints.Any) @@ -100,22 +105,16 @@ def test_standard_int_coder(self): real_coder.decode(real_coder.encode(0x040404040404))) def test_standard_str_coder(self): - real_coder = typecoders.registry.get_coder(str) - expected_coder = coders.BytesCoder() - self.assertEqual( - real_coder.encode('abc'), expected_coder.encode('abc')) - self.assertEqual('abc', real_coder.decode(real_coder.encode('abc'))) - real_coder = typecoders.registry.get_coder(bytes) expected_coder = coders.BytesCoder() self.assertEqual( - real_coder.encode('abc'), expected_coder.encode('abc')) - self.assertEqual('abc', real_coder.decode(real_coder.encode('abc'))) + real_coder.encode(b'abc'), expected_coder.encode(b'abc')) + self.assertEqual(b'abc', real_coder.decode(real_coder.encode(b'abc'))) def test_iterable_coder(self): - real_coder = typecoders.registry.get_coder(typehints.Iterable[str]) + real_coder = typecoders.registry.get_coder(typehints.Iterable[bytes]) expected_coder = coders.IterableCoder(coders.BytesCoder()) - values = ['abc', 'xyz'] + values = [b'abc', b'xyz'] self.assertEqual(expected_coder, real_coder) self.assertEqual(real_coder.encode(values), expected_coder.encode(values)) diff --git a/sdks/python/generate_pydoc.sh b/sdks/python/generate_pydoc.sh index ae8d043d376e..68b7006a1b60 100755 --- a/sdks/python/generate_pydoc.sh +++ b/sdks/python/generate_pydoc.sh @@ -121,6 +121,9 @@ ignore_identifiers = [ # Ignore broken built-in type references 'tuple', + # Ignore future.builtin type references + 'future.types.newobject.newobject', + # Ignore private classes 'apache_beam.coders.coders._PickleCoderBase', 'apache_beam.coders.coders.FastCoder', diff --git a/sdks/python/run_pylint.sh b/sdks/python/run_pylint.sh index 7150fa6f50e3..4e7dc9d8218f 100755 --- a/sdks/python/run_pylint.sh +++ b/sdks/python/run_pylint.sh @@ -89,25 +89,6 @@ done isort ${MODULE} -p apache_beam --line-width 120 --check-only --order-by-type \ --combine-star --force-single-line-imports --diff --recursive ${SKIP_PARAM} -FUTURIZE_EXCLUDED=( - "typehints.py" - "pb2" - "trivial_infernce.py" -) -FUTURIZE_GREP_PARAM=$( IFS='|'; echo "${ids[*]}" ) -echo "Checking for files requiring stage 1 refactoring from futurize" -futurize_results=$(futurize -j 8 --stage1 apache_beam 2>&1 |grep Refactored) -futurize_filtered=$(echo "$futurize_results" |grep -v "$FUTURIZE_GREP_PARAM" || echo "") -count=${#futurize_filtered} -if [ "$count" != "0" ]; then - echo "Some of the changes require futurize stage 1 changes." - echo "The files with required changes:" - echo "$futurize_filtered" - echo "You can run futurize apache_beam to see the proposed changes." - exit 1 -fi -echo "No future changes needed" - echo "Checking unittest.main for module ${MODULE}:" TESTS_MISSING_MAIN=$(find ${MODULE} | grep '\.py$' | xargs grep -l '^import unittest$' | xargs grep -L unittest.main) if [ -n "${TESTS_MISSING_MAIN}" ]; then diff --git a/sdks/python/run_pylint_2to3.sh b/sdks/python/run_pylint_2to3.sh new file mode 100755 index 000000000000..9a074368dba0 --- /dev/null +++ b/sdks/python/run_pylint_2to3.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This script will run pylint with the --py3k parameter to check for python +# 3 compatibility. This script can run on a list of modules provided as +# command line arguments. +# +# The exit-code of the script indicates success or a failure. + +set -o errexit +set -o pipefail + +DEFAULT_MODULE=apache_beam + +usage(){ echo "Usage: $0 [MODULE|--help] +# The default MODULE is $DEFAULT_MODULE"; } + +MODULES=${DEFAULT_MODULE} +while [[ $# -gt 0 ]] ; do + key="$1" + case ${key} in + --help) usage; exit 1;; + *) + if [ ${MODULES} = ${DEFAULT_MODULE} ] ; then + MODULES=() + fi + MODULES+=("$1") + shift;; + esac +done + +FUTURIZE_EXCLUDED=( + "typehints.py" + "pb2" + "trivial_infernce.py" +) +FUTURIZE_GREP_PARAM=$( IFS='|'; echo "${ids[*]}" ) +echo "Checking for files requiring stage 1 refactoring from futurize" +futurize_results=$(futurize -j 8 --stage1 apache_beam 2>&1 |grep Refactored) +futurize_filtered=$(echo "$futurize_results" |grep -v "$FUTURIZE_GREP_PARAM" \ + || echo "") +count=${#futurize_filtered} +if [ "$count" != "0" ]; then + echo "Some of the changes require futurize stage 1 changes." + echo "The files with required changes:" + echo "$futurize_filtered" + echo "You can run futurize apache_beam to see the proposed changes." + exit 1 +fi +echo "No future changes needed" + +# Following generated files are excluded from lint checks. +EXCLUDED_GENERATED_FILES=( +"apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py" +"apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_messages.py" +"apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_client.py" +"apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_messages.py" +"apache_beam/io/gcp/internal/clients/storage/storage_v1_client.py" +"apache_beam/io/gcp/internal/clients/storage/storage_v1_messages.py" +"apache_beam/coders/proto2_coder_test_messages_pb2.py" +apache_beam/portability/api/*pb2*.py +) + +FILES_TO_IGNORE="" +for file in "${EXCLUDED_GENERATED_FILES[@]}"; do + if test -z "$FILES_TO_IGNORE" + then FILES_TO_IGNORE="$(basename $file)" + else FILES_TO_IGNORE="$FILES_TO_IGNORE, $(basename $file)" + fi +done +echo "Skipping lint for generated files: $FILES_TO_IGNORE" + +echo "Running pylint --py3k for modules $( printf "%s " "${MODULES[@]}" ):" +pylint -j8 $( printf "%s " "${MODULES[@]}" ) \ + --ignore-patterns="$FILES_TO_IGNORE" --py3k diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 364148355c2f..35e705583f6f 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -66,7 +66,7 @@ def get_version(): ) -REQUIRED_CYTHON_VERSION = '0.26.1' +REQUIRED_CYTHON_VERSION = '0.28.1' try: _CYTHON_VERSION = get_distribution('cython').version if StrictVersion(_CYTHON_VERSION) < StrictVersion(REQUIRED_CYTHON_VERSION): diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 0d49cbc958a6..caa899de2dbe 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -17,7 +17,7 @@ [tox] # new environments will be excluded by default unless explicitly added to envlist. -envlist = py27,py27-{gcp,cython,lint},py3-lint,docs +envlist = py27,py27-{gcp,cython,lint,lint3},py3-lint,docs toxworkdir = {toxinidir}/target/.tox [pycodestyle] @@ -34,7 +34,8 @@ whitelist_externals = find time deps = - cython: cython==0.26.1 + cython: cython==0.28.1 + future==0.16.0 # These 2 magic command overrides are required for Jenkins builds. # Otherwise we get "OSError: [Errno 2] No such file or directory" errors. @@ -88,6 +89,22 @@ commands = pip --version time {toxinidir}/run_pylint.sh +[testenv:py27-lint3] +# Checks for py2/3 compatibility issues +deps = + pycodestyle==2.3.1 + pylint==1.7.2 + future==0.16.0 + isort==4.2.15 + flake8==3.5.0 +modules = + apache_beam/coders +commands = + python --version + pip --version + time {toxinidir}/run_pylint_2to3.sh {[testenv:py27-lint3]modules} + + [testenv:py3-lint] deps = pycodestyle==2.3.1