From 80fb5ffb9e18127a926f36fc766a98e3fbc4ab79 Mon Sep 17 00:00:00 2001 From: cclauss Date: Thu, 15 Feb 2018 10:26:29 +0100 Subject: [PATCH] Change unicode --> six.text_type for Python 3 --- sdks/python/apache_beam/coders/coder_impl.py | 6 ++++-- sdks/python/apache_beam/coders/coders.py | 9 +++++---- sdks/python/apache_beam/coders/typecoders.py | 4 +++- .../apache_beam/examples/cookbook/datastore_wordcount.py | 5 +++-- sdks/python/apache_beam/examples/snippets/snippets.py | 5 ++++- sdks/python/apache_beam/examples/streaming_wordcount.py | 4 +++- sdks/python/apache_beam/examples/windowed_wordcount.py | 4 +++- sdks/python/apache_beam/examples/wordcount.py | 4 +++- sdks/python/apache_beam/examples/wordcount_debugging.py | 4 +++- sdks/python/apache_beam/examples/wordcount_fnapi.py | 4 +++- sdks/python/apache_beam/examples/wordcount_minimal.py | 4 +++- sdks/python/apache_beam/io/gcp/datastore/v1/helper.py | 5 ++++- .../apache_beam/runners/dataflow/internal/apiclient.py | 3 ++- sdks/python/apache_beam/transforms/display.py | 4 +++- sdks/python/apache_beam/transforms/display_test.py | 6 +++++- 15 files changed, 51 insertions(+), 20 deletions(-) diff --git a/sdks/python/apache_beam/coders/coder_impl.py b/sdks/python/apache_beam/coders/coder_impl.py index b5b17899601f..d47357df8c91 100644 --- a/sdks/python/apache_beam/coders/coder_impl.py +++ b/sdks/python/apache_beam/coders/coder_impl.py @@ -30,6 +30,8 @@ from types import NoneType +import six + from apache_beam.coders import observable from apache_beam.utils import windowed_value from apache_beam.utils.timestamp import MAX_TIMESTAMP @@ -197,7 +199,7 @@ def __init__(self, coder, step_label): self._step_label = step_label def _check_safe(self, value): - if isinstance(value, (str, unicode, long, int, float)): + if isinstance(value, (str, six.text_type, long, int, float)): pass elif value is None: pass @@ -288,7 +290,7 @@ def encode_to_stream(self, value, stream, nested): elif t is str: stream.write_byte(STR_TYPE) stream.write(value, nested) - elif t is unicode: + elif t is six.text_type: unicode_value = value # for typing stream.write_byte(UNICODE_TYPE) stream.write(unicode_value.encode('utf-8'), nested) diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py index f76625869879..ecbdd538d38b 100644 --- a/sdks/python/apache_beam/coders/coders.py +++ b/sdks/python/apache_beam/coders/coders.py @@ -309,10 +309,11 @@ class ToStringCoder(Coder): """A default string coder used if no sink coder is specified.""" def encode(self, value): - if isinstance(value, unicode): - return value.encode('utf-8') - elif isinstance(value, str): - return value + try: # Python 2 + if isinstance(value, unicode): + return value.encode('utf-8') + except NameError: # Python 3 + pass return str(value) def decode(self, _): diff --git a/sdks/python/apache_beam/coders/typecoders.py b/sdks/python/apache_beam/coders/typecoders.py index dd071d7a9331..355c6230f923 100644 --- a/sdks/python/apache_beam/coders/typecoders.py +++ b/sdks/python/apache_beam/coders/typecoders.py @@ -64,6 +64,8 @@ def MakeXyzs(v): See apache_beam.typehints.decorators module for more details. """ +import six + from apache_beam.coders import coders from apache_beam.typehints import typehints @@ -84,7 +86,7 @@ def register_standard_coders(self, fallback_coder): self._register_coder_internal(float, coders.FloatCoder) self._register_coder_internal(str, coders.BytesCoder) self._register_coder_internal(bytes, coders.BytesCoder) - self._register_coder_internal(unicode, coders.StrUtf8Coder) + self._register_coder_internal(six.text_type, coders.StrUtf8Coder) self._register_coder_internal(typehints.TupleConstraint, coders.TupleCoder) # Default fallback coders applied in that order until the first matching # coder found. diff --git a/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py b/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py index 7204e3b2077a..8040ab142fd4 100644 --- a/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py +++ b/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py @@ -72,6 +72,7 @@ from google.cloud.proto.datastore.v1 import query_pb2 from googledatastore import helper as datastore_helper from googledatastore import PropertyFilter +import six import apache_beam as beam from apache_beam.io import ReadFromText @@ -131,7 +132,7 @@ def make_entity(self, content): datastore_helper.add_key_path(entity.key, self._kind, self._ancestor, self._kind, str(uuid.uuid4())) - datastore_helper.add_properties(entity, {"content": unicode(content)}) + datastore_helper.add_properties(entity, {"content": six.text_type(content)}) return entity @@ -186,7 +187,7 @@ def count_ones(word_ones): counts = (lines | 'split' >> (beam.ParDo(WordExtractingDoFn()) - .with_output_types(unicode)) + .with_output_types(six.text_type)) | 'pair_with_one' >> beam.Map(lambda x: (x, 1)) | 'group' >> beam.GroupByKey() | 'count' >> beam.Map(count_ones)) diff --git a/sdks/python/apache_beam/examples/snippets/snippets.py b/sdks/python/apache_beam/examples/snippets/snippets.py index b2c5bb926b34..8a09214ed0f1 100644 --- a/sdks/python/apache_beam/examples/snippets/snippets.py +++ b/sdks/python/apache_beam/examples/snippets/snippets.py @@ -30,6 +30,8 @@ string. The tags can contain only letters, digits and _. """ +import six + import apache_beam as beam from apache_beam.io import iobase from apache_beam.io.range_trackers import OffsetRangeTracker @@ -983,7 +985,8 @@ def model_datastoreio(): def to_entity(content): entity = entity_pb2.Entity() googledatastore.helper.add_key_path(entity.key, kind, str(uuid.uuid4())) - googledatastore.helper.add_properties(entity, {'content': unicode(content)}) + googledatastore.helper.add_properties(entity, + {'content': six.text_type(content)}) return entity entities = musicians | 'To Entity' >> beam.Map(to_entity) diff --git a/sdks/python/apache_beam/examples/streaming_wordcount.py b/sdks/python/apache_beam/examples/streaming_wordcount.py index df8a99bcf35f..43d19f3acc95 100644 --- a/sdks/python/apache_beam/examples/streaming_wordcount.py +++ b/sdks/python/apache_beam/examples/streaming_wordcount.py @@ -26,6 +26,8 @@ import argparse import logging +import six + import apache_beam as beam import apache_beam.transforms.window as window from apache_beam.options.pipeline_options import PipelineOptions @@ -65,7 +67,7 @@ def count_ones(word_ones): transformed = (lines # Use a pre-defined function that imports the re package. | 'Split' >> ( - beam.FlatMap(split_fn).with_output_types(unicode)) + beam.FlatMap(split_fn).with_output_types(six.text_type)) | 'PairWithOne' >> beam.Map(lambda x: (x, 1)) | beam.WindowInto(window.FixedWindows(15, 0)) | 'Group' >> beam.GroupByKey() diff --git a/sdks/python/apache_beam/examples/windowed_wordcount.py b/sdks/python/apache_beam/examples/windowed_wordcount.py index 4c7eee18fe31..3838408fd2b3 100644 --- a/sdks/python/apache_beam/examples/windowed_wordcount.py +++ b/sdks/python/apache_beam/examples/windowed_wordcount.py @@ -26,6 +26,8 @@ import argparse import logging +import six + import apache_beam as beam import apache_beam.transforms.window as window @@ -75,7 +77,7 @@ def count_ones(word_ones): transformed = (lines | 'Split' >> (beam.FlatMap(find_words) - .with_output_types(unicode)) + .with_output_types(six.text_type)) | 'PairWithOne' >> beam.Map(lambda x: (x, 1)) | beam.WindowInto(window.FixedWindows(2*60, 0)) | 'Group' >> beam.GroupByKey() diff --git a/sdks/python/apache_beam/examples/wordcount.py b/sdks/python/apache_beam/examples/wordcount.py index b1c4a5e9c159..357f08df48bc 100644 --- a/sdks/python/apache_beam/examples/wordcount.py +++ b/sdks/python/apache_beam/examples/wordcount.py @@ -23,6 +23,8 @@ import logging import re +import six + import apache_beam as beam from apache_beam.io import ReadFromText from apache_beam.io import WriteToText @@ -94,7 +96,7 @@ def count_ones(word_ones): counts = (lines | 'split' >> (beam.ParDo(WordExtractingDoFn()) - .with_output_types(unicode)) + .with_output_types(six.text_type)) | 'pair_with_one' >> beam.Map(lambda x: (x, 1)) | 'group' >> beam.GroupByKey() | 'count' >> beam.Map(count_ones)) diff --git a/sdks/python/apache_beam/examples/wordcount_debugging.py b/sdks/python/apache_beam/examples/wordcount_debugging.py index 6ff8f2653ffd..9fdcade7a263 100644 --- a/sdks/python/apache_beam/examples/wordcount_debugging.py +++ b/sdks/python/apache_beam/examples/wordcount_debugging.py @@ -45,6 +45,8 @@ import logging import re +import six + import apache_beam as beam from apache_beam.io import ReadFromText from apache_beam.io import WriteToText @@ -99,7 +101,7 @@ def count_ones(word_ones): return (pcoll | 'split' >> (beam.FlatMap(lambda x: re.findall(r'[A-Za-z\']+', x)) - .with_output_types(unicode)) + .with_output_types(six.text_type)) | 'pair_with_one' >> beam.Map(lambda x: (x, 1)) | 'group' >> beam.GroupByKey() | 'count' >> beam.Map(count_ones)) diff --git a/sdks/python/apache_beam/examples/wordcount_fnapi.py b/sdks/python/apache_beam/examples/wordcount_fnapi.py index 113968820d14..dce24517b115 100644 --- a/sdks/python/apache_beam/examples/wordcount_fnapi.py +++ b/sdks/python/apache_beam/examples/wordcount_fnapi.py @@ -28,6 +28,8 @@ import logging import re +import six + import apache_beam as beam from apache_beam.io import ReadFromText # TODO(BEAM-2887): Enable after the issue is fixed. @@ -102,7 +104,7 @@ def run(argv=None): counts = (lines | 'split' >> (beam.ParDo(WordExtractingDoFn()) - .with_output_types(unicode)) + .with_output_types(six.text_type)) | 'pair_with_one' >> beam.Map(lambda x: (x, 1)) | 'group_and_sum' >> beam.CombinePerKey(sum)) diff --git a/sdks/python/apache_beam/examples/wordcount_minimal.py b/sdks/python/apache_beam/examples/wordcount_minimal.py index 390c8c04af88..34abdb78a0dd 100644 --- a/sdks/python/apache_beam/examples/wordcount_minimal.py +++ b/sdks/python/apache_beam/examples/wordcount_minimal.py @@ -50,6 +50,8 @@ import logging import re +import six + import apache_beam as beam from apache_beam.io import ReadFromText from apache_beam.io import WriteToText @@ -101,7 +103,7 @@ def run(argv=None): counts = ( lines | 'Split' >> (beam.FlatMap(lambda x: re.findall(r'[A-Za-z\']+', x)) - .with_output_types(unicode)) + .with_output_types(six.text_type)) | 'PairWithOne' >> beam.Map(lambda x: (x, 1)) | 'GroupAndSum' >> beam.CombinePerKey(sum)) diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py b/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py index b86a2fa01455..87d798bebe3f 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py @@ -26,6 +26,8 @@ import time from socket import error as SocketError +import six + # pylint: disable=ungrouped-imports from apache_beam.internal.gcp import auth from apache_beam.utils import retry @@ -252,7 +254,8 @@ def make_kind_stats_query(namespace, kind, latest_timestamp): kind_stat_query.kind.add().name = '__Stat_Ns_Kind__' kind_filter = datastore_helper.set_property_filter( - query_pb2.Filter(), 'kind_name', PropertyFilter.EQUAL, unicode(kind)) + query_pb2.Filter(), 'kind_name', PropertyFilter.EQUAL, + six.text_type(kind)) timestamp_filter = datastore_helper.set_property_filter( query_pb2.Filter(), 'timestamp', PropertyFilter.EQUAL, latest_timestamp) diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py index 1cf80b799021..e503da9eb5ba 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py @@ -31,6 +31,7 @@ from apitools.base.py import encoding from apitools.base.py import exceptions +import six from apache_beam.internal.gcp.auth import get_service_credentials from apache_beam.internal.gcp.json_value import to_json_value @@ -287,7 +288,7 @@ def encode_shortstrings(input_buffer, errors='strict'): def decode_shortstrings(input_buffer, errors='strict'): """Decoder (to Unicode) that suppresses long base64 strings.""" shortened, length = encode_shortstrings(input_buffer, errors) - return unicode(shortened), length + return six.text_type(shortened), length def shortstrings_registerer(encoding_name): if encoding_name == 'shortstrings': diff --git a/sdks/python/apache_beam/transforms/display.py b/sdks/python/apache_beam/transforms/display.py index cb7b53eb29aa..4206f2110b7d 100644 --- a/sdks/python/apache_beam/transforms/display.py +++ b/sdks/python/apache_beam/transforms/display.py @@ -44,6 +44,8 @@ from datetime import datetime from datetime import timedelta +import six + __all__ = ['HasDisplayData', 'DisplayDataItem', 'DisplayData'] @@ -167,7 +169,7 @@ class DisplayDataItem(object): display item belongs to. """ typeDict = {str:'STRING', - unicode:'STRING', + six.text_type:'STRING', int:'INTEGER', float:'FLOAT', bool: 'BOOLEAN', diff --git a/sdks/python/apache_beam/transforms/display_test.py b/sdks/python/apache_beam/transforms/display_test.py index 5c73cf39a92f..90bde8caa8c4 100644 --- a/sdks/python/apache_beam/transforms/display_test.py +++ b/sdks/python/apache_beam/transforms/display_test.py @@ -22,7 +22,9 @@ import unittest from datetime import datetime +# pylint: disable=ungrouped-imports import hamcrest as hc +import six from hamcrest.core.base_matcher import BaseMatcher import apache_beam as beam @@ -31,6 +33,8 @@ from apache_beam.transforms.display import DisplayDataItem from apache_beam.transforms.display import HasDisplayData +# pylint: enable=ungrouped-imports + class DisplayDataItemMatcher(BaseMatcher): """ Matcher class for DisplayDataItems in unit tests. @@ -161,7 +165,7 @@ def test_create_list_display_data(self): def test_unicode_type_display_data(self): class MyDoFn(beam.DoFn): def display_data(self): - return {'unicode_string': unicode('my string'), + return {'unicode_string': six.text_type('my string'), 'unicode_literal_string': u'my literal string'} fn = MyDoFn()