From 2030e89eeb3161c42fc430398f3b54bc23b6b42c Mon Sep 17 00:00:00 2001 From: Matthias Feys Date: Tue, 10 Jul 2018 20:59:00 +0200 Subject: [PATCH 1/3] fix missing pylint3 check for io subpackage --- sdks/python/apache_beam/io/concat_source_test.py | 3 +++ sdks/python/apache_beam/io/filebasedsink.py | 3 +++ sdks/python/apache_beam/io/filebasedsource_test.py | 1 + .../apache_beam/io/gcp/datastore/v1/datastoreio.py | 1 + sdks/python/apache_beam/io/gcp/datastore/v1/helper.py | 2 ++ .../apache_beam/io/gcp/datastore/v1/query_splitter.py | 1 + sdks/python/apache_beam/io/gcp/datastore/v1/util_test.py | 2 ++ sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py | 2 ++ sdks/python/apache_beam/io/gcp/gcsio_test.py | 1 + sdks/python/apache_beam/io/gcp/pubsub.py | 3 +++ .../apache_beam/io/gcp/tests/pubsub_matcher_test.py | 2 ++ sdks/python/apache_beam/io/hadoopfilesystem_test.py | 3 +++ sdks/python/apache_beam/io/range_trackers.py | 8 ++++++-- sdks/python/apache_beam/io/restriction_trackers.py | 3 +++ sdks/python/apache_beam/io/tfrecordio.py | 9 +++++---- sdks/python/apache_beam/io/vcfio.py | 9 ++++++++- sdks/python/tox.ini | 1 + 17 files changed, 47 insertions(+), 7 deletions(-) diff --git a/sdks/python/apache_beam/io/concat_source_test.py b/sdks/python/apache_beam/io/concat_source_test.py index 31e4392f6454..47df91be5663 100644 --- a/sdks/python/apache_beam/io/concat_source_test.py +++ b/sdks/python/apache_beam/io/concat_source_test.py @@ -78,6 +78,9 @@ def __eq__(self, other): return (type(self) == type(other) and self._start == other._start and self._end == other._end) + def __hash__(self): + return hash((type(self), self._start, self._end)) + def __ne__(self, other): return not self == other diff --git a/sdks/python/apache_beam/io/filebasedsink.py b/sdks/python/apache_beam/io/filebasedsink.py index dc6726e6442a..8970fac7e37f 100644 --- a/sdks/python/apache_beam/io/filebasedsink.py +++ b/sdks/python/apache_beam/io/filebasedsink.py @@ -373,6 +373,9 @@ def __eq__(self, other): # pylint: disable=unidiomatic-typecheck return type(self) == type(other) and self.__dict__ == other.__dict__ + def __hash__(self): + return hash((type(self), self.__dict__)) + class FileBasedSinkWriter(iobase.Writer): """The writer for FileBasedSink. diff --git a/sdks/python/apache_beam/io/filebasedsource_test.py b/sdks/python/apache_beam/io/filebasedsource_test.py index e9312238f5b5..71498d062a0f 100644 --- a/sdks/python/apache_beam/io/filebasedsource_test.py +++ b/sdks/python/apache_beam/io/filebasedsource_test.py @@ -15,6 +15,7 @@ # from __future__ import absolute_import +from __future__ import division import bz2 import gzip diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio.py b/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio.py index 437f38850bac..66cca24f1936 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio.py @@ -22,6 +22,7 @@ import logging import time from builtins import object +from builtins import round from apache_beam.io.gcp.datastore.v1 import helper from apache_beam.io.gcp.datastore.v1 import query_splitter diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py b/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py index a27df09ac0e0..5fdc3a7aeff1 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py @@ -20,6 +20,8 @@ For internal use only; no backwards-compatibility guarantees. """ +from __future__ import absolute_import + import errno import logging import sys diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/query_splitter.py b/sdks/python/apache_beam/io/gcp/datastore/v1/query_splitter.py index 7723fb7b6465..ef3c1e4a18f5 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1/query_splitter.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1/query_splitter.py @@ -20,6 +20,7 @@ from __future__ import division from builtins import range +from builtins import round from apache_beam.io.gcp.datastore.v1 import helper diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/util_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1/util_test.py index 8f17c21533c8..8b05ebef0998 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1/util_test.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1/util_test.py @@ -16,6 +16,8 @@ # """Tests for util.py.""" +from __future__ import absolute_import + import unittest from apache_beam.io.gcp.datastore.v1 import util diff --git a/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py b/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py index 88f7ce93f46a..757475d2db61 100644 --- a/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py +++ b/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py @@ -18,6 +18,8 @@ """Unit tests for GCS File System.""" +from __future__ import absolute_import + import logging import unittest from builtins import zip diff --git a/sdks/python/apache_beam/io/gcp/gcsio_test.py b/sdks/python/apache_beam/io/gcp/gcsio_test.py index b10926c41906..0a0b16d63be1 100644 --- a/sdks/python/apache_beam/io/gcp/gcsio_test.py +++ b/sdks/python/apache_beam/io/gcp/gcsio_test.py @@ -15,6 +15,7 @@ # limitations under the License. # """Tests for Google Cloud Storage client.""" +from __future__ import absolute_import from __future__ import division import errno diff --git a/sdks/python/apache_beam/io/gcp/pubsub.py b/sdks/python/apache_beam/io/gcp/pubsub.py index a8e8d4de81a3..e85d949bb1e1 100644 --- a/sdks/python/apache_beam/io/gcp/pubsub.py +++ b/sdks/python/apache_beam/io/gcp/pubsub.py @@ -78,6 +78,9 @@ def __eq__(self, other): self.data == other.data and self.attributes == other.attributes) + def __hash__(self): + return hash((type(self), self.payload, self.attributes)) + def __repr__(self): return 'PubsubMessage(%s, %s)' % (self.data, self.attributes) diff --git a/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher_test.py b/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher_test.py index 83f21a2a589b..0e5948163f9d 100644 --- a/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher_test.py +++ b/sdks/python/apache_beam/io/gcp/tests/pubsub_matcher_test.py @@ -17,6 +17,8 @@ """Unit test for PubSub verifier.""" +from __future__ import absolute_import + import logging import unittest diff --git a/sdks/python/apache_beam/io/hadoopfilesystem_test.py b/sdks/python/apache_beam/io/hadoopfilesystem_test.py index cce0ac72a33b..c6c85ffc2629 100644 --- a/sdks/python/apache_beam/io/hadoopfilesystem_test.py +++ b/sdks/python/apache_beam/io/hadoopfilesystem_test.py @@ -49,6 +49,9 @@ def __init__(self, path, mode='', type='FILE'): def __eq__(self, other): return self.stat == other.stat and self.getvalue() == self.getvalue() + def __hash__(self): + return hash((self.stat, self.getvalue())) + def close(self): self.saved_data = self.getvalue() io.BytesIO.close(self) diff --git a/sdks/python/apache_beam/io/range_trackers.py b/sdks/python/apache_beam/io/range_trackers.py index 50439e198177..f843529b4f3e 100644 --- a/sdks/python/apache_beam/io/range_trackers.py +++ b/sdks/python/apache_beam/io/range_trackers.py @@ -20,9 +20,13 @@ from __future__ import absolute_import from __future__ import division +import codecs import logging import math import threading +from builtins import zip + +from past.builtins import long from past.builtins import long @@ -402,7 +406,7 @@ def _string_to_int(s, prec): s += '\0' * (prec - len(s)) else: s = s[:prec] - return int(s.encode('hex'), 16) + return int(codecs.encode(s, 'hex'), 16) @staticmethod def _string_from_int(i, prec): @@ -410,4 +414,4 @@ def _string_from_int(i, prec): Inverse of _string_to_int. """ h = '%x' % i - return ('0' * (2 * prec - len(h)) + h).decode('hex') + return codecs.decode('0' * (2 * prec - len(h)) + h, 'hex') diff --git a/sdks/python/apache_beam/io/restriction_trackers.py b/sdks/python/apache_beam/io/restriction_trackers.py index 014125f29006..e7224204fd54 100644 --- a/sdks/python/apache_beam/io/restriction_trackers.py +++ b/sdks/python/apache_beam/io/restriction_trackers.py @@ -42,6 +42,9 @@ def __eq__(self, other): return self.start == other.start and self.stop == other.stop + def __hash__(self): + return hash((type(self), self.start, self.stop)) + def split(self, desired_num_offsets_per_split, min_num_offsets_per_split=1): current_split_start = self.start max_split_size = max(desired_num_offsets_per_split, diff --git a/sdks/python/apache_beam/io/tfrecordio.py b/sdks/python/apache_beam/io/tfrecordio.py index 2ef7c5b4c729..5561d2c68b03 100644 --- a/sdks/python/apache_beam/io/tfrecordio.py +++ b/sdks/python/apache_beam/io/tfrecordio.py @@ -18,6 +18,7 @@ from __future__ import absolute_import +import codecs import logging import struct from builtins import object @@ -120,24 +121,24 @@ def read_record(cls, file_handle): # Validate all length related payloads. if len(buf) != buf_length_expected: raise ValueError('Not a valid TFRecord. Fewer than %d bytes: %s' % - (buf_length_expected, buf.encode('hex'))) + (buf_length_expected, codecs.encode(buf, 'hex'))) length, length_mask_expected = struct.unpack(' Date: Wed, 11 Jul 2018 11:09:17 +0200 Subject: [PATCH 2/3] resolved hash if dict issues --- sdks/python/apache_beam/io/concat_source_test.py | 6 ++++-- sdks/python/apache_beam/io/filebasedsink.py | 2 +- sdks/python/apache_beam/io/hadoopfilesystem_test.py | 2 +- sdks/python/apache_beam/io/range_trackers.py | 2 -- sdks/python/apache_beam/io/vcfio.py | 3 ++- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sdks/python/apache_beam/io/concat_source_test.py b/sdks/python/apache_beam/io/concat_source_test.py index 47df91be5663..c1a14ad85472 100644 --- a/sdks/python/apache_beam/io/concat_source_test.py +++ b/sdks/python/apache_beam/io/concat_source_test.py @@ -76,10 +76,12 @@ def read(self, range_tracker): # For testing def __eq__(self, other): return (type(self) == type(other) - and self._start == other._start and self._end == other._end) + and self._start == other._start + and self._end == other._end + and self._split_freq == other._split_freq) def __hash__(self): - return hash((type(self), self._start, self._end)) + return hash((type(self), self._start, self._end, self._split_freq)) def __ne__(self, other): return not self == other diff --git a/sdks/python/apache_beam/io/filebasedsink.py b/sdks/python/apache_beam/io/filebasedsink.py index 8970fac7e37f..f152d2023b82 100644 --- a/sdks/python/apache_beam/io/filebasedsink.py +++ b/sdks/python/apache_beam/io/filebasedsink.py @@ -374,7 +374,7 @@ def __eq__(self, other): return type(self) == type(other) and self.__dict__ == other.__dict__ def __hash__(self): - return hash((type(self), self.__dict__)) + return hash((type(self), frozenset(self.__dict__.items()))) class FileBasedSinkWriter(iobase.Writer): diff --git a/sdks/python/apache_beam/io/hadoopfilesystem_test.py b/sdks/python/apache_beam/io/hadoopfilesystem_test.py index c6c85ffc2629..5ff24c44d29e 100644 --- a/sdks/python/apache_beam/io/hadoopfilesystem_test.py +++ b/sdks/python/apache_beam/io/hadoopfilesystem_test.py @@ -50,7 +50,7 @@ def __eq__(self, other): return self.stat == other.stat and self.getvalue() == self.getvalue() def __hash__(self): - return hash((self.stat, self.getvalue())) + return hash((frozenset(self.stat.items()), self.getvalue())) def close(self): self.saved_data = self.getvalue() diff --git a/sdks/python/apache_beam/io/range_trackers.py b/sdks/python/apache_beam/io/range_trackers.py index f843529b4f3e..0514b74ef911 100644 --- a/sdks/python/apache_beam/io/range_trackers.py +++ b/sdks/python/apache_beam/io/range_trackers.py @@ -28,8 +28,6 @@ from past.builtins import long -from past.builtins import long - from apache_beam.io import iobase __all__ = ['OffsetRangeTracker', 'LexicographicKeyRangeTracker', diff --git a/sdks/python/apache_beam/io/vcfio.py b/sdks/python/apache_beam/io/vcfio.py index 5654821d9b2c..c86bb8e373df 100644 --- a/sdks/python/apache_beam/io/vcfio.py +++ b/sdks/python/apache_beam/io/vcfio.py @@ -220,7 +220,8 @@ def __eq__(self, other): (other.name, other.genotype, other.phaseset, other.info)) def __hash__(self): - return hash((self.name, self.genotype, self.phaseset, self.info)) + return hash((self.name, self.genotype, + self.phaseset, frozenset(self.info.items()))) def __repr__(self): return ', '.join( From cd7d58fc52a9713c06720c74d099a7b26e4ae4df Mon Sep 17 00:00:00 2001 From: Matthias Feys Date: Fri, 27 Jul 2018 00:10:27 +0200 Subject: [PATCH 3/3] remove and simplify __hash__ --- sdks/python/apache_beam/io/concat_source_test.py | 5 ++--- sdks/python/apache_beam/io/filebasedsink.py | 4 +--- sdks/python/apache_beam/io/gcp/pubsub.py | 3 --- .../apache_beam/io/gcp/pubsub_integration_test.py | 1 + sdks/python/apache_beam/io/gcp/pubsub_it_pipeline.py | 2 ++ sdks/python/apache_beam/io/hadoopfilesystem_test.py | 4 +--- sdks/python/apache_beam/io/vcfio.py | 10 +++------- 7 files changed, 10 insertions(+), 19 deletions(-) diff --git a/sdks/python/apache_beam/io/concat_source_test.py b/sdks/python/apache_beam/io/concat_source_test.py index c1a14ad85472..1e8603ab3318 100644 --- a/sdks/python/apache_beam/io/concat_source_test.py +++ b/sdks/python/apache_beam/io/concat_source_test.py @@ -35,6 +35,8 @@ class RangeSource(iobase.BoundedSource): + __hash__ = None + def __init__(self, start, end, split_freq=1): assert start <= end self._start = start @@ -80,9 +82,6 @@ def __eq__(self, other): and self._end == other._end and self._split_freq == other._split_freq) - def __hash__(self): - return hash((type(self), self._start, self._end, self._split_freq)) - def __ne__(self, other): return not self == other diff --git a/sdks/python/apache_beam/io/filebasedsink.py b/sdks/python/apache_beam/io/filebasedsink.py index f152d2023b82..5a09582a1c33 100644 --- a/sdks/python/apache_beam/io/filebasedsink.py +++ b/sdks/python/apache_beam/io/filebasedsink.py @@ -60,6 +60,7 @@ class FileBasedSink(iobase.Sink): # Max number of threads to be used for renaming. _MAX_RENAME_THREADS = 64 + __hash__ = None def __init__(self, file_path_prefix, @@ -373,9 +374,6 @@ def __eq__(self, other): # pylint: disable=unidiomatic-typecheck return type(self) == type(other) and self.__dict__ == other.__dict__ - def __hash__(self): - return hash((type(self), frozenset(self.__dict__.items()))) - class FileBasedSinkWriter(iobase.Writer): """The writer for FileBasedSink. diff --git a/sdks/python/apache_beam/io/gcp/pubsub.py b/sdks/python/apache_beam/io/gcp/pubsub.py index e85d949bb1e1..a8e8d4de81a3 100644 --- a/sdks/python/apache_beam/io/gcp/pubsub.py +++ b/sdks/python/apache_beam/io/gcp/pubsub.py @@ -78,9 +78,6 @@ def __eq__(self, other): self.data == other.data and self.attributes == other.attributes) - def __hash__(self): - return hash((type(self), self.payload, self.attributes)) - def __repr__(self): return 'PubsubMessage(%s, %s)' % (self.data, self.attributes) diff --git a/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py b/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py index d287ef283735..c3921bb2e55b 100644 --- a/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py +++ b/sdks/python/apache_beam/io/gcp/pubsub_integration_test.py @@ -17,6 +17,7 @@ """ Integration test for Google Cloud Pub/Sub. """ +from __future__ import absolute_import import logging import unittest diff --git a/sdks/python/apache_beam/io/gcp/pubsub_it_pipeline.py b/sdks/python/apache_beam/io/gcp/pubsub_it_pipeline.py index 407ed8830fa2..0c4535f1a294 100644 --- a/sdks/python/apache_beam/io/gcp/pubsub_it_pipeline.py +++ b/sdks/python/apache_beam/io/gcp/pubsub_it_pipeline.py @@ -18,6 +18,8 @@ Test pipeline for use by pubsub_integration_test. """ +from __future__ import absolute_import + import argparse import apache_beam as beam diff --git a/sdks/python/apache_beam/io/hadoopfilesystem_test.py b/sdks/python/apache_beam/io/hadoopfilesystem_test.py index 5ff24c44d29e..a943a12bb4d7 100644 --- a/sdks/python/apache_beam/io/hadoopfilesystem_test.py +++ b/sdks/python/apache_beam/io/hadoopfilesystem_test.py @@ -35,6 +35,7 @@ class FakeFile(io.BytesIO): """File object for FakeHdfs""" + __hash__ = None def __init__(self, path, mode='', type='FILE'): io.BytesIO.__init__(self) @@ -49,9 +50,6 @@ def __init__(self, path, mode='', type='FILE'): def __eq__(self, other): return self.stat == other.stat and self.getvalue() == self.getvalue() - def __hash__(self): - return hash((frozenset(self.stat.items()), self.getvalue())) - def close(self): self.saved_data = self.getvalue() io.BytesIO.close(self) diff --git a/sdks/python/apache_beam/io/vcfio.py b/sdks/python/apache_beam/io/vcfio.py index c86bb8e373df..d96c8f79d655 100644 --- a/sdks/python/apache_beam/io/vcfio.py +++ b/sdks/python/apache_beam/io/vcfio.py @@ -73,6 +73,7 @@ class Variant(object): Each object corresponds to a single record in a VCF file. """ + __hash__ = None def __init__(self, reference_name=None, @@ -124,9 +125,6 @@ def __eq__(self, other): return (isinstance(other, Variant) and vars(self) == vars(other)) - def __hash__(self): - return hash((type(self), vars(self))) - def __repr__(self): return ', '.join( [str(s) for s in [self.reference_name, @@ -191,6 +189,8 @@ class VariantCall(object): variant. It may include associated information such as quality and phasing. """ + __hash__ = None + def __init__(self, name=None, genotype=None, phaseset=None, info=None): """Initialize the :class:`VariantCall` object. @@ -219,10 +219,6 @@ def __eq__(self, other): return ((self.name, self.genotype, self.phaseset, self.info) == (other.name, other.genotype, other.phaseset, other.info)) - def __hash__(self): - return hash((self.name, self.genotype, - self.phaseset, frozenset(self.info.items()))) - def __repr__(self): return ', '.join( [str(s) for s in [self.name, self.genotype, self.phaseset, self.info]])