-
Notifications
You must be signed in to change notification settings - Fork 4.5k
[BEAM-3981] Futurize coders subpackage #5053
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ | |
| import logging | ||
| import math | ||
| import unittest | ||
| from builtins import range | ||
|
|
||
| import dill | ||
|
|
||
|
|
@@ -103,7 +104,7 @@ def test_custom_coder(self): | |
|
|
||
| self.check_coder(CustomCoder(), 1, -10, 5) | ||
| self.check_coder(coders.TupleCoder((CustomCoder(), coders.BytesCoder())), | ||
| (1, 'a'), (-10, 'b'), (5, 'c')) | ||
| (1, b'a'), (-10, b'b'), (5, b'c')) | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably not critical, but looks like 'a' is not replaced with b'a' here - are these changes done by some tool or manually?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess this is aimed at the 'a' in line 109? |
||
| def test_pickle_coder(self): | ||
| self.check_coder(coders.PickleCoder(), 'a', 1, 1.5, (1, 2, 3)) | ||
|
|
@@ -129,7 +130,7 @@ def test_dill_coder(self): | |
|
|
||
| def test_fast_primitives_coder(self): | ||
| coder = coders.FastPrimitivesCoder(coders.SingletonCoder(len)) | ||
| self.check_coder(coder, None, 1, -1, 1.5, 'str\0str', u'unicode\0\u0101') | ||
| self.check_coder(coder, None, 1, -1, 1.5, b'str\0str', u'unicode\0\u0101') | ||
| self.check_coder(coder, (), (1, 2, 3)) | ||
| self.check_coder(coder, [], [1, 2, 3]) | ||
| self.check_coder(coder, dict(), {'a': 'b'}, {0: dict(), 1: len}) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also here 'a' and 'b' are not bytestrings.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comment above. |
||
|
|
@@ -139,7 +140,7 @@ def test_fast_primitives_coder(self): | |
| self.check_coder(coders.TupleCoder((coder,)), ('a',), (1,)) | ||
|
|
||
| def test_bytes_coder(self): | ||
| self.check_coder(coders.BytesCoder(), 'a', '\0', 'z' * 1000) | ||
| self.check_coder(coders.BytesCoder(), b'a', b'\0', b'z' * 1000) | ||
|
|
||
| def test_varint_coder(self): | ||
| # Small ints. | ||
|
|
@@ -190,7 +191,7 @@ def test_timestamp_coder(self): | |
| timestamp.Timestamp(micros=1234567890123456789)) | ||
| self.check_coder( | ||
| coders.TupleCoder((coders.TimestampCoder(), coders.BytesCoder())), | ||
| (timestamp.Timestamp.of(27), 'abc')) | ||
| (timestamp.Timestamp.of(27), b'abc')) | ||
|
|
||
| def test_tuple_coder(self): | ||
| kv_coder = coders.TupleCoder((coders.VarIntCoder(), coders.BytesCoder())) | ||
|
|
@@ -206,14 +207,14 @@ def test_tuple_coder(self): | |
| kv_coder.as_cloud_object()) | ||
| # Test binary representation | ||
| self.assertEqual( | ||
| '\x04abc', | ||
| kv_coder.encode((4, 'abc'))) | ||
| b'\x04abc', | ||
| kv_coder.encode((4, b'abc'))) | ||
| # Test unnested | ||
| self.check_coder( | ||
| kv_coder, | ||
| (1, 'a'), | ||
| (-2, 'a' * 100), | ||
| (300, 'abc\0' * 5)) | ||
| (1, b'a'), | ||
| (-2, b'a' * 100), | ||
| (300, b'abc\0' * 5)) | ||
| # Test nested | ||
| self.check_coder( | ||
| coders.TupleCoder( | ||
|
|
@@ -322,12 +323,12 @@ def test_windowed_value_coder(self): | |
| }, | ||
| coder.as_cloud_object()) | ||
| # Test binary representation | ||
| self.assertEqual('\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01', | ||
| self.assertEqual(b'\x7f\xdf;dZ\x1c\xac\t\x00\x00\x00\x01\x0f\x01', | ||
| coder.encode(window.GlobalWindows.windowed_value(1))) | ||
|
|
||
| # Test decoding large timestamp | ||
| self.assertEqual( | ||
| coder.decode('\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'), | ||
| coder.decode(b'\x7f\xdf;dZ\x1c\xac\x08\x00\x00\x00\x01\x0f\x00'), | ||
| windowed_value.create(0, MIN_TIMESTAMP.micros, (GlobalWindow(),))) | ||
|
|
||
| # Test unnested | ||
|
|
@@ -364,7 +365,7 @@ def test_proto_coder(self): | |
| proto_coder = coders.ProtoCoder(ma.__class__) | ||
| self.check_coder(proto_coder, ma) | ||
| self.check_coder(coders.TupleCoder((proto_coder, coders.BytesCoder())), | ||
| (ma, 'a'), (mb, 'b')) | ||
| (ma, b'a'), (mb, b'b')) | ||
|
|
||
| def test_global_window_coder(self): | ||
| coder = coders.GlobalWindowCoder() | ||
|
|
@@ -391,16 +392,16 @@ def test_length_prefix_coder(self): | |
| }, | ||
| coder.as_cloud_object()) | ||
| # Test binary representation | ||
| self.assertEqual('\x00', coder.encode('')) | ||
| self.assertEqual('\x01a', coder.encode('a')) | ||
| self.assertEqual('\x02bc', coder.encode('bc')) | ||
| self.assertEqual('\xff\x7f' + 'z' * 16383, coder.encode('z' * 16383)) | ||
| self.assertEqual(b'\x00', coder.encode(b'')) | ||
| self.assertEqual(b'\x01a', coder.encode(b'a')) | ||
| self.assertEqual(b'\x02bc', coder.encode(b'bc')) | ||
| self.assertEqual(b'\xff\x7f' + b'z' * 16383, coder.encode(b'z' * 16383)) | ||
| # Test unnested | ||
| self.check_coder(coder, '', 'a', 'bc', 'def') | ||
| self.check_coder(coder, b'', b'a', b'bc', b'def') | ||
| # Test nested | ||
| self.check_coder(coders.TupleCoder((coder, coder)), | ||
| ('', 'a'), | ||
| ('bc', 'def')) | ||
| (b'', b'a'), | ||
| (b'bc', b'def')) | ||
|
|
||
| def test_nested_observables(self): | ||
| class FakeObservableIterator(observable.ObservableMixin): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this change required by Python3 migration or we are just fixing an omission that hash was not previously defined, while eq was?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On Python 2,
__hash__defaults to give a different value for each instance. On Python 3,__hash__defaults toNoneif__eq__is implemented. By implementing__hash__, we get consistent behavior on both versions.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Revisiting this in light of other PRs. I think, it would be safer to guarantee the contract that hash does not change for the same object if we compute it here based on object type, sent #5390.
Another possibility to guarantee consistent behavior between Python 2 and 3 would be to set
__hash__ = Noneif we can infer that a class is obviously non-hashable.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can also use the id which is guaranteed to stay the same for an object:
hash(id(self))The default Python 2 hash also relies on id.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's true. Although that wouldn't honor the contract between eq and hash.