From 42df688fa936fa91b96a0f84c9343d1d2a2ac2fb Mon Sep 17 00:00:00 2001 From: Igor Sapego Date: Fri, 20 Nov 2020 11:46:25 +0300 Subject: [PATCH 1/3] IGNITE-14059: Fix hashing of complex objects --- pyignite/utils.py | 30 +++++++++++++++++++------- tests/test_binary.py | 51 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/pyignite/utils.py b/pyignite/utils.py index ebe5501..57eedda 100644 --- a/pyignite/utils.py +++ b/pyignite/utils.py @@ -106,20 +106,34 @@ def unwrap_binary(client: 'Client', wrapped: tuple) -> object: return result -def hashcode(string: Union[str, bytes]) -> int: +def hashcode(data: Union[str, bytes]) -> int: """ Calculate hash code used for identifying objects in Ignite binary API. :param string: UTF-8-encoded string identifier of binary buffer, :return: hash code. """ - result = 1 if isinstance(string, (bytes, bytearray)) else 0 - for char in string: - try: - char = ord(char) - except TypeError: - pass - result = int_overflow(31 * result + char) + if isinstance(data, str): + """ + For strings we iterate over code point which are of the int type + and can take up to 4 bytes and can only be positive. + """ + result = 1 if isinstance(string, (bytes, bytearray)) else 0 + for char in data: + try: + char_val = ord(char) + result = int_overflow(31 * result + char_val) + except TypeError: + pass + else: + """ + For byte array we iterate over bytes which only take 1 byte and can + be negative. For this reason we use ctypes.c_byte() to + """ + result = 1 + for byte in data: + byte = ctypes.c_byte(byte).value + result = int_overflow(31 * result + byte) return result diff --git a/tests/test_binary.py b/tests/test_binary.py index 5190a6a..4c45afb 100644 --- a/tests/test_binary.py +++ b/tests/test_binary.py @@ -304,3 +304,54 @@ class NonPythonicallyNamedType( obj = cache.get(key) assert obj.type_name == type_name, 'Complex type name mismatch' assert obj.field == data, 'Complex object data failure' + + +def test_complex_object_hash(client): + """ + Test that Python client correctly calculates hash of the binary + object that contains negative bytes. + """ + class Internal( + metaclass=GenericObjectMeta, + type_name='Internal', + schema=OrderedDict([ + ('id', IntObject), + ('str', String), + ]) + ): + pass + + class TestObject( + metaclass=GenericObjectMeta, + type_name='TestObject', + schema=OrderedDict([ + ('id', IntObject), + ('str', String), + ('internal', BinaryObject), + ]) + ): + pass + + obj_ascii = TestObject() + obj_ascii.id = 1 + obj_ascii.str = 'test_string' + + obj_ascii.internal = Internal() + obj_ascii.internal.id = 2 + obj_ascii.internal.str = 'lorem ipsum' + + hash_ascii = BinaryObject.hashcode(obj_ascii, client=client) + + assert hash_ascii == -1314567146, 'Invalid hashcode value for object with ASCII strings' + + obj_utf8 = TestObject() + obj_utf8.id = 1 + obj_utf8.str = 'юникод' + + obj_utf8.internal = Internal() + obj_utf8.internal.id = 2 + obj_utf8.internal.str = 'ユニコード' + + hash_utf8 = BinaryObject.hashcode(obj_utf8, client=client) + + assert hash_utf8 == -1945378474, 'Invalid hashcode value for object with UTF-8 strings' From 7e536811408277903eaf7720541f975ef253cbca Mon Sep 17 00:00:00 2001 From: Igor Sapego Date: Tue, 26 Jan 2021 13:22:06 +0300 Subject: [PATCH 2/3] IGNITE-14059: Fix comments --- pyignite/utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pyignite/utils.py b/pyignite/utils.py index 57eedda..a58f357 100644 --- a/pyignite/utils.py +++ b/pyignite/utils.py @@ -110,7 +110,7 @@ def hashcode(data: Union[str, bytes]) -> int: """ Calculate hash code used for identifying objects in Ignite binary API. - :param string: UTF-8-encoded string identifier of binary buffer, + :param data: UTF-8-encoded string identifier of binary buffer or byte array :return: hash code. """ if isinstance(data, str): @@ -127,8 +127,11 @@ def hashcode(data: Union[str, bytes]) -> int: pass else: """ - For byte array we iterate over bytes which only take 1 byte and can - be negative. For this reason we use ctypes.c_byte() to + For byte array we iterate over bytes which only take 1 byte. But + according to protocol, bytes during hashing should be treated as signed + integer numbers 8 bits long. On other hand elements in Python's `bytes` + are unsigned. For this reason we use ctypes.c_byte() to make them + signed. """ result = 1 for byte in data: From 1e561c143bd845898f72eaa01eb84ced73dcc005 Mon Sep 17 00:00:00 2001 From: Igor Sapego Date: Mon, 1 Feb 2021 15:09:38 -0800 Subject: [PATCH 3/3] IGNITE-14059: Fix --- pyignite/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyignite/utils.py b/pyignite/utils.py index a58f357..ce00d53 100644 --- a/pyignite/utils.py +++ b/pyignite/utils.py @@ -118,7 +118,7 @@ def hashcode(data: Union[str, bytes]) -> int: For strings we iterate over code point which are of the int type and can take up to 4 bytes and can only be positive. """ - result = 1 if isinstance(string, (bytes, bytearray)) else 0 + result = 0 for char in data: try: char_val = ord(char)