Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 26 additions & 9 deletions pyignite/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,20 +106,37 @@ def unwrap_binary(client: 'Client', wrapped: tuple) -> object:
return result


def hashcode(string: Union[str, bytes]) -> int:
def hashcode(data: Union[str, bytes]) -> int:
"""
Calculate hash code used for identifying objects in Ignite binary API.

:param string: UTF-8-encoded string identifier of binary buffer,
:param data: UTF-8-encoded string identifier of binary buffer or byte array
:return: hash code.
"""
result = 1 if isinstance(string, (bytes, bytearray)) else 0
for char in string:
try:
char = ord(char)
except TypeError:
pass
result = int_overflow(31 * result + char)
if isinstance(data, str):
"""
For strings we iterate over code point which are of the int type
and can take up to 4 bytes and can only be positive.
"""
result = 0
for char in data:
try:
char_val = ord(char)
result = int_overflow(31 * result + char_val)
except TypeError:
pass
else:
"""
For byte array we iterate over bytes which only take 1 byte. But
according to protocol, bytes during hashing should be treated as signed
integer numbers 8 bits long. On other hand elements in Python's `bytes`
are unsigned. For this reason we use ctypes.c_byte() to make them
signed.
"""
result = 1
for byte in data:
byte = ctypes.c_byte(byte).value
result = int_overflow(31 * result + byte)
return result


Expand Down
51 changes: 51 additions & 0 deletions tests/test_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,3 +304,54 @@ class NonPythonicallyNamedType(
obj = cache.get(key)
assert obj.type_name == type_name, 'Complex type name mismatch'
assert obj.field == data, 'Complex object data failure'


def test_complex_object_hash(client):
"""
Test that Python client correctly calculates hash of the binary
object that contains negative bytes.
"""
class Internal(
metaclass=GenericObjectMeta,
type_name='Internal',
schema=OrderedDict([
('id', IntObject),
('str', String),
])
):
pass

class TestObject(
metaclass=GenericObjectMeta,
type_name='TestObject',
schema=OrderedDict([
('id', IntObject),
('str', String),
('internal', BinaryObject),
])
):
pass

obj_ascii = TestObject()
obj_ascii.id = 1
obj_ascii.str = 'test_string'

obj_ascii.internal = Internal()
obj_ascii.internal.id = 2
obj_ascii.internal.str = 'lorem ipsum'

hash_ascii = BinaryObject.hashcode(obj_ascii, client=client)

assert hash_ascii == -1314567146, 'Invalid hashcode value for object with ASCII strings'

obj_utf8 = TestObject()
obj_utf8.id = 1
obj_utf8.str = 'юникод'

obj_utf8.internal = Internal()
obj_utf8.internal.id = 2
obj_utf8.internal.str = 'ユニコード'

hash_utf8 = BinaryObject.hashcode(obj_utf8, client=client)

assert hash_utf8 == -1945378474, 'Invalid hashcode value for object with UTF-8 strings'