From a4199fd40de2a2655034e05b70109eaeb8a941a5 Mon Sep 17 00:00:00 2001 From: Maciej Urbanski Date: Fri, 26 Apr 2024 18:07:17 +0200 Subject: [PATCH 1/3] fix body_hash check circumvention --- bittensor/axon.py | 2 -- bittensor/synapse.py | 23 +++++++++++------------ tests/unit_tests/test_synapse.py | 20 ++++++++++++++++++++ 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/bittensor/axon.py b/bittensor/axon.py index 34ce9e51f1..f72c024759 100644 --- a/bittensor/axon.py +++ b/bittensor/axon.py @@ -696,9 +696,7 @@ async def verify_body_integrity(self, request: Request): body = await request.body() request_body = body.decode() if isinstance(body, bytes) else body - # Gather the required field names from the axon's required_hash_fields dict request_name = request.url.path.split("/")[1] - required_hash_fields = self.required_hash_fields[request_name] # Load the body dict and check if all required field hashes match body_dict = json.loads(request_body) diff --git a/bittensor/synapse.py b/bittensor/synapse.py index 6b7af3d765..518b9431c6 100644 --- a/bittensor/synapse.py +++ b/bittensor/synapse.py @@ -683,21 +683,20 @@ def body_hash(self) -> str: Returns: str: The SHA3-256 hash as a hexadecimal string, providing a fingerprint of the Synapse instance's data for integrity checks. """ - # Hash the body for verification hashes = [] - # Getting the fields of the instance - instance_fields = self.dict() + required_hash_fields = self.__class__.__fields__["required_hash_fields"].default + + if required_hash_fields: + instance_fields = self.dict() + # Preserve backward compatibility in which fields will added in .dict() order + # instead of the order one from `self.required_hash_fields` + required_hash_fields = [ + field for field in instance_fields if field in required_hash_fields + ] + for field in required_hash_fields: + hashes.append(bittensor.utils.hash(str(instance_fields[field]))) - for field, value in instance_fields.items(): - # If the field is required in the subclass schema, hash and add it. - if ( - self.required_hash_fields is not None - and field in self.required_hash_fields - ): - hashes.append(bittensor.utils.hash(str(value))) - - # Hash and return the hashes that have been concatenated return bittensor.utils.hash("".join(hashes)) @classmethod diff --git a/tests/unit_tests/test_synapse.py b/tests/unit_tests/test_synapse.py index 70dd88db76..82502fd317 100644 --- a/tests/unit_tests/test_synapse.py +++ b/tests/unit_tests/test_synapse.py @@ -224,3 +224,23 @@ def test_default_instance_fields_dict_consistency(): "computed_body_hash": "", "required_hash_fields": [], } + + +def test_synapse_body_hash(): + class HashedSynapse(bittensor.Synapse): + a: int + b: int + c: typing.Optional[int] + d: typing.Optional[typing.List[str]] + required_hash_fields: typing.Optional[typing.List[str]] = ["b", "a", "d"] + + synapse_instance = HashedSynapse(a=1, b=2, d=["foobar"]) + synapse_instance_2 = HashedSynapse(d=["foobar"], c=3, a=1, b=2) + synapse_different = HashedSynapse(a=1, b=2) + + assert synapse_instance.body_hash == synapse_instance_2.body_hash + assert synapse_instance.body_hash != synapse_different.body_hash + assert ( + synapse_instance.body_hash + == "ae06397d08f30f75c91395c59f05c62ac3b62b88250eb78b109213258e6ced0c" + ) From df79322efc337e3a0eb95c73a60e3e4d6ebb1911 Mon Sep 17 00:00:00 2001 From: Maciej Urbanski Date: Sat, 27 Apr 2024 22:47:32 +0200 Subject: [PATCH 2/3] change required_hash_fields to ClassVar --- bittensor/axon.py | 7 ---- bittensor/synapse.py | 46 +++++++++++++++--------- tests/unit_tests/test_synapse.py | 61 +++++++++++++++++++------------- 3 files changed, 66 insertions(+), 48 deletions(-) diff --git a/bittensor/axon.py b/bittensor/axon.py index f72c024759..cd1b8f224d 100644 --- a/bittensor/axon.py +++ b/bittensor/axon.py @@ -364,7 +364,6 @@ def __init__( self.priority_fns: Dict[str, Optional[Callable]] = {} self.forward_fns: Dict[str, Optional[Callable]] = {} self.verify_fns: Dict[str, Optional[Callable]] = {} - self.required_hash_fields: Dict[str, str] = {} # Instantiate FastAPI self.app = FastAPI() @@ -566,12 +565,6 @@ def verify_custom(synapse: MyCustomSynapse): ) # Use 'default_verify' if 'verify_fn' is None self.forward_fns[request_name] = forward_fn - # Parse required hash fields from the forward function protocol defaults - required_hash_fields = request_class.__dict__["__fields__"][ - "required_hash_fields" - ].default - self.required_hash_fields[request_name] = required_hash_fields - return self @classmethod diff --git a/bittensor/synapse.py b/bittensor/synapse.py index 518b9431c6..5c0975a557 100644 --- a/bittensor/synapse.py +++ b/bittensor/synapse.py @@ -20,11 +20,13 @@ import base64 import json import sys +import typing +import warnings import pydantic from pydantic.schema import schema import bittensor -from typing import Optional, List, Any, Dict +from typing import Optional, Any, Dict def get_size(obj, seen=None) -> int: @@ -293,6 +295,8 @@ class Synapse(pydantic.BaseModel): 5. Body Hash Computation (``computed_body_hash``, ``required_hash_fields``): Ensures data integrity and security by computing hashes of transmitted data. Provides users with a mechanism to verify data integrity and detect any tampering during transmission. + It is recommended that names of fields in `required_hash_fields` are listed in the order they are + defined in the class. 6. Serialization and Deserialization Methods: Facilitates the conversion of Synapse objects to and from a format suitable for network transmission. @@ -480,14 +484,7 @@ def set_name_type(cls, values) -> dict: repr=False, ) - required_hash_fields: Optional[List[str]] = pydantic.Field( - title="required_hash_fields", - description="The list of required fields to compute the body hash.", - examples=["roles", "messages"], - default=[], - allow_mutation=False, - repr=False, - ) + required_hash_fields: typing.ClassVar[typing.Tuple[str, ...]] = () def __setattr__(self, name: str, value: Any): """ @@ -685,15 +682,32 @@ def body_hash(self) -> str: """ hashes = [] - required_hash_fields = self.__class__.__fields__["required_hash_fields"].default + hash_fields_field = self.__class__.__fields__.get("required_hash_fields") + instance_fields = None + if hash_fields_field: + warnings.warn( + "The 'required_hash_fields' field handling deprecated and will be removed. " + "Please update Synapse class definition to use 'required_hash_fields' class variable instead.", + DeprecationWarning, + ) + required_hash_fields = hash_fields_field.default + + if required_hash_fields: + instance_fields = self.dict() + # Preserve backward compatibility in which fields will added in .dict() order + # instead of the order one from `self.required_hash_fields` + required_hash_fields = [ + field for field in instance_fields if field in required_hash_fields + ] + + # Hack to cache the required hash fields names + if len(required_hash_fields) == len(required_hash_fields): + self.__class__.required_hash_fields = tuple(required_hash_fields) + else: + required_hash_fields = self.__class__.required_hash_fields if required_hash_fields: - instance_fields = self.dict() - # Preserve backward compatibility in which fields will added in .dict() order - # instead of the order one from `self.required_hash_fields` - required_hash_fields = [ - field for field in instance_fields if field in required_hash_fields - ] + instance_fields = instance_fields or self.dict() for field in required_hash_fields: hashes.append(bittensor.utils.hash(str(instance_fields[field]))) diff --git a/tests/unit_tests/test_synapse.py b/tests/unit_tests/test_synapse.py index 82502fd317..61e9868878 100644 --- a/tests/unit_tests/test_synapse.py +++ b/tests/unit_tests/test_synapse.py @@ -178,18 +178,6 @@ def test_body_hash_override(): synapse_instance.body_hash = [] -def test_required_fields_override(): - # Create a Synapse instance - synapse_instance = bittensor.Synapse() - - # Try to set the required_hash_fields property and expect a TypeError - with pytest.raises( - TypeError, - match='"required_hash_fields" has allow_mutation set to False and cannot be assigned', - ): - synapse_instance.required_hash_fields = [] - - def test_default_instance_fields_dict_consistency(): synapse_instance = bittensor.Synapse() assert synapse_instance.dict() == { @@ -222,25 +210,48 @@ def test_default_instance_fields_dict_consistency(): "signature": None, }, "computed_body_hash": "", - "required_hash_fields": [], } -def test_synapse_body_hash(): - class HashedSynapse(bittensor.Synapse): - a: int - b: int - c: typing.Optional[int] - d: typing.Optional[typing.List[str]] - required_hash_fields: typing.Optional[typing.List[str]] = ["b", "a", "d"] +class LegacyHashedSynapse(bittensor.Synapse): + """Legacy Synapse subclass that serialized `required_hash_fields`.""" - synapse_instance = HashedSynapse(a=1, b=2, d=["foobar"]) - synapse_instance_2 = HashedSynapse(d=["foobar"], c=3, a=1, b=2) - synapse_different = HashedSynapse(a=1, b=2) + a: int + b: int + c: typing.Optional[int] + d: typing.Optional[typing.List[str]] + required_hash_fields: typing.Optional[typing.List[str]] = ["b", "a", "d"] - assert synapse_instance.body_hash == synapse_instance_2.body_hash - assert synapse_instance.body_hash != synapse_different.body_hash + +class HashedSynapse(bittensor.Synapse): + a: int + b: int + c: typing.Optional[int] + d: typing.Optional[typing.List[str]] + required_hash_fields: typing.ClassVar[typing.Tuple[str]] = ("a", "b", "d") + + +@pytest.mark.parametrize("synapse_cls", [LegacyHashedSynapse, HashedSynapse]) +def test_synapse_body_hash(synapse_cls): + synapse_instance = synapse_cls(a=1, b=2, d=["foobar"]) assert ( synapse_instance.body_hash == "ae06397d08f30f75c91395c59f05c62ac3b62b88250eb78b109213258e6ced0c" ) + + # Extra non-hashed values should not influence the body hash + synapse_instance_slightly_different = synapse_cls(d=["foobar"], c=3, a=1, b=2) + assert synapse_instance.body_hash == synapse_instance_slightly_different.body_hash + + # Even if someone tries to override the required_hash_fields, it should still be the same + synapse_instance_try_override_hash_fields = synapse_cls( + a=1, b=2, d=["foobar"], required_hash_fields=["a"] + ) + assert ( + synapse_instance.body_hash + == synapse_instance_try_override_hash_fields.body_hash + ) + + # Different hashed values should result in different body hashes + synapse_different = synapse_cls(a=1, b=2) + assert synapse_instance.body_hash != synapse_different.body_hash From cb4e3a6b1f90724f70cb256be9a0160d5214491f Mon Sep 17 00:00:00 2001 From: Maciej Urbanski Date: Wed, 22 May 2024 12:33:54 +0200 Subject: [PATCH 3/3] revert to Optional[] syntax as `| None` is not supported on Python 3.9 --- tests/unit_tests/test_synapse.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/unit_tests/test_synapse.py b/tests/unit_tests/test_synapse.py index ee00da2261..6be99520c1 100644 --- a/tests/unit_tests/test_synapse.py +++ b/tests/unit_tests/test_synapse.py @@ -17,7 +17,7 @@ import json import base64 import typing -from typing import List, Optional +from typing import Optional import pytest import bittensor @@ -130,13 +130,15 @@ def test_custom_synapse(): class Test(bittensor.Synapse): a: int # Carried through because required. b: int = None # Not carried through headers - c: int | None # Required, carried through headers, cannot be None - d: list[int] | None # Required, carried though headers, cannot be None + c: Optional[int] # Required, carried through headers, cannot be None + d: Optional[list[int]] # Required, carried though headers, cannot be None e: list[int] # Carried through headers - f: int | None = None # Not Required, Not carried through headers, can be None - g: list[ + f: Optional[ int - ] | None = None # Not Required, Not carried though headers, can be None + ] = None # Not Required, Not carried through headers, can be None + g: Optional[ + list[int] + ] = None # Not Required, Not carried though headers, can be None # Create an instance of the custom Synapse subclass synapse = Test( @@ -227,16 +229,16 @@ class LegacyHashedSynapse(bittensor.Synapse): a: int b: int - c: int | None = None - d: list[str] | None = None - required_hash_fields: list[str] | None = ["b", "a", "d"] + c: Optional[int] = None + d: Optional[list[str]] = None + required_hash_fields: Optional[list[str]] = ["b", "a", "d"] class HashedSynapse(bittensor.Synapse): a: int b: int - c: int | None = None - d: list[str] | None = None + c: Optional[int] = None + d: Optional[list[str]] = None required_hash_fields: typing.ClassVar[tuple[str, ...]] = ("a", "b", "d")