From 3b56272c3929924db2b4e63595e9a6814e0fb8dc Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Fri, 25 Aug 2023 01:08:06 +0000 Subject: [PATCH 1/8] replace pickle with jsons to avoid pickle security issues --- bittensor/synapse.py | 20 ++++++++++++++------ requirements/prod.txt | 1 + 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/bittensor/synapse.py b/bittensor/synapse.py index a75324cdd1..982700d0f7 100644 --- a/bittensor/synapse.py +++ b/bittensor/synapse.py @@ -19,7 +19,7 @@ import ast import sys import torch -import pickle +import jsons import base64 import typing import pydantic @@ -437,9 +437,12 @@ def to_headers(self) -> dict: headers[f"bt_header_dict_tensor_{field}"] = str(serialized_dict_tensor) elif required and field in required: - serialized_value = pickle.dumps(value) - encoded_value = base64.b64encode(serialized_value).decode("utf-8") - headers[f"bt_header_input_obj_{field}"] = encoded_value + try: + serialized_value = jsons.dumps(value) + encoded_value = base64.b64encode(serialized_value.encode()).decode("utf-8") + headers[f"bt_header_input_obj_{field}"] = encoded_value + except jsons.exceptions.SerializationError as e: + raise ValueError(f"Error serializing {field} with value {value}. Objects must be jsons serializable.") from e # Adding the size of the headers and the total size to the headers headers["header_size"] = str(sys.getsizeof(headers)) @@ -539,9 +542,14 @@ def parse_headers_to_inputs(cls, headers: dict) -> dict: if new_key in inputs_dict: continue # Decode and load the serialized object - inputs_dict[new_key] = pickle.loads( - base64.b64decode(value.encode("utf-8")) + inputs_dict[new_key] = jsons.loads( + base64.b64decode(value.encode()).decode("utf-8") + ) + except jsons.exceptions.JSONDecodeError as e: + bittensor.logging.error( + f"Error while jsons decoding 'input_obj' header {key}: {e}" ) + continue except Exception as e: bittensor.logging.error( f"Error while parsing 'input_obj' header {key}: {e}" diff --git a/requirements/prod.txt b/requirements/prod.txt index e0797f34a6..61aaae9d54 100644 --- a/requirements/prod.txt +++ b/requirements/prod.txt @@ -6,6 +6,7 @@ ddt==1.6.0 fuzzywuzzy>=0.18.0 fastapi==0.99.1 httpx==0.24.1 +jsons==1.6.3 loguru==0.7.0 miniupnpc==2.0.2 munch==2.5.0 From 22719beade2475d3c93b8fca3c90be0c224ac8c8 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Fri, 25 Aug 2023 17:06:46 +0000 Subject: [PATCH 2/8] fix test_syapse.py --- bittensor/synapse.py | 8 ++++++-- tests/unit_tests/test_synapse.py | 14 +++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/bittensor/synapse.py b/bittensor/synapse.py index 982700d0f7..81b8dedd45 100644 --- a/bittensor/synapse.py +++ b/bittensor/synapse.py @@ -439,10 +439,14 @@ def to_headers(self) -> dict: elif required and field in required: try: serialized_value = jsons.dumps(value) - encoded_value = base64.b64encode(serialized_value.encode()).decode("utf-8") + encoded_value = base64.b64encode(serialized_value.encode()).decode( + "utf-8" + ) headers[f"bt_header_input_obj_{field}"] = encoded_value except jsons.exceptions.SerializationError as e: - raise ValueError(f"Error serializing {field} with value {value}. Objects must be jsons serializable.") from e + raise ValueError( + f"Error serializing {field} with value {value}. Objects must be jsons serializable." + ) from e # Adding the size of the headers and the total size to the headers headers["header_size"] = str(sys.getsizeof(headers)) diff --git a/tests/unit_tests/test_synapse.py b/tests/unit_tests/test_synapse.py index 764736fe49..473200f814 100644 --- a/tests/unit_tests/test_synapse.py +++ b/tests/unit_tests/test_synapse.py @@ -14,8 +14,8 @@ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. +import jsons import torch -import pickle import base64 import typing import pytest @@ -34,9 +34,9 @@ class Test(bittensor.Synapse): headers = { "bt_header_axon_nonce": "111", "bt_header_dendrite_ip": "12.1.1.2", - "bt_header_input_obj_key1": base64.b64encode(pickle.dumps([1, 2, 3, 4])).decode( - "utf-8" - ), + "bt_header_input_obj_key1": base64.b64encode( + jsons.dumps([1, 2, 3, 4]).encode("utf-8") + ).decode("utf-8"), "bt_header_tensor_key2": "[3]-torch.float32", "timeout": "12", "name": "Test", @@ -69,9 +69,9 @@ class Test(bittensor.Synapse): headers = { "bt_header_axon_nonce": "111", "bt_header_dendrite_ip": "12.1.1.2", - "bt_header_input_obj_key1": base64.b64encode(pickle.dumps([1, 2, 3, 4])).decode( - "utf-8" - ), + "bt_header_input_obj_key1": base64.b64encode( + jsons.dumps([1, 2, 3, 4]).encode("utf-8") + ).decode("utf-8"), "bt_header_tensor_key2": "[3]-torch.float32", "timeout": "12", "name": "Test", From 9cba6c97f640d4671119b4d3f34c6d7ec84e0c14 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Wed, 30 Aug 2023 18:22:51 +0000 Subject: [PATCH 3/8] use our internal msgpack-numpy that disables pickle --- requirements/prod.txt | 2 +- setup.py | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/requirements/prod.txt b/requirements/prod.txt index a3df878e7f..43a19aea8e 100644 --- a/requirements/prod.txt +++ b/requirements/prod.txt @@ -12,7 +12,7 @@ munch==2.5.0 netaddr numpy msgpack -msgpack_numpy +git+https://github.com/opentensor/msgpack-numpy.git#egg=msgpack-numpy nest_asyncio pycryptodome>=3.18.0,<4.0.0 pyyaml diff --git a/setup.py b/setup.py index 9ef3f5fff1..b0125e7ec8 100644 --- a/setup.py +++ b/setup.py @@ -23,13 +23,25 @@ import re import os import pathlib +import subprocess def read_requirements(path): + requirements = [] + git_requirements = [] + with pathlib.Path(path).open() as requirements_txt: - return [ - str(requirement) for requirement in parse_requirements(requirements_txt) - ] + for line in requirements_txt: + if line.startswith("git+"): + git_requirements.append(line.strip()) + else: + requirements.append(line.strip()) + + # Install git dependencies + for git_req in git_requirements: + subprocess.check_call(["pip", "install", git_req]) + + return requirements requirements = read_requirements("requirements/prod.txt") From 6b197fe7ff23b721c4b8963f6f516d7b180d518d Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Wed, 30 Aug 2023 18:27:25 +0000 Subject: [PATCH 4/8] skip git+ reqs in check_compat --- scripts/check_compatibility.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/check_compatibility.sh b/scripts/check_compatibility.sh index 3a6cf47e4c..5f48f4cbb0 100755 --- a/scripts/check_compatibility.sh +++ b/scripts/check_compatibility.sh @@ -17,6 +17,11 @@ check_compatibility() { all_supported=0 while read -r requirement; do + # Skip lines starting with git+ + if [[ "$requirement" == git+* ]]; then + continue + fi + package_name=$(echo "$requirement" | awk -F'[!=<>]' '{print $1}' | awk -F'[' '{print $1}') # Strip off brackets echo -n "Checking $package_name... " From 9a895dc839ee3aa57e541be81db82c3d3f41815a Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Thu, 31 Aug 2023 15:11:17 +0000 Subject: [PATCH 5/8] graceful error for submodules --- setup.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 9c921183a7..79cb6b9728 100644 --- a/setup.py +++ b/setup.py @@ -26,6 +26,9 @@ import subprocess +class SubmoduleSyncError(Exception): + pass + def sync_and_update_submodules(): try: print("Synchronizing and updating submodules...") @@ -33,9 +36,12 @@ def sync_and_update_submodules(): subprocess.check_call(['git', 'submodule', 'update', '--init']) except subprocess.CalledProcessError: print("Error synchronizing or updating submodules. Please ensure you have git installed and are in the root directory of the repository.") - raise + raise SubmoduleSyncError("An error occurred while synchronizing or updating submodules.") -sync_and_update_submodules() +try: + sync_and_update_submodules() +except SubmoduleSyncError as e: + print(f"Submodule synchronization error: {e}") def read_requirements(path): requirements = [] From a851fc7ae6b9058905113b4feb97d48891f7b9cc Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Thu, 31 Aug 2023 16:53:13 +0000 Subject: [PATCH 6/8] remove jsons in favor of json, no benefit as async client (httpx) attempts to internally json deserialize anyway. jsons would be of no benefit --- bittensor/dendrite.py | 3 +++ bittensor/synapse.py | 16 +++++++++------- setup.py | 15 +++++++++++---- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/bittensor/dendrite.py b/bittensor/dendrite.py index 5b72f2f6ae..10fe2a91ab 100644 --- a/bittensor/dendrite.py +++ b/bittensor/dendrite.py @@ -289,6 +289,7 @@ def preprocess_synapse_for_request( Returns: bt.Synapse: The preprocessed synapse. """ + bt.logging.trace("Pre-process synapse for request") # Set the timeout for the synapse synapse.timeout = str(timeout) @@ -331,6 +332,8 @@ def process_server_response(self, server_response, local_synapse: bt.Synapse): Raises: None, but errors in attribute setting are silently ignored. """ + bt.logging.trace("Postprocess server response") + # Check if the server responded with a successful status code if server_response.status_code == 200: # If the response is successful, overwrite local synapse state with diff --git a/bittensor/synapse.py b/bittensor/synapse.py index 56b5e38129..392b24f948 100644 --- a/bittensor/synapse.py +++ b/bittensor/synapse.py @@ -19,7 +19,7 @@ import ast import sys import torch -import jsons +import json import base64 import typing import hashlib @@ -552,15 +552,16 @@ def to_headers(self) -> dict: headers[f"bt_header_dict_tensor_{field}"] = str(serialized_dict_tensor) elif required and field in required: + bittensor.logging.trace(f"Serializing {field} with json...") try: - serialized_value = jsons.dumps(value) + serialized_value = json.dumps(value) encoded_value = base64.b64encode(serialized_value.encode()).decode( "utf-8" ) headers[f"bt_header_input_obj_{field}"] = encoded_value - except jsons.exceptions.SerializationError as e: + except TypeError as e: raise ValueError( - f"Error serializing {field} with value {value}. Objects must be jsons serializable." + f"Error serializing {field} with value {value}. Objects must be json serializable." ) from e # Adding the size of the headers and the total size to the headers @@ -655,18 +656,19 @@ def parse_headers_to_inputs(cls, headers: dict) -> dict: continue # Handle 'input_obj' headers elif "bt_header_input_obj" in key: + bittensor.logging.trace(f"Deserializing {key} with json...") try: new_key = key.split("bt_header_input_obj_")[1] # Skip if the key already exists in the dictionary if new_key in inputs_dict: continue # Decode and load the serialized object - inputs_dict[new_key] = jsons.loads( + inputs_dict[new_key] = json.loads( base64.b64decode(value.encode()).decode("utf-8") ) - except jsons.exceptions.JSONDecodeError as e: + except json.JSONDecodeError as e: bittensor.logging.error( - f"Error while jsons decoding 'input_obj' header {key}: {e}" + f"Error while json decoding 'input_obj' header {key}: {e}" ) continue except Exception as e: diff --git a/setup.py b/setup.py index 79cb6b9728..654acee9d4 100644 --- a/setup.py +++ b/setup.py @@ -29,20 +29,27 @@ class SubmoduleSyncError(Exception): pass + def sync_and_update_submodules(): try: print("Synchronizing and updating submodules...") - subprocess.check_call(['git', 'submodule', 'sync']) - subprocess.check_call(['git', 'submodule', 'update', '--init']) + subprocess.check_call(["git", "submodule", "sync"]) + subprocess.check_call(["git", "submodule", "update", "--init"]) except subprocess.CalledProcessError: - print("Error synchronizing or updating submodules. Please ensure you have git installed and are in the root directory of the repository.") - raise SubmoduleSyncError("An error occurred while synchronizing or updating submodules.") + print( + "Error synchronizing or updating submodules. Please ensure you have git installed and are in the root directory of the repository." + ) + raise SubmoduleSyncError( + "An error occurred while synchronizing or updating submodules." + ) + try: sync_and_update_submodules() except SubmoduleSyncError as e: print(f"Submodule synchronization error: {e}") + def read_requirements(path): requirements = [] git_requirements = [] From b739e4af6879602fc0c2276119ec8ae296844d9c Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Thu, 31 Aug 2023 20:08:21 +0000 Subject: [PATCH 7/8] fix tests --- tests/unit_tests/test_synapse.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit_tests/test_synapse.py b/tests/unit_tests/test_synapse.py index db942f2af4..68f2d9541b 100644 --- a/tests/unit_tests/test_synapse.py +++ b/tests/unit_tests/test_synapse.py @@ -14,7 +14,7 @@ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. -import jsons +import json import torch import base64 import typing @@ -32,7 +32,7 @@ class Test(bittensor.Synapse): "bt_header_axon_nonce": "111", "bt_header_dendrite_ip": "12.1.1.2", "bt_header_input_obj_key1": base64.b64encode( - jsons.dumps([1, 2, 3, 4]).encode("utf-8") + json.dumps([1, 2, 3, 4]).encode("utf-8") ).decode("utf-8"), "bt_header_tensor_key2": "[3]-torch.float32", "timeout": "12", @@ -67,7 +67,7 @@ class Test(bittensor.Synapse): "bt_header_axon_nonce": "111", "bt_header_dendrite_ip": "12.1.1.2", "bt_header_input_obj_key1": base64.b64encode( - jsons.dumps([1, 2, 3, 4]).encode("utf-8") + json.dumps([1, 2, 3, 4]).encode("utf-8") ).decode("utf-8"), "bt_header_tensor_key2": "[3]-torch.float32", "timeout": "12", From a67bee61f14eb698d485fe56637d4bce3dc92435 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Thu, 31 Aug 2023 20:11:52 +0000 Subject: [PATCH 8/8] remove jsons req --- requirements/prod.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements/prod.txt b/requirements/prod.txt index 43a19aea8e..82f28437af 100644 --- a/requirements/prod.txt +++ b/requirements/prod.txt @@ -6,7 +6,6 @@ ddt==1.6.0 fuzzywuzzy>=0.18.0 fastapi==0.99.1 httpx==0.24.1 -jsons==1.6.3 loguru==0.7.0 munch==2.5.0 netaddr