From 364f9b4cc45578588fa0fe2c6199ed661e55487e Mon Sep 17 00:00:00 2001 From: Roman Date: Tue, 19 Nov 2024 13:50:30 -0800 Subject: [PATCH 1/4] fix reconnection --- bittensor/utils/networking.py | 56 ++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/bittensor/utils/networking.py b/bittensor/utils/networking.py index 76686d5fa4..7505b92306 100644 --- a/bittensor/utils/networking.py +++ b/bittensor/utils/networking.py @@ -20,12 +20,14 @@ import json import os import socket +import time import urllib from functools import wraps from typing import Optional import netaddr import requests +from websocket import WebSocketConnectionClosedException from bittensor.utils.btlogging import logging @@ -178,22 +180,48 @@ def get_formatted_ws_endpoint_url(endpoint_url: Optional[str]) -> Optional[str]: def ensure_connected(func): """Decorator ensuring the function executes with an active substrate connection.""" + def is_connected(substrate) -> bool: + """Check if the substrate connection is active.""" + sock = substrate.websocket.sock + return ( + sock is not None + and sock.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR) == 0 + ) + + def reconnect_with_retries(self, retries: int = 5, delay: int = 5) -> bool: + """Attempt to reconnect with a specified number of retries.""" + while retries > 0: + logging.error( + f"Attempting to reconnect to substrate ({retries} attempts left)..." + ) + try: + self._get_substrate() + old_level = logging.get_level() + logging.set_info() + logging.success("Connection successfully restored!") + logging.setLevel(old_level) + return True + except ConnectionRefusedError: + retries -= 1 + time.sleep(delay) + logging.error("Failed to reconnect to substrate after multiple attempts.") + return False + @wraps(func) def wrapper(self, *args, **kwargs): - """Wrapper function where `self` argument is Subtensor instance with the substrate connection.""" - # Check the socket state before method execution - if ( - # connection was closed correctly - self.substrate.websocket.sock is None - # connection has a broken pipe - or self.substrate.websocket.sock.getsockopt( - socket.SOL_SOCKET, socket.SO_ERROR - ) - != 0 - ): - logging.debug("Reconnecting to substrate...") + """Wrapper function where `self` is expected to be a Subtensor instance.""" + if not is_connected(self.substrate): + logging.debug("Substrate connection inactive. Attempting to reconnect...") self._get_substrate() - # Execute the method if the connection is active or after reconnecting - return func(self, *args, **kwargs) + + try: + return func(self, *args, **kwargs) + except WebSocketConnectionClosedException: + logging.warning("WebSocket connection closed. Attempting to reconnect...") + if reconnect_with_retries(self): + return func(self, *args, **kwargs) + else: + logging.error("Unable to restore connection. Raising exception.") + raise ConnectionRefusedError("Failed to reconnect to substrate.") return wrapper From a73e45694f3b693c1551190f8567b48352370f42 Mon Sep 17 00:00:00 2001 From: Roman Date: Tue, 19 Nov 2024 15:41:26 -0800 Subject: [PATCH 2/4] use retry + improve logging messages --- bittensor/core/subtensor.py | 6 +---- bittensor/utils/networking.py | 46 +++++++++++++++++------------------ 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/bittensor/core/subtensor.py b/bittensor/core/subtensor.py index 6416d9971d..f6b2c86636 100644 --- a/bittensor/core/subtensor.py +++ b/bittensor/core/subtensor.py @@ -239,11 +239,7 @@ def _get_substrate(self): except (ConnectionRefusedError, ssl.SSLError) as error: logging.error( - f"Could not connect to {self.network} network with {self.chain_endpoint} chain endpoint.", - ) - logging.info( - "You can check if you have connectivity by running this command: nc -vz localhost " - f"{self.chain_endpoint}" + f"Could not connect to {self.network} network with {self.chain_endpoint} chain endpoint.", ) raise ConnectionRefusedError(error.args) diff --git a/bittensor/utils/networking.py b/bittensor/utils/networking.py index 7505b92306..edd3b2884e 100644 --- a/bittensor/utils/networking.py +++ b/bittensor/utils/networking.py @@ -20,11 +20,10 @@ import json import os import socket -import time import urllib from functools import wraps from typing import Optional - +from retry import retry import netaddr import requests from websocket import WebSocketConnectionClosedException @@ -188,24 +187,22 @@ def is_connected(substrate) -> bool: and sock.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR) == 0 ) - def reconnect_with_retries(self, retries: int = 5, delay: int = 5) -> bool: - """Attempt to reconnect with a specified number of retries.""" - while retries > 0: - logging.error( - f"Attempting to reconnect to substrate ({retries} attempts left)..." - ) - try: - self._get_substrate() - old_level = logging.get_level() - logging.set_info() - logging.success("Connection successfully restored!") - logging.setLevel(old_level) - return True - except ConnectionRefusedError: - retries -= 1 - time.sleep(delay) - logging.error("Failed to reconnect to substrate after multiple attempts.") - return False + @retry( + exceptions=ConnectionRefusedError, + tries=5, + delay=5, + backoff=1, + logger=logging, + ) + def reconnect_with_retries(self): + """Attempt to reconnect with retries using retry library.""" + logging.info("Attempting to reconnect to substrate...") + self._get_substrate() + + old_level = logging.get_level() + logging.set_info() + logging.success("Connection successfully restored!") + logging.setLevel(old_level) @wraps(func) def wrapper(self, *args, **kwargs): @@ -217,10 +214,13 @@ def wrapper(self, *args, **kwargs): try: return func(self, *args, **kwargs) except WebSocketConnectionClosedException: - logging.warning("WebSocket connection closed. Attempting to reconnect...") - if reconnect_with_retries(self): + logging.warning( + "WebSocket connection closed. Attempting to reconnect 5 times..." + ) + try: + reconnect_with_retries(self) return func(self, *args, **kwargs) - else: + except ConnectionRefusedError: logging.error("Unable to restore connection. Raising exception.") raise ConnectionRefusedError("Failed to reconnect to substrate.") From 09dcb93fd3582f6d4ffaf605668ae75c6c742da5 Mon Sep 17 00:00:00 2001 From: Roman Date: Tue, 19 Nov 2024 15:44:01 -0800 Subject: [PATCH 3/4] remove `backoff` from requirements --- bittensor/core/extrinsics/registration.py | 3 +-- requirements/prod.txt | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/bittensor/core/extrinsics/registration.py b/bittensor/core/extrinsics/registration.py index 28a3e70a8a..3bed32d362 100644 --- a/bittensor/core/extrinsics/registration.py +++ b/bittensor/core/extrinsics/registration.py @@ -273,8 +273,7 @@ def _do_burned_register( """ Performs a burned register extrinsic call to the Subtensor chain. - This method sends a registration transaction to the Subtensor blockchain using the burned register mechanism. It - retries the call up to three times with exponential backoff in case of failures. + This method sends a registration transaction to the Subtensor blockchain using the burned register mechanism. Args: self (bittensor.core.subtensor.Subtensor): Subtensor instance. diff --git a/requirements/prod.txt b/requirements/prod.txt index 1f3628e1a0..d084b5e37a 100644 --- a/requirements/prod.txt +++ b/requirements/prod.txt @@ -2,7 +2,6 @@ wheel setuptools~=70.0.0 aiohttp~=3.9 async-property==0.2.2 -backoff bittensor-cli bt-decode==0.2.0a0 colorama~=0.4.6 From c3468ab5f3f611423f5a838eb7e4e7a60dd7210d Mon Sep 17 00:00:00 2001 From: Roman Date: Tue, 19 Nov 2024 15:47:56 -0800 Subject: [PATCH 4/4] optimise imports --- bittensor/core/subtensor.py | 17 ----------------- bittensor/utils/networking.py | 20 ++------------------ 2 files changed, 2 insertions(+), 35 deletions(-) diff --git a/bittensor/core/subtensor.py b/bittensor/core/subtensor.py index f6b2c86636..0198e4818f 100644 --- a/bittensor/core/subtensor.py +++ b/bittensor/core/subtensor.py @@ -1,20 +1,3 @@ -# The MIT License (MIT) -# Copyright © 2024 Opentensor Foundation -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. -# -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - """ The ``bittensor.core.subtensor.Subtensor`` module in Bittensor serves as a crucial interface for interacting with the Bittensor blockchain, facilitating a range of operations essential for the decentralized machine learning network. diff --git a/bittensor/utils/networking.py b/bittensor/utils/networking.py index edd3b2884e..7524b353f5 100644 --- a/bittensor/utils/networking.py +++ b/bittensor/utils/networking.py @@ -1,20 +1,3 @@ -# The MIT License (MIT) -# Copyright © 2024 Opentensor Foundation -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. -# -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - """Utils for handling local network with ip and ports.""" import json @@ -23,9 +6,10 @@ import urllib from functools import wraps from typing import Optional -from retry import retry + import netaddr import requests +from retry import retry from websocket import WebSocketConnectionClosedException from bittensor.utils.btlogging import logging