From 59c238dc4e9fbf799918ab1e1b1fb970e4ec384b Mon Sep 17 00:00:00 2001 From: Benjamin Himes Date: Wed, 12 Mar 2025 21:57:00 +0200 Subject: [PATCH 1/4] Improves the logic of the disk cache so that it doesn't spill over --- async_substrate_interface/utils/cache.py | 53 +++++++++++++----------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/async_substrate_interface/utils/cache.py b/async_substrate_interface/utils/cache.py index ab4f457..df69fed 100644 --- a/async_substrate_interface/utils/cache.py +++ b/async_substrate_interface/utils/cache.py @@ -2,6 +2,7 @@ import os import pickle import sqlite3 +from pathlib import Path import asyncstdlib as a USE_CACHE = True if os.getenv("NO_CACHE") != "1" else False @@ -14,6 +15,12 @@ ) +def _ensure_dir(): + path = Path(CACHE_LOCATION).parent + if not path.exists(): + path.mkdir(parents=True, exist_ok=True) + + def _get_table_name(func): """Convert "ClassName.method_name" to "ClassName_method_name""" return func.__qualname__.replace(".", "_") @@ -74,22 +81,28 @@ def _insert_into_cache(c, conn, table_name, key, result, chain): pass +def _shared_inner_fn_logic(func, self, args, kwargs): + _ensure_dir() + conn = sqlite3.connect(CACHE_LOCATION) + c = conn.cursor() + table_name = _get_table_name(func) + _create_table(c, conn, table_name) + key = pickle.dumps((args, kwargs)) + chain = self.url + if not (local_chain := _check_if_local(chain)) or not USE_CACHE: + result = _retrieve_from_cache(c, table_name, key, chain) + else: + result = None + return c, conn, table_name, key, result, chain, local_chain + + def sql_lru_cache(maxsize=None): def decorator(func): - conn = sqlite3.connect(CACHE_LOCATION) - c = conn.cursor() - table_name = _get_table_name(func) - _create_table(c, conn, table_name) - @functools.lru_cache(maxsize=maxsize) def inner(self, *args, **kwargs): - c = conn.cursor() - key = pickle.dumps((args, kwargs)) - chain = self.url - if not (local_chain := _check_if_local(chain)) or not USE_CACHE: - result = _retrieve_from_cache(c, table_name, key, chain) - if result is not None: - return result + c, conn, table_name, key, result, chain, local_chain = ( + _shared_inner_fn_logic(func, self, args, kwargs) + ) # If not in DB, call func and store in DB result = func(self, *args, **kwargs) @@ -106,21 +119,11 @@ def inner(self, *args, **kwargs): def async_sql_lru_cache(maxsize=None): def decorator(func): - conn = sqlite3.connect(CACHE_LOCATION) - c = conn.cursor() - table_name = _get_table_name(func) - _create_table(c, conn, table_name) - @a.lru_cache(maxsize=maxsize) async def inner(self, *args, **kwargs): - c = conn.cursor() - key = pickle.dumps((args, kwargs)) - chain = self.url - - if not (local_chain := _check_if_local(chain)) or not USE_CACHE: - result = _retrieve_from_cache(c, table_name, key, chain) - if result is not None: - return result + c, conn, table_name, key, result, chain, local_chain = ( + _shared_inner_fn_logic(func, self, args, kwargs) + ) # If not in DB, call func and store in DB result = await func(self, *args, **kwargs) From 77e4519fafb51e99618a13776c0b6bb8accbfbf7 Mon Sep 17 00:00:00 2001 From: Benjamin Himes Date: Wed, 12 Mar 2025 22:15:17 +0200 Subject: [PATCH 2/4] Don't create disk cache file/table if it's not needed. --- async_substrate_interface/utils/cache.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/async_substrate_interface/utils/cache.py b/async_substrate_interface/utils/cache.py index df69fed..9d16411 100644 --- a/async_substrate_interface/utils/cache.py +++ b/async_substrate_interface/utils/cache.py @@ -82,17 +82,21 @@ def _insert_into_cache(c, conn, table_name, key, result, chain): def _shared_inner_fn_logic(func, self, args, kwargs): - _ensure_dir() - conn = sqlite3.connect(CACHE_LOCATION) - c = conn.cursor() - table_name = _get_table_name(func) - _create_table(c, conn, table_name) - key = pickle.dumps((args, kwargs)) chain = self.url if not (local_chain := _check_if_local(chain)) or not USE_CACHE: + _ensure_dir() + conn = sqlite3.connect(CACHE_LOCATION) + c = conn.cursor() + table_name = _get_table_name(func) + _create_table(c, conn, table_name) + key = pickle.dumps((args, kwargs)) result = _retrieve_from_cache(c, table_name, key, chain) else: result = None + c = None + conn = None + table_name = None + key = None return c, conn, table_name, key, result, chain, local_chain From ce1202a84a2cf89a70264de0e8b7fa7f29f4c3fc Mon Sep 17 00:00:00 2001 From: Benjamin Himes Date: Wed, 12 Mar 2025 22:23:59 +0200 Subject: [PATCH 3/4] Added test --- tests/integration_tests/test_disk_cache.py | 74 ++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 tests/integration_tests/test_disk_cache.py diff --git a/tests/integration_tests/test_disk_cache.py b/tests/integration_tests/test_disk_cache.py new file mode 100644 index 0000000..3e379ab --- /dev/null +++ b/tests/integration_tests/test_disk_cache.py @@ -0,0 +1,74 @@ +import pytest + +from async_substrate_interface.async_substrate import ( + DiskCachedAsyncSubstrateInterface, + AsyncSubstrateInterface, +) +from async_substrate_interface.sync_substrate import SubstrateInterface + + +@pytest.mark.asyncio +async def test_disk_cache(): + entrypoint = "wss://entrypoint-finney.opentensor.ai:443" + async with DiskCachedAsyncSubstrateInterface(entrypoint) as disk_cached_substrate: + current_block = await disk_cached_substrate.get_block_number(None) + block_hash = await disk_cached_substrate.get_block_hash(current_block) + parent_block_hash = await disk_cached_substrate.get_parent_block_hash( + block_hash + ) + block_runtime_info = await disk_cached_substrate.get_block_runtime_info( + block_hash + ) + block_runtime_version_for = ( + await disk_cached_substrate.get_block_runtime_version_for(block_hash) + ) + block_hash_from_cache = await disk_cached_substrate.get_block_hash( + current_block + ) + parent_block_hash_from_cache = ( + await disk_cached_substrate.get_parent_block_hash(block_hash_from_cache) + ) + block_runtime_info_from_cache = ( + await disk_cached_substrate.get_block_runtime_info(block_hash_from_cache) + ) + block_runtime_version_from_cache = ( + await disk_cached_substrate.get_block_runtime_version_for( + block_hash_from_cache + ) + ) + assert block_hash == block_hash_from_cache + assert parent_block_hash == parent_block_hash_from_cache + assert block_runtime_info == block_runtime_info_from_cache + assert block_runtime_version_for == block_runtime_version_from_cache + async with AsyncSubstrateInterface(entrypoint) as non_cache_substrate: + block_hash_non_cache = await non_cache_substrate.get_block_hash(current_block) + parent_block_hash_non_cache = await non_cache_substrate.get_parent_block_hash( + block_hash_non_cache + ) + block_runtime_info_non_cache = await non_cache_substrate.get_block_runtime_info( + block_hash_non_cache + ) + block_runtime_version_for_non_cache = ( + await non_cache_substrate.get_block_runtime_version_for( + block_hash_non_cache + ) + ) + assert block_hash == block_hash_non_cache + assert parent_block_hash == parent_block_hash_non_cache + assert block_runtime_info == block_runtime_info_non_cache + assert block_runtime_version_for == block_runtime_version_for_non_cache + with SubstrateInterface(entrypoint) as sync_substrate: + block_hash_sync = sync_substrate.get_block_hash(current_block) + parent_block_hash_sync = sync_substrate.get_parent_block_hash( + block_hash_non_cache + ) + block_runtime_info_sync = sync_substrate.get_block_runtime_info( + block_hash_non_cache + ) + block_runtime_version_for_sync = sync_substrate.get_block_runtime_version_for( + block_hash_non_cache + ) + assert block_hash == block_hash_sync + assert parent_block_hash == parent_block_hash_sync + assert block_runtime_info == block_runtime_info_sync + assert block_runtime_version_for == block_runtime_version_for_sync From 6bcf9957e913e2ab4491028c3621c2228ea8d82e Mon Sep 17 00:00:00 2001 From: ibraheem-opentensor Date: Wed, 12 Mar 2025 13:34:55 -0700 Subject: [PATCH 4/4] Updates changelog and version --- CHANGELOG.md | 7 +++++++ pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 650c5bf..319c008 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## 1.0.7 /2025-03-12 + +## What's Changed +* Improves the logic of the disk cache so that it doesn't spill over by @thewhaleking in https://github.com/opentensor/async-substrate-interface/pull/76 + +**Full Changelog**: https://github.com/opentensor/async-substrate-interface/compare/v1.0.6...v1.0.7 + ## 1.0.6 /2025-03-12 ## What's Changed diff --git a/pyproject.toml b/pyproject.toml index 0722fcd..c5584d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "async-substrate-interface" -version = "1.0.6" +version = "1.0.7" description = "Asyncio library for interacting with substrate. Mostly API-compatible with py-substrate-interface" readme = "README.md" license = { file = "LICENSE" }