Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions tools/state-dump/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,26 @@ pip install -r requirements.txt
python3 world_state.py \
--rpc http://<ARCHIVE_NODE_IP>:8545 \
--input-genesis initial_genesis.json \
--output out_genesis.json
--output out_genesis.json \
--include-code \
--include-storage \
--page-size 2048 \
--min-page-size 256 \
--rpc-timeout 300
```

**Example:**

```bash
python3 world_state.py \
python3 world_state3.py \
--rpc http://34.75.194.46:8545 \
--input-genesis initial_genesis.json \
--output out_genesis.json
--output out_genesis.json \
--include-code \
--include-storage \
--page-size 2048 \
--min-page-size 256 \
--rpc-timeout 300
```

- `--rpc`
Expand Down
336 changes: 234 additions & 102 deletions tools/state-dump/world_state.py
Original file line number Diff line number Diff line change
@@ -1,131 +1,263 @@
#!/usr/bin/env python3
import json
"""
World-state → genesis.alloc via Erigon-style debug_accountRange (6-arg, 'next'),
pinned to a single block number to avoid 'missing trie node ... loc: diff' errors.

- Prints and pins the exact block number (hex) for ALL calls.
- Lightweight account paging (no inline code/storage).
- Optional per-contract code and full storage paging.
- Adaptive page sizes for both accounts and storage.
"""

import argparse
import requests
import json
import time
from pathlib import Path
from typing import Dict, Any, Optional

# Default filename for the migrated genesis output
DEFAULT_OUT_NAME = "out_genesis.json"
import requests

def rpc_call(rpc_url: str, method: str, params: list) -> dict:
payload = {
"jsonrpc": "2.0",
"id": 1,
"method": method,
"params": params
}
resp = requests.post(rpc_url, json=payload)
resp.raise_for_status()
data = resp.json()
if "error" in data:
raise RuntimeError(f"RPC error ({method}): {data['error']}")
return data["result"]
DEFAULT_OUT = "out_genesis.json"


def rpc_get_block_number(rpc_url: str) -> int:
"""Fetch the latest block number (as an int)."""
hex_bn = rpc_call(rpc_url, "eth_blockNumber", [])
return int(hex_bn, 16)
class RpcError(RuntimeError):
pass


def rpc_debug_dump_block(rpc_url: str, block_param: str) -> dict:
"""Call debug_dumpBlock at the given block tag or hex number."""
return rpc_call(rpc_url, "debug_dumpBlock", [block_param])
def rpc_call(url: str, method: str, params: list, *, timeout: float, retries: int = 0, backoff: float = 1.6) -> Any:
last = None
for i in range(retries + 1):
try:
r = requests.post(url, json={"jsonrpc": "2.0", "id": 1, "method": method, "params": params}, timeout=timeout)
r.raise_for_status()
data = r.json()
if "error" in data:
raise RpcError(f"{method}: {data['error']}")
return data["result"]
except (requests.Timeout, requests.ConnectionError, RpcError) as e:
last = e
if i < retries:
time.sleep(backoff ** i)
else:
raise
raise last # pragma: no cover


def to_hex(x: str) -> str:
"""Convert a decimal string to a hex string (0x-prefixed)."""
def to_hex_any(x) -> str:
if x is None:
return "0x0"
if isinstance(x, int):
return hex(x)
if isinstance(x, str):
s = x.strip()
if s.startswith("0x") or s.startswith("0X"):
return s
return hex(int(s))
return hex(int(x))


def build_alloc(accounts: dict, include_nonces: bool) -> dict:
"""
Given the 'accounts' map from debug_dumpBlock, return an 'alloc'
dictionary suitable for a genesis file: address → { balance, code?, storage?, nonce? }.
"""
alloc = {}
for addr, acct in accounts.items():
entry = {"balance": to_hex(acct["balance"])}
if include_nonces and acct.get("nonce") is not None:
entry["nonce"] = to_hex(acct["nonce"])
if acct.get("code"):
entry["code"] = acct["code"]
if acct.get("storage"):
entry["storage"] = acct["storage"]
alloc[addr] = entry
def get_latest_block(url: str, timeout: float) -> int:
h = rpc_call(url, "eth_blockNumber", [], timeout=timeout)
return int(h, 16)


def account_range(url: str, block_hex: str, start_token: str, n: int, *, timeout: float) -> Dict[str, Any]:
# Erigon 6-arg: nocode=True, nostorage=True, incompletes=True (lightweight)
return rpc_call(url, "debug_accountRange", [block_hex, start_token, n, True, True, True], timeout=timeout)


def storage_range_at(url: str, block_hex: str, addr: str, start_key: str, n: int, *, timeout: float) -> Dict[str, Any]:
# Pin to the same block number (hex). txIndex=None → post-state.
return rpc_call(url, "debug_storageRangeAt", [block_hex, None, addr, start_key, n], timeout=timeout)


def eth_get_code(url: str, addr: str, block_hex: str, *, timeout: float) -> str:
# Use the same pinned block number (hex) here as well.
return rpc_call(url, "eth_getCode", [addr, block_hex], timeout=timeout)


def page_full_storage(
url: str,
block_hex: str,
addr: str,
*,
timeout: float,
initial_page_size: int = 2048,
min_page_size: int = 128,
) -> Dict[str, str]:
storage: Dict[str, str] = {}
sk = "0x"
page = initial_page_size
while True:
try:
sres = storage_range_at(url, block_hex, addr, sk, page, timeout=timeout)
except RpcError as e:
msg = str(e).lower()
# Adaptive shrink on timeouts
if "timed out" in msg and page > min_page_size:
page = max(min_page_size, page // 2)
print(f"⚠️ storageRangeAt timed out for {addr}; reducing storage page-size to {page} and retrying…")
continue
# Most important: missing trie node usually comes from unpinned or pruned state.
if "missing trie node" in msg:
raise
raise

s_map = sres.get("storage") or {}
for k, v in s_map.items():
val = v.get("value", "0x0")
if val not in (None, "0x", "0x0", "0", 0):
storage[k] = val
nxt = sres.get("nextKey")
if not nxt:
break
sk = nxt
return storage


def build_alloc(
url: str,
block_hex: str,
*,
initial_page_size: int,
min_page_size: int,
include_nonces: bool,
include_code: bool,
include_storage: bool,
timeout: float,
storage_page_initial: int,
storage_page_min: int,
) -> dict:
alloc: dict = {}
start = "0x"
total = 0
page_size = initial_page_size

while True:
# Adaptive account page
while True:
try:
res = account_range(url, block_hex, start, page_size, timeout=timeout)
break
except RpcError as e:
if "timed out" in str(e).lower() and page_size > min_page_size:
page_size = max(min_page_size, page_size // 2)
print(f"⚠️ accountRange timed out; reducing account page-size to {page_size} and retrying…")
continue
raise

accounts = res.get("accounts") or {}

for addr, meta in accounts.items():
entry = {"balance": to_hex_any(meta.get("balance", "0"))}
if include_nonces and (meta.get("nonce") is not None):
entry["nonce"] = to_hex_any(meta["nonce"])

# Code / storage (per-contract)
if include_code or include_storage:
code = eth_get_code(url, addr, block_hex, timeout=timeout)
if code and code != "0x":
if include_code:
entry["code"] = code
if include_storage:
try:
storage = page_full_storage(
url,
block_hex,
addr,
timeout=timeout,
initial_page_size=storage_page_initial,
min_page_size=storage_page_min,
)
if storage:
entry["storage"] = storage
except RpcError as e:
# If still missing trie nodes at a fixed block, node likely pruned / missing state.
if "missing trie node" in str(e).lower():
print(f"❌ storageRangeAt failed for {addr} due to missing trie node at pinned block. "
f"Your node likely lacks full state for storage paging (needs archive-like state).")
print(" → Options: (1) re-run without --include-storage, "
"or (2) use an archive/fully-synced node with state available at this block.")
# Continue with other accounts; keep code/balance/nonce
else:
raise

alloc[addr] = entry
total += 1

nxt = res.get("next")
if not nxt:
break
start = nxt

print(f" ↳ paged: {total} accounts")
return alloc


def main():
p = argparse.ArgumentParser(
description="Snapshot world-state via debug_dumpBlock and merge into a genesis template"
)
p.add_argument(
"--rpc",
default="http://127.0.0.1:8545",
help="Geth RPC endpoint"
)
p.add_argument(
"--input-genesis",
required=True,
help="Path to your source chain genesis.json"
)
p.add_argument(
"--block", "-b",
type=int,
help="Block number to snapshot (defaults to latest)"
)
p.add_argument(
"--output", "-o",
help=(
"Path or directory for output genesis JSON "
f"(default: ./{DEFAULT_OUT_NAME})"
)
)
p.add_argument(
"--include-nonces",
action="store_true",
help="Include account nonces in the alloc entries"
)
args = p.parse_args()
ap = argparse.ArgumentParser(description="World-state → genesis.alloc (Erigon accountRange) pinned to a block, with adaptive paging.")
ap.add_argument("--rpc", required=True)
ap.add_argument("--input-genesis", required=True)
ap.add_argument("--output", "-o", help=f"Output path or directory (default ./{DEFAULT_OUT})")
ap.add_argument("--block", "-b", type=int, help="Block number (default: latest)")
ap.add_argument("--exclude-nonces", action="store_true")
ap.add_argument("--page-size", type=int, default=2048)
ap.add_argument("--min-page-size", type=int, default=256)
ap.add_argument("--include-code", action="store_true")
ap.add_argument("--include-storage", action="store_true") # implies include-code if contract present
ap.add_argument("--rpc-timeout", type=float, default=300.0)
ap.add_argument("--storage-page-size", type=int, default=2048, help="Initial storage page size")
ap.add_argument("--storage-min-page-size", type=int, default=128, help="Minimum storage page size")
args = ap.parse_args()

include_nonces = not args.exclude_nonces
include_code = bool(args.include_code or args.include_storage)
include_storage = bool(args.include_storage)

# load template
tmpl_path = Path(args.input_genesis)
if not tmpl_path.is_file():
raise SystemExit(f"❌ Input genesis not found: {tmpl_path}")
genesis_tpl = json.loads(tmpl_path.read_text())
# Load template
tpl_path = Path(args.input_genesis)
if not tpl_path.is_file():
raise SystemExit(f"❌ Input genesis not found: {tpl_path}")
genesis_tpl = json.loads(tpl_path.read_text())

# decide which block to dump
# Resolve and PIN the block to a constant hex number
if args.block is None:
block_no = rpc_get_block_number(args.rpc)
block_param = "latest"
print(f"⛓ Dumping world-state at latest (block {block_no})…")
bn = get_latest_block(args.rpc, args.rpc_timeout)
else:
block_no = args.block
block_param = hex(block_no)
print(f"⛓ Dumping world-state at block {block_no}…")

# fetch the dump
dump = rpc_debug_dump_block(args.rpc, block_param)
print(f" ↳ {len(dump['accounts'])} accounts loaded")

# build alloc → merge → write out
latest_alloc = build_alloc(dump["accounts"], args.include_nonces)
new_gen = genesis_tpl.copy()
orig_alloc = new_gen.get("alloc", {})
new_gen["alloc"] = {**orig_alloc, **latest_alloc}

# determine output path
if args.output:
out_path = Path(args.output)
if out_path.is_dir():
out_path = out_path / DEFAULT_OUT_NAME
else:
out_path = Path.cwd() / DEFAULT_OUT_NAME
bn = args.block
block_hex = hex(bn)

print(f"⛓ Scanning state pinned at block {bn} (tag {block_hex}, account page {args.page_size})…")
if include_storage:
print(" • Including contract storage (per-account paging). This may take a while.")

# Build alloc
alloc = build_alloc(
args.rpc,
block_hex,
initial_page_size=args.page_size,
min_page_size=args.min_page_size,
include_nonces=include_nonces,
include_code=include_code,
include_storage=include_storage,
timeout=args.rpc_timeout,
storage_page_initial=args.storage_page_size,
storage_page_min=args.storage_min_page_size,
)

# Merge & write
new_gen = dict(genesis_tpl)
base_alloc = dict(new_gen.get("alloc") or {})
base_alloc.update(alloc)
new_gen["alloc"] = base_alloc

out_path = Path(args.output) if args.output else Path.cwd() / DEFAULT_OUT
if out_path.is_dir():
out_path = out_path / DEFAULT_OUT
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(json.dumps(new_gen, indent=2))
print(f"✅ Wrote merged genesis → {out_path}")


if __name__ == "__main__":
main()
Loading