Skip to content

Commit 4f1d730

Browse files
fix+test: property tests + delete_from_count_indexed_tree storage cleanup
Property test (A-grade item 3) found a real bug: delete_from_count_indexed_tree didn't clean up the deleted entry's child storage when the entry was a tree (CountTree, etc.). Same class as the storage-orphan bugs in db.delete (6b7ec21) and batch DeleteTree (0688731): - Remove cidx entry from primary's merk tree ✓ - Remove secondary mirror entry ✓ - Clean up the entry's child subtree storage ✗ ← THE BUG Caught in iteration <100 of the property test. FIX: after Element::delete on the cidx primary entry, when the entry was a tree, run find_subtrees on its path and clear each subtree's storage. For nested cidx entries also clear its secondary storage. primary_merk stays live (no drop+reopen) because dropping would lose the staged Element::delete write. PROPERTY TESTS: 300 iterations against single-level cidx + 200 iterations against nested two-level cidx. Each op type (insert / delete / re-create / batch insert) followed by verify_grovedb + top-k diff against an in-memory model. Hand-rolled SplitMix64 PRNG with fixed seeds for reproducibility, no new deps. All 1578 grovedb tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent fb71003 commit 4f1d730

2 files changed

Lines changed: 369 additions & 0 deletions

File tree

grovedb/src/operations/count_indexed_tree.rs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,6 +1190,94 @@ impl GroveDb {
11901190
.map_err(Error::MerkError)
11911191
);
11921192

1193+
// Storage cleanup for tree entries.
1194+
//
1195+
// If the deleted cidx entry is a tree (CountTree, SumTree, etc.),
1196+
// its child storage at `cidx_primary_path + item_key` is now
1197+
// orphaned — the entry is gone from the primary Merk but its
1198+
// children still occupy the storage namespace. Without cleanup,
1199+
// a future insert at the same item_key would observe stale data
1200+
// (orphaned entries surface to `verify_grovedb`'s raw_iter pass
1201+
// even though they're invisible to the Merk tree at the new
1202+
// root). Same class of bug as the direct `db.delete` cidx
1203+
// primary cleanup (commit 6b7ec21d).
1204+
//
1205+
// For nested cidx entries (the deleted item is itself a cidx
1206+
// primary), we also need to clear the secondary's storage at
1207+
// the derived prefix.
1208+
//
1209+
// NOTE: we do NOT drop+reopen primary_merk around this block —
1210+
// dropping the merk without explicit apply would lose the
1211+
// staged Element::delete. The cleanup calls below only use
1212+
// `&self` (via find_subtrees and get_transactional_storage_*),
1213+
// which coexist with the owned primary_merk.
1214+
if is_layered_target {
1215+
let entry_path = path.derive_owned_with_child(item_key.to_vec());
1216+
let entry_path_ref = SubtreePath::from(&entry_path);
1217+
1218+
// Detect whether the deleted entry was itself a cidx
1219+
// primary (nested cidx). Use the existing element snapshot.
1220+
let deleted_was_cidx_primary = matches!(
1221+
existing.underlying(),
1222+
Element::CountIndexedTree(..) | Element::ProvableCountIndexedTree(..)
1223+
);
1224+
1225+
// Recursively clear all primary subtree storage under
1226+
// `entry_path` via the same find_subtrees walk used by
1227+
// db.delete.
1228+
let subtrees_paths = cost_return_on_error!(
1229+
&mut cost,
1230+
self.find_subtrees(&entry_path_ref, Some(transaction), grove_version)
1231+
);
1232+
for subtree_path in subtrees_paths {
1233+
let p: SubtreePath<_> = subtree_path.as_slice().into();
1234+
let mut storage = self
1235+
.db
1236+
.get_transactional_storage_context(p, Some(batch), transaction)
1237+
.unwrap_add_cost(&mut cost);
1238+
cost_return_on_error!(
1239+
&mut cost,
1240+
storage.clear().map_err(|e| {
1241+
Error::CorruptedData(format!(
1242+
"unable to cleanup subtree storage during cidx delete: {e}",
1243+
))
1244+
})
1245+
);
1246+
}
1247+
1248+
// Nested cidx: also clear the deleted entry's secondary
1249+
// storage namespace at Blake3(primary_prefix ‖ 0x01).
1250+
if deleted_was_cidx_primary {
1251+
let primary_prefix =
1252+
grovedb_storage::rocksdb_storage::RocksDbStorage::build_prefix(
1253+
entry_path_ref.clone(),
1254+
)
1255+
.unwrap_add_cost(&mut cost);
1256+
let secondary_prefix =
1257+
grovedb_storage::rocksdb_storage::RocksDbStorage::secondary_prefix_for(
1258+
&primary_prefix,
1259+
)
1260+
.unwrap_add_cost(&mut cost);
1261+
let mut secondary_storage = self
1262+
.db
1263+
.get_transactional_storage_context_by_subtree_prefix(
1264+
secondary_prefix,
1265+
Some(batch),
1266+
transaction,
1267+
)
1268+
.unwrap_add_cost(&mut cost);
1269+
cost_return_on_error!(
1270+
&mut cost,
1271+
secondary_storage.clear().map_err(|e| {
1272+
Error::CorruptedData(format!(
1273+
"unable to cleanup nested cidx secondary during \
1274+
delete_from_count_indexed_tree: {e}",
1275+
))
1276+
})
1277+
);
1278+
}
1279+
}
1280+
11931281
// Open and update secondary.
11941282
let mut secondary_merk = cost_return_on_error!(
11951283
&mut cost,

grovedb/src/tests/count_indexed_tree_tests.rs

Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4193,4 +4193,285 @@ mod tests {
41934193
assert_eq!(&entry.1[24..32], &1u64.to_be_bytes());
41944194
assert_eq!(&entry.2[24..32], &99u64.to_be_bytes());
41954195
}
4196+
4197+
// =====================================================================
4198+
// Property-based stress tests for cidx invariants.
4199+
//
4200+
// Generates long random sequences of operations against a 2-level
4201+
// cidx layout (outer cidx contains CountTrees as values, each
4202+
// CountTree can grow and shrink) and asserts after every op:
4203+
//
4204+
// 1. verify_grovedb() reports no issues — H1-A chain integrity
4205+
// AND content consistency between primary and secondary.
4206+
// 2. count_indexed_top_k() returns the same set the property
4207+
// model says should be there, in the right count order.
4208+
//
4209+
// Random op generation uses a hand-rolled SplitMix64 PRNG so
4210+
// failing seeds are reproducible — no external dep, no test
4211+
// flakiness. Each test has a hard-coded seed; if you discover a
4212+
// failure on a different seed, hard-code it as a regression case.
4213+
//
4214+
// These tests are the structural answer to the audit-found
4215+
// nested-cidx bug class: any future code change that drifts the
4216+
// secondary fails CI here, not in production.
4217+
// =====================================================================
4218+
4219+
/// SplitMix64 — small, deterministic, no allocation. Good enough
4220+
/// for property generation.
4221+
#[derive(Clone)]
4222+
struct Prng(u64);
4223+
impl Prng {
4224+
fn new(seed: u64) -> Self {
4225+
Self(seed)
4226+
}
4227+
fn next_u64(&mut self) -> u64 {
4228+
self.0 = self.0.wrapping_add(0x9E3779B97F4A7C15);
4229+
let mut z = self.0;
4230+
z = (z ^ (z >> 30)).wrapping_mul(0xBF58476D1CE4E5B9);
4231+
z = (z ^ (z >> 27)).wrapping_mul(0x94D049BB133111EB);
4232+
z ^ (z >> 31)
4233+
}
4234+
fn next_usize(&mut self, exclusive_max: usize) -> usize {
4235+
(self.next_u64() as usize) % exclusive_max.max(1)
4236+
}
4237+
}
4238+
4239+
/// Property model: the in-memory ground truth the test maintains in
4240+
/// parallel with the database. After each op, we run a verify pass
4241+
/// AND assert the database's top-k matches the model's view.
4242+
#[derive(Clone, Default)]
4243+
struct CidxModel {
4244+
/// cidx_entry_key → number of items inside its CountTree
4245+
entries: std::collections::BTreeMap<Vec<u8>, u64>,
4246+
}
4247+
impl CidxModel {
4248+
fn top_k_ascending(&self) -> Vec<(u64, Vec<u8>)> {
4249+
// Same ordering as the secondary: (count_be, key) ascending.
4250+
let mut v: Vec<(u64, Vec<u8>)> =
4251+
self.entries.iter().map(|(k, c)| (*c, k.clone())).collect();
4252+
v.sort();
4253+
v
4254+
}
4255+
}
4256+
4257+
/// Apply one random op to both the live database and the in-memory
4258+
/// model, then assert invariants. Returns `true` if the op was
4259+
/// applied (some random selections become no-ops, e.g. delete
4260+
/// against an absent key — we count those but don't fail).
4261+
fn apply_random_op_and_check(
4262+
rng: &mut Prng,
4263+
db: &crate::GroveDb,
4264+
cidx_path: &[&[u8]],
4265+
model: &mut CidxModel,
4266+
grove_version: &GroveVersion,
4267+
iteration: usize,
4268+
) {
4269+
// Key space is small so updates dominate over inserts —
4270+
// exercises count transitions rather than only fresh creates.
4271+
const KEY_SPACE: usize = 8;
4272+
let key_idx = rng.next_usize(KEY_SPACE);
4273+
let key = format!("k{:02}", key_idx).into_bytes();
4274+
4275+
// 5 op kinds in roughly equal proportion.
4276+
let op_kind = rng.next_usize(5);
4277+
4278+
match op_kind {
4279+
0 => {
4280+
// Ensure CountTree exists at this key, then add 1 item
4281+
// inside it (raises its count by 1).
4282+
if !model.entries.contains_key(&key) {
4283+
db.insert_into_count_indexed_tree(
4284+
cidx_path,
4285+
&key,
4286+
Element::empty_count_tree(),
4287+
None,
4288+
grove_version,
4289+
)
4290+
.unwrap()
4291+
.expect("create CountTree");
4292+
model.entries.insert(key.clone(), 0);
4293+
}
4294+
let inner_key = format!("i{:08}", iteration).into_bytes();
4295+
let mut path_vec: Vec<&[u8]> = cidx_path.to_vec();
4296+
path_vec.push(&key);
4297+
db.insert(
4298+
path_vec.as_slice(),
4299+
&inner_key,
4300+
Element::new_item(b"v".to_vec()),
4301+
None,
4302+
None,
4303+
grove_version,
4304+
)
4305+
.unwrap()
4306+
.expect("insert into CountTree");
4307+
*model.entries.get_mut(&key).unwrap() += 1;
4308+
}
4309+
1 => {
4310+
// Delete the cidx entry entirely (if it exists).
4311+
if model.entries.contains_key(&key) {
4312+
db.delete_from_count_indexed_tree(cidx_path, &key, None, grove_version)
4313+
.unwrap()
4314+
.expect("delete cidx entry");
4315+
model.entries.remove(&key);
4316+
}
4317+
}
4318+
2 => {
4319+
// Re-insert the same cidx entry as a fresh empty
4320+
// CountTree. Allowed because
4321+
// insert_into_count_indexed_tree handles the existing-
4322+
// entry case (via the dedicated API; not the direct
4323+
// db.insert path which is rejected for cidx primaries).
4324+
// If the key exists, we delete then re-create to keep
4325+
// the model simple.
4326+
if model.entries.contains_key(&key) {
4327+
db.delete_from_count_indexed_tree(cidx_path, &key, None, grove_version)
4328+
.unwrap()
4329+
.expect("delete before re-create");
4330+
model.entries.remove(&key);
4331+
}
4332+
db.insert_into_count_indexed_tree(
4333+
cidx_path,
4334+
&key,
4335+
Element::empty_count_tree(),
4336+
None,
4337+
grove_version,
4338+
)
4339+
.unwrap()
4340+
.expect("create empty CountTree");
4341+
model.entries.insert(key.clone(), 0);
4342+
}
4343+
3 => {
4344+
// Batch: insert one item into an existing CountTree (if
4345+
// any exist), via the batch path — exercises the
4346+
// bubble-up + nested cidx mirror.
4347+
use crate::batch::QualifiedGroveDbOp;
4348+
if let Some((existing_key, count)) =
4349+
model.entries.iter().next().map(|(k, c)| (k.clone(), *c))
4350+
{
4351+
let inner_key = format!("b{:08}", iteration).into_bytes();
4352+
let mut inner_path: Vec<Vec<u8>> =
4353+
cidx_path.iter().map(|s| s.to_vec()).collect();
4354+
inner_path.push(existing_key.clone());
4355+
let ops = vec![QualifiedGroveDbOp::insert_or_replace_op(
4356+
inner_path,
4357+
inner_key,
4358+
Element::new_item(b"v".to_vec()),
4359+
)];
4360+
db.apply_batch(ops, None, None, grove_version)
4361+
.unwrap()
4362+
.expect("batch insert into existing CountTree");
4363+
*model.entries.get_mut(&existing_key).unwrap() = count + 1;
4364+
}
4365+
}
4366+
_ => {
4367+
// Idle iteration — random selection might land on a
4368+
// model state where this kind has nothing to do (e.g.
4369+
// delete with no entries). That's fine; the iteration
4370+
// count is preserved.
4371+
}
4372+
}
4373+
4374+
// INVARIANT 1: verify_grovedb finds no issues (chain + content).
4375+
let issues = db
4376+
.verify_grovedb(None, false, true, grove_version)
4377+
.expect("verify_grovedb");
4378+
assert!(
4379+
issues.is_empty(),
4380+
"iteration {iteration}: verify_grovedb issues: {:?}",
4381+
issues.keys().collect::<Vec<_>>()
4382+
);
4383+
4384+
// INVARIANT 2: top-k (ascending) matches the model's view.
4385+
let top = db
4386+
.count_indexed_top_k(cidx_path, 100, false, None, grove_version)
4387+
.unwrap()
4388+
.expect("top-k");
4389+
let model_top = model.top_k_ascending();
4390+
assert_eq!(
4391+
top, model_top,
4392+
"iteration {iteration}: top-k drift\n db: {:?}\n model: {:?}",
4393+
top, model_top
4394+
);
4395+
}
4396+
4397+
#[test]
4398+
fn property_random_ops_preserve_cidx_invariant_single_level() {
4399+
// 300 random ops against a single-level cidx. Each op is
4400+
// followed by a full verify_grovedb scan and a top-k diff
4401+
// against the in-memory model. Fixed seed; if you find a
4402+
// failing seed in CI, hard-code it as a regression test.
4403+
let grove_version = GroveVersion::latest();
4404+
let db = make_test_grovedb(grove_version);
4405+
db.insert(
4406+
[TEST_LEAF].as_ref(),
4407+
b"cidx",
4408+
Element::empty_count_indexed_tree(),
4409+
None,
4410+
None,
4411+
grove_version,
4412+
)
4413+
.unwrap()
4414+
.expect("create cidx");
4415+
4416+
let cidx_path: &[&[u8]] = &[TEST_LEAF, b"cidx"];
4417+
let mut model = CidxModel::default();
4418+
let mut rng = Prng::new(0xC1DC_5EED_C0FFEE);
4419+
4420+
for iteration in 0..300 {
4421+
apply_random_op_and_check(
4422+
&mut rng,
4423+
&db,
4424+
cidx_path,
4425+
&mut model,
4426+
grove_version,
4427+
iteration,
4428+
);
4429+
}
4430+
}
4431+
4432+
#[test]
4433+
fn property_random_ops_preserve_cidx_invariant_nested_two_levels() {
4434+
// Same shape but against a NESTED cidx layout
4435+
// TEST_LEAF / outer_cidx / inner_cidx
4436+
// with random ops applied to inner_cidx. Exercises the
4437+
// nested-bubble-up path — the bug class found in commit
4438+
// a8bb34fb. 200 iterations because each verify_grovedb walks
4439+
// both cidx primaries' content.
4440+
let grove_version = GroveVersion::latest();
4441+
let db = make_test_grovedb(grove_version);
4442+
db.insert(
4443+
[TEST_LEAF].as_ref(),
4444+
b"outer_cidx",
4445+
Element::empty_count_indexed_tree(),
4446+
None,
4447+
None,
4448+
grove_version,
4449+
)
4450+
.unwrap()
4451+
.expect("outer");
4452+
db.insert_into_count_indexed_tree(
4453+
[TEST_LEAF, b"outer_cidx"].as_ref(),
4454+
b"inner_cidx",
4455+
Element::empty_count_indexed_tree(),
4456+
None,
4457+
grove_version,
4458+
)
4459+
.unwrap()
4460+
.expect("inner");
4461+
4462+
let cidx_path: &[&[u8]] = &[TEST_LEAF, b"outer_cidx", b"inner_cidx"];
4463+
let mut model = CidxModel::default();
4464+
let mut rng = Prng::new(0xDEADBEEF_CAFEBABE);
4465+
4466+
for iteration in 0..200 {
4467+
apply_random_op_and_check(
4468+
&mut rng,
4469+
&db,
4470+
cidx_path,
4471+
&mut model,
4472+
grove_version,
4473+
iteration,
4474+
);
4475+
}
4476+
}
41964477
}

0 commit comments

Comments
 (0)