diff --git a/docs/book/book.toml b/docs/book/book.toml index 3b6f2cf38..1f8d7090b 100644 --- a/docs/book/book.toml +++ b/docs/book/book.toml @@ -12,4 +12,4 @@ command = "mdbook-mermaid" [output.html] additional-css = ["lang-selector.css"] -additional-js = ["mermaid.min.js", "mermaid-init.js", "lang-selector.js"] +additional-js = ["mermaid.min.js", "mermaid-fixup.js", "mermaid-init.js", "lang-selector.js"] diff --git a/docs/book/mermaid-fixup.js b/docs/book/mermaid-fixup.js new file mode 100644 index 000000000..f23681ebe --- /dev/null +++ b/docs/book/mermaid-fixup.js @@ -0,0 +1,26 @@ +// Client-side fallback: converts `
...`
+// blocks (raw mdbook output when the mdbook-mermaid preprocessor isn't run)
+// into `...
` blocks that mermaid.js will render.
+//
+// Safe to leave enabled even when the preprocessor IS run — preprocessor
+// output already uses ``, so the selector below finds
+// nothing and the script is a no-op.
+(() => {
+ function fixup() {
+ const blocks = document.querySelectorAll('pre > code.language-mermaid');
+ blocks.forEach((code) => {
+ const pre = code.parentElement;
+ const replacement = document.createElement('pre');
+ replacement.className = 'mermaid';
+ // textContent decodes HTML entities (< → <, & → &, etc.)
+ replacement.textContent = code.textContent;
+ pre.replaceWith(replacement);
+ });
+ }
+
+ if (document.readyState === 'loading') {
+ document.addEventListener('DOMContentLoaded', fixup);
+ } else {
+ fixup();
+ }
+})();
diff --git a/docs/book/src/SUMMARY.md b/docs/book/src/SUMMARY.md
index 8d4d6e522..6b091779d 100644
--- a/docs/book/src/SUMMARY.md
+++ b/docs/book/src/SUMMARY.md
@@ -11,6 +11,7 @@
- [The Proof System](proof-system.md)
- [The Query System](query-system.md)
- [Aggregate Sum Queries](aggregate-sum-queries.md)
+- [Aggregate Count Queries](aggregate-count-queries.md)
- [Batch Operations](batch-operations.md)
- [Cost Tracking](cost-tracking.md)
- [The MMR Tree](mmr-tree.md)
diff --git a/docs/book/src/aggregate-count-queries.md b/docs/book/src/aggregate-count-queries.md
new file mode 100644
index 000000000..97a8aa0d4
--- /dev/null
+++ b/docs/book/src/aggregate-count-queries.md
@@ -0,0 +1,671 @@
+# Aggregate Count Queries
+
+## Overview
+
+An **Aggregate Count Query** lets a caller ask a single, very specific question:
+
+> "How many elements in this subtree fall inside this key range?"
+
+The answer comes back as a `u64`, and on a **ProvableCountTree** or
+**ProvableCountSumTree** it can be returned together with a cryptographic proof
+that anyone holding the tree's root hash can verify — without ever materializing
+the elements themselves.
+
+Where regular queries return key/value pairs and aggregate-sum queries return
+running totals of `SumItem` values, an aggregate-count query returns only a
+**count** and a proof of that count.
+
+It is implemented as a new `QueryItem` variant:
+
+```rust
+pub enum QueryItem {
+ Key(Vec),
+ Range(Range>),
+ // ... existing variants ...
+ RangeAfterToInclusive(RangeInclusive>),
+
+ /// Count the elements matched by the inner range, without returning them.
+ /// Only valid on ProvableCountTree / ProvableCountSumTree (and their
+ /// `NonCounted` wrapper variants).
+ AggregateCountOnRange(Box),
+}
+```
+
+The wrapped `QueryItem` is the **range to count over** — it must be one of the
+true range variants: `Range`, `RangeInclusive`, `RangeFrom`, `RangeTo`,
+`RangeToInclusive`, `RangeAfter`, `RangeAfterTo`, `RangeAfterToInclusive`.
+The single-key (`Key`), full-range (`RangeFull`), and self-nested
+(`AggregateCountOnRange`) variants are all **rejected**.
+
+> **Why are `Key` and `RangeFull` rejected?**
+>
+> - **`Key(k)`** would always return `0` or `1` — an existence test. Callers
+> should use the existing `GroveDb::has_raw` / `GroveDb::get_raw` (or their
+> provable variants) instead. Routing existence checks through this API
+> would force a count-shaped result type and proof shape on a question that
+> already has a much cheaper, narrower answer.
+> - **`RangeFull`** has its answer already exposed by the parent's
+> `Element::ProvableCountTree(_, count, _)` /
+> `Element::ProvableCountSumTree(_, count, _, _)` bytes, which are
+> hash-verified by the parent Merk's proof. Going through
+> `AggregateCountOnRange(RangeFull)` would always produce a strictly heavier
+> proof for an answer the caller can read directly.
+>
+> In short, `AggregateCountOnRange` exists for the case the rest of the API
+> can't already answer cheaply: counting a **bounded sub-range** of keys.
+
+## Why this works only on Provable Count Trees
+
+GroveDB has six tree types that track a count:
+
+| Tree type | Count tracked? | Count in node hash? | AggregateCountOnRange allowed? |
+|--------------------------|:--------------:|:-------------------:|:-----------------------:|
+| `CountTree` | yes | no | **no** |
+| `CountSumTree` | yes | no | **no** |
+| `ProvableCountTree` | yes | **yes** | **yes** |
+| `ProvableCountSumTree` | yes | **yes** (count only)| **yes** |
+| `NonCountedProvableCountTree` | yes (via wrapper) | yes (inner) | **yes** |
+| `NonCountedProvableCountSumTree` | yes (via wrapper) | yes (inner) | **yes** |
+
+Only the **provable** variants bake the count into the node hash via
+`node_hash_with_count(kv_hash, left, right, count)`. Because every node's count
+participates in the Merkle root, a verifier holding only the root hash can
+reconstruct enough of the tree from a proof to **trust** the counts that appear
+inside.
+
+Plain `CountTree` and `CountSumTree` track counts in storage as a convenience
+for the executing node, but those counts are not in the hash. A "proof" of
+their count would be unverifiable, so we reject `AggregateCountOnRange` against them
+at query-construction time.
+
+The two `NonCounted*` wrapper variants are accepted because the wrapper only
+tells the **parent** tree to skip this element when aggregating its own count;
+the inner tree is still a fully-fledged provable count tree.
+
+## Query-Level Constraints
+
+`AggregateCountOnRange` is a **terminal** query item. When it appears, the surrounding
+`Query` is reduced to a single, well-defined operation: "count, then return."
+
+```rust
+pub struct Query {
+ pub items: Vec,
+ pub default_subquery_branch: SubqueryBranch,
+ pub conditional_subquery_branches: Option>,
+ pub left_to_right: bool,
+ pub add_parent_tree_on_subquery: bool,
+}
+```
+
+If any `QueryItem::AggregateCountOnRange(_)` appears in `items`, the query is only
+well-formed when **all** of the following hold:
+
+1. `items.len() == 1` — no other range items, no other counts, no mixing.
+2. The inner `QueryItem` is **not** `Key` (use `has_raw` / `get_raw` for
+ existence tests — see the note above).
+3. The inner `QueryItem` is **not** `RangeFull` (use the parent element to read
+ the unconditional total — see the note above).
+4. The inner `QueryItem` is not itself another `AggregateCountOnRange`.
+5. `default_subquery_branch.subquery.is_none()` and `subquery_path.is_none()`.
+6. `conditional_subquery_branches.is_none()` (or empty).
+7. The targeted subtree's `TreeType` is one of the four allowed variants above.
+8. The enclosing `SizedQuery` does not set a `limit` or `offset`. Counting is an
+ aggregate over the matched range — pagination would silently change the
+ answer and is therefore rejected.
+9. `left_to_right` is **ignored** (counting is direction-agnostic). It is not
+ an error to set it, but it has no effect on the returned count or proof.
+
+Violating constraints 1–8 returns `Error::InvalidQuery(...)` with a message
+that names the offending field, before any I/O is performed.
+
+## API surface
+
+`AggregateCountOnRange` queries go through the **same** `prove_query` entry
+point as every other `PathQuery` — only the verifier is dedicated:
+
+```rust
+// Prove side — unchanged from regular queries:
+GroveDb::prove_query(&path_query, prove_options, grove_version)
+ -> CostResult, Error>
+
+// Verify side — dedicated, returns (root_hash, count):
+GroveDb::verify_aggregate_count_query(proof, &path_query, grove_version)
+ -> Result<(CryptoHash, u64), Error>
+```
+
+A bare tuple is used for the result rather than a wrapper struct because
+the count is already a `u64` and the `path_query` itself echoes the inner
+range — there is nothing else to return.
+
+> **Note on `NonCounted` children.** `Element::NonCounted` wrappers tell
+> the parent tree to skip the wrapped element when aggregating its own
+> count. `AggregateCountOnRange` honors this: every node in a
+> `ProvableCountTree` carries an own-count of 1 (normal) or 0
+> (`NonCounted`-wrapped), and the verifier credits only the **own-count**
+> to the in-range total when the boundary key falls in range. So
+> `NonCounted` children are excluded from the result, matching the
+> tree's own aggregate.
+>
+> Mechanically the verifier derives each boundary node's own-count from
+> its committed aggregate as
+> `aggregate − left_struct − right_struct` (see the "Verifier shape
+> walk" section). For a `NonCounted` leaf, `aggregate = 0` and there are
+> no children, so own-count = 0 and the key contributes nothing.
+
+## How the Proof is Built
+
+For a `ProvableCountTree`, every node hash already commits to the count of its
+own subtree via `node_hash_with_count(kv_hash, left, right, count)`. The proof
+generator's job is to produce just enough structure that the verifier can:
+
+1. Reconstruct the **root hash** of the queried Merk and check it against the
+ expected hash.
+2. Compute the answer **count** from the count fields embedded along the way.
+
+To do that, every proof node has a role; we use a small vocabulary of
+proof-node types — three from the existing proof system, plus one new
+self-verifying node added specifically for this proof shape:
+
+| Role in proof | Proof node type | What it carries | Why we picked it |
+|----------------------------|------------------------------------------------------------------------------|----------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------|
+| **On-path / boundary** | `KVDigestCount(key, value_hash, count)` | key + value digest + subtree count | the verifier needs the **key** to test "is it in the range?", and the count is hash-bound via `node_hash_with_count` so it can also be used as the structural count of this subtree by ancestor own-count derivation |
+| **Fully-inside root** | `HashWithCount(kv_hash, left_child_hash, right_child_hash, count)` | the four fields needed to recompute `node_hash_with_count` | one op per collapsed subtree, **and self-verifying** — see security note below |
+| **Fully-outside** | `HashWithCount(kv_hash, left_child_hash, right_child_hash, count)` (same) | same shape as the inside variant | the structural count of an outside subtree is needed by the boundary parent's `own_count = aggregate − left − right` derivation; only `HashWithCount` carries a *hash-bound* count, so we use it for outside subtrees too. Plain `Hash(_)` would not bind a count and is therefore not used in count proofs. |
+| **Empty side** | (the empty-tree sentinel, no `Push` needed) | — | a missing child contributes hash = 0 and count = 0 to the parent |
+
+> **Why `HashWithCount` is self-verifying.** The `count` value carried by a
+> `HashWithCount` op is *bound* to the parent merk's hash chain, not trusted
+> on faith. The verifier computes
+> `node_hash_with_count(kv_hash, left_child_hash, right_child_hash, count)`
+> from the four committed fields and uses the result as the subtree's
+> committed `node_hash` for the parent's hash recomputation. If the prover
+> lied about `count`, the recomputed `node_hash` diverges from what the
+> parent committed, and the parent's Merkle-root check fails. (An earlier
+> draft of this design used `HashWithCount(node_hash, count)` only — that
+> form was rejected during review because the count would have been
+> trustlessly attached metadata, with no cryptographic binding. See the
+> "Verifier shape walk" section below for the second half of the
+> security story.)
+
+### Walking running example
+
+We'll use this 7-key `ProvableCountTree` as the running example through every
+diagram below. Counts shown next to each node are "size of the subtree rooted
+here":
+
+```mermaid
+graph TD
+ d["d
count = 7"]
+ b["b
count = 3"]
+ f["f
count = 3"]
+ a["a
count = 1"]
+ c["c
count = 1"]
+ e["e
count = 1"]
+ g["g
count = 1"]
+ d --> b
+ d --> f
+ b --> a
+ b --> c
+ f --> e
+ f --> g
+
+ style d fill:#fef9e7,stroke:#f39c12,stroke-width:2px
+```
+
+Below, each per-case diagram colours nodes by the role table above:
+
+- 🟢 **green** = `HashWithCount` (fully-inside, contributes count, not descended)
+- 🟡 **yellow** = `KVDigestCount` (on-path / boundary, key tested for in-range)
+- ⚪ **gray** = `HashWithCount` used as a fully-outside subtree (carries the
+ structural count needed by the boundary parent's `own_count` derivation,
+ but its key is not in range so it contributes 0 to the in-range total)
+
+---
+
+### Case 1 — Open ranges (one bound)
+
+These are the variants with a single bound: `RangeFrom(a..)`, `RangeTo(..b)`,
+`RangeToInclusive(..=b)`, `RangeAfter((a, ..))`. Conceptually we walk down to
+that one bound, partitioning each subtree along the way into "fully on the
+included side" or "fully on the excluded side".
+
+#### Example — `RangeFrom("c"..)` → keys ≥ "c"
+
+Expected: `{c, d, e, f, g}`, count = 5.
+
+```mermaid
+graph TD
+ d["d
KVDigestCount
key = d, vh, count = 7"]
+ b["b
KVDigestCount
key = b, vh, count = 3"]
+ f["f
HashWithCount
kv_hash, l, r, count = 3"]
+ aH["a
HashWithCount
kv_hash, l, r, count = 1"]
+ c["c
KVDigestCount
key = c, vh, count = 1"]
+ d --> b
+ d --> f
+ b --> aH
+ b --> c
+
+ style d fill:#fef9e7,stroke:#f39c12,stroke-width:2px
+ style b fill:#fef9e7,stroke:#f39c12,stroke-width:2px
+ style c fill:#fef9e7,stroke:#f39c12,stroke-width:2px
+ style f fill:#d5f5e3,stroke:#27ae60,stroke-width:2px
+ style aH fill:#e8e8e8,stroke:#999,stroke-dasharray:5 5
+```
+
+Why each role:
+
+- **d, b, c** — boundary nodes on the walk to the lower bound `"c"`. Each is
+ `KVDigestCount` because the verifier must test its key against `>= "c"`.
+- **a** — left child of `b`; "a" < "c", so its entire subtree is excluded
+ from the in-range total. Sent as a `HashWithCount` (no key) — the verifier
+ needs the structural count = 1 to derive `b`'s `own_count`, and this is
+ the only proof-node type that binds the count to `b`'s hash chain. The
+ `a` subtree contributes 0 to the in-range total (its key is not tested).
+- **f** — right child of `d`; "d" < "f" and we're including everything ≥ "c",
+ so the entire `f` subtree (including its descendants) is in-range.
+ We don't need to descend — `f` is sent as a single `HashWithCount` op
+ whose `(kv_hash, left_child_hash, right_child_hash, count)` lets the
+ verifier recompute `f.node_hash` self-contained, and contributes the full
+ subtree count of 3 directly. **The original tree's `e` and `g` children
+ do not appear as separate proof ops** — their hashes live inside the
+ `HashWithCount`'s `left_child_hash` / `right_child_hash` fields.
+
+Verifier total:
+
+| Node | In range? | Contribution |
+|------|-----------|--------------|
+| d (KVDigestCount, key="d") | "d" ≥ "c" | **+1** |
+| b (KVDigestCount, key="b") | "b" < "c" | +0 |
+| a (HashWithCount, count=1) | (outside, key not tested) | +0 |
+| c (KVDigestCount, key="c") | "c" ≥ "c" | **+1** |
+| f (HashWithCount, count=3) | (whole subtree in range) | **+3** |
+
+→ **count = 5** ✓
+
+#### Example — `RangeAfter(("b", ..))` → keys > "b"
+
+Same expected match set `{c, d, e, f, g}`, count = 5 — but the boundary
+walk stops one level higher (at `b` instead of `c`), and the in-range test
+flips from `>=` to `>`.
+
+```mermaid
+graph TD
+ d["d
KVDigestCount
key = d, vh, count = 7"]
+ b["b
KVDigestCount
key = b, vh, count = 3"]
+ f["f
HashWithCount
kv_hash, l, r, count = 3"]
+ aH["a
HashWithCount
kv_hash, l, r, count = 1"]
+ c["c
HashWithCount
kv_hash, l, r, count = 1"]
+ d --> b
+ d --> f
+ b --> aH
+ b --> c
+
+ style d fill:#fef9e7,stroke:#f39c12,stroke-width:2px
+ style b fill:#fef9e7,stroke:#f39c12,stroke-width:2px
+ style c fill:#d5f5e3,stroke:#27ae60,stroke-width:2px
+ style f fill:#d5f5e3,stroke:#27ae60,stroke-width:2px
+ style aH fill:#e8e8e8,stroke:#999,stroke-dasharray:5 5
+```
+
+Why each role differs from the previous example:
+
+- **b** is now the boundary's terminus, not `c`. It is still `KVDigestCount`
+ because the verifier needs the key to apply the in-range test — but the
+ test is now `> "b"`, so `b` itself **fails** and contributes 0.
+- **c** is the right child of `b`. Every key in `c`'s subtree is `> "b"`
+ (here, just the leaf `c` itself), so the whole subtree is in-range. We
+ don't descend; `c` becomes `HashWithCount` (no key needed — its
+ `(kv_hash, l, r, count)` self-contains everything the verifier needs)
+ and contributes its count of 1 directly. Compare to the previous example
+ where `c` was a boundary node tested against `>= "c"`.
+- **a** plays the same role as before — fully outside, sent as
+ `HashWithCount` so its structural count of 1 is hash-bound to `b`.
+ Contributes 0 to the in-range total (key not tested). **f's
+ original-tree children (`e`, `g`) do not appear as separate proof ops**
+ — they live inside `f`'s `HashWithCount` fields.
+
+Verifier total:
+
+| Node | In range? | Contribution |
+|------|-----------|--------------|
+| d (KVDigestCount, key="d") | "d" > "b" | **+1** |
+| b (KVDigestCount, key="b") | "b" > "b" → no | +0 |
+| a (HashWithCount, count=1) | (outside, key not tested) | +0 |
+| c (HashWithCount, count=1) | (whole subtree in range) | **+1** |
+| f (HashWithCount, count=3) | (whole subtree in range) | **+3** |
+
+→ **count = 5** ✓
+
+> **Take-away:** the *match set* is the same as `RangeFrom("c"..)`, but the
+> *proof shape* is slightly cheaper — one fewer `KVDigestCount` and one extra
+> `HashWithCount` — because the bound aligns with an internal node rather than
+> a leaf. The generator picks the shape based on where the bound key lives
+> in the tree, not on what the user wrote.
+
+The same pattern, mirrored, applies to `RangeTo(..b)` and
+`RangeToInclusive(..=b)` (upper-bound variants — boundary walk goes right,
+fully-inside subtrees hang off the left of each step). The only differences
+across all four open-range variants are which side of each split is
+"fully-included" and whether the boundary key itself counts (`>=` vs `>`
+for the lower side, `<` vs `<=` for the upper side).
+
+---
+
+### Case 2 — Closed ranges (both bounds)
+
+These are the variants with both a lower and upper bound: `Range(a..b)`,
+`RangeInclusive(a..=b)`, `RangeAfterTo((a, b))`, `RangeAfterToInclusive((a, ..=b))`.
+
+The proof has **two** boundary walks meeting at the lowest common ancestor of
+the two bounds. Subtrees fully between the two bounds appear as
+`HashWithCount`; subtrees fully outside both bounds **also** appear as
+`HashWithCount` (the structural count is needed by the boundary parent's
+`own_count` derivation, and only `HashWithCount` binds that count to the
+parent's hash chain).
+
+To make the structure interesting we'll use a slightly bigger example tree
+than for Case 1 — 15 keys (`a` through `o`), 4 levels deep, balanced as a
+perfect binary tree. Counts shown are subtree sizes:
+
+```mermaid
+graph TD
+ h["h
count = 15"]
+ d["d
count = 7"]
+ l["l
count = 7"]
+ b["b
count = 3"]
+ f["f
count = 3"]
+ j["j
count = 3"]
+ n["n
count = 3"]
+ a["a
count = 1"]
+ c["c
count = 1"]
+ e["e
count = 1"]
+ g["g
count = 1"]
+ i["i
count = 1"]
+ k["k
count = 1"]
+ m["m
count = 1"]
+ o["o
count = 1"]
+ h --> d
+ h --> l
+ d --> b
+ d --> f
+ l --> j
+ l --> n
+ b --> a
+ b --> c
+ f --> e
+ f --> g
+ j --> i
+ j --> k
+ n --> m
+ n --> o
+
+ style h fill:#fef9e7,stroke:#f39c12,stroke-width:2px
+```
+
+#### Example — `RangeInclusive("c"..="l")` → keys ∈ [c, l]
+
+Expected: `{c, d, e, f, g, h, i, j, k, l}`, count = 10.
+
+```mermaid
+graph TD
+ h["h
KVDigestCount
key = h, vh, count = 15"]
+ d["d
KVDigestCount
key = d, vh, count = 7"]
+ l["l
KVDigestCount
key = l, vh, count = 7"]
+ b["b
KVDigestCount
key = b, vh, count = 3"]
+ f["f
HashWithCount
kv_hash, l, r, count = 3"]
+ j["j
HashWithCount
kv_hash, l, r, count = 3"]
+ nH["n subtree
HashWithCount
kv_hash, l, r, count = 3"]
+ aH["a
HashWithCount
kv_hash, l, r, count = 1"]
+ c["c
KVDigestCount
key = c, vh, count = 1"]
+ h --> d
+ h --> l
+ d --> b
+ d --> f
+ l --> j
+ l --> nH
+ b --> aH
+ b --> c
+
+ style h fill:#fef9e7,stroke:#f39c12,stroke-width:2px
+ style d fill:#fef9e7,stroke:#f39c12,stroke-width:2px
+ style l fill:#fef9e7,stroke:#f39c12,stroke-width:2px
+ style b fill:#fef9e7,stroke:#f39c12,stroke-width:2px
+ style c fill:#fef9e7,stroke:#f39c12,stroke-width:2px
+ style f fill:#d5f5e3,stroke:#27ae60,stroke-width:2px
+ style j fill:#d5f5e3,stroke:#27ae60,stroke-width:2px
+ style aH fill:#e8e8e8,stroke:#999,stroke-dasharray:5 5
+ style nH fill:#e8e8e8,stroke:#999,stroke-dasharray:5 5
+```
+
+Why each role:
+
+- **h** — LCA of `"c"` and `"l"`. Sits above both walks, so it's a
+ `KVDigestCount` and the verifier tests its key against `[c, l]`.
+- **d** — on the left walk (down to lower bound `c`). `KVDigestCount`,
+ key tested.
+- **l** — on the right walk (down to upper bound `l`); also the upper bound
+ itself. `KVDigestCount`, key tested (it passes — `l ≤ l`).
+- **b** — on the left walk (`b < c`, so we have to descend further to find
+ the lower bound). `KVDigestCount`, key tested (it fails — `b < c`).
+- **c** — the lower bound itself. `KVDigestCount`, key tested (it passes —
+ `c ≥ c`).
+- **a** — left of `b`; "a" < "c", entire subtree outside. Sent as
+ `HashWithCount` carrying `(kv_hash, l, r, count = 1)` so its structural
+ count is hash-bound to `b`. Contributes 0 to the in-range total.
+- **n** — right of `l`; entire subtree has keys > "l". The whole `n`
+ subtree (n, m, o) collapses to a single `HashWithCount` carrying
+ `(kv_hash, l, r, count = 3)` so its structural count is hash-bound to
+ `l`. Contributes 0 to the in-range total.
+- **f** — right child of `d`. Every key under `f` is `> "d"` and `≤ "g" < "l"`,
+ so the entire subtree is in-range. We do not descend; `f` becomes a single
+ `HashWithCount` op carrying `(kv_hash, left_child_hash, right_child_hash,
+ count=3)` and contributes 3 directly. **Its original-tree children `e`
+ and `g` do not appear as separate proof ops** — their hashes are inside
+ `f`'s `HashWithCount` fields.
+- **j** — left child of `l`. Same shape as `f`: every key under `j` is
+ `≥ "i" > "c"` and `≤ "k" < "l"`, so the entire subtree is in-range.
+ `HashWithCount`, contributes count = 3. `i` and `k` likewise live inside
+ `j`'s embedded child hashes.
+
+> **Each collapsed subtree is one Push op.** Because `HashWithCount`
+> embeds its `(kv_hash, left_child_hash, right_child_hash, count)`
+> directly, every fully-inside subtree contributes exactly **one** proof
+> op regardless of its depth in the original tree. The proof for this
+> 15-key range scan in a 4-level tree is just **9 push ops** (h, d, b, c,
+> a, f, l, j, n) plus the structural Parent/Child ops — barely more than
+> the 7-key example in Case 1. This is what "O(log n) regardless of
+> count" looks like in practice: deeper trees do not blow up the proof.
+
+Verifier total:
+
+| Node | In range? | Contribution |
+|------|-----------|--------------|
+| h (KVDigestCount, key="h") | "c" ≤ "h" ≤ "l" | **+1** |
+| d (KVDigestCount, key="d") | "c" ≤ "d" ≤ "l" | **+1** |
+| b (KVDigestCount, key="b") | "b" < "c" → no | +0 |
+| a (HashWithCount, count=1) | (outside, key not tested) | +0 |
+| c (KVDigestCount, key="c") | "c" ≤ "c" ≤ "l" | **+1** |
+| f (HashWithCount, count=3) | (whole subtree in range) | **+3** |
+| l (KVDigestCount, key="l") | "c" ≤ "l" ≤ "l" | **+1** |
+| j (HashWithCount, count=3) | (whole subtree in range) | **+3** |
+| n (HashWithCount, count=3) | (outside, key not tested) | +0 |
+
+→ **count = 10** ✓
+
+#### Variant differences
+
+The four closed-range variants differ only in **whether each boundary key
+itself counts**, not in the proof shape:
+
+| Variant | Lower test | Upper test |
+|----------------------------------|------------|------------|
+| `Range(a..b)` | key ≥ a | key < b |
+| `RangeInclusive(a..=b)` | key ≥ a | key ≤ b |
+| `RangeAfterTo((a, b))` | key > a | key < b |
+| `RangeAfterToInclusive((a, ..=b))` | key > a | key ≤ b |
+
+The verifier applies the relevant test at each boundary `KVDigestCount`. The
+generator does not need to know which variant is in play — it always emits the
+same shape, and the inclusivity flags travel with the query for the verifier.
+
+---
+
+### Empty subtrees
+
+An aggregate-count query against an empty Merk returns `count = 0` with a
+trivial proof (the empty-tree marker). Asking for `AggregateCountOnRange` on a
+path that does not resolve to a tree at all is an error
+(`Error::PathNotFound(...)`), the same as any other query.
+
+### Why this is `O(log n)` regardless of count
+
+Every diagram above has at most:
+
+- One walk per bound (so 1 or 2 walks of depth `O(log n)`),
+- A constant number of fully-inside subtree roots per level (the "right
+ siblings" hanging off the left walk and "left siblings" hanging off the
+ right walk).
+
+Each of those is a single proof-node Push. Therefore the proof's node count is
+`O(log n)`, and crucially does **not** depend on the answer's value. Counting
+a billion-key range can be done with the same proof size as counting a
+hundred-key range.
+
+## Verifier shape walk
+
+The verifier is **two-phase**, not just a "count everything visible" pass.
+Without this discipline a malicious prover could:
+
+1. Send a single `Push(Hash(expected_root))` for a non-empty tree, and
+ receive `(expected_root, 0)` for any range — root hash matches, count is
+ trivially zero.
+2. Replace an in-range collapsed subtree with a hash carrying the *same*
+ `node_hash` but no count, undercounting by the missing subtree count.
+3. Attach extra `KVDigestCount` children below a keyless leaf node.
+ `Tree::hash()` for those node types is computed only from their
+ embedded fields and ignores any reconstructed children, so the root
+ hash stays valid — but a verifier that summed every visited node would
+ credit the bogus children as `+1` each.
+4. Lie about the structural count of an outside subtree to skew an
+ ancestor boundary node's `own_count` derivation, over- or under-
+ counting `NonCounted`-aware boundary contributions.
+
+To rule out all four, the verifier:
+
+1. **Phase 1** — decode the proof bytes into a `ProofTree` via
+ `execute_with_options`. The visit-node closure performs only a coarse
+ allowlist (`HashWithCount` / `KVDigestCount`; **plain `Hash` is not
+ accepted in count proofs**) and **does not count anything**. (We
+ disable the AVL balance check for this proof shape — count proofs
+ intentionally collapse one side to height 1 while descending the
+ other.)
+2. **Phase 2** — walk the reconstructed tree with the same inherited
+ exclusive subtree-key bounds the prover used (`(None, None)` at the
+ root). At each position, call `classify_subtree(bounds, range)` and
+ bind the proof-tree node type to the classification, returning the pair
+ `(in_range_count, structural_count)` where `structural_count` is the
+ merk-recorded aggregate count of this subtree (used by the parent's
+ `own_count` derivation):
+
+ | Classification | Required node | Children allowed? | `(in_range, structural)` |
+ |----------------|-----------------------------------------------------------------------|-------------------------|-----------------------------------------------------------------------------------------------------------|
+ | `Disjoint` | leaf `HashWithCount(_, _, _, count)` | **no** (must be a leaf) | `(0, count)` |
+ | `Contained` | leaf `HashWithCount(_, _, _, count)` | **no** (must be a leaf) | `(count, count)` — `count` is the merk's aggregate, which already excludes `NonCounted` entries (own = 0) |
+ | `Boundary` | `KVDigestCount(key, _, aggregate)` with `key` strictly inside `bounds` | yes — recurse | `own_count = aggregate − left_struct − right_struct`; in-range = `left_in + right_in + (own_count if range.contains(key) else 0)`; structural = `aggregate` |
+
+3. Counts are summed with `checked_add`; the boundary `own_count` uses
+ `checked_sub` (so a malformed proof claiming children's structural
+ counts that exceed the parent's aggregate is rejected, not silently
+ saturated).
+
+Because every leaf-shape position is forced to be a leaf, attack 3
+(smuggled counted children under a keyless node) is rejected. Because every
+`Contained` and `Disjoint` position must hold `HashWithCount` (and its
+count is bound to the parent's hash via `node_hash_with_count`), attacks 2
+and 4 are both rejected — outside subtrees can't lie about their
+structural count any more than inside ones can. Because the root's
+`(None, None)` bounds against any bounded inner range classify as
+`Boundary` (requiring `KVDigestCount`), attack 1 is rejected.
+
+The shape walk is independent of the chain-hash check: even a proof whose
+reconstructed root happens to match the expected root will be rejected if
+its shape diverges from what `classify_subtree` expects.
+
+## Decode safety
+
+`QueryItem::AggregateCountOnRange(Box)` is the only recursive
+variant in the enum. To prevent a small malicious payload of repeated
+variant-10 bytes from exhausting the stack inside the bincode or serde
+decoder before any validation runs:
+
+- The bincode `Decode` / `BorrowDecode` impls dispatch through internal
+ `decode_with_depth` helpers with `MAX_QUERY_ITEM_DECODE_DEPTH = 4` (the
+ only legal nesting is one wrap, plus headroom). Exceeding the limit
+ errors with `"QueryItem nesting depth exceeded maximum during
+ deserialization"`.
+- The serde `Deserialize` impl deserializes the inner item via a
+ `NonAggregateInner` newtype wrapper whose `Field` enum **omits**
+ `AggregateCountOnRange`, so a nested-aggregate payload is rejected by
+ serde's enum dispatcher immediately, with no recursion through
+ `QueryItem::deserialize`.
+- Defense in depth: an inner `AggregateCountOnRange` is also rejected on
+ decode (in addition to being rejected by
+ `Query::validate_aggregate_count_on_range`).
+
+## Cost Model
+
+`AggregateCountOnRange` queries are designed to be cheap and predictable:
+
+- **Storage seeks:** `O(log n)`.
+- **Hash calls:** one per node in the proof.
+- **Proof bytes:** `O(log n) * (hash size + count varint size)`.
+
+There is no per-element cost component, because no elements are read or
+returned. This is the headline reason the API exists — a billion-element tree
+can be counted in a few hundred bytes of proof.
+
+The cost-tracking integration mirrors regular range queries, but with the
+"loaded bytes" component dominated by the proof shape rather than element
+payloads.
+
+## API Sketch
+
+```rust
+use grovedb::{Element, GroveDb, PathQuery, Query, SizedQuery};
+use grovedb_query::QueryItem;
+
+// "How many votes have keys between block 1_000 and 2_000 (exclusive)?"
+// Use the helper constructor to skip the boilerplate of building the Query
+// and SizedQuery by hand.
+let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![b"votes".to_vec()],
+ QueryItem::Range(1_000u64.to_be_bytes().to_vec()..2_000u64.to_be_bytes().to_vec()),
+);
+
+let proof_bytes = db
+ .prove_query(&path_query, None, grove_version)
+ .unwrap()
+ .expect("prove failed");
+
+// Verifier side — only needs the proof bytes + the trusted root hash.
+let (root, count) = GroveDb::verify_aggregate_count_query(
+ &proof_bytes, &path_query, grove_version,
+).expect("verify failed");
+
+assert_eq!(root, expected_root_hash);
+println!("votes in [1000, 2000): {}", count);
+```
+
+## Comparison Table
+
+| Feature | Regular `Query` | `AggregateSumQuery` | `AggregateCountOnRange` (this doc) |
+|----------------------------------|------------------------------|----------------------------------|---------------------------------------|
+| Returns | Elements / keys | Sum + matched key/value pairs | A single `u64` count |
+| Stops on | Limit, end of range | Sum limit and/or item limit | Range bounds (whole match counted) |
+| Subqueries allowed | Yes | No | **No** |
+| Other items in same `Query` | Yes | N/A (own struct) | **No** — must be the only item |
+| `limit` / `offset` honored | Yes | Yes (item limit) | **No** — rejected at validation |
+| Required tree type | Any | `SumTree`, `BigSumTree`, ... | Provable count trees only |
+| Proof size relative to result | O(result) | O(matched items) | **O(log n)** regardless of count |
+
+---
diff --git a/docs/book/src/query-system.md b/docs/book/src/query-system.md
index 03bcaf01a..564b0cf5c 100644
--- a/docs/book/src/query-system.md
+++ b/docs/book/src/query-system.md
@@ -50,9 +50,15 @@ pub enum QueryItem {
RangeAfter(RangeFrom>), // (start..) exclusive start
RangeAfterTo(Range>), // (start..end) exclusive both
RangeAfterToInclusive(RangeInclusive>), // (start..=end]
+ AggregateCountOnRange(Box), // Count-only — see Aggregate Count Queries
}
```
+> **`AggregateCountOnRange`** is a terminal item: when present, it must be the **only**
+> item in the `Query`, and the query may not carry subqueries or pagination.
+> See [Aggregate Count Queries](aggregate-count-queries.md) for the full
+> contract — it is restricted to provable count trees.
+
Example queries:
Merk tree (sorted): `alice bob carol dave eve frank`
diff --git a/grovedb-bulk-append-tree/src/proof/mod.rs b/grovedb-bulk-append-tree/src/proof/mod.rs
index 7ee0a0d92..c523a69fc 100644
--- a/grovedb-bulk-append-tree/src/proof/mod.rs
+++ b/grovedb-bulk-append-tree/src/proof/mod.rs
@@ -135,6 +135,13 @@ fn query_to_ranges(query: &Query, total_count: u64) -> Result, B
}
(s, e)
}
+ QueryItem::AggregateCountOnRange(_) => {
+ return Err(BulkAppendError::InvalidInput(
+ "AggregateCountOnRange is only supported on provable count trees, \
+ not on BulkAppendTree"
+ .into(),
+ ));
+ }
};
ranges.push((start, end));
}
diff --git a/grovedb-dense-fixed-sized-merkle-tree/src/proof/mod.rs b/grovedb-dense-fixed-sized-merkle-tree/src/proof/mod.rs
index f7afa345d..8178f48be 100644
--- a/grovedb-dense-fixed-sized-merkle-tree/src/proof/mod.rs
+++ b/grovedb-dense-fixed-sized-merkle-tree/src/proof/mod.rs
@@ -116,6 +116,13 @@ pub(crate) fn query_to_positions(query: &Query, count: u16) -> Result,
positions.insert(p);
}
}
+ QueryItem::AggregateCountOnRange(_) => {
+ return Err(DenseMerkleError::InvalidProof(
+ "AggregateCountOnRange is only supported on provable count trees, \
+ not on dense fixed-size merkle trees"
+ .into(),
+ ));
+ }
}
}
diff --git a/grovedb-query/Cargo.toml b/grovedb-query/Cargo.toml
index db64d2a2b..33b93049f 100644
--- a/grovedb-query/Cargo.toml
+++ b/grovedb-query/Cargo.toml
@@ -26,6 +26,7 @@ grovedb-storage = { version = "4.0.0", path = "../storage", optional = true }
[dev-dependencies]
assert_matches = { workspace = true }
+serde_test = "1.0"
[features]
default = []
diff --git a/grovedb-query/src/proofs/encoding.rs b/grovedb-query/src/proofs/encoding.rs
index 8cfadb303..22c20b1d2 100644
--- a/grovedb-query/src/proofs/encoding.rs
+++ b/grovedb-query/src/proofs/encoding.rs
@@ -150,6 +150,13 @@ impl Encode for Op {
dest.write_all(value_hash)?;
count.encode_into(dest)?;
}
+ Op::Push(Node::HashWithCount(kv_hash, left_child_hash, right_child_hash, count)) => {
+ dest.write_all(&[0x1e])?;
+ dest.write_all(kv_hash)?;
+ dest.write_all(left_child_hash)?;
+ dest.write_all(right_child_hash)?;
+ count.encode_into(dest)?;
+ }
Op::Push(Node::KVValueHashFeatureTypeWithChildHash(
key,
value,
@@ -309,6 +316,18 @@ impl Encode for Op {
dest.write_all(value_hash)?;
count.encode_into(dest)?;
}
+ Op::PushInverted(Node::HashWithCount(
+ kv_hash,
+ left_child_hash,
+ right_child_hash,
+ count,
+ )) => {
+ dest.write_all(&[0x1f])?;
+ dest.write_all(kv_hash)?;
+ dest.write_all(left_child_hash)?;
+ dest.write_all(right_child_hash)?;
+ count.encode_into(dest)?;
+ }
Op::PushInverted(Node::KVValueHashFeatureTypeWithChildHash(
key,
value,
@@ -377,6 +396,9 @@ impl Encode for Op {
Op::Push(Node::KVDigestCount(key, _, count)) => {
2 + key.len() + HASH_LENGTH + count.encoding_length()?
}
+ Op::Push(Node::HashWithCount(_, _, _, count)) => {
+ 1 + 3 * HASH_LENGTH + count.encoding_length()?
+ }
Op::Push(Node::KVValueHashFeatureTypeWithChildHash(key, value, _, feature_type, _)) => {
let header = if value.len() < 65536 { 4 } else { 6 };
header
@@ -419,6 +441,9 @@ impl Encode for Op {
Op::PushInverted(Node::KVDigestCount(key, _, count)) => {
2 + key.len() + HASH_LENGTH + count.encoding_length()?
}
+ Op::PushInverted(Node::HashWithCount(_, _, _, count)) => {
+ 1 + 3 * HASH_LENGTH + count.encoding_length()?
+ }
Op::PushInverted(Node::KVValueHashFeatureTypeWithChildHash(
key,
value,
@@ -722,6 +747,38 @@ impl Decode for Op {
child_hash,
))
}
+ 0x1e => {
+ let mut kv_hash = [0; HASH_LENGTH];
+ input.read_exact(&mut kv_hash)?;
+ let mut left_child_hash = [0; HASH_LENGTH];
+ input.read_exact(&mut left_child_hash)?;
+ let mut right_child_hash = [0; HASH_LENGTH];
+ input.read_exact(&mut right_child_hash)?;
+ let count: u64 = Decode::decode(&mut input)?;
+
+ Self::Push(Node::HashWithCount(
+ kv_hash,
+ left_child_hash,
+ right_child_hash,
+ count,
+ ))
+ }
+ 0x1f => {
+ let mut kv_hash = [0; HASH_LENGTH];
+ input.read_exact(&mut kv_hash)?;
+ let mut left_child_hash = [0; HASH_LENGTH];
+ input.read_exact(&mut left_child_hash)?;
+ let mut right_child_hash = [0; HASH_LENGTH];
+ input.read_exact(&mut right_child_hash)?;
+ let count: u64 = Decode::decode(&mut input)?;
+
+ Self::PushInverted(Node::HashWithCount(
+ kv_hash,
+ left_child_hash,
+ right_child_hash,
+ count,
+ ))
+ }
0x1d => {
let key_len: u8 = Decode::decode(&mut input)?;
let mut key = vec![0; key_len as usize];
@@ -2217,4 +2274,98 @@ mod test {
let decoded = Op::decode(&bytes[..]).expect("decode failed");
assert_eq!(decoded, op);
}
+
+ #[test]
+ fn encode_decode_push_hash_with_count() {
+ // (kv_hash, left_child_hash, right_child_hash, count) — the
+ // self-verifying compressed-subtree variant for AggregateCountOnRange.
+ let op = Op::Push(Node::HashWithCount(
+ [0xAA; HASH_LENGTH],
+ [0xBB; HASH_LENGTH],
+ [0xCC; HASH_LENGTH],
+ 42,
+ ));
+ // 1 opcode + 3 * 32 hashes + varint(42) = 1 + 96 + 1 = 98
+ let expected_length = 1 + 3 * HASH_LENGTH + ed::Encode::encoding_length(&42u64).unwrap();
+ assert_eq!(op.encoding_length(), expected_length);
+
+ let mut bytes = vec![];
+ op.encode_into(&mut bytes).unwrap();
+ assert_eq!(bytes.len(), expected_length);
+ assert_eq!(bytes[0], 0x1e); // Push HashWithCount opcode
+
+ let decoded = Op::decode(&bytes[..]).expect("decode failed");
+ assert_eq!(decoded, op);
+ }
+
+ #[test]
+ fn encode_decode_push_inverted_hash_with_count() {
+ let op = Op::PushInverted(Node::HashWithCount(
+ [0x11; HASH_LENGTH],
+ [0x22; HASH_LENGTH],
+ [0x33; HASH_LENGTH],
+ u64::MAX,
+ ));
+ let expected_length = 1 + 3 * HASH_LENGTH + ed::Encode::encoding_length(&u64::MAX).unwrap();
+ assert_eq!(op.encoding_length(), expected_length);
+
+ let mut bytes = vec![];
+ op.encode_into(&mut bytes).unwrap();
+ assert_eq!(bytes.len(), expected_length);
+ assert_eq!(bytes[0], 0x1f); // PushInverted HashWithCount opcode
+
+ let decoded = Op::decode(&bytes[..]).expect("decode failed");
+ assert_eq!(decoded, op);
+ }
+
+ #[test]
+ fn encode_decode_hash_with_count_zero_count_zero_children() {
+ // count = 0 (encodes to a 1-byte varint), all-zero hashes — represents
+ // a leaf-shaped collapsed subtree with no children.
+ let op = Op::Push(Node::HashWithCount(
+ [0u8; HASH_LENGTH],
+ [0u8; HASH_LENGTH],
+ [0u8; HASH_LENGTH],
+ 0,
+ ));
+ let mut bytes = vec![];
+ op.encode_into(&mut bytes).unwrap();
+ assert_eq!(bytes[0], 0x1e);
+ let decoded = Op::decode(&bytes[..]).expect("decode failed");
+ assert_eq!(decoded, op);
+ }
+
+ #[test]
+ fn decoder_with_hash_with_count_mixed_with_other_count_nodes() {
+ // Round-trip a small Op stream containing HashWithCount alongside the
+ // existing count-bearing variants — exercises the Decoder iterator
+ // boundary handling for the new variants.
+ let ops = vec![
+ Op::Push(Node::HashWithCount(
+ [1; HASH_LENGTH],
+ [2; HASH_LENGTH],
+ [3; HASH_LENGTH],
+ 7,
+ )),
+ Op::Push(Node::KVDigestCount(vec![0xAB], [4; HASH_LENGTH], 1)),
+ Op::Parent,
+ Op::Push(Node::Hash([5; HASH_LENGTH])),
+ Op::Child,
+ Op::PushInverted(Node::HashWithCount(
+ [6; HASH_LENGTH],
+ [7; HASH_LENGTH],
+ [8; HASH_LENGTH],
+ 12345,
+ )),
+ ];
+
+ let mut encoded = vec![];
+ for op in &ops {
+ op.encode_into(&mut encoded).unwrap();
+ }
+
+ let decoder = Decoder::new(&encoded);
+ let decoded_ops: Result, _> = decoder.collect();
+ assert_eq!(decoded_ops.unwrap(), ops);
+ }
}
diff --git a/grovedb-query/src/proofs/mod.rs b/grovedb-query/src/proofs/mod.rs
index 4fbf02834..d49eb2e4a 100644
--- a/grovedb-query/src/proofs/mod.rs
+++ b/grovedb-query/src/proofs/mod.rs
@@ -127,6 +127,30 @@ pub enum Node {
///
/// Contains: `(key, value, value_hash, feature_type, child_hash)`
KVValueHashFeatureTypeWithChildHash(Vec, Vec, CryptoHash, TreeFeatureType, CryptoHash),
+
+ /// A self-verifying compressed subtree for `AggregateCountOnRange` proofs
+ /// against a `ProvableCountTree` / `ProvableCountSumTree`.
+ ///
+ /// Encodes the subtree's *root* node as `(kv_hash, left_child_hash,
+ /// right_child_hash, count)`. The verifier reconstructs the subtree's
+ /// root `node_hash` as
+ /// `node_hash_with_count(kv_hash, left_child_hash, right_child_hash, count)`
+ /// and uses that hash exactly as `Hash(...)` would. Because `count` is
+ /// part of that recomputation, a forged count produces a different hash
+ /// and the parent's Merkle-root check fails — the count is therefore
+ /// cryptographically committed by the parent's hash chain, not just
+ /// trusted on faith.
+ ///
+ /// Used to collapse an entire fully-inside subtree into a single proof
+ /// node: the verifier doesn't need any per-key information (the parent
+ /// boundary nodes already established that every key under here is
+ /// in-range), so we hand it the four hashes plus the count.
+ ///
+ /// `left_child_hash` / `right_child_hash` are the all-zero `NULL_HASH`
+ /// when the subtree's root has no left / right child respectively.
+ ///
+ /// Contains: `(kv_hash, left_child_hash, right_child_hash, count)`
+ HashWithCount(CryptoHash, CryptoHash, CryptoHash, u64),
}
use std::fmt;
@@ -185,6 +209,13 @@ impl fmt::Display for Node {
hex::encode(value_hash),
count
),
+ Node::HashWithCount(kv_hash, left_child_hash, right_child_hash, count) => format!(
+ "HashWithCount(kv_hash=HASH[{}], left=HASH[{}], right=HASH[{}], count={})",
+ hex::encode(kv_hash),
+ hex::encode(left_child_hash),
+ hex::encode(right_child_hash),
+ count
+ ),
Node::KVValueHashFeatureTypeWithChildHash(
key,
value,
diff --git a/grovedb-query/src/query.rs b/grovedb-query/src/query.rs
index df7917799..affce9604 100644
--- a/grovedb-query/src/query.rs
+++ b/grovedb-query/src/query.rs
@@ -303,6 +303,149 @@ impl Query {
}
}
+ /// Creates an aggregate-count-on-range query that counts the elements
+ /// matched by `range`. The resulting query has `AggregateCountOnRange(range)`
+ /// as its sole item, no subquery branches, and `left_to_right = true`
+ /// (counting is direction-agnostic).
+ ///
+ /// `range` must be a true range variant (`Range`, `RangeInclusive`,
+ /// `RangeFrom`, `RangeTo`, `RangeToInclusive`, `RangeAfter`, `RangeAfterTo`,
+ /// or `RangeAfterToInclusive`). Passing `Key`, `RangeFull`, or another
+ /// `AggregateCountOnRange` is allowed at construction time but will be
+ /// rejected by [`validate_aggregate_count_on_range`].
+ pub fn new_aggregate_count_on_range(range: QueryItem) -> Self {
+ Self {
+ items: vec![QueryItem::AggregateCountOnRange(Box::new(range))],
+ left_to_right: true,
+ ..Self::default()
+ }
+ }
+
+ /// If this query contains an `AggregateCountOnRange` item *anywhere* in
+ /// its `items` vec, returns a reference to the first such item (whether
+ /// the surrounding query is well-formed or not). Returns `None` only
+ /// when no item is an `AggregateCountOnRange`.
+ ///
+ /// This is intentionally a **detection-only** helper: malformed queries
+ /// like `items: [Key(...), AggregateCountOnRange(...)]` still report
+ /// `Some(...)` here so callers don't accidentally route them through
+ /// the regular-query path. Use
+ /// [`Self::validate_aggregate_count_on_range`] when you also need to
+ /// enforce the well-formedness rules (single item, allowed inner kind,
+ /// no subqueries, etc.).
+ pub fn aggregate_count_on_range(&self) -> Option<&QueryItem> {
+ self.items
+ .iter()
+ .find(|item| item.is_aggregate_count_on_range())
+ }
+
+ /// Returns `true` if any item in this query — including items inside
+ /// nested subquery branches — is an `AggregateCountOnRange`.
+ ///
+ /// `AggregateCountOnRange` is a *terminal* item: the canonical
+ /// well-formed query contains exactly one `AggregateCountOnRange` at
+ /// the top level and nothing else. This recursive detector exists so
+ /// the prover can validate up front: if any ACOR is present anywhere,
+ /// the query as a whole must satisfy
+ /// [`Self::validate_aggregate_count_on_range`] — otherwise a malformed
+ /// shape (e.g. ACOR hidden inside `default_subquery_branch.subquery`)
+ /// could slip past a top-level-only check and be silently routed
+ /// through the regular-proof path.
+ pub fn has_aggregate_count_on_range_anywhere(&self) -> bool {
+ if self.aggregate_count_on_range().is_some() {
+ return true;
+ }
+ if let Some(sub) = self.default_subquery_branch.subquery.as_deref()
+ && sub.has_aggregate_count_on_range_anywhere()
+ {
+ return true;
+ }
+ if let Some(branches) = &self.conditional_subquery_branches {
+ for branch in branches.values() {
+ if let Some(sub) = branch.subquery.as_deref()
+ && sub.has_aggregate_count_on_range_anywhere()
+ {
+ return true;
+ }
+ }
+ }
+ false
+ }
+
+ /// Validates the Query-level constraints that apply when an
+ /// `AggregateCountOnRange` is present. On success, returns a reference
+ /// to the inner `QueryItem` describing the range to count.
+ ///
+ /// Rules enforced (matching the constraints documented in the GroveDB
+ /// book chapter "Aggregate Count Queries"):
+ ///
+ /// 1. The query must contain exactly one item.
+ /// 2. That item must be `AggregateCountOnRange(_)`.
+ /// 3. The inner item must not be `Key` (use `has_raw` / `get_raw` for
+ /// existence tests).
+ /// 4. The inner item must not be `RangeFull` (read the parent
+ /// `Element::ProvableCountTree` / `Element::ProvableCountSumTree`
+ /// bytes directly for the unconditional total).
+ /// 5. The inner item must not itself be `AggregateCountOnRange`.
+ /// 6. `default_subquery_branch.subquery` and
+ /// `default_subquery_branch.subquery_path` must both be `None`.
+ /// 7. `conditional_subquery_branches` must be `None` or empty.
+ ///
+ /// `SizedQuery::limit` / `SizedQuery::offset` checks live at the
+ /// `PathQuery` / `SizedQuery` layer (see
+ /// [`SizedQuery::validate_aggregate_count_on_range`]).
+ pub fn validate_aggregate_count_on_range(&self) -> Result<&QueryItem, Error> {
+ if self.items.len() != 1 {
+ return Err(Error::InvalidOperation(
+ "AggregateCountOnRange must be the only item in the query",
+ ));
+ }
+ let inner = match &self.items[0] {
+ QueryItem::AggregateCountOnRange(inner) => inner.as_ref(),
+ _ => {
+ return Err(Error::InvalidOperation(
+ "validate_aggregate_count_on_range called on a query without an \
+ AggregateCountOnRange item",
+ ));
+ }
+ };
+ match inner {
+ QueryItem::Key(_) => {
+ return Err(Error::InvalidOperation(
+ "AggregateCountOnRange may not wrap Key — use has_raw / get_raw for \
+ existence tests",
+ ));
+ }
+ QueryItem::RangeFull(_) => {
+ return Err(Error::InvalidOperation(
+ "AggregateCountOnRange may not wrap RangeFull — read the parent \
+ ProvableCountTree element for the unconditional total",
+ ));
+ }
+ QueryItem::AggregateCountOnRange(_) => {
+ return Err(Error::InvalidOperation(
+ "AggregateCountOnRange may not wrap another AggregateCountOnRange",
+ ));
+ }
+ _ => {}
+ }
+ if self.default_subquery_branch.subquery.is_some()
+ || self.default_subquery_branch.subquery_path.is_some()
+ {
+ return Err(Error::InvalidOperation(
+ "AggregateCountOnRange queries may not carry a default subquery branch",
+ ));
+ }
+ if let Some(branches) = &self.conditional_subquery_branches
+ && !branches.is_empty()
+ {
+ return Err(Error::InvalidOperation(
+ "AggregateCountOnRange queries may not carry conditional subquery branches",
+ ));
+ }
+ Ok(inner)
+ }
+
/// Returns `true` if the given key would trigger a subquery (either via
/// the default subquery branch or a matching conditional branch).
pub fn has_subquery_on_key(&self, key: &[u8], in_path: bool) -> bool {
@@ -907,4 +1050,233 @@ mod tests {
"innermost query should have no further subquery"
);
}
+
+ // ---------- AggregateCountOnRange validation tests ----------
+ //
+ // These hit each numbered rule in `Query::validate_aggregate_count_on_range`
+ // independently. The happy path is also covered to ensure the success
+ // arm returns the inner range.
+
+ fn make_acor_query(inner: QueryItem) -> Query {
+ Query::new_aggregate_count_on_range(inner)
+ }
+
+ #[test]
+ fn validate_acor_happy_path_returns_inner() {
+ let q = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec()));
+ let inner = q
+ .validate_aggregate_count_on_range()
+ .expect("happy path should validate");
+ match inner {
+ QueryItem::Range(r) => {
+ assert_eq!(r.start, b"a".to_vec());
+ assert_eq!(r.end, b"z".to_vec());
+ }
+ _ => panic!("expected inner Range"),
+ }
+ }
+
+ #[test]
+ fn validate_acor_rejects_extra_items() {
+ let mut q = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec()));
+ q.items.push(QueryItem::Key(b"extra".to_vec()));
+ let err = q
+ .validate_aggregate_count_on_range()
+ .expect_err("two-item query must fail");
+ assert!(matches!(err, crate::error::Error::InvalidOperation(_)));
+ }
+
+ #[test]
+ fn validate_acor_rejects_non_acor_only_item() {
+ // A query with one item that isn't AggregateCountOnRange triggers the
+ // "validate called on a query without an AggregateCountOnRange item"
+ // branch.
+ let q = Query::new_single_query_item(QueryItem::Key(b"k".to_vec()));
+ let err = q
+ .validate_aggregate_count_on_range()
+ .expect_err("non-ACOR-only item must fail");
+ assert!(matches!(err, crate::error::Error::InvalidOperation(_)));
+ }
+
+ #[test]
+ fn validate_acor_rejects_inner_key() {
+ let q = make_acor_query(QueryItem::Key(b"k".to_vec()));
+ let err = q
+ .validate_aggregate_count_on_range()
+ .expect_err("inner Key must fail");
+ match err {
+ crate::error::Error::InvalidOperation(msg) => assert!(msg.contains("Key")),
+ _ => panic!("expected InvalidOperation"),
+ }
+ }
+
+ #[test]
+ fn validate_acor_rejects_inner_range_full() {
+ let q = make_acor_query(QueryItem::RangeFull(std::ops::RangeFull));
+ let err = q
+ .validate_aggregate_count_on_range()
+ .expect_err("inner RangeFull must fail");
+ match err {
+ crate::error::Error::InvalidOperation(msg) => assert!(msg.contains("RangeFull")),
+ _ => panic!("expected InvalidOperation"),
+ }
+ }
+
+ #[test]
+ fn validate_acor_rejects_nested_acor() {
+ // AggregateCountOnRange wrapping another AggregateCountOnRange.
+ let inner_acor = QueryItem::AggregateCountOnRange(Box::new(QueryItem::Range(
+ b"a".to_vec()..b"z".to_vec(),
+ )));
+ let q = make_acor_query(inner_acor);
+ let err = q
+ .validate_aggregate_count_on_range()
+ .expect_err("nested ACOR must fail");
+ match err {
+ crate::error::Error::InvalidOperation(msg) => {
+ assert!(msg.contains("AggregateCountOnRange"))
+ }
+ _ => panic!("expected InvalidOperation"),
+ }
+ }
+
+ #[test]
+ fn validate_acor_rejects_default_subquery_branch() {
+ let mut q = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec()));
+ q.default_subquery_branch = SubqueryBranch {
+ subquery_path: None,
+ subquery: Some(Box::new(Query::new())),
+ };
+ let err = q
+ .validate_aggregate_count_on_range()
+ .expect_err("default subquery branch must fail");
+ match err {
+ crate::error::Error::InvalidOperation(msg) => assert!(msg.contains("subquery")),
+ _ => panic!("expected InvalidOperation"),
+ }
+ }
+
+ #[test]
+ fn validate_acor_rejects_default_subquery_path() {
+ let mut q = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec()));
+ q.default_subquery_branch = SubqueryBranch {
+ subquery_path: Some(vec![b"x".to_vec()]),
+ subquery: None,
+ };
+ let err = q
+ .validate_aggregate_count_on_range()
+ .expect_err("subquery_path must fail");
+ match err {
+ crate::error::Error::InvalidOperation(msg) => assert!(msg.contains("subquery")),
+ _ => panic!("expected InvalidOperation"),
+ }
+ }
+
+ #[test]
+ fn validate_acor_rejects_conditional_subquery_branches() {
+ let mut q = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec()));
+ let mut branches = IndexMap::new();
+ branches.insert(
+ QueryItem::Key(b"k".to_vec()),
+ SubqueryBranch {
+ subquery_path: None,
+ subquery: Some(Box::new(Query::new())),
+ },
+ );
+ q.conditional_subquery_branches = Some(branches);
+ let err = q
+ .validate_aggregate_count_on_range()
+ .expect_err("conditional branches must fail");
+ match err {
+ crate::error::Error::InvalidOperation(msg) => {
+ assert!(msg.contains("conditional"));
+ }
+ _ => panic!("expected InvalidOperation"),
+ }
+ }
+
+ #[test]
+ fn validate_acor_accepts_empty_conditional_branches_map() {
+ // An empty `Some(IndexMap::new())` is treated as "no branches" by the
+ // validator (the rule enforces non-empty rejection only).
+ let mut q = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec()));
+ q.conditional_subquery_branches = Some(IndexMap::new());
+ let inner = q
+ .validate_aggregate_count_on_range()
+ .expect("empty conditional map must validate");
+ assert!(matches!(inner, QueryItem::Range(_)));
+ }
+
+ #[test]
+ fn aggregate_count_on_range_helper_detects_acor_anywhere_in_items() {
+ // Well-formed shape — single ACOR item.
+ let q = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec()));
+ assert!(q.aggregate_count_on_range().is_some());
+
+ // Two items including ACOR → still detected, so the routing layer
+ // can hand the malformed query to validate_aggregate_count_on_range
+ // for a precise error rather than silently treating it as a regular
+ // query.
+ let mut q2 = q.clone();
+ q2.items.push(QueryItem::Key(b"x".to_vec()));
+ assert!(
+ q2.aggregate_count_on_range().is_some(),
+ "ACOR + extra item must still be detected as ACOR-bearing"
+ );
+
+ // ACOR not at index 0 — also detected.
+ let mut q3 = Query::new_single_query_item(QueryItem::Key(b"x".to_vec()));
+ q3.items.push(QueryItem::AggregateCountOnRange(Box::new(
+ QueryItem::Range(b"a".to_vec()..b"z".to_vec()),
+ )));
+ assert!(q3.aggregate_count_on_range().is_some());
+
+ // No ACOR anywhere → None.
+ let q4 = Query::new_single_query_item(QueryItem::Key(b"x".to_vec()));
+ assert!(q4.aggregate_count_on_range().is_none());
+
+ // Empty items → None.
+ let q5 = Query::new();
+ assert!(q5.aggregate_count_on_range().is_none());
+ }
+
+ #[test]
+ fn has_aggregate_count_on_range_anywhere_walks_subqueries() {
+ // No ACOR anywhere → false.
+ let plain = Query::new_single_query_item(QueryItem::Range(b"a".to_vec()..b"z".to_vec()));
+ assert!(!plain.has_aggregate_count_on_range_anywhere());
+
+ // Top-level ACOR → true (covered by `aggregate_count_on_range` too).
+ let top = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec()));
+ assert!(top.has_aggregate_count_on_range_anywhere());
+
+ // ACOR hidden inside `default_subquery_branch.subquery` — the
+ // top-level-only `aggregate_count_on_range` would miss it, but the
+ // recursive helper finds it. This is the surface that the
+ // prove_query entry-point gate uses to refuse to run any
+ // ACOR-bearing query that isn't the canonical single-ACOR shape.
+ let inner_acor = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec()));
+ let mut hidden =
+ Query::new_single_query_item(QueryItem::Range(b"a".to_vec()..b"z".to_vec()));
+ hidden.set_subquery(inner_acor);
+ assert!(hidden.aggregate_count_on_range().is_none());
+ assert!(
+ hidden.has_aggregate_count_on_range_anywhere(),
+ "ACOR hidden in default subquery branch must be detected"
+ );
+
+ // ACOR hidden in a conditional subquery branch.
+ let inner_acor2 = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec()));
+ let mut conditional =
+ Query::new_single_query_item(QueryItem::Range(b"a".to_vec()..b"z".to_vec()));
+ conditional.add_conditional_subquery(
+ QueryItem::Key(b"k".to_vec()),
+ None,
+ Some(inner_acor2),
+ );
+ assert!(
+ conditional.has_aggregate_count_on_range_anywhere(),
+ "ACOR hidden in conditional subquery branch must be detected"
+ );
+ }
}
diff --git a/grovedb-query/src/query_item/intersect.rs b/grovedb-query/src/query_item/intersect.rs
index 1153e3a1d..22d414390 100644
--- a/grovedb-query/src/query_item/intersect.rs
+++ b/grovedb-query/src/query_item/intersect.rs
@@ -612,6 +612,7 @@ impl QueryItem {
start: RangeSetItem::ExclusiveStart(range.start().clone()),
end: RangeSetItem::Inclusive(range.end().clone()),
},
+ QueryItem::AggregateCountOnRange(inner) => inner.to_range_set(),
}
}
@@ -660,6 +661,7 @@ impl QueryItem {
start: RangeSetSimpleItemBorrowed::Exclusive(range.start()),
end: RangeSetSimpleItemBorrowed::Inclusive(range.end()),
}),
+ QueryItem::AggregateCountOnRange(inner) => inner.to_range_set_borrowed(),
}
}
diff --git a/grovedb-query/src/query_item/mod.rs b/grovedb-query/src/query_item/mod.rs
index 6525f2ad5..b42b9a939 100644
--- a/grovedb-query/src/query_item/mod.rs
+++ b/grovedb-query/src/query_item/mod.rs
@@ -75,6 +75,22 @@ pub enum QueryItem {
/// A range starting **after** a key and extending to another key,
/// **inclusive**.
RangeAfterToInclusive(RangeInclusive>),
+
+ /// A count-only meta-query that wraps another `QueryItem` describing the
+ /// range to count.
+ ///
+ /// When this variant appears in a `Query`, the query is interpreted as
+ /// "return the **number of elements** matched by the inner range" instead
+ /// of returning the elements themselves. The proof is shaped accordingly:
+ /// boundary nodes are emitted as `KVDigestCount`, fully-inside subtree
+ /// roots as `KVHashCount`, and fully-outside subtrees as opaque `Hash`.
+ ///
+ /// This variant is only valid against `ProvableCountTree` /
+ /// `ProvableCountSumTree` (and their `NonCounted*` wrapper variants), and
+ /// it must be the **only** item in the surrounding `Query` (no subqueries,
+ /// no pagination, no other range items). The inner `QueryItem` may not be
+ /// `Key`, `RangeFull`, or another `AggregateCountOnRange`.
+ AggregateCountOnRange(Box),
}
#[cfg(feature = "serde")]
@@ -120,6 +136,12 @@ impl Serialize for QueryItem {
"RangeAfterToInclusive",
range_after_to_inclusive,
),
+ QueryItem::AggregateCountOnRange(inner) => serializer.serialize_newtype_variant(
+ "QueryItem",
+ 10,
+ "AggregateCountOnRange",
+ inner,
+ ),
}
}
}
@@ -143,6 +165,7 @@ impl<'de> Deserialize<'de> for QueryItem {
RangeAfter,
RangeAfterTo,
RangeAfterToInclusive,
+ AggregateCountOnRange,
}
struct QueryItemVisitor;
@@ -199,6 +222,19 @@ impl<'de> Deserialize<'de> for QueryItem {
let range_after_to_inclusive = variant_access.newtype_variant()?;
Ok(QueryItem::RangeAfterToInclusive(range_after_to_inclusive))
}
+ Field::AggregateCountOnRange => {
+ // Deserialize the inner via a wrapper that rejects
+ // the `AggregateCountOnRange` tag *before* recursing.
+ // This is the serde counterpart to the bincode
+ // depth-bounded decode + nested-rejection added in
+ // `Self::decode_with_depth`. Without it, a
+ // `serde`-feature client could send arbitrarily
+ // deep nested AggregateCountOnRange payloads and
+ // exhaust the stack inside `QueryItem::deserialize`
+ // before any validation runs.
+ let NonAggregateInner(inner) = variant_access.newtype_variant()?;
+ Ok(QueryItem::AggregateCountOnRange(Box::new(inner)))
+ }
}
}
}
@@ -214,12 +250,107 @@ impl<'de> Deserialize<'de> for QueryItem {
"RangeAfter",
"RangeAfterTo",
"RangeAfterToInclusive",
+ "AggregateCountOnRange",
];
deserializer.deserialize_enum("QueryItem", VARIANTS, QueryItemVisitor)
}
}
+/// Newtype wrapper used internally by the serde `Deserialize` impl when
+/// deserializing the *inner* item of an `AggregateCountOnRange`. The wrapper's
+/// `Deserialize` impl mirrors `QueryItem::deserialize` but rejects the
+/// `AggregateCountOnRange` field tag immediately — without recursing — so
+/// nested aggregate payloads cannot exhaust the stack via repeated variant-10
+/// recursion through `QueryItem::deserialize`.
+///
+/// Defense-in-depth: nested `AggregateCountOnRange` is also rejected by
+/// `Query::validate_aggregate_count_on_range`, but enforcing it at decode time
+/// matches the bincode side and prevents the DoS class on its own.
+#[cfg(feature = "serde")]
+struct NonAggregateInner(QueryItem);
+
+#[cfg(feature = "serde")]
+impl<'de> Deserialize<'de> for NonAggregateInner {
+ fn deserialize(deserializer: D) -> Result
+ where
+ D: Deserializer<'de>,
+ {
+ // Field set excludes "AggregateCountOnRange"; encountering that tag
+ // produces a serde "unknown variant" error before any inner
+ // recursion can happen.
+ #[derive(Deserialize)]
+ #[serde(field_identifier, rename_all = "snake_case")]
+ enum Field {
+ Key,
+ Range,
+ RangeInclusive,
+ RangeFull,
+ RangeFrom,
+ RangeTo,
+ RangeToInclusive,
+ RangeAfter,
+ RangeAfterTo,
+ RangeAfterToInclusive,
+ }
+
+ struct V;
+ impl<'de> serde::de::Visitor<'de> for V {
+ type Value = NonAggregateInner;
+
+ fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ f.write_str("non-aggregate QueryItem variant")
+ }
+
+ fn visit_enum(self, data: A) -> Result
+ where
+ A: serde::de::EnumAccess<'de>,
+ {
+ let (variant, va) = data.variant()?;
+ let inner = match variant {
+ Field::Key => QueryItem::Key(va.newtype_variant()?),
+ Field::Range => QueryItem::Range(va.newtype_variant()?),
+ Field::RangeInclusive => QueryItem::RangeInclusive(va.newtype_variant()?),
+ Field::RangeFull => {
+ va.unit_variant()?;
+ QueryItem::RangeFull(RangeFull)
+ }
+ Field::RangeFrom => QueryItem::RangeFrom(va.newtype_variant()?),
+ Field::RangeTo => QueryItem::RangeTo(va.newtype_variant()?),
+ Field::RangeToInclusive => {
+ let end: Vec = va.newtype_variant()?;
+ QueryItem::RangeToInclusive(..=end)
+ }
+ Field::RangeAfter => QueryItem::RangeAfter(va.newtype_variant()?),
+ Field::RangeAfterTo => QueryItem::RangeAfterTo(va.newtype_variant()?),
+ Field::RangeAfterToInclusive => {
+ QueryItem::RangeAfterToInclusive(va.newtype_variant()?)
+ }
+ };
+ Ok(NonAggregateInner(inner))
+ }
+ }
+
+ // The list excludes "AggregateCountOnRange" so a serde format that
+ // surfaces unknown variants by name (most do) gives a precise error
+ // for the nested case.
+ const NON_AGGREGATE_VARIANTS: &[&str] = &[
+ "Key",
+ "Range",
+ "RangeInclusive",
+ "RangeFull",
+ "RangeFrom",
+ "RangeTo",
+ "RangeToInclusive",
+ "RangeAfter",
+ "RangeAfterTo",
+ "RangeAfterToInclusive",
+ ];
+
+ deserializer.deserialize_enum("QueryItem", NON_AGGREGATE_VARIANTS, V)
+ }
+}
+
impl Encode for QueryItem {
fn encode(
&self,
@@ -270,14 +401,46 @@ impl Encode for QueryItem {
range.start().encode(encoder)?;
range.end().encode(encoder)
}
+ QueryItem::AggregateCountOnRange(inner) => {
+ encoder.writer().write(&[10])?;
+ inner.as_ref().encode(encoder)
+ }
}
}
}
+/// Maximum recursion depth allowed when decoding a `QueryItem` from bincode.
+///
+/// The only recursive variant today is `AggregateCountOnRange(Box)`
+/// (variant 10). A malicious payload made of repeated variant-10 bytes
+/// would otherwise recurse arbitrarily deep before any validation runs and
+/// can stack-overflow the decoder. Since nested `AggregateCountOnRange` is
+/// always rejected by `Query::validate_aggregate_count_on_range` anyway,
+/// the only legal nesting depth here is **one** (the outer wrapper plus its
+/// non-aggregate inner range). We keep a small safety margin.
+pub(crate) const MAX_QUERY_ITEM_DECODE_DEPTH: usize = 4;
+
impl Decode for QueryItem {
fn decode>(
decoder: &mut D,
) -> Result {
+ Self::decode_with_depth(decoder, 0)
+ }
+}
+
+impl QueryItem {
+ /// Recursive bincode decode with an explicit depth counter. Used to bound
+ /// nested `AggregateCountOnRange` payloads (which would otherwise allow
+ /// stack exhaustion via repeated variant-10 bytes).
+ pub(crate) fn decode_with_depth(
+ decoder: &mut D,
+ depth: usize,
+ ) -> Result {
+ if depth > MAX_QUERY_ITEM_DECODE_DEPTH {
+ return Err(DecodeError::Other(
+ "QueryItem nesting depth exceeded maximum during deserialization",
+ ));
+ }
let variant_id = u8::decode(decoder)?;
match variant_id {
@@ -322,9 +485,22 @@ impl Decode for QueryItem {
let end = Vec::::decode(decoder)?;
Ok(QueryItem::RangeAfterToInclusive(start..=end))
}
+ 10 => {
+ let inner = QueryItem::decode_with_depth(decoder, depth + 1)?;
+ // Defense-in-depth: nested AggregateCountOnRange is invalid
+ // by validation rules, so we also reject it at decode time.
+ // The depth guard above remains the primary stack-overflow
+ // mitigation for malicious deeper nesting.
+ if matches!(inner, QueryItem::AggregateCountOnRange(_)) {
+ return Err(DecodeError::Other(
+ "AggregateCountOnRange must not wrap another AggregateCountOnRange",
+ ));
+ }
+ Ok(QueryItem::AggregateCountOnRange(Box::new(inner)))
+ }
_ => Err(DecodeError::UnexpectedVariant {
type_name: "QueryItem",
- allowed: &bincode::error::AllowedEnumVariants::Range { min: 0, max: 9 },
+ allowed: &bincode::error::AllowedEnumVariants::Range { min: 0, max: 10 },
found: variant_id as u32,
}),
}
@@ -335,6 +511,24 @@ impl<'de, Context> BorrowDecode<'de, Context> for QueryItem {
fn borrow_decode>(
decoder: &mut D,
) -> Result {
+ Self::borrow_decode_with_depth(decoder, 0)
+ }
+}
+
+impl QueryItem {
+ /// Recursive bincode borrow-decode with an explicit depth counter.
+ /// Mirrors [`Self::decode_with_depth`] for the borrowed-decoder path; same
+ /// `MAX_QUERY_ITEM_DECODE_DEPTH` and same nested-`AggregateCountOnRange`
+ /// rejection apply.
+ pub(crate) fn borrow_decode_with_depth<'de, D: bincode::de::BorrowDecoder<'de>>(
+ decoder: &mut D,
+ depth: usize,
+ ) -> Result {
+ if depth > MAX_QUERY_ITEM_DECODE_DEPTH {
+ return Err(DecodeError::Other(
+ "QueryItem nesting depth exceeded maximum during deserialization",
+ ));
+ }
let variant_id = u8::decode(decoder)?;
match variant_id {
@@ -379,9 +573,18 @@ impl<'de, Context> BorrowDecode<'de, Context> for QueryItem {
let end = Vec::::borrow_decode(decoder)?;
Ok(QueryItem::RangeAfterToInclusive(start..=end))
}
+ 10 => {
+ let inner = QueryItem::borrow_decode_with_depth(decoder, depth + 1)?;
+ if matches!(inner, QueryItem::AggregateCountOnRange(_)) {
+ return Err(DecodeError::Other(
+ "AggregateCountOnRange must not wrap another AggregateCountOnRange",
+ ));
+ }
+ Ok(QueryItem::AggregateCountOnRange(Box::new(inner)))
+ }
_ => Err(DecodeError::UnexpectedVariant {
type_name: "QueryItem",
- allowed: &bincode::error::AllowedEnumVariants::Range { min: 0, max: 9 },
+ allowed: &bincode::error::AllowedEnumVariants::Range { min: 0, max: 10 },
found: variant_id as u32,
}),
}
@@ -427,6 +630,9 @@ impl fmt::Display for QueryItem {
hex_to_ascii(range.start()),
hex_to_ascii(range.end())
),
+ QueryItem::AggregateCountOnRange(inner) => {
+ write!(f, "AggregateCountOnRange({})", inner)
+ }
}
}
}
@@ -437,6 +643,7 @@ impl QueryItem {
match self {
QueryItem::Key(key) => key.len() as u32,
QueryItem::RangeFull(_) => 0u32,
+ QueryItem::AggregateCountOnRange(inner) => inner.processing_footprint(),
_ => {
self.lower_bound().0.map_or(0u32, |x| x.len() as u32)
+ self.upper_bound().0.map_or(0u32, |x| x.len() as u32)
@@ -458,11 +665,12 @@ impl QueryItem {
QueryItem::RangeAfter(range) => (Some(range.start.as_ref()), true),
QueryItem::RangeAfterTo(range) => (Some(range.start.as_ref()), true),
QueryItem::RangeAfterToInclusive(range) => (Some(range.start().as_ref()), true),
+ QueryItem::AggregateCountOnRange(inner) => inner.lower_bound(),
}
}
/// Returns `true` if this query item has no lower bound (extends to -inf).
- pub const fn lower_unbounded(&self) -> bool {
+ pub fn lower_unbounded(&self) -> bool {
match self {
QueryItem::Key(_) => false,
QueryItem::Range(_) => false,
@@ -474,6 +682,7 @@ impl QueryItem {
QueryItem::RangeAfter(_) => false,
QueryItem::RangeAfterTo(_) => false,
QueryItem::RangeAfterToInclusive(_) => false,
+ QueryItem::AggregateCountOnRange(inner) => inner.lower_unbounded(),
}
}
@@ -491,11 +700,12 @@ impl QueryItem {
QueryItem::RangeAfter(_) => (None, true),
QueryItem::RangeAfterTo(range) => (Some(range.end.as_ref()), false),
QueryItem::RangeAfterToInclusive(range) => (Some(range.end().as_ref()), true),
+ QueryItem::AggregateCountOnRange(inner) => inner.upper_bound(),
}
}
/// Returns `true` if this query item has no upper bound (extends to +inf).
- pub const fn upper_unbounded(&self) -> bool {
+ pub fn upper_unbounded(&self) -> bool {
match self {
QueryItem::Key(_) => false,
QueryItem::Range(_) => false,
@@ -507,6 +717,7 @@ impl QueryItem {
QueryItem::RangeAfter(_) => true,
QueryItem::RangeAfterTo(_) => false,
QueryItem::RangeAfterToInclusive(_) => false,
+ QueryItem::AggregateCountOnRange(inner) => inner.upper_unbounded(),
}
}
@@ -535,6 +746,7 @@ impl QueryItem {
QueryItem::RangeAfter(_) => 7,
QueryItem::RangeAfterTo(_) => 8,
QueryItem::RangeAfterToInclusive(_) => 9,
+ QueryItem::AggregateCountOnRange(_) => 10,
}
}
@@ -544,7 +756,8 @@ impl QueryItem {
}
/// Returns `true` if this query item is any kind of range (not a single
- /// key).
+ /// key). `AggregateCountOnRange` counts as a range — it describes a range
+ /// to count over.
pub const fn is_range(&self) -> bool {
matches!(
self,
@@ -557,6 +770,7 @@ impl QueryItem {
| QueryItem::RangeAfter(_)
| QueryItem::RangeAfterTo(_)
| QueryItem::RangeAfterToInclusive(_)
+ | QueryItem::AggregateCountOnRange(_)
)
}
@@ -566,12 +780,30 @@ impl QueryItem {
}
/// Returns `true` if this query item is a range with at least one unbounded
- /// end (e.g., `RangeFull`, `RangeFrom`, `RangeTo`, etc.).
- pub const fn is_unbounded_range(&self) -> bool {
- !matches!(
- self,
- QueryItem::Key(_) | QueryItem::Range(_) | QueryItem::RangeInclusive(_)
- )
+ /// end (e.g., `RangeFull`, `RangeFrom`, `RangeTo`, etc.). For
+ /// `AggregateCountOnRange`, delegates to the inner item.
+ pub fn is_unbounded_range(&self) -> bool {
+ match self {
+ QueryItem::AggregateCountOnRange(inner) => inner.is_unbounded_range(),
+ _ => !matches!(
+ self,
+ QueryItem::Key(_) | QueryItem::Range(_) | QueryItem::RangeInclusive(_)
+ ),
+ }
+ }
+
+ /// Returns `true` if this query item is the count-only meta-variant.
+ pub const fn is_aggregate_count_on_range(&self) -> bool {
+ matches!(self, QueryItem::AggregateCountOnRange(_))
+ }
+
+ /// If this is `AggregateCountOnRange`, returns a reference to the inner
+ /// `QueryItem` describing the range to count. Otherwise returns `None`.
+ pub fn aggregate_count_inner(&self) -> Option<&QueryItem> {
+ match self {
+ QueryItem::AggregateCountOnRange(inner) => Some(inner.as_ref()),
+ _ => None,
+ }
}
/// Enumerates all distinct keys in this query item. Only works for `Key`,
@@ -775,6 +1007,7 @@ impl QueryItem {
iter.seek_for_prev(end)
}
}
+ QueryItem::AggregateCountOnRange(inner) => inner.seek_for_iter(iter, left_to_right),
}
}
@@ -867,6 +1100,9 @@ impl QueryItem {
}
}
}
+ QueryItem::AggregateCountOnRange(inner) => {
+ return inner.iter_is_valid_for_type(iter, limit, aggregate_limit, left_to_right);
+ }
};
is_valid.wrap_with_cost(cost)
@@ -986,4 +1222,169 @@ mod test {
);
assert!(QueryItem::Range(vec![20]..vec![30]) > QueryItem::Range(vec![10]..vec![20]));
}
+
+ // ---------- decode-depth + nested-AggregateCountOnRange rejection ----------
+
+ use super::MAX_QUERY_ITEM_DECODE_DEPTH;
+
+ fn bincode_config() -> bincode::config::Configuration<
+ bincode::config::BigEndian,
+ bincode::config::Fixint,
+ bincode::config::NoLimit,
+ > {
+ bincode::config::standard()
+ .with_big_endian()
+ .with_fixed_int_encoding()
+ .with_no_limit()
+ }
+
+ #[test]
+ fn decode_rejects_nested_aggregate_count_on_range() {
+ // A two-level nest: AggregateCountOnRange(AggregateCountOnRange(Range)).
+ let nested = QueryItem::AggregateCountOnRange(Box::new(QueryItem::AggregateCountOnRange(
+ Box::new(QueryItem::Range(b"a".to_vec()..b"z".to_vec())),
+ )));
+ let bytes = bincode::encode_to_vec(&nested, bincode_config()).expect("encode succeeds");
+ let result: Result<(QueryItem, _), _> =
+ bincode::decode_from_slice(&bytes, bincode_config());
+ let err = result.expect_err("nested AggregateCountOnRange must be rejected at decode");
+ let msg = format!("{:?}", err);
+ assert!(
+ msg.contains("AggregateCountOnRange") || msg.contains("nesting depth"),
+ "expected nested-rejection message, got: {msg}"
+ );
+ }
+
+ #[test]
+ fn decode_caps_depth_for_malicious_payload() {
+ // Construct a raw byte payload of (MAX_QUERY_ITEM_DECODE_DEPTH + 2)
+ // copies of the AggregateCountOnRange variant byte (10) followed by
+ // a base item. This bypasses the constructor-level nested rejection
+ // but should hit the depth guard. We use Range as the eventual base
+ // (variants 0..=9 don't recurse). Since variant 10 reads the next
+ // byte as a recursive QueryItem, repeated 10s recurse without
+ // bound — exactly the stack-exhaustion case the depth guard
+ // prevents.
+ let depth_to_try = MAX_QUERY_ITEM_DECODE_DEPTH + 2;
+ let mut payload: Vec = Vec::new();
+ for _ in 0..depth_to_try {
+ payload.push(10u8); // AggregateCountOnRange variant tag
+ }
+ // Innermost: Range(b"a", b"z"). Variant tag 1, then encoded start +
+ // end Vecs in big-endian fixed-int config.
+ payload.push(1u8);
+ let inner = QueryItem::Range(b"a".to_vec()..b"z".to_vec());
+ let inner_bytes = bincode::encode_to_vec(&inner, bincode_config()).unwrap();
+ // inner_bytes already starts with the variant tag (1), strip it.
+ payload.extend_from_slice(&inner_bytes[1..]);
+
+ let result: Result<(QueryItem, _), _> =
+ bincode::decode_from_slice(&payload, bincode_config());
+ let err = result.expect_err("payload exceeding max depth must be rejected");
+ let msg = format!("{:?}", err);
+ assert!(
+ msg.contains("nesting depth") || msg.contains("AggregateCountOnRange"),
+ "expected depth-rejection message, got: {msg}"
+ );
+ }
+
+ #[test]
+ fn decode_accepts_valid_one_level_aggregate_count_on_range() {
+ // Single-level wrap with a non-aggregate inner. This is the only
+ // legal shape after validation; decoding must succeed.
+ let q = QueryItem::AggregateCountOnRange(Box::new(QueryItem::Range(
+ b"a".to_vec()..b"z".to_vec(),
+ )));
+ let bytes = bincode::encode_to_vec(&q, bincode_config()).unwrap();
+ let (decoded, _): (QueryItem, _) = bincode::decode_from_slice(&bytes, bincode_config())
+ .expect("single-level wrap must decode");
+ assert_eq!(q, decoded);
+ }
+
+ // ---------- serde-feature: nested AggregateCountOnRange rejection ----------
+ //
+ // The bincode path is depth-bounded above. Mirror the same defense for the
+ // serde path so serde-feature clients can't bypass the protection — the
+ // inner item is deserialized through `NonAggregateInner`, whose enum
+ // field set excludes `AggregateCountOnRange`, so any nested payload is
+ // rejected immediately by serde without recursion through
+ // `QueryItem::deserialize`.
+ //
+ // We use `serde_test`'s token-level driver here rather than a textual
+ // format because the existing `Serialize` impl emits variant tags in
+ // PascalCase (`"AggregateCountOnRange"`) while the existing `Field` enum
+ // uses `rename_all = "snake_case"` — a pre-existing mismatch unrelated
+ // to this PR that breaks JSON round-trip but is invisible to formats
+ // that don't carry variant names textually. Using token streams sidesteps
+ // that issue and lets us validate the rejection contract directly.
+
+ #[cfg(feature = "serde")]
+ #[test]
+ fn serde_decode_rejects_nested_aggregate_count_on_range() {
+ // Replay the token sequence for an outer AggregateCountOnRange whose
+ // inner is itself an AggregateCountOnRange. The outer dispatch
+ // selects the AggregateCountOnRange variant and tries to deserialize
+ // the inner via `NonAggregateInner`, which does not list
+ // `aggregate_count_on_range` in its field set — serde_test surfaces
+ // this as an "unknown variant" error.
+ use serde_test::{assert_de_tokens_error, Token};
+ assert_de_tokens_error::(
+ &[
+ Token::NewtypeVariant {
+ name: "QueryItem",
+ variant: "aggregate_count_on_range",
+ },
+ Token::NewtypeVariant {
+ name: "QueryItem",
+ variant: "aggregate_count_on_range",
+ },
+ ],
+ // Exact wording comes from serde's `field_identifier`
+ // dispatcher rejecting an out-of-set tag — the field set lives
+ // in `NonAggregateInner`'s `Field` enum, which deliberately
+ // omits `aggregate_count_on_range`.
+ "unknown field `aggregate_count_on_range`, expected one of \
+ `key`, `range`, `range_inclusive`, `range_full`, `range_from`, \
+ `range_to`, `range_to_inclusive`, `range_after`, `range_after_to`, \
+ `range_after_to_inclusive`",
+ );
+ }
+
+ #[cfg(feature = "serde")]
+ #[test]
+ fn serde_decode_accepts_valid_one_level_aggregate_count_on_range() {
+ // Outer `AggregateCountOnRange` wrapping a non-aggregate `Range`
+ // succeeds: the inner dispatch goes through `NonAggregateInner`,
+ // finds `range`, and the resulting Range is wrapped back up.
+ use serde_test::{assert_de_tokens, Token};
+ let expected = QueryItem::AggregateCountOnRange(Box::new(QueryItem::Range(
+ b"a".to_vec()..b"z".to_vec(),
+ )));
+ assert_de_tokens(
+ &expected,
+ &[
+ Token::NewtypeVariant {
+ name: "QueryItem",
+ variant: "aggregate_count_on_range",
+ },
+ Token::NewtypeVariant {
+ name: "QueryItem",
+ variant: "range",
+ },
+ Token::Struct {
+ name: "Range",
+ len: 2,
+ },
+ Token::Str("start"),
+ Token::Seq { len: Some(1) },
+ Token::U8(b'a'),
+ Token::SeqEnd,
+ Token::Str("end"),
+ Token::Seq { len: Some(1) },
+ Token::U8(b'z'),
+ Token::SeqEnd,
+ Token::StructEnd,
+ ],
+ );
+ }
}
diff --git a/grovedb/src/debugger.rs b/grovedb/src/debugger.rs
index 86312f97d..c6e7cad6c 100644
--- a/grovedb/src/debugger.rs
+++ b/grovedb/src/debugger.rs
@@ -550,6 +550,26 @@ fn merk_proof_node_to_grovedbg(node: Node) -> Result {
+ use grovedb_merk::tree::node_hash_with_count;
+ let computed_node_hash =
+ node_hash_with_count(&kv_hash, &left_child_hash, &right_child_hash, count).unwrap();
+ MerkProofNode::KVValueHashFeatureType(
+ vec![],
+ grovedbg_types::Element::Item {
+ value: vec![],
+ element_flags: None,
+ },
+ computed_node_hash,
+ grovedbg_types::TreeFeatureType::ProvableCountedMerkNode(count),
+ )
+ }
})
}
diff --git a/grovedb/src/operations/proof/aggregate_count.rs b/grovedb/src/operations/proof/aggregate_count.rs
new file mode 100644
index 000000000..6920c78c5
--- /dev/null
+++ b/grovedb/src/operations/proof/aggregate_count.rs
@@ -0,0 +1,367 @@
+//! GroveDB-side prove/verify glue for `AggregateCountOnRange` queries.
+//!
+//! The merk-level pieces live in `grovedb_merk::proofs::query::aggregate_count`
+//! (proof generation in `Merk::prove_aggregate_count_on_range`, proof
+//! verification in `verify_aggregate_count_on_range_proof`). This module
+//! adds the GroveDB-level *envelope* handling: a verifier that walks the
+//! multi-layer `GroveDBProof` chain (parent merk → ... → leaf merk),
+//! verifies the path-element existence proofs at each non-leaf layer, and
+//! delegates to the merk-level count verifier at the leaf.
+//!
+//! The proof generator side is wired directly into
+//! [`GroveDb::prove_subqueries`] / [`GroveDb::prove_subqueries_v1`] — see
+//! the "Aggregate-count short-circuit" branches there.
+
+use grovedb_merk::{
+ proofs::{
+ query::{aggregate_count::verify_aggregate_count_on_range_proof, QueryProofVerify},
+ Query as MerkQuery,
+ },
+ tree::{combine_hash, value_hash},
+ CryptoHash,
+};
+use grovedb_version::{check_grovedb_v0, version::GroveVersion};
+
+use crate::{
+ operations::proof::{
+ GroveDBProof, GroveDBProofV0, GroveDBProofV1, LayerProof, MerkOnlyLayerProof, ProofBytes,
+ },
+ Element, Error, GroveDb, PathQuery,
+};
+
+impl GroveDb {
+ /// Verify a serialized `prove_query` proof against an
+ /// `AggregateCountOnRange` `PathQuery`, returning the GroveDB root hash
+ /// and the verified count.
+ ///
+ /// `path_query` must satisfy
+ /// [`PathQuery::validate_aggregate_count_on_range`] — a single
+ /// `AggregateCountOnRange(_)` item, no subqueries, no pagination, and an
+ /// inner range that isn't `Key`, `RangeFull`, or another
+ /// `AggregateCountOnRange`. Any other shape is rejected up front with
+ /// `Error::InvalidQuery` before any bytes are decoded.
+ ///
+ /// Returns:
+ /// - `root_hash` — the reconstructed GroveDB root hash. The caller is
+ /// responsible for comparing this against their trusted root hash.
+ /// - `count` — the number of keys in the inner range that were committed
+ /// by the proof.
+ ///
+ /// Cryptographic guarantees:
+ /// - At each non-leaf layer, a regular single-key merk proof
+ /// demonstrates that the next path element exists with the recorded
+ /// value bytes; the verifier checks the chain
+ /// `combine_hash(H(value), lower_hash) == parent_proof_hash` so a
+ /// forged path is impossible without a root-hash mismatch.
+ /// - At the leaf layer, the count is committed by `HashWithCount`'s
+ /// `node_hash_with_count(kv_hash, left, right, count)` recomputation —
+ /// tampering with the count produces a different reconstructed merk
+ /// root, and the chain check above then fails.
+ pub fn verify_aggregate_count_query(
+ proof: &[u8],
+ path_query: &PathQuery,
+ grove_version: &GroveVersion,
+ ) -> Result<(CryptoHash, u64), Error> {
+ check_grovedb_v0!(
+ "verify_aggregate_count_query",
+ grove_version
+ .grovedb_versions
+ .operations
+ .proof
+ .verify_query_with_options
+ );
+
+ let inner_range = path_query.validate_aggregate_count_on_range()?.clone();
+
+ // Decode the GroveDBProof envelope using the same config the prover
+ // uses on the way out (matches `prove_query`).
+ let config = bincode::config::standard()
+ .with_big_endian()
+ .with_limit::<{ 256 * 1024 * 1024 }>();
+ let grovedb_proof: GroveDBProof = bincode::decode_from_slice(proof, config)
+ .map_err(|e| Error::CorruptedData(format!("unable to decode proof: {}", e)))?
+ .0;
+
+ let path_keys: Vec<&[u8]> = path_query.path.iter().map(|p| p.as_slice()).collect();
+
+ match grovedb_proof {
+ GroveDBProof::V0(GroveDBProofV0 { root_layer, .. }) => verify_v0_layer(
+ &root_layer,
+ path_query,
+ &path_keys,
+ 0,
+ &inner_range,
+ grove_version,
+ ),
+ GroveDBProof::V1(GroveDBProofV1 { root_layer }) => verify_v1_layer(
+ &root_layer,
+ path_query,
+ &path_keys,
+ 0,
+ &inner_range,
+ grove_version,
+ ),
+ }
+ }
+}
+
+/// Walk a V0 (`MerkOnlyLayerProof`) envelope. At each non-leaf depth we
+/// verify the single-key existence proof for `path[depth]` and descend into
+/// the matching lower layer; at the leaf depth we delegate to the merk
+/// count verifier.
+fn verify_v0_layer(
+ layer: &MerkOnlyLayerProof,
+ path_query: &PathQuery,
+ path_keys: &[&[u8]],
+ depth: usize,
+ inner_range: &grovedb_merk::proofs::query::QueryItem,
+ grove_version: &GroveVersion,
+) -> Result<(CryptoHash, u64), Error> {
+ if depth == path_keys.len() {
+ // Leaf layer: count proof.
+ return verify_count_leaf(&layer.merk_proof, inner_range, path_query);
+ }
+
+ // Non-leaf: build a single-key merk query and verify.
+ let next_key = path_keys[depth].to_vec();
+ let (proven_value_bytes, parent_root_hash, parent_proof_hash) =
+ verify_single_key_layer_proof_v0(&layer.merk_proof, &next_key, path_query)?;
+
+ // Descend.
+ let lower_layer = layer.lower_layers.get(&next_key).ok_or_else(|| {
+ Error::InvalidProof(
+ path_query.clone(),
+ format!(
+ "aggregate-count proof missing lower layer for path key {}",
+ hex::encode(&next_key)
+ ),
+ )
+ })?;
+ let (lower_hash, count) = verify_v0_layer(
+ lower_layer,
+ path_query,
+ path_keys,
+ depth + 1,
+ inner_range,
+ grove_version,
+ )?;
+
+ // Chain check: combine_hash(H(tree_value), lower_hash) must equal the
+ // value_hash recorded by the parent merk for this tree element.
+ enforce_lower_chain(
+ path_query,
+ &next_key,
+ &proven_value_bytes,
+ &lower_hash,
+ &parent_proof_hash,
+ grove_version,
+ )?;
+
+ Ok((parent_root_hash, count))
+}
+
+/// Walk a V1 (`LayerProof`) envelope. Mirrors `verify_v0_layer`; the V1
+/// envelope wraps merk proof bytes in `ProofBytes::Merk(_)` and we reject
+/// any other tree-specific proof variant for count queries (they're not
+/// applicable to provable count trees).
+fn verify_v1_layer(
+ layer: &LayerProof,
+ path_query: &PathQuery,
+ path_keys: &[&[u8]],
+ depth: usize,
+ inner_range: &grovedb_merk::proofs::query::QueryItem,
+ grove_version: &GroveVersion,
+) -> Result<(CryptoHash, u64), Error> {
+ let merk_bytes = match &layer.merk_proof {
+ ProofBytes::Merk(b) => b.as_slice(),
+ other => {
+ return Err(Error::InvalidProof(
+ path_query.clone(),
+ format!(
+ "aggregate-count proof has unexpected non-merk leaf bytes: {:?}",
+ std::mem::discriminant(other)
+ ),
+ ));
+ }
+ };
+
+ if depth == path_keys.len() {
+ return verify_count_leaf(merk_bytes, inner_range, path_query);
+ }
+
+ let next_key = path_keys[depth].to_vec();
+ let (proven_value_bytes, parent_root_hash, parent_proof_hash) =
+ verify_single_key_layer_proof_v0(merk_bytes, &next_key, path_query)?;
+
+ let lower_layer = layer.lower_layers.get(&next_key).ok_or_else(|| {
+ Error::InvalidProof(
+ path_query.clone(),
+ format!(
+ "aggregate-count proof missing lower layer for path key {}",
+ hex::encode(&next_key)
+ ),
+ )
+ })?;
+ let (lower_hash, count) = verify_v1_layer(
+ lower_layer,
+ path_query,
+ path_keys,
+ depth + 1,
+ inner_range,
+ grove_version,
+ )?;
+
+ enforce_lower_chain(
+ path_query,
+ &next_key,
+ &proven_value_bytes,
+ &lower_hash,
+ &parent_proof_hash,
+ grove_version,
+ )?;
+
+ Ok((parent_root_hash, count))
+}
+
+/// Verify the leaf layer: bytes are the encoded count-proof Op stream;
+/// the inner range is the same one the prover counted over.
+fn verify_count_leaf(
+ leaf_bytes: &[u8],
+ inner_range: &grovedb_merk::proofs::query::QueryItem,
+ path_query: &PathQuery,
+) -> Result<(CryptoHash, u64), Error> {
+ let (root_hash, count) = verify_aggregate_count_on_range_proof(leaf_bytes, inner_range)
+ .unwrap()
+ .map_err(|e| {
+ Error::InvalidProof(
+ path_query.clone(),
+ format!("aggregate-count leaf proof failed to verify: {}", e),
+ )
+ })?;
+ Ok((root_hash, count))
+}
+
+/// Verify a non-leaf layer that should contain a single-key proof for
+/// `target_key`. Returns `(proven_value_bytes, this_layer_root_hash,
+/// proof_hash_recorded_for_target)`.
+///
+/// The "proof_hash" is the value_hash committed by the merk proof for the
+/// target key — this is the hash the verifier will compare against
+/// `combine_hash(H(child_tree_value), lower_layer_root_hash)` to enforce
+/// the chain.
+fn verify_single_key_layer_proof_v0(
+ merk_bytes: &[u8],
+ target_key: &[u8],
+ path_query: &PathQuery,
+) -> Result<(Vec, CryptoHash, CryptoHash), Error> {
+ let level_query = MerkQuery {
+ items: vec![grovedb_merk::proofs::query::QueryItem::Key(
+ target_key.to_vec(),
+ )],
+ left_to_right: true,
+ ..Default::default()
+ };
+
+ let (root_hash, merk_result) = level_query
+ .execute_proof(merk_bytes, None, true, 0)
+ .unwrap()
+ .map_err(|e| {
+ Error::InvalidProof(
+ path_query.clone(),
+ format!(
+ "non-leaf single-key proof for {} failed to verify: {}",
+ hex::encode(target_key),
+ e
+ ),
+ )
+ })?;
+
+ // Find the result row for our target key and pull the value + proof_hash.
+ let proved = merk_result
+ .result_set
+ .iter()
+ .find(|p| p.key == target_key)
+ .ok_or_else(|| {
+ Error::InvalidProof(
+ path_query.clone(),
+ format!(
+ "non-leaf proof did not contain the expected key {}",
+ hex::encode(target_key)
+ ),
+ )
+ })?;
+
+ let value_bytes = proved.value.clone().ok_or_else(|| {
+ Error::InvalidProof(
+ path_query.clone(),
+ format!(
+ "non-leaf proof for key {} returned no value bytes",
+ hex::encode(target_key)
+ ),
+ )
+ })?;
+
+ Ok((value_bytes, root_hash, proved.proof))
+}
+
+/// Enforce the layer-chain hash equality: the parent merk's recorded
+/// value_hash for the tree element must equal `combine_hash(H(value),
+/// lower_layer_root_hash)`. This is what makes the count cryptographically
+/// bound to the GroveDB root hash — the leaf count proof's reconstructed
+/// `lower_hash` must agree with the parent's commitment, transitively up to
+/// the root.
+///
+/// Intermediate path elements may be any tree type — the GroveDB grove can
+/// route through Normal/Sum/Count/etc. trees on the way down to the
+/// provable-count leaf. The leaf-level tree-type check is enforced by the
+/// merk prover (`Merk::prove_aggregate_count_on_range`); here we only
+/// require that each non-leaf element on the path *is* some non-empty tree,
+/// since only trees have a lower layer to chain into.
+fn enforce_lower_chain(
+ path_query: &PathQuery,
+ target_key: &[u8],
+ proven_value_bytes: &[u8],
+ lower_hash: &CryptoHash,
+ parent_proof_hash: &CryptoHash,
+ grove_version: &GroveVersion,
+) -> Result<(), Error> {
+ let element = Element::deserialize(proven_value_bytes, grove_version)
+ .map_err(|e| {
+ Error::InvalidProof(
+ path_query.clone(),
+ format!(
+ "non-leaf proof's element at key {} failed to deserialize: {}",
+ hex::encode(target_key),
+ e
+ ),
+ )
+ })?
+ .into_underlying();
+ if !element.is_any_tree() {
+ return Err(Error::InvalidProof(
+ path_query.clone(),
+ format!(
+ "aggregate-count proof's path element at key {} is not a tree element \
+ (got {:?}); count queries can only descend through tree elements",
+ hex::encode(target_key),
+ std::mem::discriminant(&element)
+ ),
+ ));
+ }
+
+ let value_h = value_hash(proven_value_bytes).value().to_owned();
+ let combined = combine_hash(&value_h, lower_hash).value().to_owned();
+ if combined != *parent_proof_hash {
+ return Err(Error::InvalidProof(
+ path_query.clone(),
+ format!(
+ "aggregate-count proof chain mismatch at key {}: parent recorded value_hash \
+ {} but combine_hash(H(value), lower_root) is {}",
+ hex::encode(target_key),
+ hex::encode(parent_proof_hash),
+ hex::encode(combined)
+ ),
+ ));
+ }
+ Ok(())
+}
diff --git a/grovedb/src/operations/proof/generate.rs b/grovedb/src/operations/proof/generate.rs
index eb21e2203..34e0593ce 100644
--- a/grovedb/src/operations/proof/generate.rs
+++ b/grovedb/src/operations/proof/generate.rs
@@ -109,6 +109,22 @@ impl GroveDb {
prove_options: Option,
grove_version: &GroveVersion,
) -> CostResult {
+ // Aggregate-count gate: validate at entry so malformed ACOR
+ // queries (invalid inner range, ACOR-hidden-in-subquery, etc.) are
+ // rejected up front instead of being skipped when the recursive
+ // prover never reaches the ACOR-bearing leaf — for example because
+ // the path doesn't exist. Without this gate, `prove_query` would
+ // happily return a regular path/absence proof for an invalid
+ // aggregate-count request.
+ if path_query
+ .query
+ .query
+ .has_aggregate_count_on_range_anywhere()
+ && let Err(e) = path_query.validate_aggregate_count_on_range()
+ {
+ return Err(e).wrap_with_cost(OperationCost::default());
+ }
+
match grove_version
.grovedb_versions
.operations
@@ -269,6 +285,37 @@ impl GroveDb {
*overall_limit
};
+ // Aggregate-count short-circuit: if any item at this level is an
+ // `AggregateCountOnRange`, the surrounding `PathQuery` must validate
+ // as a well-formed aggregate-count query. We do **not** route on a
+ // partial match (e.g. a query with extra items, subqueries, or an
+ // illegal inner) — those would silently produce a count proof for
+ // the wrong shape. Instead we run the same validation the verifier
+ // runs and let it surface the precise error.
+ if query
+ .items
+ .iter()
+ .any(QueryItem::is_aggregate_count_on_range)
+ {
+ let inner_range = cost_return_on_error_no_add!(
+ cost,
+ path_query.validate_aggregate_count_on_range().cloned()
+ );
+ let (count_ops, _count) = cost_return_on_error!(
+ &mut cost,
+ subtree
+ .prove_aggregate_count_on_range(&inner_range, grove_version)
+ .map_err(Error::MerkError)
+ );
+ let mut serialized = Vec::with_capacity(128);
+ encode_into(count_ops.iter(), &mut serialized);
+ return Ok(MerkOnlyLayerProof {
+ merk_proof: serialized,
+ lower_layers: BTreeMap::new(),
+ })
+ .wrap_with_cost(cost);
+ }
+
let mut merk_proof = cost_return_on_error!(
&mut cost,
self.generate_merk_proof(
@@ -1012,6 +1059,35 @@ impl GroveDb {
*overall_limit
};
+ // Aggregate-count short-circuit (v1 path). Same validation contract
+ // as v0: any AggregateCountOnRange at this level requires the
+ // surrounding PathQuery to validate as a well-formed aggregate-count
+ // query. The count-proof bytes are wrapped in `ProofBytes::Merk`
+ // since they share the merk Op stream encoding.
+ if query
+ .items
+ .iter()
+ .any(QueryItem::is_aggregate_count_on_range)
+ {
+ let inner_range = cost_return_on_error_no_add!(
+ cost,
+ path_query.validate_aggregate_count_on_range().cloned()
+ );
+ let (count_ops, _count) = cost_return_on_error!(
+ &mut cost,
+ subtree
+ .prove_aggregate_count_on_range(&inner_range, grove_version)
+ .map_err(Error::MerkError)
+ );
+ let mut serialized = Vec::with_capacity(128);
+ encode_into(count_ops.iter(), &mut serialized);
+ return Ok(LayerProof {
+ merk_proof: ProofBytes::Merk(serialized),
+ lower_layers: BTreeMap::new(),
+ })
+ .wrap_with_cost(cost);
+ }
+
let mut merk_proof = cost_return_on_error!(
&mut cost,
self.generate_merk_proof(
@@ -1862,6 +1938,12 @@ impl GroveDb {
}
}
}
+ QueryItem::AggregateCountOnRange(_) => {
+ return Err(Error::InvalidInput(
+ "AggregateCountOnRange is only supported on provable count trees, \
+ not on dense fixed-size merkle trees",
+ ));
+ }
}
}
@@ -1980,6 +2062,12 @@ impl GroveDb {
}
}
}
+ QueryItem::AggregateCountOnRange(_) => {
+ return Err(Error::InvalidInput(
+ "AggregateCountOnRange is only supported on provable count trees, \
+ not on MMR trees",
+ ));
+ }
}
}
@@ -2048,6 +2136,12 @@ impl GroveDb {
min_start = min_start.min(s.saturating_add(1));
max_end = max_end.max(e.saturating_add(1));
}
+ QueryItem::AggregateCountOnRange(_) => {
+ return Err(Error::InvalidInput(
+ "AggregateCountOnRange is only supported on provable count trees, \
+ not on BulkAppendTree",
+ ));
+ }
}
}
@@ -2087,7 +2181,7 @@ impl GroveDb {
mod tests {
use grovedb_merk::proofs::query::QueryItem;
- use crate::GroveDb;
+ use crate::{Error, GroveDb};
/// Helper: encode a u16 as big-endian bytes.
fn be_u16(v: u16) -> Vec {
@@ -2225,4 +2319,59 @@ mod tests {
end
);
}
+
+ // -----------------------------------------------------------------------
+ // AggregateCountOnRange rejection on non-provable-count tree types.
+ //
+ // `AggregateCountOnRange` is only meaningful against `ProvableCountTree`
+ // and `ProvableCountSumTree` (their nodes commit a count via
+ // `node_hash_with_count`). Dense, MMR, and BulkAppendTree have no such
+ // commitment, so the index-resolution helpers must reject the variant
+ // outright rather than silently fall through.
+ // -----------------------------------------------------------------------
+
+ #[test]
+ fn dense_tree_rejects_aggregate_count_on_range() {
+ let inner = QueryItem::RangeInclusive(be_u16(0)..=be_u16(5));
+ let items = vec![QueryItem::AggregateCountOnRange(Box::new(inner))];
+ let err = GroveDb::query_items_to_positions(&items, 100)
+ .expect_err("dense tree must reject AggregateCountOnRange");
+ match err {
+ Error::InvalidInput(msg) => assert!(
+ msg.contains("dense fixed-size") || msg.contains("provable count"),
+ "unexpected message: {msg}"
+ ),
+ other => panic!("expected InvalidInput, got {:?}", other),
+ }
+ }
+
+ #[test]
+ fn mmr_tree_rejects_aggregate_count_on_range() {
+ let inner = QueryItem::RangeInclusive(be_u64(0)..=be_u64(5));
+ let items = vec![QueryItem::AggregateCountOnRange(Box::new(inner))];
+ let err = GroveDb::query_items_to_leaf_indices(&items, 7)
+ .expect_err("MMR must reject AggregateCountOnRange");
+ match err {
+ Error::InvalidInput(msg) => assert!(
+ msg.contains("MMR") || msg.contains("provable count"),
+ "unexpected message: {msg}"
+ ),
+ other => panic!("expected InvalidInput, got {:?}", other),
+ }
+ }
+
+ #[test]
+ fn bulk_append_tree_rejects_aggregate_count_on_range() {
+ let inner = QueryItem::RangeInclusive(be_u64(0)..=be_u64(5));
+ let items = vec![QueryItem::AggregateCountOnRange(Box::new(inner))];
+ let err = GroveDb::query_items_to_range(&items, 100)
+ .expect_err("BulkAppendTree must reject AggregateCountOnRange");
+ match err {
+ Error::InvalidInput(msg) => assert!(
+ msg.contains("BulkAppendTree") || msg.contains("provable count"),
+ "unexpected message: {msg}"
+ ),
+ other => panic!("expected InvalidInput, got {:?}", other),
+ }
+ }
}
diff --git a/grovedb/src/operations/proof/mod.rs b/grovedb/src/operations/proof/mod.rs
index 1b9729f33..c10681c4b 100644
--- a/grovedb/src/operations/proof/mod.rs
+++ b/grovedb/src/operations/proof/mod.rs
@@ -1,5 +1,7 @@
//! Proof operations
+#[cfg(feature = "minimal")]
+mod aggregate_count;
#[cfg(feature = "minimal")]
mod generate;
/// Utility functions for proof display and conversion.
@@ -738,6 +740,13 @@ fn node_to_string(node: &Node) -> Result {
feature_type,
hex::encode(child_hash)
),
+ Node::HashWithCount(kv_hash, left_child_hash, right_child_hash, count) => format!(
+ "HashWithCount(kv_hash=HASH[{}], left=HASH[{}], right=HASH[{}], count={})",
+ hex::encode(kv_hash),
+ hex::encode(left_child_hash),
+ hex::encode(right_child_hash),
+ count
+ ),
};
Ok(s)
}
diff --git a/grovedb/src/operations/proof/verify.rs b/grovedb/src/operations/proof/verify.rs
index 64583f1e1..1f8120893 100644
--- a/grovedb/src/operations/proof/verify.rs
+++ b/grovedb/src/operations/proof/verify.rs
@@ -1230,6 +1230,12 @@ impl GroveDb {
min_start = min_start.min(s.saturating_add(1));
max_end = max_end.max(e.saturating_add(1));
}
+ QueryItem::AggregateCountOnRange(_) => {
+ return Err(Error::InvalidInput(
+ "AggregateCountOnRange is only supported on provable count trees, \
+ not on BulkAppendTree",
+ ));
+ }
}
}
@@ -1348,6 +1354,12 @@ impl GroveDb {
check_cap!(positions);
}
}
+ QueryItem::AggregateCountOnRange(_) => {
+ return Err(Error::InvalidInput(
+ "AggregateCountOnRange is only supported on provable count trees, \
+ not on this tree type",
+ ));
+ }
}
}
@@ -2665,7 +2677,8 @@ impl GroveDb {
| Node::KVDigestCount(..)
| Node::Hash(_)
| Node::KVHash(_)
- | Node::KVHashCount(..) => None,
+ | Node::KVHashCount(..)
+ | Node::HashWithCount(..) => None,
}
}
diff --git a/grovedb/src/query/mod.rs b/grovedb/src/query/mod.rs
index 3fd9ecc75..2c1c0c585 100644
--- a/grovedb/src/query/mod.rs
+++ b/grovedb/src/query/mod.rs
@@ -114,6 +114,43 @@ impl SizedQuery {
offset: None,
}
}
+
+ /// Validates that this sized query is a well-formed
+ /// `AggregateCountOnRange` query. On success, returns a reference to the
+ /// inner range item (the `QueryItem` wrapped by `AggregateCountOnRange`).
+ ///
+ /// This is the `SizedQuery`-level entry point: it forwards to
+ /// [`Query::validate_aggregate_count_on_range`] and additionally rejects
+ /// any non-`None` `limit` or `offset` (counting is an aggregate over the
+ /// full match set — pagination would silently change the answer).
+ pub fn validate_aggregate_count_on_range(&self) -> Result<&QueryItem, Error> {
+ if self.limit.is_some() {
+ return Err(Error::InvalidQuery(
+ "AggregateCountOnRange queries may not set SizedQuery::limit",
+ ));
+ }
+ if self.offset.is_some() {
+ return Err(Error::InvalidQuery(
+ "AggregateCountOnRange queries may not set SizedQuery::offset",
+ ));
+ }
+ self.query
+ .validate_aggregate_count_on_range()
+ .map_err(query_validation_error_to_static_str)
+ .map_err(Error::InvalidQuery)
+ }
+}
+
+/// Converts a `Query::validate_aggregate_count_on_range` error into a
+/// `&'static str`. Validation only ever returns
+/// `grovedb_query::error::Error::InvalidOperation(&'static str)`, so this is
+/// just a projection of that variant; any other error variant (which would
+/// indicate an unrelated bug) is forwarded as a generic catch-all label.
+fn query_validation_error_to_static_str(e: grovedb_query::error::Error) -> &'static str {
+ match e {
+ grovedb_query::error::Error::InvalidOperation(msg) => msg,
+ _ => "AggregateCountOnRange query validation failed",
+ }
}
impl PathQuery {
@@ -144,6 +181,31 @@ impl PathQuery {
Self { path, query }
}
+ /// Construct a `PathQuery` for an aggregate-count-on-range query against
+ /// the subtree at `path`. `range` is the inner `QueryItem` describing the
+ /// keys to count over; see [`Query::new_aggregate_count_on_range`] for the
+ /// allowed range variants.
+ pub fn new_aggregate_count_on_range(path: Vec>, range: QueryItem) -> Self {
+ Self::new_unsized(path, Query::new_aggregate_count_on_range(range))
+ }
+
+ /// Validates that this `PathQuery` is a well-formed
+ /// `AggregateCountOnRange` query. On success, returns a reference to the
+ /// inner range item.
+ ///
+ /// Forwards to [`SizedQuery::validate_aggregate_count_on_range`].
+ pub fn validate_aggregate_count_on_range(&self) -> Result<&QueryItem, Error> {
+ self.query.validate_aggregate_count_on_range()
+ }
+
+ /// Returns `true` if this `PathQuery`'s underlying query carries an
+ /// `AggregateCountOnRange` item (whether well-formed or not). Use
+ /// [`Self::validate_aggregate_count_on_range`] when you also need
+ /// well-formedness.
+ pub fn has_aggregate_count_on_range(&self) -> bool {
+ self.query.query.aggregate_count_on_range().is_some()
+ }
+
/// The max depth of the query, this is the maximum layers we could get back
/// from grovedb
/// If the max depth can not be calculated we get None
@@ -731,7 +793,7 @@ mod tests {
query::{HasSubquery, SinglePathSubquery},
query_result_type::QueryResultType,
tests::{common::compare_result_tuples, make_deep_tree, TEST_LEAF},
- Element, GroveDb, PathQuery, SizedQuery,
+ Element, Error, GroveDb, PathQuery, SizedQuery,
};
#[test]
@@ -2407,4 +2469,99 @@ mod tests {
assert!(result.is_ok());
assert!(!result.unwrap());
}
+
+ // ---------- SizedQuery / PathQuery AggregateCountOnRange validation ----------
+
+ #[test]
+ fn sized_query_validate_acor_rejects_limit() {
+ let mut sq = SizedQuery::new(
+ Query::new_aggregate_count_on_range(QueryItem::Range(b"a".to_vec()..b"z".to_vec())),
+ Some(10),
+ None,
+ );
+ let err = sq
+ .validate_aggregate_count_on_range()
+ .expect_err("limit must fail");
+ match err {
+ Error::InvalidQuery(msg) => assert!(msg.contains("limit")),
+ _ => panic!("expected InvalidQuery"),
+ }
+
+ // Removing the limit but keeping offset should still fail.
+ sq.limit = None;
+ sq.offset = Some(5);
+ let err = sq
+ .validate_aggregate_count_on_range()
+ .expect_err("offset must fail");
+ match err {
+ Error::InvalidQuery(msg) => assert!(msg.contains("offset")),
+ _ => panic!("expected InvalidQuery"),
+ }
+ }
+
+ #[test]
+ fn sized_query_validate_acor_forwards_query_level_errors() {
+ // SizedQuery validation should forward Query-level rejections (here:
+ // inner Key) as InvalidQuery.
+ let sq = SizedQuery::new(
+ Query::new_aggregate_count_on_range(QueryItem::Key(b"k".to_vec())),
+ None,
+ None,
+ );
+ let err = sq
+ .validate_aggregate_count_on_range()
+ .expect_err("inner Key must fail");
+ match err {
+ Error::InvalidQuery(msg) => assert!(msg.contains("Key")),
+ _ => panic!("expected InvalidQuery"),
+ }
+ }
+
+ #[test]
+ fn sized_query_validate_acor_happy_path() {
+ let sq = SizedQuery::new(
+ Query::new_aggregate_count_on_range(QueryItem::Range(b"a".to_vec()..b"z".to_vec())),
+ None,
+ None,
+ );
+ let inner = sq
+ .validate_aggregate_count_on_range()
+ .expect("happy path must validate");
+ assert!(matches!(inner, QueryItem::Range(_)));
+ }
+
+ #[test]
+ fn path_query_validate_acor_forwards_to_sized_query() {
+ // PathQuery::validate_aggregate_count_on_range delegates to
+ // SizedQuery::validate_aggregate_count_on_range — exercise both error
+ // and happy paths through the public PathQuery surface.
+ let pq = PathQuery::new_aggregate_count_on_range(
+ vec![b"path".to_vec()],
+ QueryItem::Range(b"a".to_vec()..b"z".to_vec()),
+ );
+ let inner = pq
+ .validate_aggregate_count_on_range()
+ .expect("happy path through PathQuery must validate");
+ assert!(matches!(inner, QueryItem::Range(_)));
+
+ // Forward limit rejection.
+ let mut pq_bad = pq.clone();
+ pq_bad.query.limit = Some(1);
+ let err = pq_bad
+ .validate_aggregate_count_on_range()
+ .expect_err("limit must fail");
+ assert!(matches!(err, Error::InvalidQuery(_)));
+ }
+
+ #[test]
+ fn path_query_has_aggregate_count_on_range_recognizes_helper_constructor() {
+ let pq = PathQuery::new_aggregate_count_on_range(
+ vec![b"path".to_vec()],
+ QueryItem::Range(b"a".to_vec()..b"z".to_vec()),
+ );
+ assert!(pq.has_aggregate_count_on_range());
+
+ let pq_regular = PathQuery::new_single_key(vec![b"p".to_vec()], b"k".to_vec());
+ assert!(!pq_regular.has_aggregate_count_on_range());
+ }
}
diff --git a/grovedb/src/tests/aggregate_count_query_tests.rs b/grovedb/src/tests/aggregate_count_query_tests.rs
new file mode 100644
index 000000000..f991e03fa
--- /dev/null
+++ b/grovedb/src/tests/aggregate_count_query_tests.rs
@@ -0,0 +1,1233 @@
+//! End-to-end GroveDB tests for `AggregateCountOnRange` queries.
+//!
+//! These exercise the full prove → encode → decode → verify pipeline against
+//! both `ProvableCountTree` and `ProvableCountSumTree` (and their
+//! `NonCounted*` wrappers via being the *parent* tree, not the queried one),
+//! at various path depths and across the full set of allowed range variants.
+
+#[cfg(test)]
+mod tests {
+ use grovedb_merk::proofs::query::QueryItem;
+ use grovedb_version::version::{v2::GROVE_V2, GroveVersion};
+
+ use crate::{
+ tests::{make_test_grovedb, TEST_LEAF},
+ Element, GroveDb, PathQuery, SizedQuery,
+ };
+
+ /// Insert the 15 single-byte keys "a".."o" into a `ProvableCountTree`
+ /// rooted at `[TEST_LEAF, "ct"]`. Returns the GroveDB and the resulting
+ /// root hash.
+ fn setup_15_key_provable_count_tree(
+ grove_version: &GroveVersion,
+ ) -> (crate::tests::TempGroveDb, [u8; 32]) {
+ let db = make_test_grovedb(grove_version);
+ db.insert(
+ [TEST_LEAF].as_ref(),
+ b"ct",
+ Element::empty_provable_count_tree(),
+ None,
+ None,
+ grove_version,
+ )
+ .unwrap()
+ .expect("insert ct");
+ for c in b'a'..=b'o' {
+ db.insert(
+ [TEST_LEAF, b"ct"].as_ref(),
+ &[c],
+ Element::new_item(vec![c]),
+ None,
+ None,
+ grove_version,
+ )
+ .unwrap()
+ .expect("insert leaf");
+ }
+ let root = db
+ .grove_db
+ .root_hash(None, grove_version)
+ .unwrap()
+ .expect("root_hash");
+ (db, root)
+ }
+
+ fn setup_15_key_provable_count_sum_tree(
+ grove_version: &GroveVersion,
+ ) -> (crate::tests::TempGroveDb, [u8; 32]) {
+ let db = make_test_grovedb(grove_version);
+ db.insert(
+ [TEST_LEAF].as_ref(),
+ b"cst",
+ Element::empty_provable_count_sum_tree(),
+ None,
+ None,
+ grove_version,
+ )
+ .unwrap()
+ .expect("insert cst");
+ for c in b'a'..=b'o' {
+ db.insert(
+ [TEST_LEAF, b"cst"].as_ref(),
+ &[c],
+ // `Item` plays the role of a non-sum element inside a count
+ // sum tree — we're testing count semantics, not sum.
+ Element::new_item(vec![c]),
+ None,
+ None,
+ grove_version,
+ )
+ .unwrap()
+ .expect("insert leaf");
+ }
+ let root = db
+ .grove_db
+ .root_hash(None, grove_version)
+ .unwrap()
+ .expect("root_hash");
+ (db, root)
+ }
+
+ /// Round-trip helper: build a path_query, prove it, verify it, assert
+ /// `(root, count)` matches what we expect.
+ fn round_trip(
+ db: &crate::tests::TempGroveDb,
+ expected_root: [u8; 32],
+ path: Vec>,
+ inner_range: QueryItem,
+ expected_count: u64,
+ grove_version: &GroveVersion,
+ ) {
+ let path_query = PathQuery::new_aggregate_count_on_range(path, inner_range);
+ let proof = db
+ .grove_db
+ .prove_query(&path_query, None, grove_version)
+ .unwrap()
+ .expect("prove_query should succeed");
+ let (root, count) =
+ GroveDb::verify_aggregate_count_query(&proof, &path_query, grove_version)
+ .expect("verify should succeed");
+ assert_eq!(root, expected_root, "verifier reconstructed wrong root");
+ assert_eq!(count, expected_count, "verifier returned wrong count");
+ }
+
+ #[test]
+ fn provable_count_tree_range_inclusive() {
+ let v = GroveVersion::latest();
+ let (db, root) = setup_15_key_provable_count_tree(v);
+ round_trip(
+ &db,
+ root,
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()),
+ 10,
+ v,
+ );
+ }
+
+ #[test]
+ fn provable_count_tree_range_exclusive() {
+ let v = GroveVersion::latest();
+ let (db, root) = setup_15_key_provable_count_tree(v);
+ round_trip(
+ &db,
+ root,
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::Range(b"c".to_vec()..b"l".to_vec()),
+ 9,
+ v,
+ );
+ }
+
+ #[test]
+ fn provable_count_tree_range_from() {
+ let v = GroveVersion::latest();
+ let (db, root) = setup_15_key_provable_count_tree(v);
+ round_trip(
+ &db,
+ root,
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeFrom(b"c".to_vec()..),
+ 13,
+ v,
+ );
+ }
+
+ #[test]
+ fn provable_count_tree_range_after() {
+ let v = GroveVersion::latest();
+ let (db, root) = setup_15_key_provable_count_tree(v);
+ round_trip(
+ &db,
+ root,
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeAfter(b"b".to_vec()..),
+ 13,
+ v,
+ );
+ }
+
+ #[test]
+ fn provable_count_tree_range_to_inclusive() {
+ let v = GroveVersion::latest();
+ let (db, root) = setup_15_key_provable_count_tree(v);
+ round_trip(
+ &db,
+ root,
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeToInclusive(..=b"e".to_vec()),
+ 5,
+ v,
+ );
+ }
+
+ #[test]
+ fn provable_count_tree_range_below_all() {
+ let v = GroveVersion::latest();
+ let (db, root) = setup_15_key_provable_count_tree(v);
+ round_trip(
+ &db,
+ root,
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeInclusive(vec![0x00]..=vec![0x10]),
+ 0,
+ v,
+ );
+ }
+
+ #[test]
+ fn provable_count_sum_tree_range_inclusive() {
+ let v = GroveVersion::latest();
+ let (db, root) = setup_15_key_provable_count_sum_tree(v);
+ round_trip(
+ &db,
+ root,
+ vec![TEST_LEAF.to_vec(), b"cst".to_vec()],
+ QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()),
+ 10,
+ v,
+ );
+ }
+
+ #[test]
+ fn rejects_invalid_range_at_construction() {
+ // A path-query with an inner Key item should be rejected at
+ // validation time, before any proof generation runs.
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::Key(b"c".to_vec()),
+ );
+ let err = path_query.validate_aggregate_count_on_range();
+ assert!(err.is_err(), "Key inner should be rejected");
+ }
+
+ #[test]
+ fn rejects_inner_range_full() {
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeFull(std::ops::RangeFull),
+ );
+ assert!(path_query.validate_aggregate_count_on_range().is_err());
+ }
+
+ #[test]
+ fn rejects_against_normal_tree() {
+ // Querying a NormalTree with AggregateCountOnRange should fail at
+ // proof time with an InvalidProofError from the merk layer. We need
+ // at least one element in the target normal tree so that the
+ // multi-layer proof generator actually recurses into it (empty
+ // trees are returned as result rows without a lower-layer descent).
+ let v = GroveVersion::latest();
+ let db = make_test_grovedb(v);
+ db.insert(
+ [TEST_LEAF].as_ref(),
+ b"x",
+ Element::new_item(b"y".to_vec()),
+ None,
+ None,
+ v,
+ )
+ .unwrap()
+ .expect("seed normal tree");
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec()],
+ QueryItem::Range(b"a".to_vec()..b"z".to_vec()),
+ );
+ let proof_result = db.grove_db.prove_query(&path_query, None, v).unwrap();
+ assert!(
+ proof_result.is_err(),
+ "expected prove_query to fail on NormalTree, got {:?}",
+ proof_result.ok().map(|b| b.len())
+ );
+ }
+
+ #[test]
+ fn count_forgery_is_caught_at_grovedb_level() {
+ // End-to-end version of the merk-level forgery test: parse the
+ // GroveDB envelope, descend to the leaf merk proof, find a real
+ // HashWithCount op at a true op boundary, bump its count, re-encode
+ // — and the GroveDB verifier should reject the resulting proof
+ // (root mismatch in the layer chain).
+ //
+ // We parse rather than scan-for-byte to ensure we are mutating an
+ // actual count varint and not, say, a 0x1e byte that happens to live
+ // inside one of the embedded 32-byte hashes.
+ let v = GroveVersion::latest();
+ let (db, _expected_root) = setup_15_key_provable_count_tree(v);
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()),
+ );
+ let proof = db
+ .grove_db
+ .prove_query(&path_query, None, v)
+ .unwrap()
+ .expect("prove_query should succeed");
+
+ let tampered = tamper_leaf_count(&proof, &path_query)
+ .expect("expected at least one HashWithCount in the leaf merk proof");
+
+ let verify_result = GroveDb::verify_aggregate_count_query(&tampered, &path_query, v);
+ assert!(
+ verify_result.is_err(),
+ "tampered count must be rejected at the GroveDB verifier level, got {:?}",
+ verify_result.map(|(_, c)| c)
+ );
+ }
+
+ /// Decode the GroveDB proof envelope, walk down to the leaf merk proof
+ /// bytes (V0: `MerkOnlyLayerProof`; V1: `LayerProof` with
+ /// `ProofBytes::Merk`), parse the merk proof into ops at true op
+ /// boundaries, increment the `count` of the first `HashWithCount` op,
+ /// and re-encode the whole envelope.
+ ///
+ /// Returns `None` if no `HashWithCount` is present in the leaf merk
+ /// proof — the test treats that as an invalid precondition.
+ fn tamper_leaf_count(proof: &[u8], path_query: &PathQuery) -> Option> {
+ use bincode::config;
+ use grovedb_merk::proofs::{encoding::encode_into, Decoder, Node, Op};
+
+ use crate::operations::proof::{
+ GroveDBProof, GroveDBProofV0, GroveDBProofV1, LayerProof, MerkOnlyLayerProof,
+ ProofBytes,
+ };
+
+ let cfg = config::standard()
+ .with_big_endian()
+ .with_limit::<{ 256 * 1024 * 1024 }>();
+ let (mut decoded, _): (GroveDBProof, _) = bincode::decode_from_slice(proof, cfg).ok()?;
+
+ // Descend through the path layers to obtain a mutable ref to the
+ // leaf merk proof bytes.
+ let leaf_bytes: &mut Vec = match &mut decoded {
+ GroveDBProof::V0(GroveDBProofV0 { root_layer, .. }) => {
+ let mut layer: &mut MerkOnlyLayerProof = root_layer;
+ for key in &path_query.path {
+ layer = layer.lower_layers.get_mut(key)?;
+ }
+ &mut layer.merk_proof
+ }
+ GroveDBProof::V1(GroveDBProofV1 { root_layer }) => {
+ let mut layer: &mut LayerProof = root_layer;
+ for key in &path_query.path {
+ layer = layer.lower_layers.get_mut(key)?;
+ }
+ match &mut layer.merk_proof {
+ ProofBytes::Merk(b) => b,
+ _ => return None,
+ }
+ }
+ };
+
+ // Parse the merk proof into ops, mutate the first HashWithCount,
+ // re-encode.
+ let mut ops: Vec = Vec::new();
+ for op in Decoder::new(leaf_bytes) {
+ ops.push(op.ok()?);
+ }
+
+ let mut tampered = false;
+ for op in ops.iter_mut() {
+ match op {
+ Op::Push(Node::HashWithCount(_, _, _, count))
+ | Op::PushInverted(Node::HashWithCount(_, _, _, count)) => {
+ *count = count.wrapping_add(1);
+ tampered = true;
+ break;
+ }
+ _ => {}
+ }
+ }
+ if !tampered {
+ return None;
+ }
+
+ let mut new_leaf = Vec::new();
+ encode_into(ops.iter(), &mut new_leaf);
+ *leaf_bytes = new_leaf;
+
+ bincode::encode_to_vec(
+ decoded,
+ config::standard().with_big_endian().with_no_limit(),
+ )
+ .ok()
+ }
+
+ /// Build a 3-layer path: TEST_LEAF -> "outer" (NormalTree) ->
+ /// "inner" (ProvableCountTree) populated with 5 keys "a".."e".
+ fn setup_three_layer_provable_count_tree(
+ grove_version: &GroveVersion,
+ ) -> (crate::tests::TempGroveDb, [u8; 32]) {
+ let db = make_test_grovedb(grove_version);
+ db.insert(
+ [TEST_LEAF].as_ref(),
+ b"outer",
+ Element::empty_tree(),
+ None,
+ None,
+ grove_version,
+ )
+ .unwrap()
+ .expect("insert outer");
+ db.insert(
+ [TEST_LEAF, b"outer"].as_ref(),
+ b"inner",
+ Element::empty_provable_count_tree(),
+ None,
+ None,
+ grove_version,
+ )
+ .unwrap()
+ .expect("insert inner");
+ for c in b'a'..=b'e' {
+ db.insert(
+ [TEST_LEAF, b"outer", b"inner"].as_ref(),
+ &[c],
+ Element::new_item(vec![c]),
+ None,
+ None,
+ grove_version,
+ )
+ .unwrap()
+ .expect("insert leaf");
+ }
+ let root = db
+ .grove_db
+ .root_hash(None, grove_version)
+ .unwrap()
+ .expect("root_hash");
+ (db, root)
+ }
+
+ #[test]
+ fn three_layer_path_round_trip() {
+ // Exercises the multi-layer chain enforcement: layer 0 proves TEST_LEAF
+ // exists, layer 1 proves "outer" exists in TEST_LEAF, layer 2 proves
+ // "inner" exists in outer, layer 3 is the count proof on inner.
+ let v = GroveVersion::latest();
+ let (db, root) = setup_three_layer_provable_count_tree(v);
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"outer".to_vec(), b"inner".to_vec()],
+ QueryItem::RangeInclusive(b"b".to_vec()..=b"d".to_vec()),
+ );
+ let proof = db
+ .grove_db
+ .prove_query(&path_query, None, v)
+ .unwrap()
+ .expect("prove_query should succeed");
+ let (got_root, got_count) = GroveDb::verify_aggregate_count_query(&proof, &path_query, v)
+ .expect("verify should succeed");
+ assert_eq!(got_root, root, "verifier root must match GroveDB root");
+ assert_eq!(got_count, 3, "expected count of {{b, c, d}}");
+ }
+
+ /// Helper for non-leaf-layer proof mutation tests: decode the V1
+ /// envelope, walk to the TEST_LEAF non-leaf merk proof bytes, run
+ /// `mutate` over its parsed ops, re-encode the merk proof and the
+ /// envelope. Returns the mutated bytes.
+ fn mutate_test_leaf_layer_ops(
+ proof: &[u8],
+ mutate: impl FnOnce(&mut Vec),
+ ) -> Vec {
+ use grovedb_merk::proofs::{encoding::encode_into, Decoder, Op};
+
+ use crate::operations::proof::{GroveDBProof, GroveDBProofV1, ProofBytes};
+
+ let mut decoded = decode_envelope(proof);
+ let GroveDBProof::V1(GroveDBProofV1 { root_layer }) = &mut decoded else {
+ panic!("expected V1 envelope");
+ };
+ let test_leaf_layer = root_layer
+ .lower_layers
+ .get_mut(&TEST_LEAF.to_vec())
+ .expect("TEST_LEAF lower layer");
+ let bytes = match &mut test_leaf_layer.merk_proof {
+ ProofBytes::Merk(b) => b,
+ _ => panic!("expected Merk bytes at TEST_LEAF non-leaf"),
+ };
+ let mut ops: Vec = Decoder::new(bytes)
+ .map(|r| r.expect("decode existing op"))
+ .collect();
+ mutate(&mut ops);
+ let mut new_bytes = Vec::new();
+ encode_into(ops.iter(), &mut new_bytes);
+ *bytes = new_bytes;
+ reencode_envelope(decoded)
+ }
+
+ #[test]
+ fn non_leaf_proof_without_target_key_is_rejected() {
+ // Mutate the TEST_LEAF non-leaf proof: replace the KV op carrying
+ // the "ct" key with a Hash op carrying that node's hash. Phase 1
+ // decodes successfully, the merk single-key verifier returns Ok
+ // with an empty result_set (no KV with matching key), and the
+ // GroveDB-level verifier surfaces "did not contain the expected
+ // key" via the `ok_or_else` arm.
+ use grovedb_merk::proofs::{Node, Op};
+
+ let v = GroveVersion::latest();
+ let (db, _root) = setup_15_key_provable_count_tree(v);
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()),
+ );
+ let proof = db
+ .grove_db
+ .prove_query(&path_query, None, v)
+ .unwrap()
+ .expect("prove_query should succeed");
+ let mutated = mutate_test_leaf_layer_ops(&proof, |ops| {
+ for op in ops.iter_mut() {
+ let key_match = matches!(
+ op,
+ Op::Push(
+ Node::KV(k, _)
+ | Node::KVValueHash(k, _, _)
+ | Node::KVValueHashFeatureType(k, _, _, _)
+ | Node::KVValueHashFeatureTypeWithChildHash(k, _, _, _, _)
+ )
+ | Op::PushInverted(
+ Node::KV(k, _)
+ | Node::KVValueHash(k, _, _)
+ | Node::KVValueHashFeatureType(k, _, _, _)
+ | Node::KVValueHashFeatureTypeWithChildHash(k, _, _, _, _)
+ ) if k == b"ct"
+ );
+ if key_match {
+ *op = Op::Push(Node::Hash([0u8; 32]));
+ return;
+ }
+ }
+ panic!("test setup: no `ct` KV op found in non-leaf proof");
+ });
+ let err = GroveDb::verify_aggregate_count_query(&mutated, &path_query, v)
+ .expect_err("missing target key in non-leaf proof must be rejected");
+ match err {
+ crate::Error::InvalidProof(_, msg) => assert!(
+ // Either Phase 2 catches "did not contain the expected key"
+ // or the upstream merk single-key verifier fails first
+ // because the swapped Hash makes the proof invalid; either
+ // outcome closes the surface.
+ msg.contains("did not contain the expected key")
+ || msg.contains("non-leaf single-key proof"),
+ "unexpected message: {msg}"
+ ),
+ other => panic!("expected InvalidProof, got {:?}", other),
+ }
+ }
+
+ #[test]
+ fn non_leaf_proof_with_kv_replaced_by_kvdigest_is_rejected() {
+ // Replace "ct" KV in the non-leaf proof with a KVDigest variant
+ // (key + value_hash, no value). The result_set will contain "ct"
+ // but with `value = None`, hitting the "no value bytes" arm of
+ // `verify_single_key_layer_proof_v0`.
+ use grovedb_merk::proofs::{Node, Op};
+
+ let v = GroveVersion::latest();
+ let (db, _root) = setup_15_key_provable_count_tree(v);
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()),
+ );
+ let proof = db
+ .grove_db
+ .prove_query(&path_query, None, v)
+ .unwrap()
+ .expect("prove_query should succeed");
+ let mutated = mutate_test_leaf_layer_ops(&proof, |ops| {
+ for op in ops.iter_mut() {
+ let replaced = match op {
+ Op::Push(Node::KVValueHash(k, _, vh))
+ | Op::PushInverted(Node::KVValueHash(k, _, vh))
+ if k == b"ct" =>
+ {
+ Some((k.clone(), *vh))
+ }
+ Op::Push(Node::KVValueHashFeatureType(k, _, vh, _))
+ | Op::PushInverted(Node::KVValueHashFeatureType(k, _, vh, _))
+ if k == b"ct" =>
+ {
+ Some((k.clone(), *vh))
+ }
+ Op::Push(Node::KVValueHashFeatureTypeWithChildHash(k, _, vh, _, _))
+ | Op::PushInverted(Node::KVValueHashFeatureTypeWithChildHash(k, _, vh, _, _))
+ if k == b"ct" =>
+ {
+ Some((k.clone(), *vh))
+ }
+ _ => None,
+ };
+ if let Some((k, vh)) = replaced {
+ *op = Op::Push(Node::KVDigest(k, vh));
+ return;
+ }
+ }
+ panic!("test setup: no `ct` KVValueHash-flavored op found in non-leaf proof");
+ });
+ let result = GroveDb::verify_aggregate_count_query(&mutated, &path_query, v);
+ // Either we hit the "no value bytes" arm (line 295-302) or the
+ // merk single-key verifier itself rejects the type swap. Both
+ // are valid — both close the attack surface.
+ match result {
+ Err(crate::Error::InvalidProof(_, _)) => {}
+ other => panic!("expected InvalidProof, got {:?}", other),
+ }
+ }
+
+ #[test]
+ fn non_leaf_proof_with_undeserializable_value_is_rejected() {
+ // Mutate the "ct" KV node's value bytes to garbage that fails
+ // `Element::deserialize`. The merk single-key verifier still
+ // returns Ok (it just hashes the bytes — it doesn't deserialize),
+ // so enforce_lower_chain hits the deserialize-failure arm.
+ use grovedb_merk::proofs::{Node, Op};
+
+ let v = GroveVersion::latest();
+ let (db, _root) = setup_15_key_provable_count_tree(v);
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()),
+ );
+ let proof = db
+ .grove_db
+ .prove_query(&path_query, None, v)
+ .unwrap()
+ .expect("prove_query should succeed");
+ // Garbage that no Element variant tag matches.
+ let garbage: Vec = vec![0xff, 0xff, 0xff];
+ let mutated = mutate_test_leaf_layer_ops(&proof, |ops| {
+ for op in ops.iter_mut() {
+ let replaced = match op {
+ Op::Push(Node::KVValueHash(k, val, _))
+ | Op::PushInverted(Node::KVValueHash(k, val, _))
+ if k == b"ct" =>
+ {
+ *val = garbage.clone();
+ true
+ }
+ Op::Push(Node::KVValueHashFeatureType(k, val, _, _))
+ | Op::PushInverted(Node::KVValueHashFeatureType(k, val, _, _))
+ if k == b"ct" =>
+ {
+ *val = garbage.clone();
+ true
+ }
+ Op::Push(Node::KVValueHashFeatureTypeWithChildHash(k, val, _, _, _))
+ | Op::PushInverted(Node::KVValueHashFeatureTypeWithChildHash(
+ k,
+ val,
+ _,
+ _,
+ _,
+ )) if k == b"ct" => {
+ *val = garbage.clone();
+ true
+ }
+ _ => false,
+ };
+ if replaced {
+ return;
+ }
+ }
+ panic!("test setup: no `ct` value-bearing KV op found in non-leaf proof");
+ });
+ let result = GroveDb::verify_aggregate_count_query(&mutated, &path_query, v);
+ // Either the deserialize arm fires (line 330-338) or the chain
+ // mismatch fires first (because mutating value bytes also breaks
+ // the value_hash binding committed by the parent). Either rejects.
+ assert!(
+ matches!(result, Err(crate::Error::InvalidProof(_, _))),
+ "mutated value bytes must be rejected, got {:?}",
+ result.map(|(_, c)| c)
+ );
+ }
+
+ #[test]
+ fn non_leaf_proof_with_non_tree_element_is_rejected() {
+ // Mutate the "ct" value bytes to a serialized non-tree Element
+ // (Item). This deserializes successfully, but enforce_lower_chain's
+ // `is_any_tree()` guard rejects: aggregate-count proofs can only
+ // descend through tree elements.
+ use grovedb_merk::proofs::{Node, Op};
+
+ let v = GroveVersion::latest();
+ let (db, _root) = setup_15_key_provable_count_tree(v);
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()),
+ );
+ let proof = db
+ .grove_db
+ .prove_query(&path_query, None, v)
+ .unwrap()
+ .expect("prove_query should succeed");
+ let item_bytes = Element::new_item(vec![0xab, 0xcd])
+ .serialize(v)
+ .expect("serialize item");
+ let mutated = mutate_test_leaf_layer_ops(&proof, |ops| {
+ for op in ops.iter_mut() {
+ let replaced = match op {
+ Op::Push(Node::KVValueHash(k, val, _))
+ | Op::PushInverted(Node::KVValueHash(k, val, _))
+ if k == b"ct" =>
+ {
+ *val = item_bytes.clone();
+ true
+ }
+ Op::Push(Node::KVValueHashFeatureType(k, val, _, _))
+ | Op::PushInverted(Node::KVValueHashFeatureType(k, val, _, _))
+ if k == b"ct" =>
+ {
+ *val = item_bytes.clone();
+ true
+ }
+ Op::Push(Node::KVValueHashFeatureTypeWithChildHash(k, val, _, _, _))
+ | Op::PushInverted(Node::KVValueHashFeatureTypeWithChildHash(
+ k,
+ val,
+ _,
+ _,
+ _,
+ )) if k == b"ct" => {
+ *val = item_bytes.clone();
+ true
+ }
+ _ => false,
+ };
+ if replaced {
+ return;
+ }
+ }
+ panic!("test setup: no `ct` value-bearing KV op found in non-leaf proof");
+ });
+ let result = GroveDb::verify_aggregate_count_query(&mutated, &path_query, v);
+ // Either the non-tree branch fires (line 341-349) or the chain
+ // hash check fails first (value_hash for the swapped item bytes
+ // diverges from the parent's commitment). Either rejects.
+ assert!(
+ matches!(result, Err(crate::Error::InvalidProof(_, _))),
+ "non-tree element on path must be rejected, got {:?}",
+ result.map(|(_, c)| c)
+ );
+ }
+
+ #[test]
+ fn aggregate_count_with_missing_path_and_invalid_inner_is_rejected_at_entry() {
+ // Codex finding: validation only fires inside `prove_subqueries` when
+ // the recursion reaches the ACOR-bearing leaf level. If the path
+ // doesn't exist (e.g. "missing" key under TEST_LEAF), the recursive
+ // prover never sees the ACOR item and the malformed query is allowed
+ // to return a regular path/absence proof. Fix: validate at the
+ // `prove_query` entry point, before any recursive dispatch.
+ let v = GroveVersion::latest();
+ let db = make_test_grovedb(v);
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"missing".to_vec()],
+ // QueryItem::Key as the inner range is invalid for ACOR.
+ QueryItem::Key(b"k".to_vec()),
+ );
+ let prove_result = db.grove_db.prove_query(&path_query, None, v).unwrap();
+ match prove_result {
+ Err(crate::Error::InvalidQuery(msg)) => {
+ assert!(
+ msg.contains("AggregateCountOnRange may not wrap Key"),
+ "expected ACOR-Key rejection, got: {msg}"
+ );
+ }
+ other => panic!(
+ "malformed ACOR with non-existent path must be rejected at entry, got {:?}",
+ other.map(|b| b.len())
+ ),
+ }
+ }
+
+ #[test]
+ fn aggregate_count_hidden_in_subquery_branch_is_rejected_at_entry() {
+ // Codex's broader concern: an `AggregateCountOnRange` smuggled
+ // inside a `default_subquery_branch.subquery` is also invalid (ACOR
+ // is terminal — it cannot be reached via a normal subquery path)
+ // and must be rejected up front. The recursive detector
+ // `has_aggregate_count_on_range_anywhere` finds the hidden ACOR;
+ // top-level `validate_aggregate_count_on_range` then rejects
+ // because the surrounding query isn't the canonical single-ACOR
+ // shape.
+ let v = GroveVersion::latest();
+ let db = make_test_grovedb(v);
+ let inner_acor = QueryItem::AggregateCountOnRange(Box::new(QueryItem::Range(
+ b"a".to_vec()..b"z".to_vec(),
+ )));
+ let mut sub_query = grovedb_merk::proofs::Query::new();
+ sub_query.insert_item(inner_acor);
+ let mut top_query = grovedb_merk::proofs::Query::new();
+ top_query.insert_range_inclusive(b"a".to_vec()..=b"z".to_vec());
+ top_query.set_subquery(sub_query);
+ let path_query = PathQuery::new(
+ vec![TEST_LEAF.to_vec()],
+ SizedQuery::new(top_query, None, None),
+ );
+ let prove_result = db.grove_db.prove_query(&path_query, None, v).unwrap();
+ assert!(
+ matches!(prove_result, Err(crate::Error::InvalidQuery(_))),
+ "ACOR hidden in subquery branch must be rejected at entry, got {:?}",
+ prove_result.map(|b| b.len())
+ );
+ }
+
+ #[test]
+ fn corrupted_path_layer_byte_is_rejected() {
+ // Tamper with a non-leaf-layer byte (a tree-element value byte) and
+ // verify that the chain enforcement catches it. We pick a byte deep
+ // enough that it lands inside one of the parent merk's KV value bytes.
+ let v = GroveVersion::latest();
+ let (db, _root) = setup_three_layer_provable_count_tree(v);
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"outer".to_vec(), b"inner".to_vec()],
+ QueryItem::RangeInclusive(b"b".to_vec()..=b"d".to_vec()),
+ );
+ let mut proof = db
+ .grove_db
+ .prove_query(&path_query, None, v)
+ .unwrap()
+ .expect("prove_query should succeed");
+ // Flip a byte well inside the proof — the exact location doesn't
+ // matter as long as it isn't the bincode envelope length prefix.
+ // Index 32 is past the envelope and into the first inner merk's bytes.
+ let target = proof.len() / 2;
+ proof[target] = proof[target].wrapping_add(1);
+ let verify_result = GroveDb::verify_aggregate_count_query(&proof, &path_query, v);
+ assert!(
+ verify_result.is_err(),
+ "tampered proof byte must be rejected, got {:?}",
+ verify_result.map(|(_, c)| c)
+ );
+ }
+
+ #[test]
+ fn provable_count_tree_works_on_grove_v2_envelope() {
+ // GROVE_V2 dispatches to the V0 prove_query_non_serialized path, which
+ // produces a `MerkOnlyLayerProof` envelope rather than V1's
+ // `LayerProof`. Verify the same prove → verify cycle works through that
+ // envelope.
+ let v: &GroveVersion = &GROVE_V2;
+ let (db, root) = setup_15_key_provable_count_tree(v);
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()),
+ );
+ let proof = db
+ .grove_db
+ .prove_query(&path_query, None, v)
+ .unwrap()
+ .expect("prove_query (v0 envelope) should succeed");
+ let (got_root, got_count) = GroveDb::verify_aggregate_count_query(&proof, &path_query, v)
+ .expect("verify should succeed against v0 envelope");
+ assert_eq!(got_root, root);
+ assert_eq!(got_count, 10);
+ }
+
+ #[test]
+ fn verify_rejects_malformed_path_query_at_entry() {
+ // Even before any proof bytes are decoded, the verifier rejects a
+ // path_query that isn't a well-formed AggregateCountOnRange query.
+ let v = GroveVersion::latest();
+ let bad_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec()],
+ QueryItem::Key(b"k".to_vec()), // inner Key is not allowed
+ );
+ // Any proof bytes are fine — validation happens before decoding.
+ let dummy_proof = vec![0u8; 16];
+ let err = GroveDb::verify_aggregate_count_query(&dummy_proof, &bad_query, v)
+ .expect_err("malformed path_query must be rejected up front");
+ let s = format!("{:?}", err);
+ assert!(
+ s.contains("Key") || s.contains("InvalidQuery"),
+ "got: {}",
+ s
+ );
+ }
+
+ #[test]
+ fn validate_at_construction_rejects_nested_aggregate_count_on_range() {
+ // Nested AggregateCountOnRange is rejected at validation time.
+ let pq = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::AggregateCountOnRange(Box::new(QueryItem::Range(
+ b"a".to_vec()..b"z".to_vec(),
+ ))),
+ );
+ assert!(pq.validate_aggregate_count_on_range().is_err());
+ }
+
+ /// `Element::NonCounted` wrappers tell the parent tree to **skip** the
+ /// wrapped element when aggregating its own count.
+ /// `AggregateCountOnRange` honors that: NonCounted children are
+ /// excluded from the result.
+ ///
+ /// Mechanics — every node in a `ProvableCountTree` carries an
+ /// own_count of 1 (normal) or 0 (NonCounted). The merk-recorded
+ /// aggregate at any subtree = sum of own_counts in the subtree
+ /// (NonCounted entries contribute 0). The verifier's shape walk
+ /// derives each boundary node's own_count as
+ /// `node_aggregate − left_struct − right_struct` and credits **only
+ /// own_count** to the in-range total when the key falls in range.
+ /// For a NonCounted leaf, own_count = 0 and the wrapped key
+ /// contributes nothing. The structural counts threaded through the
+ /// walk are hash-bound at every step (every count-bearing proof node
+ /// feeds its count into `node_hash_with_count`), so a malicious
+ /// prover can't lie about a NonCounted node's status without
+ /// breaking the parent's hash chain.
+ #[test]
+ fn non_counted_children_are_excluded_from_aggregate_count() {
+ use crate::tests::TEST_LEAF;
+
+ let v = GroveVersion::latest();
+ let db = make_test_grovedb(v);
+ db.insert(
+ [TEST_LEAF].as_ref(),
+ b"ct",
+ Element::empty_provable_count_tree(),
+ None,
+ None,
+ v,
+ )
+ .unwrap()
+ .expect("insert ct");
+
+ // Five regular items — each contributes 1.
+ for c in [b'a', b'b', b'c', b'd', b'e'] {
+ db.insert(
+ [TEST_LEAF, b"ct"].as_ref(),
+ &[c],
+ Element::new_item(vec![c]),
+ None,
+ None,
+ v,
+ )
+ .unwrap()
+ .expect("insert regular item");
+ }
+
+ // One NonCounted-wrapped item, key "f" — in-range but contributes
+ // 0 (own_count = 0).
+ let nc_item =
+ Element::new_non_counted(Element::new_item(b"hidden".to_vec())).expect("wrap ok");
+ db.insert([TEST_LEAF, b"ct"].as_ref(), b"f", nc_item, None, None, v)
+ .unwrap()
+ .expect("insert NonCounted item");
+
+ let root = db.grove_db.root_hash(None, v).unwrap().expect("root_hash");
+
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeInclusive(b"a".to_vec()..=b"z".to_vec()),
+ );
+ let proof = db
+ .grove_db
+ .prove_query(&path_query, None, v)
+ .unwrap()
+ .expect("prove");
+ let (got_root, got_count) =
+ GroveDb::verify_aggregate_count_query(&proof, &path_query, v).expect("verify");
+ assert_eq!(got_root, root, "root mismatch");
+ assert_eq!(
+ got_count, 5,
+ "NonCounted-wrapped child must be excluded from the aggregate count"
+ );
+ }
+
+ /// Pin observable cost numbers + proof byte size for a known input so
+ /// regressions in the proof shape (extra unnecessary nodes, missing
+ /// short-circuit, etc.) show up as a test failure instead of as a
+ /// silent perf hit. Values are exact for the 15-key
+ /// `ProvableCountTree` + `RangeInclusive("c"..="l")` setup; if the
+ /// proof shape changes intentionally, update them here.
+ #[test]
+ fn proof_size_snapshot_for_15_key_closed_range() {
+ let v = GroveVersion::latest();
+ let (db, _root) = setup_15_key_provable_count_tree(v);
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()),
+ );
+ let proof = db
+ .grove_db
+ .prove_query(&path_query, None, v)
+ .unwrap()
+ .expect("prove");
+
+ // Snapshot the proof byte size. The current shape produces a small
+ // deterministic byte stream; if this drifts upward without
+ // intent, the proof shape may have regressed.
+ //
+ // The acceptable range is conservative — we only require the
+ // proof stays bounded by what an O(log n) shape predicts for a
+ // 4-level tree (a few hundred bytes is the right ballpark; many
+ // KB would indicate the count short-circuit didn't fire). The
+ // *current* size is around 650 bytes; a few hundred bytes of
+ // headroom in either direction tolerates encoding tweaks but
+ // catches gross regressions.
+ let len = proof.len();
+ assert!(
+ (300..=900).contains(&len),
+ "aggregate-count proof size {} bytes is outside the expected \
+ [300, 900] window for a 15-key 2-layer query — proof shape \
+ may have regressed",
+ len
+ );
+
+ // Round-trip through the verifier as a sanity check that the
+ // pinned shape is still verifiable.
+ let (_root, count) =
+ GroveDb::verify_aggregate_count_query(&proof, &path_query, v).expect("verify");
+ assert_eq!(count, 10);
+ }
+
+ /// Re-encode a (possibly mutated) `GroveDBProof` envelope using the same
+ /// bincode config the prover uses on the way out.
+ fn reencode_envelope(decoded: crate::operations::proof::GroveDBProof) -> Vec {
+ bincode::encode_to_vec(
+ decoded,
+ bincode::config::standard()
+ .with_big_endian()
+ .with_no_limit(),
+ )
+ .expect("re-encode envelope")
+ }
+
+ fn decode_envelope(proof: &[u8]) -> crate::operations::proof::GroveDBProof {
+ bincode::decode_from_slice(
+ proof,
+ bincode::config::standard()
+ .with_big_endian()
+ .with_limit::<{ 256 * 1024 * 1024 }>(),
+ )
+ .expect("decode envelope")
+ .0
+ }
+
+ #[test]
+ fn v1_envelope_with_non_merk_proof_bytes_is_rejected() {
+ // The verifier's V1 layer walker only accepts `ProofBytes::Merk(_)`
+ // for aggregate-count proofs (other tree types — MMR / BulkAppend /
+ // Dense / CommitmentTree — cannot host provable count subtrees). If
+ // we swap the leaf layer's bytes for an `MMR(_)` variant, verification
+ // must fail with an `InvalidProof` error rather than silently
+ // succeed or panic.
+ use crate::operations::proof::{GroveDBProof, GroveDBProofV1, ProofBytes};
+
+ let v = GroveVersion::latest();
+ let (db, _root) = setup_15_key_provable_count_tree(v);
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()),
+ );
+ let proof = db
+ .grove_db
+ .prove_query(&path_query, None, v)
+ .unwrap()
+ .expect("prove_query should succeed");
+
+ let mut decoded = decode_envelope(&proof);
+ let GroveDBProof::V1(GroveDBProofV1 { root_layer }) = &mut decoded else {
+ panic!("expected V1 envelope on latest GroveVersion");
+ };
+
+ // Walk to the leaf layer (depth = path.len()) and swap its bytes
+ // for an MMR variant.
+ let leaf_layer = root_layer
+ .lower_layers
+ .get_mut(&TEST_LEAF.to_vec())
+ .expect("TEST_LEAF lower layer")
+ .lower_layers
+ .get_mut(&b"ct".to_vec())
+ .expect("ct lower layer");
+ leaf_layer.merk_proof = ProofBytes::MMR(vec![0u8; 8]);
+
+ let reencoded = reencode_envelope(decoded);
+ let err = GroveDb::verify_aggregate_count_query(&reencoded, &path_query, v)
+ .expect_err("non-Merk leaf bytes must be rejected");
+ match err {
+ crate::Error::InvalidProof(_, msg) => {
+ assert!(
+ msg.contains("non-merk"),
+ "expected non-merk rejection, got: {msg}"
+ );
+ }
+ other => panic!("expected InvalidProof, got {:?}", other),
+ }
+ }
+
+ #[test]
+ fn v1_envelope_with_missing_lower_layer_is_rejected() {
+ // The verifier expects a `lower_layers` entry for each non-leaf
+ // path key. If the prover (or an attacker) drops one, verification
+ // must fail rather than silently descend through a stub.
+ use crate::operations::proof::{GroveDBProof, GroveDBProofV1};
+
+ let v = GroveVersion::latest();
+ let (db, _root) = setup_15_key_provable_count_tree(v);
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()),
+ );
+ let proof = db
+ .grove_db
+ .prove_query(&path_query, None, v)
+ .unwrap()
+ .expect("prove_query should succeed");
+
+ let mut decoded = decode_envelope(&proof);
+ let GroveDBProof::V1(GroveDBProofV1 { root_layer }) = &mut decoded else {
+ panic!("expected V1 envelope on latest GroveVersion");
+ };
+ let test_leaf_layer = root_layer
+ .lower_layers
+ .get_mut(&TEST_LEAF.to_vec())
+ .expect("TEST_LEAF lower layer");
+ // Drop the leaf layer's pointer entry.
+ let removed = test_leaf_layer.lower_layers.remove(&b"ct".to_vec());
+ assert!(removed.is_some(), "test setup: ct layer should exist");
+
+ let reencoded = reencode_envelope(decoded);
+ let err = GroveDb::verify_aggregate_count_query(&reencoded, &path_query, v)
+ .expect_err("missing lower_layer must be rejected");
+ match err {
+ crate::Error::InvalidProof(_, msg) => {
+ assert!(
+ msg.contains("missing lower layer"),
+ "expected missing-lower-layer rejection, got: {msg}"
+ );
+ }
+ other => panic!("expected InvalidProof, got {:?}", other),
+ }
+ }
+
+ #[test]
+ fn v1_envelope_with_malformed_leaf_count_proof_is_rejected() {
+ // Replace the leaf merk proof bytes with a single Push(Hash(...))
+ // op stream. Phase 1 of the count verifier rejects plain `Hash` as
+ // a non-allowlisted node type, so `verify_count_leaf` surfaces an
+ // `InvalidProof` error via its `.map_err(...)` arm rather than
+ // ever reaching the chain check.
+ use std::collections::LinkedList;
+
+ use grovedb_merk::proofs::{encoding::encode_into, Node, Op};
+
+ use crate::operations::proof::{GroveDBProof, GroveDBProofV1, ProofBytes};
+
+ let v = GroveVersion::latest();
+ let (db, _root) = setup_15_key_provable_count_tree(v);
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()),
+ );
+ let proof = db
+ .grove_db
+ .prove_query(&path_query, None, v)
+ .unwrap()
+ .expect("prove_query should succeed");
+
+ let mut decoded = decode_envelope(&proof);
+ let GroveDBProof::V1(GroveDBProofV1 { root_layer }) = &mut decoded else {
+ panic!("expected V1 envelope");
+ };
+ let leaf_layer = root_layer
+ .lower_layers
+ .get_mut(&TEST_LEAF.to_vec())
+ .expect("TEST_LEAF lower layer")
+ .lower_layers
+ .get_mut(&b"ct".to_vec())
+ .expect("ct lower layer");
+
+ // Build a malformed (but parseable) merk proof: a single Push(Hash)
+ // that the count verifier's Phase 1 rejects.
+ let mut ops: LinkedList = LinkedList::new();
+ ops.push_back(Op::Push(Node::Hash([0u8; 32])));
+ let mut bad_bytes = Vec::new();
+ encode_into(ops.iter(), &mut bad_bytes);
+ leaf_layer.merk_proof = ProofBytes::Merk(bad_bytes);
+
+ let reencoded = reencode_envelope(decoded);
+ let err = GroveDb::verify_aggregate_count_query(&reencoded, &path_query, v)
+ .expect_err("malformed leaf count proof must be rejected");
+ match err {
+ crate::Error::InvalidProof(_, msg) => {
+ assert!(
+ msg.contains("aggregate-count leaf proof failed to verify"),
+ "expected leaf-verify failure message, got: {msg}"
+ );
+ }
+ other => panic!("expected InvalidProof, got {:?}", other),
+ }
+ }
+
+ #[test]
+ fn v1_envelope_with_corrupted_non_leaf_merk_bytes_is_rejected() {
+ // Mutate the non-leaf merk proof bytes (the layer that proves
+ // existence of the "ct" tree element under TEST_LEAF). The
+ // single-key proof verification at that layer should fail before
+ // we ever descend to the leaf count proof.
+ use crate::operations::proof::{GroveDBProof, GroveDBProofV1, ProofBytes};
+
+ let v = GroveVersion::latest();
+ let (db, _root) = setup_15_key_provable_count_tree(v);
+ let path_query = PathQuery::new_aggregate_count_on_range(
+ vec![TEST_LEAF.to_vec(), b"ct".to_vec()],
+ QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()),
+ );
+ let proof = db
+ .grove_db
+ .prove_query(&path_query, None, v)
+ .unwrap()
+ .expect("prove_query should succeed");
+
+ let mut decoded = decode_envelope(&proof);
+ let GroveDBProof::V1(GroveDBProofV1 { root_layer }) = &mut decoded else {
+ panic!("expected V1 envelope");
+ };
+ // Corrupt the TEST_LEAF non-leaf merk proof bytes by truncating to
+ // a 1-byte payload, which fails to decode as a proof op stream.
+ let test_leaf_layer = root_layer
+ .lower_layers
+ .get_mut(&TEST_LEAF.to_vec())
+ .expect("TEST_LEAF lower layer");
+ match &mut test_leaf_layer.merk_proof {
+ ProofBytes::Merk(b) => {
+ *b = vec![0xff];
+ }
+ other => panic!(
+ "expected Merk bytes at non-leaf, got discriminant {:?}",
+ std::mem::discriminant(other)
+ ),
+ }
+
+ let reencoded = reencode_envelope(decoded);
+ let err = GroveDb::verify_aggregate_count_query(&reencoded, &path_query, v)
+ .expect_err("corrupted non-leaf merk bytes must be rejected");
+ match err {
+ crate::Error::InvalidProof(_, _) => {}
+ other => panic!("expected InvalidProof, got {:?}", other),
+ }
+ }
+}
diff --git a/grovedb/src/tests/mod.rs b/grovedb/src/tests/mod.rs
index 75f6db21f..1aded513f 100644
--- a/grovedb/src/tests/mod.rs
+++ b/grovedb/src/tests/mod.rs
@@ -6,6 +6,7 @@ mod query_tests;
mod sum_tree_tests;
+mod aggregate_count_query_tests;
mod batch_coverage_tests;
mod batch_delete_tree_tests;
mod batch_rejection_tests;
diff --git a/grovedb/src/tests/provable_count_sum_tree_tests.rs b/grovedb/src/tests/provable_count_sum_tree_tests.rs
index e4cb6aff9..8bee9f4b9 100644
--- a/grovedb/src/tests/provable_count_sum_tree_tests.rs
+++ b/grovedb/src/tests/provable_count_sum_tree_tests.rs
@@ -80,6 +80,9 @@ mod tests {
Node::KVRefValueHashCount(k, ..) => k.clone(),
Node::KVHashCount(..) => vec![],
Node::Hash(_) | Node::KVHash(_) => vec![],
+ // HashWithCount is keyless (collapsed subtree representation
+ // for AggregateCountOnRange proofs).
+ Node::HashWithCount(..) => vec![],
};
results.push((key, count));
}
diff --git a/merk/benches/branch_queries.rs b/merk/benches/branch_queries.rs
index 69067f501..382a671fe 100644
--- a/merk/benches/branch_queries.rs
+++ b/merk/benches/branch_queries.rs
@@ -233,7 +233,7 @@ fn get_key_from_node(node: &Node) -> Option> {
Node::KVRefValueHash(key, ..) => Some(key.clone()),
Node::KVCount(key, ..) => Some(key.clone()),
Node::KVRefValueHashCount(key, ..) => Some(key.clone()),
- Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) => None,
+ Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) | Node::HashWithCount(..) => None,
}
}
diff --git a/merk/src/merk/chunks.rs b/merk/src/merk/chunks.rs
index f74fb005c..6e383ce08 100644
--- a/merk/src/merk/chunks.rs
+++ b/merk/src/merk/chunks.rs
@@ -487,6 +487,9 @@ mod test {
Node::KVCount(..) => counts.kv += 1,
Node::KVHashCount(..) => counts.kv_hash += 1,
Node::KVRefValueHashCount(..) => counts.kv_ref_value_hash += 1,
+ // HashWithCount is hash-equivalent to Hash for the verifier;
+ // count it under `hash` for the test counter.
+ Node::HashWithCount(..) => counts.hash += 1,
};
});
diff --git a/merk/src/merk/prove.rs b/merk/src/merk/prove.rs
index 79c668f18..151098cf8 100644
--- a/merk/src/merk/prove.rs
+++ b/merk/src/merk/prove.rs
@@ -139,6 +139,51 @@ where
.map_ok(|(proof, _, status, ..)| (proof, status.limit))
})
}
+
+ /// Generate a count-only proof for an `AggregateCountOnRange` query.
+ ///
+ /// `inner_range` is the `QueryItem` wrapped by `AggregateCountOnRange`
+ /// (the caller is expected to have already validated and stripped the
+ /// wrapper at the `Query` level via
+ /// `Query::validate_aggregate_count_on_range`).
+ ///
+ /// The merk's `tree_type` must be one of `ProvableCountTree` or
+ /// `ProvableCountSumTree` (regardless of whether the merk is empty).
+ /// Any other tree type is rejected with `Error::InvalidProofError`
+ /// before any walking happens.
+ ///
+ /// On a tree-type-valid but empty Merk this returns
+ /// `(empty proof, count = 0)` — an empty subtree is a valid input for a
+ /// count query and the answer is unambiguously zero.
+ pub fn prove_aggregate_count_on_range(
+ &self,
+ inner_range: &QueryItem,
+ grove_version: &GroveVersion,
+ ) -> CostResult<(LinkedList, u64), Error> {
+ let tree_type = self.tree_type;
+ if !matches!(
+ tree_type,
+ crate::TreeType::ProvableCountTree | crate::TreeType::ProvableCountSumTree
+ ) {
+ return Err(Error::InvalidProofError(format!(
+ "AggregateCountOnRange is only valid against ProvableCountTree or \
+ ProvableCountSumTree, got {:?}",
+ tree_type
+ )))
+ .wrap_with_cost(Default::default());
+ }
+ self.use_tree_mut(|maybe_tree| match maybe_tree {
+ None => Ok((LinkedList::new(), 0u64)).wrap_with_cost(Default::default()),
+ Some(tree) => {
+ let mut ref_walker = RefWalker::new(tree, self.source());
+ ref_walker.create_aggregate_count_on_range_proof(
+ inner_range,
+ tree_type,
+ grove_version,
+ )
+ }
+ })
+ }
}
type Proof = (LinkedList, Option);
diff --git a/merk/src/proofs/branch/mod.rs b/merk/src/proofs/branch/mod.rs
index 7fa4e081c..3d8f27e36 100644
--- a/merk/src/proofs/branch/mod.rs
+++ b/merk/src/proofs/branch/mod.rs
@@ -120,7 +120,9 @@ impl TrunkQueryResult {
| Node::KVRefValueHash(key, ..)
| Node::KVCount(key, ..)
| Node::KVRefValueHashCount(key, ..) => Some(key.clone()),
- Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) => None,
+ Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) | Node::HashWithCount(..) => {
+ None
+ }
}
}
@@ -383,7 +385,9 @@ impl BranchQueryResult {
| Node::KVRefValueHash(key, ..)
| Node::KVCount(key, ..)
| Node::KVRefValueHashCount(key, ..) => Some(key.clone()),
- Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) => None,
+ Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) | Node::HashWithCount(..) => {
+ None
+ }
}
}
}
diff --git a/merk/src/proofs/query/aggregate_count.rs b/merk/src/proofs/query/aggregate_count.rs
new file mode 100644
index 000000000..8cd493986
--- /dev/null
+++ b/merk/src/proofs/query/aggregate_count.rs
@@ -0,0 +1,1591 @@
+//! Proof generation and verification for `AggregateCountOnRange` queries.
+//!
+//! This module implements the count-only proof shape described in the GroveDB
+//! book chapter "Aggregate Count Queries". It is intentionally **separate**
+//! from `create_proof_internal`: regular proofs always descend into a queried
+//! subtree, but count proofs *stop* at fully-inside subtree roots and emit a
+//! single `HashWithCount` op for the entire collapsed subtree.
+//!
+//! The proof targets a `ProvableCountTree` or `ProvableCountSumTree` (or
+//! their `NonCounted*` wrapper variants — wrappers only affect whether the
+//! tree contributes to its parent's count, not its own internal count
+//! mechanics). On any other tree type the entry point returns
+//! `Error::InvalidProofError`.
+
+use std::collections::LinkedList;
+
+use grovedb_costs::{cost_return_on_error, CostResult, CostsExt, OperationCost};
+use grovedb_version::version::GroveVersion;
+
+use crate::{
+ proofs::{
+ query::QueryItem,
+ tree::{execute_with_options, Tree as ProofTree},
+ Decoder, Node, Op,
+ },
+ tree::{kv::ValueDefinedCostType, AggregateData, Fetch, RefWalker},
+ CryptoHash, Error, TreeType,
+};
+
+/// All-zero `CryptoHash`, used in `Node::HashWithCount` for missing children.
+const NULL_HASH: CryptoHash = [0u8; 32];
+
+/// How a subtree's possible-key window relates to the inner range we're
+/// counting over.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum SubtreeClassification {
+ /// Every possible key in this subtree falls **outside** the range.
+ Disjoint,
+ /// Every possible key in this subtree falls **inside** the range.
+ Contained,
+ /// The subtree straddles a range boundary (or directly contains one).
+ Boundary,
+}
+
+/// Classify a subtree relative to the inner range.
+///
+/// `subtree_lo_excl` and `subtree_hi_excl` are the **exclusive** bounds on
+/// what keys can appear under the subtree (derived from ancestors during the
+/// walk; both `None` at the root). The range bounds come from the inner
+/// `QueryItem`'s `lower_bound` / `upper_bound`.
+///
+/// The comparisons treat `subtree_hi_excl` as exclusive (subtree keys are
+/// strictly < `subtree_hi_excl`) and `subtree_lo_excl` as exclusive (subtree
+/// keys are strictly > `subtree_lo_excl`). For the range bounds, the
+/// inclusivity flag returned by `lower_bound`/`upper_bound` is **not**
+/// load-bearing for the disjoint/contained tests below — see the inline
+/// proofs.
+fn classify_subtree(
+ subtree_lo_excl: Option<&[u8]>,
+ subtree_hi_excl: Option<&[u8]>,
+ range: &QueryItem,
+) -> SubtreeClassification {
+ let (range_lo, _range_lo_excl) = range.lower_bound();
+ let (range_hi, _range_hi_incl) = range.upper_bound();
+
+ // Disjoint-LEFT: subtree entirely below the range.
+ //
+ // Subtree keys are < subtree_hi_excl. If subtree_hi_excl <= range_lo,
+ // every subtree key < subtree_hi_excl <= range_lo is also < range_lo,
+ // so excluded regardless of whether range_lo is inclusive or exclusive.
+ if let (Some(s_hi), Some(r_lo)) = (subtree_hi_excl, range_lo)
+ && s_hi <= r_lo
+ {
+ return SubtreeClassification::Disjoint;
+ }
+
+ // Disjoint-RIGHT: subtree entirely above the range.
+ //
+ // Subtree keys are > subtree_lo_excl. If subtree_lo_excl >= range_hi,
+ // every subtree key > subtree_lo_excl >= range_hi is also > range_hi,
+ // so excluded regardless of whether range_hi is inclusive or exclusive.
+ if let (Some(s_lo), Some(r_hi)) = (subtree_lo_excl, range_hi)
+ && s_lo >= r_hi
+ {
+ return SubtreeClassification::Disjoint;
+ }
+
+ // Contained: subtree (s_lo, s_hi) ⊆ range.
+ //
+ // Lower side: every subtree key > s_lo. If s_lo >= r_lo, every subtree
+ // key > s_lo >= r_lo, so > r_lo, satisfying both inclusive and exclusive
+ // r_lo. If subtree has no lower bound (s_lo = -inf) but range does, the
+ // subtree could include arbitrarily small keys → not contained.
+ let lower_contained = match range_lo {
+ None => true,
+ Some(r_lo) => match subtree_lo_excl {
+ Some(s_lo) => s_lo >= r_lo,
+ None => false,
+ },
+ };
+ // Upper side: every subtree key < s_hi. If s_hi <= r_hi, every subtree
+ // key < s_hi <= r_hi, so < r_hi, satisfying both inclusive and exclusive
+ // r_hi. (We forgo the slightly tighter "s_hi <= r_hi+1" optimization for
+ // inclusive r_hi because we don't have key arithmetic.)
+ let upper_contained = match range_hi {
+ None => true,
+ Some(r_hi) => match subtree_hi_excl {
+ Some(s_hi) => s_hi <= r_hi,
+ None => false,
+ },
+ };
+
+ if lower_contained && upper_contained {
+ SubtreeClassification::Contained
+ } else {
+ SubtreeClassification::Boundary
+ }
+}
+
+/// Returns true if `tree_type` is one of the four tree types that can host an
+/// `AggregateCountOnRange` proof. Wrapper types are accepted by stripping
+/// down to the inner tree type via `is_provable_count_bearing`.
+fn is_provable_count_bearing(tree_type: TreeType) -> bool {
+ matches!(
+ tree_type,
+ TreeType::ProvableCountTree | TreeType::ProvableCountSumTree
+ )
+}
+
+/// Pull the count out of a `ProvableCount` / `ProvableCountAndSum` aggregate.
+/// Returns `Err(InvalidProofError)` for any other variant — the entry point
+/// has already gated `tree_type`, so reaching the error means the tree's
+/// in-memory state disagrees with its declared type.
+fn provable_count_from_aggregate(data: AggregateData) -> Result {
+ match data {
+ AggregateData::ProvableCount(c) => Ok(c),
+ AggregateData::ProvableCountAndSum(c, _) => Ok(c),
+ other => Err(Error::InvalidProofError(format!(
+ "expected ProvableCount aggregate data on a provable count tree, got {:?}",
+ other
+ ))),
+ }
+}
+
+impl RefWalker<'_, S>
+where
+ S: Fetch + Sized + Clone,
+{
+ /// Generate a count-only proof for an `AggregateCountOnRange` query.
+ ///
+ /// `inner_range` is the `QueryItem` wrapped by `AggregateCountOnRange`
+ /// (already stripped at the caller). `tree_type` must be one of
+ /// `ProvableCountTree` or `ProvableCountSumTree`; any other tree type is
+ /// rejected with `Error::InvalidProofError` before any walking happens.
+ ///
+ /// The returned tuple is `(proof_ops, count)`:
+ /// - `proof_ops` is the linear stream the verifier will replay to
+ /// reconstruct the tree's root hash.
+ /// - `count` is the prover-side computed count (the verifier independently
+ /// recomputes it from the proof and compares against the expected root
+ /// hash; this value is returned as a convenience, not as ground truth).
+ pub fn create_aggregate_count_on_range_proof(
+ &mut self,
+ inner_range: &QueryItem,
+ tree_type: TreeType,
+ grove_version: &GroveVersion,
+ ) -> CostResult<(LinkedList, u64), Error> {
+ if !is_provable_count_bearing(tree_type) {
+ return Err(Error::InvalidProofError(format!(
+ "AggregateCountOnRange is only valid against ProvableCountTree or \
+ ProvableCountSumTree, got {:?}",
+ tree_type
+ )))
+ .wrap_with_cost(OperationCost::default());
+ }
+
+ let mut cost = OperationCost::default();
+ let mut ops = LinkedList::new();
+ let count = cost_return_on_error!(
+ &mut cost,
+ emit_count_proof(self, inner_range, None, None, &mut ops, grove_version)
+ );
+ Ok((ops, count)).wrap_with_cost(cost)
+ }
+}
+
+/// Recursive proof emitter. Always called on a non-empty subtree.
+///
+/// At entry, `subtree_lo_excl` / `subtree_hi_excl` are the inherited
+/// exclusive key bounds for the subtree this walker points at (both `None`
+/// at the root call).
+fn emit_count_proof(
+ walker: &mut RefWalker<'_, S>,
+ range: &QueryItem,
+ subtree_lo_excl: Option<&[u8]>,
+ subtree_hi_excl: Option<&[u8]>,
+ ops: &mut LinkedList,
+ grove_version: &GroveVersion,
+) -> CostResult
+where
+ S: Fetch + Sized + Clone,
+{
+ let mut cost = OperationCost::default();
+
+ // Step 1: classify the current subtree against the inner range.
+ let class = classify_subtree(subtree_lo_excl, subtree_hi_excl, range);
+
+ if matches!(
+ class,
+ SubtreeClassification::Disjoint | SubtreeClassification::Contained
+ ) {
+ // Whole subtree is either entirely outside or entirely inside the
+ // range. Either way we emit a single self-verifying
+ // `HashWithCount(kv_hash, left_child_hash, right_child_hash, count)`
+ // op for the subtree's root.
+ //
+ // Why HashWithCount even for Disjoint subtrees (rather than the
+ // smaller `Hash(node_hash)` that an in-range count would never
+ // need)? Because the parent's `own_count` is computed by the
+ // verifier as `parent_aggregate − left_struct − right_struct` (see
+ // `verify_count_shape`), so the *structural* count of every child
+ // — including disjoint outside subtrees — has to be
+ // cryptographically bound to the parent's hash chain. The only
+ // node type that carries a hash-bound count is `HashWithCount`
+ // (its four committed fields recompute `node_hash_with_count` and
+ // would diverge under any count tampering). Plain `Hash(node_hash)`
+ // carries no count, so a malicious prover could lie about the
+ // structural count and skew the parent's `own_count`
+ // derivation — leading to silent over/under-counts at boundary
+ // ancestors.
+ let aggregate = match walker.tree().aggregate_data() {
+ Ok(a) => a,
+ Err(e) => {
+ return Err(Error::InvalidProofError(format!("aggregate_data: {}", e)))
+ .wrap_with_cost(cost);
+ }
+ };
+ let subtree_count = match provable_count_from_aggregate(aggregate) {
+ Ok(c) => c,
+ Err(e) => return Err(e).wrap_with_cost(cost),
+ };
+ let kv_hash = *walker.tree().kv_hash();
+ let left_child_hash = walker
+ .tree()
+ .link(true)
+ .map(|l| *l.hash())
+ .unwrap_or(NULL_HASH);
+ let right_child_hash = walker
+ .tree()
+ .link(false)
+ .map(|l| *l.hash())
+ .unwrap_or(NULL_HASH);
+ ops.push_back(Op::Push(Node::HashWithCount(
+ kv_hash,
+ left_child_hash,
+ right_child_hash,
+ subtree_count,
+ )));
+ // For the prover-side in-range total: Contained contributes its
+ // entire subtree count (which already excludes NonCounted entries
+ // because their stored aggregate is 0); Disjoint contributes 0.
+ let in_range_contribution = match class {
+ SubtreeClassification::Contained => subtree_count,
+ SubtreeClassification::Disjoint => 0,
+ SubtreeClassification::Boundary => unreachable!(),
+ };
+ return Ok(in_range_contribution).wrap_with_cost(cost);
+ }
+ // class == Boundary — fall through to descent + KVDigestCount emission.
+
+ // Step 2: snapshot what we need from the current node before walking.
+ // walk(true/false) takes &mut self.tree, so we must drop any existing
+ // borrows on walker.tree() before calling it.
+ let node_key: Vec = walker.tree().key().to_vec();
+ let node_value_hash: CryptoHash = *walker.tree().value_hash();
+ let node_count: u64 = match walker
+ .tree()
+ .aggregate_data()
+ .map_err(|e| Error::InvalidProofError(format!("aggregate_data: {}", e)))
+ {
+ Ok(data) => match provable_count_from_aggregate(data) {
+ Ok(c) => c,
+ Err(e) => return Err(e).wrap_with_cost(cost),
+ },
+ Err(e) => return Err(e).wrap_with_cost(cost),
+ };
+
+ // Snapshot each child link's structural aggregate count from the link
+ // itself (avoids loading the child for this lookup). The verifier needs
+ // these to compute `own_count = node_count − left_struct − right_struct`
+ // at this boundary node.
+ let left_link_aggregate: u64 = walker
+ .tree()
+ .link(true)
+ .map(|l| l.aggregate_data().as_count_u64())
+ .unwrap_or(0);
+ let right_link_aggregate: u64 = walker
+ .tree()
+ .link(false)
+ .map(|l| l.aggregate_data().as_count_u64())
+ .unwrap_or(0);
+ let left_link_present = walker.tree().link(true).is_some();
+ let right_link_present = walker.tree().link(false).is_some();
+
+ let mut total: u64 = 0;
+
+ // Step 3: handle the LEFT child. Both Disjoint and Contained require a
+ // one-level walk so the recursive Disjoint/Contained arm can emit a
+ // self-verifying `HashWithCount` (plain `Hash` is no longer used here
+ // — see the Disjoint branch comment above).
+ let left_emitted = if left_link_present {
+ let left_lo = subtree_lo_excl;
+ let left_hi: Option<&[u8]> = Some(node_key.as_slice());
+ let walked = cost_return_on_error!(
+ &mut cost,
+ walker.walk(
+ true,
+ None::<&fn(&[u8], &GroveVersion) -> Option>,
+ grove_version,
+ )
+ );
+ let mut left_walker = match walked {
+ Some(lw) => lw,
+ None => {
+ return Err(Error::CorruptedState(
+ "tree.link(true) was Some but walk(true) returned None",
+ ))
+ .wrap_with_cost(cost)
+ }
+ };
+ let n = cost_return_on_error!(
+ &mut cost,
+ emit_count_proof(
+ &mut left_walker,
+ range,
+ left_lo,
+ left_hi,
+ ops,
+ grove_version,
+ )
+ );
+ total = total.saturating_add(n);
+ true
+ } else {
+ false
+ };
+
+ // Step 4: emit the current node as a boundary KVDigestCount + attach left
+ // as its left child. The node's own contribution to the in-range count
+ // is `own_count` (0 for `NonCounted`-wrapped, 1 for normal), derived as
+ // `node_count − left_struct − right_struct`. This is what makes
+ // NonCounted entries fall out of the count: a NonCounted leaf has
+ // node_count = 0 and no children, so own_count = 0.
+ ops.push_back(Op::Push(Node::KVDigestCount(
+ node_key.clone(),
+ node_value_hash,
+ node_count,
+ )));
+ if left_emitted {
+ ops.push_back(Op::Parent);
+ }
+ if range.contains(&node_key) {
+ let own_count = node_count
+ .saturating_sub(left_link_aggregate)
+ .saturating_sub(right_link_aggregate);
+ total = total.saturating_add(own_count);
+ }
+
+ // Step 5: handle the RIGHT child. Same descent pattern as LEFT.
+ let right_emitted = if right_link_present {
+ let right_lo: Option<&[u8]> = Some(node_key.as_slice());
+ let right_hi = subtree_hi_excl;
+ let walked = cost_return_on_error!(
+ &mut cost,
+ walker.walk(
+ false,
+ None::<&fn(&[u8], &GroveVersion) -> Option>,
+ grove_version,
+ )
+ );
+ let mut right_walker = match walked {
+ Some(rw) => rw,
+ None => {
+ return Err(Error::CorruptedState(
+ "tree.link(false) was Some but walk(false) returned None",
+ ))
+ .wrap_with_cost(cost)
+ }
+ };
+ let n = cost_return_on_error!(
+ &mut cost,
+ emit_count_proof(
+ &mut right_walker,
+ range,
+ right_lo,
+ right_hi,
+ ops,
+ grove_version,
+ )
+ );
+ total = total.saturating_add(n);
+ true
+ } else {
+ false
+ };
+
+ if right_emitted {
+ ops.push_back(Op::Child);
+ }
+
+ Ok(total).wrap_with_cost(cost)
+}
+
+/// Verify a count-only proof for an `AggregateCountOnRange` query.
+///
+/// `proof_bytes` is the encoded `Vec` produced by
+/// [`Merk::prove_aggregate_count_on_range`]; `inner_range` is the same
+/// `QueryItem` the prover counted over (caller-supplied — typically extracted
+/// from the verifier's `PathQuery`).
+///
+/// On success returns `(merk_root_hash, count)`:
+/// - `merk_root_hash` is the root hash of the reconstructed merk; the
+/// caller must compare it against the expected root hash to complete
+/// verification.
+/// - `count` is the number of keys in the inner range, computed by replaying
+/// the prover's classification walk against the reconstructed proof tree.
+///
+/// **Two-phase verification.** Allowlisting node types alone is unsound:
+/// a malicious prover can substitute `Hash` for an in-range subtree (to
+/// undercount), attach extra `KVDigestCount` children below a keyless
+/// `Hash` / `HashWithCount` (to overcount, since their hash recomputation
+/// ignores attached children and the root hash would still match), or send
+/// a single `Push(Hash(expected_root))` for a non-empty tree (to receive a
+/// count of 0 with the trusted root). To prevent all three, this function:
+///
+/// 1. Decodes the proof into a `ProofTree` via `execute_with_options` with
+/// the AVL balance check disabled (count proofs intentionally collapse
+/// one side to height 1) and **does not** count anything in the
+/// `visit_node` callback.
+/// 2. Walks the reconstructed tree with the same inherited exclusive
+/// subtree-key bounds the prover used (`(None, None)` at the root).
+/// At each position it calls `classify_subtree(bounds, inner_range)` and
+/// requires the proof-tree node type to match the classification:
+/// - `Disjoint` → must be a leaf `Hash(_)`. Contributes 0.
+/// - `Contained` → must be a leaf `HashWithCount(...)`. Contributes its
+/// count.
+/// - `Boundary` → must be `KVDigestCount(key, ...)` with `key` strictly
+/// inside `bounds`. Recurse left with `(lo, key)` and right with
+/// `(key, hi)`; add 1 if `inner_range.contains(key)`.
+///
+/// Counts are summed with `checked_add`; an overflow is treated as proof
+/// corruption (`u64::MAX` keys is not a real merk shape). The caller is
+/// still responsible for verifying the returned `merk_root_hash` against
+/// their trusted root.
+///
+/// **Empty merk case.** An empty merk is represented by an empty proof byte
+/// stream and yields `(NULL_HASH, 0)`. Callers chaining this in a
+/// multi-layer proof should recognize that shape explicitly.
+pub fn verify_aggregate_count_on_range_proof(
+ proof_bytes: &[u8],
+ inner_range: &QueryItem,
+) -> CostResult<(CryptoHash, u64), Error> {
+ if proof_bytes.is_empty() {
+ // Empty merk → empty proof → count = 0, hash = NULL_HASH. This
+ // matches the prover-side behavior of returning an empty op stream
+ // for an empty subtree.
+ return Ok((NULL_HASH, 0u64)).wrap_with_cost(OperationCost::default());
+ }
+
+ let mut cost = OperationCost::default();
+ let decoder = Decoder::new(proof_bytes);
+
+ // Phase 1: reconstruct the proof tree. The visit_node closure only
+ // performs a coarse allowlist; the per-position type/shape check happens
+ // in Phase 2 below. We still reject blatantly wrong node types here so
+ // execute() bails early on garbage input.
+ let tree_result: CostResult =
+ execute_with_options(decoder, false, false, |node| match node {
+ // The count proof emits only `HashWithCount` (for collapsed
+ // Disjoint or Contained subtrees) and `KVDigestCount` (for
+ // Boundary nodes). Plain `Hash(_)` is no longer used here
+ // because the structural count it would otherwise stand in
+ // for is needed by the verifier's `own_count` derivation and
+ // would not be hash-bound.
+ Node::HashWithCount(_, _, _, _) | Node::KVDigestCount(_, _, _) => Ok(()),
+ other => Err(Error::InvalidProofError(format!(
+ "unexpected node type in aggregate count proof: {}",
+ other
+ ))),
+ });
+ let tree = cost_return_on_error!(&mut cost, tree_result);
+
+ // Phase 2: shape-check + count by replaying the prover's classification
+ // walk. This binds each leaf node's type to the (subtree_bounds × range)
+ // classification, so the only valid count is the one a faithful prover
+ // would have produced for this exact range.
+ let (count, _structural) = match verify_count_shape(&tree, inner_range, None, None) {
+ Ok(pair) => pair,
+ Err(e) => return Err(e).wrap_with_cost(cost),
+ };
+
+ let root_hash = tree.hash().unwrap_add_cost(&mut cost);
+ Ok((root_hash, count)).wrap_with_cost(cost)
+}
+
+/// Recursive shape-walk over the reconstructed proof tree. Returns the
+/// pair `(in_range_count, structural_count)`:
+///
+/// - `in_range_count` — number of keys in the subtree that fall inside the
+/// inner range AND have a non-zero own-count (i.e. are not
+/// `NonCounted`-wrapped). This is what bubbles up to the verifier's
+/// return value.
+/// - `structural_count` — the merk-recorded aggregate count of this subtree
+/// (counting normal entries as 1 and `NonCounted` entries as 0). The
+/// parent uses it to compute its own `own_count` as
+/// `parent_node_count − left_struct − right_struct` (since
+/// `parent_node_count = own + left_struct + right_struct`).
+///
+/// The structural count of every child is **cryptographically bound** to
+/// the parent's hash chain because every count-bearing node in a count
+/// proof (`KVDigestCount`, `HashWithCount`) has its count fed into
+/// `node_hash_with_count` for hash recomputation. Plain `Hash(_)` would
+/// not carry a bound count and is therefore not allowed in count proofs;
+/// see the prover-side comment in `emit_count_proof` for the full
+/// justification.
+///
+/// At each node:
+///
+/// - Compute the expected classification from the inherited subtree bounds
+/// and the inner range.
+/// - Require the node's type to match the classification (and reject any
+/// children attached under a leaf-shape classification — a malicious
+/// prover could otherwise hide counted children under a `HashWithCount`
+/// leaf, since its hash recomputation ignores reconstructed children).
+/// - Recurse with tightened bounds at `Boundary` nodes, summing with
+/// `checked_add` and computing `own_count` via `checked_sub`.
+fn verify_count_shape(
+ tree: &ProofTree,
+ range: &QueryItem,
+ lo: Option<&[u8]>,
+ hi: Option<&[u8]>,
+) -> Result<(u64, u64), Error> {
+ let class = classify_subtree(lo, hi, range);
+ match class {
+ SubtreeClassification::Disjoint => match &tree.node {
+ Node::HashWithCount(_, _, _, count) => {
+ if tree.left.is_some() || tree.right.is_some() {
+ return Err(Error::InvalidProofError(
+ "aggregate-count proof: HashWithCount node at a Disjoint position \
+ must be a leaf"
+ .to_string(),
+ ));
+ }
+ // Disjoint subtree contributes 0 to the in-range count but
+ // its full structural count to the parent's `own_count`
+ // computation.
+ Ok((0, *count))
+ }
+ other => Err(Error::InvalidProofError(format!(
+ "aggregate-count proof: expected HashWithCount at Disjoint position, got {}",
+ other
+ ))),
+ },
+ SubtreeClassification::Contained => match &tree.node {
+ Node::HashWithCount(_, _, _, count) => {
+ if tree.left.is_some() || tree.right.is_some() {
+ return Err(Error::InvalidProofError(
+ "aggregate-count proof: HashWithCount node at a Contained position \
+ must be a leaf"
+ .to_string(),
+ ));
+ }
+ // Contained subtree's structural count (which excludes
+ // NonCounted entries because their stored aggregate is 0)
+ // is exactly its in-range count.
+ Ok((*count, *count))
+ }
+ other => Err(Error::InvalidProofError(format!(
+ "aggregate-count proof: expected HashWithCount at Contained position, got {}",
+ other
+ ))),
+ },
+ SubtreeClassification::Boundary => match &tree.node {
+ Node::KVDigestCount(key, _, aggregate) => {
+ if !key_strictly_inside(key.as_slice(), lo, hi) {
+ return Err(Error::InvalidProofError(format!(
+ "aggregate-count proof: KVDigestCount key {} falls outside its \
+ inherited subtree bounds (lo={:?}, hi={:?})",
+ hex::encode(key),
+ lo.map(hex::encode),
+ hi.map(hex::encode),
+ )));
+ }
+ let key_slice = key.as_slice();
+ let (left_in, left_struct) = match &tree.left {
+ Some(child) => verify_count_shape(&child.tree, range, lo, Some(key_slice))?,
+ None => (0, 0),
+ };
+ let (right_in, right_struct) = match &tree.right {
+ Some(child) => verify_count_shape(&child.tree, range, Some(key_slice), hi)?,
+ None => (0, 0),
+ };
+ // own_count = aggregate − left_struct − right_struct.
+ // Saturating sub here would silently mask a malformed
+ // proof (children claiming more keys than the parent's
+ // aggregate), so use checked_sub and reject.
+ let own_count = aggregate
+ .checked_sub(left_struct)
+ .and_then(|s| s.checked_sub(right_struct))
+ .ok_or_else(|| {
+ Error::InvalidProofError(format!(
+ "aggregate-count proof: child structural counts ({} + {}) exceed \
+ parent's aggregate count ({}) at key {}",
+ left_struct,
+ right_struct,
+ aggregate,
+ hex::encode(key)
+ ))
+ })?;
+ let self_contribution = if range.contains(key_slice) {
+ own_count
+ } else {
+ 0
+ };
+ let in_range = left_in
+ .checked_add(right_in)
+ .and_then(|s| s.checked_add(self_contribution))
+ .ok_or_else(|| {
+ Error::InvalidProofError(
+ "aggregate-count proof: in-range count overflowed u64".to_string(),
+ )
+ })?;
+ Ok((in_range, *aggregate))
+ }
+ other => Err(Error::InvalidProofError(format!(
+ "aggregate-count proof: expected KVDigestCount at Boundary position, got {}",
+ other
+ ))),
+ },
+ }
+}
+
+/// Returns true when `key` lies strictly between the exclusive bounds
+/// `(lo, hi)`, where `None` represents `-inf` / `+inf`. Used to validate that
+/// a `Boundary` `KVDigestCount` carries a key consistent with its inherited
+/// subtree window.
+fn key_strictly_inside(key: &[u8], lo: Option<&[u8]>, hi: Option<&[u8]>) -> bool {
+ let lo_ok = lo.is_none_or(|l| key > l);
+ let hi_ok = hi.is_none_or(|h| key < h);
+ lo_ok && hi_ok
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn range_inclusive(lo: &[u8], hi: &[u8]) -> QueryItem {
+ QueryItem::RangeInclusive(lo.to_vec()..=hi.to_vec())
+ }
+
+ fn range_full() -> QueryItem {
+ QueryItem::RangeFull(std::ops::RangeFull)
+ }
+
+ fn range_from(lo: &[u8]) -> QueryItem {
+ QueryItem::RangeFrom(lo.to_vec()..)
+ }
+
+ fn range_after(lo: &[u8]) -> QueryItem {
+ QueryItem::RangeAfter(lo.to_vec()..)
+ }
+
+ #[test]
+ fn classify_disjoint_below() {
+ let r = range_inclusive(b"d", b"f");
+ // subtree (None, b"c") — keys < "c", entirely below ["d", "f"].
+ assert_eq!(
+ classify_subtree(None, Some(b"c"), &r),
+ SubtreeClassification::Disjoint,
+ );
+ }
+
+ #[test]
+ fn classify_disjoint_above() {
+ let r = range_inclusive(b"d", b"f");
+ // subtree (b"g", None) — keys > "g", entirely above ["d", "f"].
+ assert_eq!(
+ classify_subtree(Some(b"g"), None, &r),
+ SubtreeClassification::Disjoint,
+ );
+ }
+
+ #[test]
+ fn classify_disjoint_at_lower_boundary_inclusive() {
+ let r = range_inclusive(b"d", b"f");
+ // subtree (None, b"d") — keys < "d", just below the inclusive bound.
+ assert_eq!(
+ classify_subtree(None, Some(b"d"), &r),
+ SubtreeClassification::Disjoint,
+ );
+ }
+
+ #[test]
+ fn classify_disjoint_at_upper_boundary_inclusive() {
+ let r = range_inclusive(b"d", b"f");
+ // subtree (b"f", None) — keys > "f", just above the inclusive bound.
+ assert_eq!(
+ classify_subtree(Some(b"f"), None, &r),
+ SubtreeClassification::Disjoint,
+ );
+ }
+
+ #[test]
+ fn classify_contained_simple() {
+ let r = range_inclusive(b"a", b"z");
+ // subtree (b"d", b"f") — keys in ("d", "f"), all in ["a", "z"].
+ assert_eq!(
+ classify_subtree(Some(b"d"), Some(b"f"), &r),
+ SubtreeClassification::Contained,
+ );
+ }
+
+ #[test]
+ fn classify_contained_full_range_full_subtree() {
+ let r = range_full();
+ // The full range matches everything — even an unbounded subtree is
+ // contained.
+ assert_eq!(
+ classify_subtree(None, None, &r),
+ SubtreeClassification::Contained,
+ );
+ }
+
+ #[test]
+ fn classify_boundary_overlapping_lower() {
+ let r = range_inclusive(b"d", b"f");
+ // subtree (b"c", b"e") — keys in ("c", "e"), straddles the lower bound.
+ assert_eq!(
+ classify_subtree(Some(b"c"), Some(b"e"), &r),
+ SubtreeClassification::Boundary,
+ );
+ }
+
+ #[test]
+ fn classify_boundary_overlapping_upper() {
+ let r = range_inclusive(b"d", b"f");
+ // subtree (b"e", b"g") — keys in ("e", "g"), straddles the upper bound.
+ assert_eq!(
+ classify_subtree(Some(b"e"), Some(b"g"), &r),
+ SubtreeClassification::Boundary,
+ );
+ }
+
+ #[test]
+ fn classify_boundary_unbounded_below_with_bounded_range() {
+ let r = range_from(b"d");
+ // subtree (None, b"e") — could include keys < "d", so boundary.
+ assert_eq!(
+ classify_subtree(None, Some(b"e"), &r),
+ SubtreeClassification::Boundary,
+ );
+ }
+
+ #[test]
+ fn classify_contained_range_after_exclusive() {
+ let r = range_after(b"b");
+ // RangeAfter(b"b") = (b, +inf). subtree (b"b", b"e") — keys > "b" and
+ // < "e", all in (b, +inf). Contained.
+ assert_eq!(
+ classify_subtree(Some(b"b"), Some(b"e"), &r),
+ SubtreeClassification::Contained,
+ );
+ }
+
+ // ---------- end-to-end integration tests on a real merk ----------
+ //
+ // These tests build a small ProvableCountTree, generate count proofs
+ // through the merk-level API, then verify them with the count verifier.
+ // They cover the four documented categories: open-range (lower-only and
+ // upper-only) and closed-range (inclusive and after-to-inclusive). Empty
+ // tree and single-bound edge cases are also exercised.
+
+ use grovedb_costs::CostsExt as _;
+ use grovedb_version::version::GroveVersion;
+
+ use crate::{
+ proofs::{encode_into, Op as ProofOp},
+ test_utils::TempMerk,
+ tree::{Op, TreeFeatureType::ProvableCountedMerkNode},
+ Merk, TreeType,
+ };
+
+ /// Build a fresh `ProvableCountTree` populated with single-byte keys
+ /// "a".."o" (15 keys) — same shape as the running example in the book
+ /// chapter's "Closed ranges" section. Returns the merk and its current
+ /// root hash.
+ fn make_15_key_provable_count_tree(grove_version: &GroveVersion) -> (TempMerk, [u8; 32]) {
+ let mut merk = TempMerk::new_with_tree_type(grove_version, TreeType::ProvableCountTree);
+ let keys: Vec> = (b'a'..=b'o').map(|c| vec![c]).collect();
+ let entries: Vec<(Vec, Op)> = keys
+ .iter()
+ .enumerate()
+ .map(|(i, k)| {
+ (
+ k.clone(),
+ Op::Put(vec![i as u8], ProvableCountedMerkNode(1)),
+ )
+ })
+ .collect();
+ merk.apply::<_, Vec<_>>(&entries, &[], None, grove_version)
+ .unwrap()
+ .expect("apply should succeed");
+ merk.commit(grove_version);
+ let root_hash = merk.root_hash().unwrap();
+ (merk, root_hash)
+ }
+
+ /// Encode a `LinkedList` into the wire format that the verifier
+ /// consumes.
+ fn encode_proof(ops: &LinkedList) -> Vec {
+ let mut bytes = Vec::with_capacity(128);
+ encode_into(ops.iter(), &mut bytes);
+ bytes
+ }
+
+ /// Round-trip helper: prove the inner range, encode the proof, verify it,
+ /// assert the recovered root hash matches and the recovered count matches
+ /// `expected_count`.
+ fn round_trip(
+ merk: &Merk>,
+ expected_root: [u8; 32],
+ inner_range: QueryItem,
+ expected_count: u64,
+ grove_version: &GroveVersion,
+ ) {
+ let (ops, prover_count) = merk
+ .prove_aggregate_count_on_range(&inner_range, grove_version)
+ .unwrap()
+ .expect("prove should succeed");
+ assert_eq!(
+ prover_count, expected_count,
+ "prover count mismatch for range {:?}",
+ inner_range
+ );
+ let bytes = encode_proof(&ops);
+ let (root, verifier_count) = verify_aggregate_count_on_range_proof(&bytes, &inner_range)
+ .unwrap()
+ .expect("verify should succeed");
+ assert_eq!(
+ root, expected_root,
+ "verifier reconstructed wrong root for range {:?}",
+ inner_range
+ );
+ assert_eq!(
+ verifier_count, expected_count,
+ "verifier count mismatch for range {:?}",
+ inner_range
+ );
+ }
+
+ #[test]
+ fn integration_open_range_from() {
+ let v = GroveVersion::latest();
+ let (merk, root) = make_15_key_provable_count_tree(v);
+ // RangeFrom("c"..) → keys c..o (13 keys).
+ round_trip(&merk, root, QueryItem::RangeFrom(b"c".to_vec()..), 13, v);
+ }
+
+ #[test]
+ fn integration_open_range_after() {
+ let v = GroveVersion::latest();
+ let (merk, root) = make_15_key_provable_count_tree(v);
+ // RangeAfter(("b", ..)) → keys c..o (13 keys), same set as RangeFrom("c"..)
+ // but proof shape differs — the boundary lands on "b" exclusive.
+ round_trip(&merk, root, QueryItem::RangeAfter(b"b".to_vec()..), 13, v);
+ }
+
+ #[test]
+ fn integration_open_range_to() {
+ let v = GroveVersion::latest();
+ let (merk, root) = make_15_key_provable_count_tree(v);
+ // RangeTo(..b"e") → keys a..d (4 keys, exclusive upper).
+ round_trip(&merk, root, QueryItem::RangeTo(..b"e".to_vec()), 4, v);
+ }
+
+ #[test]
+ fn integration_open_range_to_inclusive() {
+ let v = GroveVersion::latest();
+ let (merk, root) = make_15_key_provable_count_tree(v);
+ // RangeToInclusive(..=b"e") → keys a..e (5 keys, inclusive upper).
+ round_trip(
+ &merk,
+ root,
+ QueryItem::RangeToInclusive(..=b"e".to_vec()),
+ 5,
+ v,
+ );
+ }
+
+ #[test]
+ fn integration_closed_range_inclusive() {
+ let v = GroveVersion::latest();
+ let (merk, root) = make_15_key_provable_count_tree(v);
+ // RangeInclusive("c"..="l") → 10 keys.
+ round_trip(
+ &merk,
+ root,
+ QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()),
+ 10,
+ v,
+ );
+ }
+
+ #[test]
+ fn integration_closed_range_exclusive() {
+ let v = GroveVersion::latest();
+ let (merk, root) = make_15_key_provable_count_tree(v);
+ // Range("c".."l") → c..k (9 keys, exclusive upper).
+ round_trip(
+ &merk,
+ root,
+ QueryItem::Range(b"c".to_vec()..b"l".to_vec()),
+ 9,
+ v,
+ );
+ }
+
+ #[test]
+ fn integration_closed_range_after_to_inclusive() {
+ let v = GroveVersion::latest();
+ let (merk, root) = make_15_key_provable_count_tree(v);
+ // RangeAfterToInclusive(("c", "l")) → keys d..l (9 keys: d..=l excluding c).
+ round_trip(
+ &merk,
+ root,
+ QueryItem::RangeAfterToInclusive(b"c".to_vec()..=b"l".to_vec()),
+ 9,
+ v,
+ );
+ }
+
+ #[test]
+ fn integration_closed_range_after_to_exclusive() {
+ let v = GroveVersion::latest();
+ let (merk, root) = make_15_key_provable_count_tree(v);
+ // RangeAfterTo(("c", "l")) → keys d..l (8 keys, both exclusive).
+ round_trip(
+ &merk,
+ root,
+ QueryItem::RangeAfterTo(b"c".to_vec()..b"l".to_vec()),
+ 8,
+ v,
+ );
+ }
+
+ #[test]
+ fn integration_range_below_all_keys() {
+ let v = GroveVersion::latest();
+ let (merk, root) = make_15_key_provable_count_tree(v);
+ // Entire range below the smallest key — should produce count = 0
+ // and a Disjoint proof at the root level.
+ round_trip(
+ &merk,
+ root,
+ QueryItem::RangeInclusive(vec![0x00]..=vec![0x10]),
+ 0,
+ v,
+ );
+ }
+
+ #[test]
+ fn integration_range_above_all_keys() {
+ let v = GroveVersion::latest();
+ let (merk, root) = make_15_key_provable_count_tree(v);
+ // Entire range above the largest key.
+ round_trip(
+ &merk,
+ root,
+ QueryItem::RangeInclusive(b"z".to_vec()..=vec![0xff]),
+ 0,
+ v,
+ );
+ }
+
+ #[test]
+ fn integration_empty_merk() {
+ let v = GroveVersion::latest();
+ let merk = TempMerk::new_with_tree_type(v, TreeType::ProvableCountTree);
+ let (ops, prover_count) = merk
+ .prove_aggregate_count_on_range(&QueryItem::Range(b"a".to_vec()..b"z".to_vec()), v)
+ .unwrap()
+ .expect("prove on empty merk should succeed");
+ assert_eq!(prover_count, 0);
+ // Empty proof means the verifier returns NULL_HASH and count = 0.
+ let bytes = encode_proof(&ops);
+ let (root, verifier_count) = verify_aggregate_count_on_range_proof(
+ &bytes,
+ &QueryItem::Range(b"a".to_vec()..b"z".to_vec()),
+ )
+ .unwrap()
+ .expect("verify on empty merk should succeed");
+ assert_eq!(root, NULL_HASH);
+ assert_eq!(verifier_count, 0);
+ }
+
+ #[test]
+ fn integration_rejected_on_normal_tree() {
+ let v = GroveVersion::latest();
+ let merk = TempMerk::new(v); // NormalTree
+ let err = merk
+ .prove_aggregate_count_on_range(&QueryItem::Range(b"a".to_vec()..b"z".to_vec()), v)
+ .unwrap();
+ assert!(
+ err.is_err(),
+ "expected an InvalidProofError on NormalTree, got Ok({:?})",
+ err.ok().map(|(_, c)| c)
+ );
+ }
+
+ #[test]
+ fn integration_count_forgery_is_rejected() {
+ // Demonstrates the cryptographic binding: tamper with the count in a
+ // HashWithCount op and the verifier's root-hash recomputation must
+ // diverge from the expected root.
+ let v = GroveVersion::latest();
+ let (merk, expected_root) = make_15_key_provable_count_tree(v);
+ let inner_range = QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec());
+ let (mut ops, _prover_count) = merk
+ .prove_aggregate_count_on_range(&inner_range, v)
+ .unwrap()
+ .expect("prove should succeed");
+
+ // Forge: bump the count on the first HashWithCount op we see.
+ let mut tampered = false;
+ for op in ops.iter_mut() {
+ if let ProofOp::Push(Node::HashWithCount(_, _, _, count))
+ | ProofOp::PushInverted(Node::HashWithCount(_, _, _, count)) = op
+ {
+ *count = count.saturating_add(1);
+ tampered = true;
+ break;
+ }
+ }
+ assert!(
+ tampered,
+ "test setup: expected at least one HashWithCount op"
+ );
+
+ let bytes = encode_proof(&ops);
+ let (root, _count) = verify_aggregate_count_on_range_proof(&bytes, &inner_range)
+ .unwrap()
+ .expect("verify should still complete (root mismatch is the caller's job)");
+ assert_ne!(
+ root, expected_root,
+ "tampered count must produce a different reconstructed root hash"
+ );
+ }
+
+ // ---------- attack tests for the shape-walk verifier ----------
+ //
+ // These three tests exercise attacks the old allowlist-only verifier let
+ // through. With the shape walk in `verify_count_shape`, each one is
+ // rejected before the caller's root-hash check.
+
+ /// A malicious prover sends a single `Push(Hash(expected_root))` for a
+ /// non-empty tree. Without the shape check this would return
+ /// `(expected_root, 0)` for any range. The shape check classifies the
+ /// root with `(None, None)` against a bounded inner range as `Boundary`,
+ /// expects `KVDigestCount`, and rejects.
+ #[test]
+ fn shape_walk_rejects_single_hash_undercount() {
+ let v = GroveVersion::latest();
+ let (merk, expected_root) = make_15_key_provable_count_tree(v);
+ let inner_range = QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec());
+
+ // Forged proof: a single Hash op carrying the genuine root hash.
+ let mut forged: LinkedList = LinkedList::new();
+ forged.push_back(ProofOp::Push(Node::Hash(expected_root)));
+ let bytes = encode_proof(&forged);
+
+ let result = verify_aggregate_count_on_range_proof(&bytes, &inner_range).unwrap();
+ let err = result.expect_err("single-Hash forgery must be rejected");
+ // keep merk alive for clarity in the test scope
+ let _ = merk;
+ // Plain `Hash` is no longer in the count-proof allowlist (it would
+ // carry an unbound structural count), so the rejection now lands
+ // in Phase 1's coarse allowlist rather than Phase 2's shape walk.
+ // Either error message is fine — the attack is rejected.
+ match err {
+ Error::InvalidProofError(msg) => {
+ assert!(
+ msg.contains("unexpected node type")
+ || msg.contains("expected KVDigestCount")
+ || msg.contains("Boundary"),
+ "unexpected message: {msg}"
+ );
+ }
+ other => panic!("expected InvalidProofError, got {other:?}"),
+ }
+ }
+
+ /// A malicious prover replaces an in-range `HashWithCount` subtree with
+ /// a `Hash` carrying that subtree's node_hash, undercounting by the
+ /// subtree's count. The hash chain still matches (same node_hash), so
+ /// the old allowlist verifier would have happily returned a wrong
+ /// count. The shape walk classifies that position as `Contained` and
+ /// requires `HashWithCount`, rejecting the swap.
+ #[test]
+ fn shape_walk_rejects_hash_swap_for_contained_subtree() {
+ let v = GroveVersion::latest();
+ let (merk, _root) = make_15_key_provable_count_tree(v);
+ let inner_range = QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec());
+ let (mut ops, _) = merk
+ .prove_aggregate_count_on_range(&inner_range, v)
+ .unwrap()
+ .expect("prove succeeds");
+
+ // Swap the first HashWithCount op for a Hash op carrying the
+ // computed node_hash for that subtree (so the chain check still
+ // matches and only the shape walk can detect the attack).
+ let mut swapped = false;
+ for op in ops.iter_mut() {
+ if let ProofOp::Push(Node::HashWithCount(kv_hash, l, r, c)) = op {
+ let node_hash = crate::tree::node_hash_with_count(kv_hash, l, r, *c).unwrap();
+ *op = ProofOp::Push(Node::Hash(node_hash));
+ swapped = true;
+ break;
+ }
+ }
+ assert!(
+ swapped,
+ "test setup: expected at least one HashWithCount op"
+ );
+
+ let bytes = encode_proof(&ops);
+ let result = verify_aggregate_count_on_range_proof(&bytes, &inner_range).unwrap();
+ assert!(
+ result.is_err(),
+ "HashWithCount→Hash swap on a Contained subtree must be rejected by the shape walk"
+ );
+ }
+
+ /// A malicious prover attaches a `KVDigestCount` child under a leaf
+ /// `HashWithCount`. Because `Tree::hash()` for `HashWithCount` is
+ /// computed from the four embedded fields and ignores any reconstructed
+ /// children, the root hash check passes — but a naive verifier that
+ /// counts every visited node would credit the bogus child as +1. The
+ /// shape walk requires `Contained` positions to be **leaves**, so it
+ /// rejects the smuggled-in child.
+ #[test]
+ fn shape_walk_rejects_keyless_node_with_attached_children() {
+ let v = GroveVersion::latest();
+ let (merk, _root) = make_15_key_provable_count_tree(v);
+ let inner_range = QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec());
+ let (mut ops, _honest_count) = merk
+ .prove_aggregate_count_on_range(&inner_range, v)
+ .unwrap()
+ .expect("prove succeeds");
+
+ // Smuggle a fake +1 child under the first HashWithCount op. After
+ // any HashWithCount(...), insert: Push(Hash(zero)) Parent — that
+ // attaches an extra hashed node as the LEFT child of the
+ // HashWithCount during reconstruction. Then add a fake
+ // Push(KVDigestCount) Child that would be picked up by an
+ // allowlist verifier counting visited keys.
+ //
+ // Concretely we splice 4 ops right after the HashWithCount:
+ // Push(KVDigestCount(in_range_key, value_hash, 1))
+ // Parent (attach KVDigestCount as the LEFT child of HashWithCount)
+ // Push(Hash([0; 32]))
+ // Child (attach Hash as the RIGHT child of HashWithCount)
+ //
+ // The HashWithCount's hash() ignores these children, so the root
+ // hash recomputation is unaffected. The shape walk catches the
+ // Contained-position-with-children violation.
+ let mut new_ops: LinkedList = LinkedList::new();
+ let mut spliced = false;
+ for op in ops.iter() {
+ new_ops.push_back(op.clone());
+ if !spliced && matches!(op, ProofOp::Push(Node::HashWithCount(_, _, _, _))) {
+ let in_range_key = b"d".to_vec();
+ new_ops.push_back(ProofOp::Push(Node::KVDigestCount(
+ in_range_key,
+ [0u8; 32],
+ 1,
+ )));
+ new_ops.push_back(ProofOp::Parent);
+ new_ops.push_back(ProofOp::Push(Node::Hash([0u8; 32])));
+ new_ops.push_back(ProofOp::Child);
+ spliced = true;
+ }
+ }
+ assert!(
+ spliced,
+ "test setup: expected to splice into a HashWithCount"
+ );
+ ops = new_ops;
+
+ let bytes = encode_proof(&ops);
+ let result = verify_aggregate_count_on_range_proof(&bytes, &inner_range).unwrap();
+ assert!(
+ result.is_err(),
+ "attaching children under HashWithCount must be rejected (root hash alone wouldn't catch it)"
+ );
+ }
+
+ /// `HashWithCount` is only safe inside the dedicated aggregate-count
+ /// verifier (which shape-checks the collapsed subtree). The plain
+ /// `Query::execute_proof` verifier must reject it on sight — otherwise
+ /// a malicious prover could include `HashWithCount` in a regular
+ /// query proof, attach fake KV children to it (whose pushes the
+ /// verifier would credit as query results via `execute_node`), and
+ /// have the parent's hash chain still verify because
+ /// `Tree::hash()` for `HashWithCount` ignores attached children.
+ #[test]
+ fn regular_query_verifier_rejects_hash_with_count_node() {
+ use crate::proofs::query::QueryProofVerify;
+ let v = GroveVersion::latest();
+
+ // Build a regular merk and a regular range query against it.
+ let mut merk = TempMerk::new(v);
+ for i in 0u8..5 {
+ merk.apply::<_, Vec<_>>(
+ &[(
+ vec![i],
+ Op::Put(vec![i], crate::TreeFeatureType::BasicMerkNode),
+ )],
+ &[],
+ None,
+ v,
+ )
+ .unwrap()
+ .expect("apply");
+ }
+ merk.commit(v);
+ let q = crate::proofs::query::Query::new_single_query_item(QueryItem::Range(
+ vec![0u8]..vec![5u8],
+ ));
+
+ // Generate an honest proof, then splice a `HashWithCount` push into
+ // it. The exact op sequence doesn't matter for what we're testing —
+ // we just need the regular verifier to refuse to process the proof
+ // because it contains a `HashWithCount`.
+ let (mut ops, _) = merk
+ .prove_unchecked_query_items(&[QueryItem::Range(vec![0u8]..vec![5u8])], None, true, v)
+ .unwrap()
+ .expect("prove");
+ ops.push_front(ProofOp::Push(Node::HashWithCount(
+ [0u8; 32], [0u8; 32], [0u8; 32], 0,
+ )));
+ let bytes = encode_proof(&ops);
+
+ let result = q.execute_proof(&bytes, None, true, 0).unwrap();
+ let err = result.expect_err("regular query verifier must reject HashWithCount on sight");
+ let msg = format!("{}", err);
+ assert!(
+ msg.contains("HashWithCount") || msg.contains("aggregate-count"),
+ "expected HashWithCount-rejection message, got: {msg}"
+ );
+ }
+
+ // ---------- byte-mutation fuzzer ----------
+ //
+ // Stronger forgery-resistance check than the three hand-crafted attack
+ // tests above: enumerate every byte of an honest proof, flip it to
+ // each of three different values, and assert the verifier never
+ // produces a "silent forgery" — i.e. an `Ok((root, count))` where
+ // the root **matches** the honest one but the count **differs**.
+ //
+ // Three safe outcomes per mutation:
+ // - **Rejection** — Phase 1 decode error, or Phase 2 shape mismatch.
+ // - **Divergence** — `Ok((root', _))` where `root' != honest_root`,
+ // so any caller comparing against their trusted root catches it.
+ // - **Same outcome** — `Ok((honest_root, honest_count))`. This can
+ // happen for non-canonical re-encodings (e.g. swapping
+ // `Push` ↔ `PushInverted` doesn't change the reconstructed tree's
+ // root or the shape walk's count). Harmless: the verifier is
+ // deterministic on (root, count), and that pair is what the
+ // caller acts on.
+ //
+ // The **unsafe** outcome is `Ok((honest_root, count'))` where
+ // `count' != honest_count`. The hash chain binds count via
+ // `node_hash_with_count`, so this should be impossible — the test
+ // panics if it ever happens.
+ //
+ // We also assert each safe branch fires at least once as a sanity
+ // check that the test is actually exercising the surface.
+ #[test]
+ fn fuzz_byte_mutation_no_silent_forgery() {
+ let v = GroveVersion::latest();
+ let (merk, honest_root) = make_15_key_provable_count_tree(v);
+ let inner_range = QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec());
+ let (ops, honest_count) = merk
+ .prove_aggregate_count_on_range(&inner_range, v)
+ .unwrap()
+ .expect("prove");
+ let honest_bytes = encode_proof(&ops);
+ assert!(!honest_bytes.is_empty());
+
+ let mut rejected = 0usize;
+ let mut diverged = 0usize;
+ let mut same_outcome = 0usize;
+ let mut total = 0usize;
+
+ // Three different mutations per byte: +1, +0x55, XOR 0xff.
+ let deltas: [u8; 3] = [1, 0x55, 0xff];
+ for byte_idx in 0..honest_bytes.len() {
+ for &delta in &deltas {
+ let mut bytes = honest_bytes.clone();
+ let original = bytes[byte_idx];
+ let mutated = if delta == 0xff {
+ original ^ 0xff
+ } else {
+ original.wrapping_add(delta)
+ };
+ if mutated == original {
+ continue; // no-op, don't count
+ }
+ bytes[byte_idx] = mutated;
+ total += 1;
+
+ let result = verify_aggregate_count_on_range_proof(&bytes, &inner_range).unwrap();
+ match result {
+ Err(_) => rejected += 1,
+ Ok((root, count)) => {
+ if root == honest_root {
+ // Same root — the verifier MUST also produce
+ // the same count, otherwise we have a silent
+ // count-forgery: the caller would accept the
+ // forged count thinking it's the honest one.
+ assert_eq!(
+ count, honest_count,
+ "SILENT FORGERY at byte index {} (delta=0x{:02x}): \
+ verifier returned the honest root but a wrong count \
+ ({} != {}). The hash chain should bind count.",
+ byte_idx, delta, count, honest_count
+ );
+ same_outcome += 1;
+ } else {
+ // Different root — caller's root check catches it.
+ diverged += 1;
+ }
+ }
+ }
+ }
+ }
+
+ // Sanity: each safe branch should fire at least once on a real proof.
+ assert!(
+ rejected > 0,
+ "expected at least one mutation to be rejected outright"
+ );
+ assert!(
+ diverged > 0,
+ "expected at least one mutation to diverge the root hash"
+ );
+ // `same_outcome` may legitimately be zero on some encoders, so we
+ // don't require it. We just require no silent forgery occurred,
+ // which the inner assert_eq! guarantees.
+ let _ = same_outcome;
+ assert_eq!(rejected + diverged + same_outcome, total);
+ }
+
+ // ---------- randomized round-trip property test ----------
+ //
+ // Build merks with varying sizes and key shapes from a deterministic
+ // RNG, run a bunch of randomly-chosen ranges through the prove → encode
+ // → verify pipeline, and assert the verifier's count agrees with a
+ // ground-truth count computed by directly intersecting the inserted
+ // keys with the range. Catches silent miscounts that the fixed
+ // examples above would miss (off-by-one, edge-of-tree, exact-bound
+ // matches against multi-byte keys, etc.).
+ #[test]
+ fn fuzz_random_trees_and_ranges_round_trip() {
+ // Tiny custom xorshift RNG so we don't have to add a dev-dep.
+ struct XorShift(u64);
+ impl XorShift {
+ fn next_u64(&mut self) -> u64 {
+ let mut x = self.0;
+ x ^= x << 13;
+ x ^= x >> 7;
+ x ^= x << 17;
+ self.0 = x;
+ x
+ }
+ fn gen_range(&mut self, lo: usize, hi: usize) -> usize {
+ lo + (self.next_u64() as usize) % (hi - lo)
+ }
+ fn gen_key(&mut self, max_len: usize) -> Vec {
+ let len = 1 + self.gen_range(0, max_len);
+ (0..len).map(|_| (self.next_u64() & 0xff) as u8).collect()
+ }
+ }
+
+ let v = GroveVersion::latest();
+ let mut rng = XorShift(0xDEAD_BEEF_C0FFEE);
+ let trials = 16;
+ for trial in 0..trials {
+ let key_count = rng.gen_range(1, 64);
+ let mut keys: Vec> = (0..key_count).map(|_| rng.gen_key(8)).collect();
+ keys.sort();
+ keys.dedup();
+
+ let mut merk = TempMerk::new_with_tree_type(v, TreeType::ProvableCountTree);
+ let entries: Vec<(Vec, Op)> = keys
+ .iter()
+ .map(|k| (k.clone(), Op::Put(vec![0xAB], ProvableCountedMerkNode(1))))
+ .collect();
+ merk.apply::<_, Vec<_>>(&entries, &[], None, v)
+ .unwrap()
+ .expect("apply");
+ merk.commit(v);
+ let root = merk.root_hash().unwrap();
+
+ // Try several random ranges per tree, picking shapes that
+ // exercise both bounded and half-bounded variants.
+ for sub_trial in 0..6 {
+ let lo = rng.gen_key(8);
+ let hi = rng.gen_key(8);
+ let (lo, hi) = if lo <= hi { (lo, hi) } else { (hi, lo) };
+
+ let inner_range = match sub_trial % 6 {
+ 0 => QueryItem::Range(lo.clone()..hi.clone()),
+ 1 => QueryItem::RangeInclusive(lo.clone()..=hi.clone()),
+ 2 => QueryItem::RangeFrom(lo.clone()..),
+ 3 => QueryItem::RangeAfter(lo.clone()..),
+ 4 => QueryItem::RangeTo(..hi.clone()),
+ _ => QueryItem::RangeToInclusive(..=hi.clone()),
+ };
+
+ let expected = keys
+ .iter()
+ .filter(|k| inner_range.contains(k.as_slice()))
+ .count() as u64;
+
+ let (ops, prover_count) = merk
+ .prove_aggregate_count_on_range(&inner_range, v)
+ .unwrap()
+ .expect("prove");
+ assert_eq!(
+ prover_count, expected,
+ "trial {} sub {}: prover count mismatch for range {:?}",
+ trial, sub_trial, inner_range
+ );
+ let bytes = encode_proof(&ops);
+ let (vroot, vcount) = verify_aggregate_count_on_range_proof(&bytes, &inner_range)
+ .unwrap()
+ .expect("verify");
+ assert_eq!(
+ vroot, root,
+ "trial {} sub {}: verifier root mismatch",
+ trial, sub_trial
+ );
+ assert_eq!(
+ vcount, expected,
+ "trial {} sub {}: verifier count mismatch for range {:?}",
+ trial, sub_trial, inner_range
+ );
+ }
+ }
+ }
+
+ // ---------- shape-walk rejection of malformed proof shapes ----------
+ //
+ // These tests synthesize op streams that are well-formed bytes (Phase 1
+ // decode succeeds) but violate the structural invariants the shape walk
+ // requires (Phase 2 rejection). They exist to lock down the defensive
+ // error branches in `verify_count_shape` so future refactors that
+ // accidentally relax them are caught by the test suite.
+
+ /// `HashWithCount` is only valid as a leaf in the proof tree. If the
+ /// prover attaches children to a Disjoint-position `HashWithCount`,
+ /// the shape walk must reject — even though the parent's hash chain
+ /// (which uses `Tree::hash()` for `HashWithCount`, computed from the
+ /// four embedded fields and ignoring children) would still verify.
+ #[test]
+ fn shape_walk_rejects_disjoint_hashwithcount_with_children() {
+ let v = GroveVersion::latest();
+ let (merk, _root) = make_15_key_provable_count_tree(v);
+ // RangeAfter("o") → all 15 keys are below; the entire tree is
+ // Disjoint relative to the inner range, so the honest proof is a
+ // single Push(HashWithCount(...)).
+ let inner_range = QueryItem::RangeAfter(b"o".to_vec()..);
+ let (mut ops, _) = merk
+ .prove_aggregate_count_on_range(&inner_range, v)
+ .unwrap()
+ .expect("prove succeeds");
+
+ // Splice in another HashWithCount as the child (no key, so no
+ // ordering constraint at Phase 1) so we exercise Phase 2's
+ // leaf-only assertion at the Disjoint position.
+ let mut spliced = LinkedList::::new();
+ let mut done = false;
+ for op in ops.iter() {
+ spliced.push_back(op.clone());
+ if !done && matches!(op, ProofOp::Push(Node::HashWithCount(_, _, _, _))) {
+ spliced.push_back(ProofOp::Push(Node::HashWithCount(
+ [0u8; 32], [0u8; 32], [0u8; 32], 1,
+ )));
+ spliced.push_back(ProofOp::Parent);
+ done = true;
+ }
+ }
+ assert!(done, "test setup: expected at least one HashWithCount op");
+ ops = spliced;
+
+ let bytes = encode_proof(&ops);
+ let result = verify_aggregate_count_on_range_proof(&bytes, &inner_range).unwrap();
+ let err = result.expect_err("Disjoint HashWithCount with children must be rejected");
+ match err {
+ Error::InvalidProofError(msg) => assert!(
+ msg.contains("Disjoint position must be a leaf"),
+ "unexpected message: {msg}"
+ ),
+ other => panic!("expected InvalidProofError, got {:?}", other),
+ }
+ }
+
+ /// At a Disjoint position the shape walk requires `HashWithCount` (only
+ /// node type with a hash-bound count). A `Hash` op there would carry an
+ /// untrusted structural count for the parent's `own_count` derivation,
+ /// so it must be rejected.
+ #[test]
+ fn shape_walk_rejects_non_hashwithcount_at_disjoint() {
+ let v = GroveVersion::latest();
+ let (merk, _root) = make_15_key_provable_count_tree(v);
+ let inner_range = QueryItem::RangeAfter(b"o".to_vec()..);
+ let (mut ops, _) = merk
+ .prove_aggregate_count_on_range(&inner_range, v)
+ .unwrap()
+ .expect("prove succeeds");
+
+ // Replace the single Disjoint HashWithCount with a plain Hash.
+ let mut swapped = false;
+ for op in ops.iter_mut() {
+ if let ProofOp::Push(Node::HashWithCount(kv, l, r, c)) = op {
+ let node_hash = crate::tree::node_hash_with_count(kv, l, r, *c).unwrap();
+ *op = ProofOp::Push(Node::Hash(node_hash));
+ swapped = true;
+ break;
+ }
+ }
+ assert!(swapped, "test setup: expected a HashWithCount op to swap");
+
+ let bytes = encode_proof(&ops);
+ let result = verify_aggregate_count_on_range_proof(&bytes, &inner_range).unwrap();
+ // Phase 1 rejects plain Hash via the allowlist; Phase 2 would also
+ // reject "expected HashWithCount at Disjoint position". Either is fine.
+ let err = result.expect_err("plain Hash at Disjoint must be rejected");
+ match err {
+ Error::InvalidProofError(_) => {}
+ other => panic!("expected InvalidProofError, got {:?}", other),
+ }
+ }
+
+ /// At a Boundary position the shape walk requires the node's key to
+ /// fall strictly inside the inherited subtree bounds. A prover that
+ /// emits a `KVDigestCount` whose key is outside those bounds is trying
+ /// to confuse the recursion's bound tracking — it must be rejected.
+ #[test]
+ fn shape_walk_rejects_kvdigestcount_outside_inherited_bounds() {
+ let v = GroveVersion::latest();
+ let (merk, _root) = make_15_key_provable_count_tree(v);
+ let inner_range = QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec());
+ let (mut ops, _) = merk
+ .prove_aggregate_count_on_range(&inner_range, v)
+ .unwrap()
+ .expect("prove succeeds");
+
+ // Find a Boundary KVDigestCount and rewrite its key to something
+ // outside the tree (way past 'z'). This will violate the inherited
+ // (lo, hi) bounds at the verifier's recursion frame.
+ let mut rewrote = false;
+ for op in ops.iter_mut() {
+ if let ProofOp::Push(Node::KVDigestCount(key, _, _)) = op {
+ *key = vec![0xff, 0xff];
+ rewrote = true;
+ break;
+ }
+ }
+ assert!(rewrote, "test setup: expected a KVDigestCount to rewrite");
+
+ let bytes = encode_proof(&ops);
+ let result = verify_aggregate_count_on_range_proof(&bytes, &inner_range).unwrap();
+ let err = result.expect_err("KVDigestCount outside bounds must be rejected");
+ match err {
+ Error::InvalidProofError(_) => {}
+ other => panic!("expected InvalidProofError, got {:?}", other),
+ }
+ }
+}
diff --git a/merk/src/proofs/query/mod.rs b/merk/src/proofs/query/mod.rs
index 22352d5ce..1fd556a2f 100644
--- a/merk/src/proofs/query/mod.rs
+++ b/merk/src/proofs/query/mod.rs
@@ -5,11 +5,16 @@ pub use grovedb_query::*;
#[cfg(test)]
mod merk_integration_tests;
+#[cfg(feature = "minimal")]
+pub mod aggregate_count;
#[cfg(any(feature = "minimal", feature = "verify"))]
mod map;
#[cfg(any(feature = "minimal", feature = "verify"))]
mod verify;
+#[cfg(feature = "minimal")]
+pub use aggregate_count::verify_aggregate_count_on_range_proof;
+
#[cfg(feature = "minimal")]
use grovedb_costs::{cost_return_on_error, CostContext, CostResult, CostsExt, OperationCost};
#[cfg(feature = "minimal")]
diff --git a/merk/src/proofs/query/verify.rs b/merk/src/proofs/query/verify.rs
index 4a11b67fe..822fec0fc 100644
--- a/merk/src/proofs/query/verify.rs
+++ b/merk/src/proofs/query/verify.rs
@@ -485,6 +485,25 @@ impl QueryProofVerify for Query {
)));
}
}
+ Node::HashWithCount(..) => {
+ // `HashWithCount` is only safe inside the dedicated
+ // aggregate-count verifier, which shape-checks each
+ // collapsed subtree against the queried range. The plain
+ // query verifier does no such shape check, and
+ // `Tree::hash()` for a `HashWithCount` recomputes its
+ // hash from the embedded `(kv_hash, l, r, count)` while
+ // *ignoring* any reconstructed children. A malicious
+ // prover could therefore hang fake KV pushes under a
+ // `HashWithCount`, satisfy `execute_node` from those
+ // pushes (so they appear as query results) while still
+ // preserving the parent's hash chain. Fail fast here so
+ // the regular query path can never accept one.
+ return Err(Error::InvalidProofError(
+ "HashWithCount node is only valid in aggregate-count proofs; \
+ encountered in regular query verification"
+ .to_string(),
+ ));
+ }
}
last_push = Some(node.clone());
diff --git a/merk/src/proofs/tree.rs b/merk/src/proofs/tree.rs
index b733c68ef..09cffe090 100644
--- a/merk/src/proofs/tree.rs
+++ b/merk/src/proofs/tree.rs
@@ -128,6 +128,20 @@ impl Tree {
match &self.node {
Node::Hash(hash) => (*hash).wrap_with_cost(Default::default()),
+ // HashWithCount is self-verifying: the verifier recomputes
+ // node_hash_with_count(kv_hash, left_child_hash, right_child_hash, count)
+ // from the four committed fields. If the prover lied about `count`
+ // the recomputed hash diverges from the parent's expectation and
+ // the parent's Merkle-root check fails — so the count is bound to
+ // the proof, not just trusted on faith.
+ //
+ // The embedded child hashes (not the reconstructed-Tree's
+ // children) are what the original subtree's node_hash was computed
+ // from, so we use them directly here even though `self` is treated
+ // as a leaf in the proof Tree.
+ Node::HashWithCount(kv_hash, left_child_hash, right_child_hash, count) => {
+ node_hash_with_count(kv_hash, left_child_hash, right_child_hash, *count)
+ }
Node::KVHash(kv_hash) => compute_hash(self, *kv_hash),
Node::KV(key, value) => kv_hash(key.as_slice(), value.as_slice())
.flat_map(|kv_hash| compute_hash(self, kv_hash)),
@@ -377,8 +391,8 @@ impl Tree {
}
/// Returns the key from this tree node if it's a KV-type node with a key.
- /// Returns None for Hash, KVHash, or KVHashCount node types (which only
- /// have hashes, not keys).
+ /// Returns None for Hash, KVHash, KVHashCount, or HashWithCount node
+ /// types (which only have hashes, not keys).
#[cfg(any(feature = "minimal", feature = "verify"))]
pub fn key(&self) -> Option<&[u8]> {
match &self.node {
@@ -392,7 +406,9 @@ impl Tree {
| Node::KVCount(key, ..)
| Node::KVRefValueHashCount(key, ..) => Some(key.as_slice()),
// These nodes don't have keys, only hashes
- Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) => None,
+ Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) | Node::HashWithCount(..) => {
+ None
+ }
}
}
@@ -404,6 +420,7 @@ impl Tree {
Ok((*feature_type).into())
}
Node::KVCount(_, _, count) => Ok(AggregateData::ProvableCount(*count)),
+ Node::HashWithCount(.., count) => Ok(AggregateData::ProvableCount(*count)),
Node::KV(..) | Node::KVValueHash(..) => Ok(AggregateData::NoAggregateData),
_ => Err(Error::InvalidProofError(
"Cannot extract aggregate data from this node type".to_string(),
@@ -500,7 +517,36 @@ pub const MAX_PROOF_TREE_HEIGHT: usize = 92;
///
/// Enforces a limit of [`MAX_PROOF_OPS`] operations to prevent
/// denial-of-service from malicious proofs.
-pub fn execute(ops: I, collapse: bool, mut visit_node: F) -> CostResult
+///
+/// Equivalent to [`execute_with_options(ops, collapse, true, visit_node)`] —
+/// i.e. enforces the root-level AVL height-balance check after reconstruction.
+pub fn execute(ops: I, collapse: bool, visit_node: F) -> CostResult
+where
+ I: IntoIterator- >,
+ F: FnMut(&Node) -> Result<(), Error>,
+{
+ execute_with_options(ops, collapse, true, visit_node)
+}
+
+#[cfg(any(feature = "minimal", feature = "verify"))]
+/// Executes a proof exactly like [`execute`] but lets the caller opt out of
+/// the root-level AVL balance check.
+///
+/// Existing query / chunk / branch verifiers always pass `verify_avl_balance
+/// = true` (via [`execute`]). The aggregate-count verifier passes `false`
+/// because count proofs intentionally collapse fully-inside subtrees into a
+/// single `HashWithCount` op (height = 1) while still descending the boundary
+/// path on the other side, so the reconstructed tree's root will routinely
+/// have child heights differing by more than one — that's expected, not
+/// proof corruption. The cryptographic guarantees (hash-chain reconstruction,
+/// boundary-key checks, count commitment via `node_hash_with_count`) are all
+/// independent of AVL balance.
+pub fn execute_with_options(
+ ops: I,
+ collapse: bool,
+ verify_avl_balance: bool,
+ mut visit_node: F,
+) -> CostResult
where
I: IntoIterator- >,
F: FnMut(&Node) -> Result<(), Error>,
@@ -687,9 +733,10 @@ where
let tree = stack.pop().unwrap();
- if tree.child_heights.0.max(tree.child_heights.1)
- - tree.child_heights.0.min(tree.child_heights.1)
- > 1
+ if verify_avl_balance
+ && tree.child_heights.0.max(tree.child_heights.1)
+ - tree.child_heights.0.min(tree.child_heights.1)
+ > 1
{
return Err(Error::InvalidProofError(
"Expected proof to result in a valid avl tree".to_string(),