diff --git a/docs/book/book.toml b/docs/book/book.toml index 3b6f2cf38..1f8d7090b 100644 --- a/docs/book/book.toml +++ b/docs/book/book.toml @@ -12,4 +12,4 @@ command = "mdbook-mermaid" [output.html] additional-css = ["lang-selector.css"] -additional-js = ["mermaid.min.js", "mermaid-init.js", "lang-selector.js"] +additional-js = ["mermaid.min.js", "mermaid-fixup.js", "mermaid-init.js", "lang-selector.js"] diff --git a/docs/book/mermaid-fixup.js b/docs/book/mermaid-fixup.js new file mode 100644 index 000000000..f23681ebe --- /dev/null +++ b/docs/book/mermaid-fixup.js @@ -0,0 +1,26 @@ +// Client-side fallback: converts `
...`
+// blocks (raw mdbook output when the mdbook-mermaid preprocessor isn't run)
+// into `
...
` blocks that mermaid.js will render. +// +// Safe to leave enabled even when the preprocessor IS run — preprocessor +// output already uses `
`, so the selector below finds
+// nothing and the script is a no-op.
+(() => {
+    function fixup() {
+        const blocks = document.querySelectorAll('pre > code.language-mermaid');
+        blocks.forEach((code) => {
+            const pre = code.parentElement;
+            const replacement = document.createElement('pre');
+            replacement.className = 'mermaid';
+            // textContent decodes HTML entities (< → <, & → &, etc.)
+            replacement.textContent = code.textContent;
+            pre.replaceWith(replacement);
+        });
+    }
+
+    if (document.readyState === 'loading') {
+        document.addEventListener('DOMContentLoaded', fixup);
+    } else {
+        fixup();
+    }
+})();
diff --git a/docs/book/src/SUMMARY.md b/docs/book/src/SUMMARY.md
index 8d4d6e522..6b091779d 100644
--- a/docs/book/src/SUMMARY.md
+++ b/docs/book/src/SUMMARY.md
@@ -11,6 +11,7 @@
 - [The Proof System](proof-system.md)
 - [The Query System](query-system.md)
 - [Aggregate Sum Queries](aggregate-sum-queries.md)
+- [Aggregate Count Queries](aggregate-count-queries.md)
 - [Batch Operations](batch-operations.md)
 - [Cost Tracking](cost-tracking.md)
 - [The MMR Tree](mmr-tree.md)
diff --git a/docs/book/src/aggregate-count-queries.md b/docs/book/src/aggregate-count-queries.md
new file mode 100644
index 000000000..97a8aa0d4
--- /dev/null
+++ b/docs/book/src/aggregate-count-queries.md
@@ -0,0 +1,671 @@
+# Aggregate Count Queries
+
+## Overview
+
+An **Aggregate Count Query** lets a caller ask a single, very specific question:
+
+> "How many elements in this subtree fall inside this key range?"
+
+The answer comes back as a `u64`, and on a **ProvableCountTree** or
+**ProvableCountSumTree** it can be returned together with a cryptographic proof
+that anyone holding the tree's root hash can verify — without ever materializing
+the elements themselves.
+
+Where regular queries return key/value pairs and aggregate-sum queries return
+running totals of `SumItem` values, an aggregate-count query returns only a
+**count** and a proof of that count.
+
+It is implemented as a new `QueryItem` variant:
+
+```rust
+pub enum QueryItem {
+    Key(Vec),
+    Range(Range>),
+    // ... existing variants ...
+    RangeAfterToInclusive(RangeInclusive>),
+
+    /// Count the elements matched by the inner range, without returning them.
+    /// Only valid on ProvableCountTree / ProvableCountSumTree (and their
+    /// `NonCounted` wrapper variants).
+    AggregateCountOnRange(Box),
+}
+```
+
+The wrapped `QueryItem` is the **range to count over** — it must be one of the
+true range variants: `Range`, `RangeInclusive`, `RangeFrom`, `RangeTo`,
+`RangeToInclusive`, `RangeAfter`, `RangeAfterTo`, `RangeAfterToInclusive`.
+The single-key (`Key`), full-range (`RangeFull`), and self-nested
+(`AggregateCountOnRange`) variants are all **rejected**.
+
+> **Why are `Key` and `RangeFull` rejected?**
+>
+> - **`Key(k)`** would always return `0` or `1` — an existence test. Callers
+>   should use the existing `GroveDb::has_raw` / `GroveDb::get_raw` (or their
+>   provable variants) instead. Routing existence checks through this API
+>   would force a count-shaped result type and proof shape on a question that
+>   already has a much cheaper, narrower answer.
+> - **`RangeFull`** has its answer already exposed by the parent's
+>   `Element::ProvableCountTree(_, count, _)` /
+>   `Element::ProvableCountSumTree(_, count, _, _)` bytes, which are
+>   hash-verified by the parent Merk's proof. Going through
+>   `AggregateCountOnRange(RangeFull)` would always produce a strictly heavier
+>   proof for an answer the caller can read directly.
+>
+> In short, `AggregateCountOnRange` exists for the case the rest of the API
+> can't already answer cheaply: counting a **bounded sub-range** of keys.
+
+## Why this works only on Provable Count Trees
+
+GroveDB has six tree types that track a count:
+
+| Tree type                | Count tracked? | Count in node hash? | AggregateCountOnRange allowed? |
+|--------------------------|:--------------:|:-------------------:|:-----------------------:|
+| `CountTree`              | yes            | no                  | **no**                  |
+| `CountSumTree`           | yes            | no                  | **no**                  |
+| `ProvableCountTree`      | yes            | **yes**             | **yes**                 |
+| `ProvableCountSumTree`   | yes            | **yes** (count only)| **yes**                 |
+| `NonCountedProvableCountTree`    | yes (via wrapper) | yes (inner)    | **yes**                 |
+| `NonCountedProvableCountSumTree` | yes (via wrapper) | yes (inner)    | **yes**                 |
+
+Only the **provable** variants bake the count into the node hash via
+`node_hash_with_count(kv_hash, left, right, count)`. Because every node's count
+participates in the Merkle root, a verifier holding only the root hash can
+reconstruct enough of the tree from a proof to **trust** the counts that appear
+inside.
+
+Plain `CountTree` and `CountSumTree` track counts in storage as a convenience
+for the executing node, but those counts are not in the hash. A "proof" of
+their count would be unverifiable, so we reject `AggregateCountOnRange` against them
+at query-construction time.
+
+The two `NonCounted*` wrapper variants are accepted because the wrapper only
+tells the **parent** tree to skip this element when aggregating its own count;
+the inner tree is still a fully-fledged provable count tree.
+
+## Query-Level Constraints
+
+`AggregateCountOnRange` is a **terminal** query item. When it appears, the surrounding
+`Query` is reduced to a single, well-defined operation: "count, then return."
+
+```rust
+pub struct Query {
+    pub items: Vec,
+    pub default_subquery_branch: SubqueryBranch,
+    pub conditional_subquery_branches: Option>,
+    pub left_to_right: bool,
+    pub add_parent_tree_on_subquery: bool,
+}
+```
+
+If any `QueryItem::AggregateCountOnRange(_)` appears in `items`, the query is only
+well-formed when **all** of the following hold:
+
+1. `items.len() == 1` — no other range items, no other counts, no mixing.
+2. The inner `QueryItem` is **not** `Key` (use `has_raw` / `get_raw` for
+   existence tests — see the note above).
+3. The inner `QueryItem` is **not** `RangeFull` (use the parent element to read
+   the unconditional total — see the note above).
+4. The inner `QueryItem` is not itself another `AggregateCountOnRange`.
+5. `default_subquery_branch.subquery.is_none()` and `subquery_path.is_none()`.
+6. `conditional_subquery_branches.is_none()` (or empty).
+7. The targeted subtree's `TreeType` is one of the four allowed variants above.
+8. The enclosing `SizedQuery` does not set a `limit` or `offset`. Counting is an
+   aggregate over the matched range — pagination would silently change the
+   answer and is therefore rejected.
+9. `left_to_right` is **ignored** (counting is direction-agnostic). It is not
+   an error to set it, but it has no effect on the returned count or proof.
+
+Violating constraints 1–8 returns `Error::InvalidQuery(...)` with a message
+that names the offending field, before any I/O is performed.
+
+## API surface
+
+`AggregateCountOnRange` queries go through the **same** `prove_query` entry
+point as every other `PathQuery` — only the verifier is dedicated:
+
+```rust
+// Prove side — unchanged from regular queries:
+GroveDb::prove_query(&path_query, prove_options, grove_version)
+    -> CostResult, Error>
+
+// Verify side — dedicated, returns (root_hash, count):
+GroveDb::verify_aggregate_count_query(proof, &path_query, grove_version)
+    -> Result<(CryptoHash, u64), Error>
+```
+
+A bare tuple is used for the result rather than a wrapper struct because
+the count is already a `u64` and the `path_query` itself echoes the inner
+range — there is nothing else to return.
+
+> **Note on `NonCounted` children.** `Element::NonCounted` wrappers tell
+> the parent tree to skip the wrapped element when aggregating its own
+> count. `AggregateCountOnRange` honors this: every node in a
+> `ProvableCountTree` carries an own-count of 1 (normal) or 0
+> (`NonCounted`-wrapped), and the verifier credits only the **own-count**
+> to the in-range total when the boundary key falls in range. So
+> `NonCounted` children are excluded from the result, matching the
+> tree's own aggregate.
+>
+> Mechanically the verifier derives each boundary node's own-count from
+> its committed aggregate as
+> `aggregate − left_struct − right_struct` (see the "Verifier shape
+> walk" section). For a `NonCounted` leaf, `aggregate = 0` and there are
+> no children, so own-count = 0 and the key contributes nothing.
+
+## How the Proof is Built
+
+For a `ProvableCountTree`, every node hash already commits to the count of its
+own subtree via `node_hash_with_count(kv_hash, left, right, count)`. The proof
+generator's job is to produce just enough structure that the verifier can:
+
+1. Reconstruct the **root hash** of the queried Merk and check it against the
+   expected hash.
+2. Compute the answer **count** from the count fields embedded along the way.
+
+To do that, every proof node has a role; we use a small vocabulary of
+proof-node types — three from the existing proof system, plus one new
+self-verifying node added specifically for this proof shape:
+
+| Role in proof              | Proof node type                                                              | What it carries                                                | Why we picked it                                                                                                         |
+|----------------------------|------------------------------------------------------------------------------|----------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------|
+| **On-path / boundary**     | `KVDigestCount(key, value_hash, count)`                                       | key + value digest + subtree count                             | the verifier needs the **key** to test "is it in the range?", and the count is hash-bound via `node_hash_with_count` so it can also be used as the structural count of this subtree by ancestor own-count derivation |
+| **Fully-inside root**      | `HashWithCount(kv_hash, left_child_hash, right_child_hash, count)`            | the four fields needed to recompute `node_hash_with_count`     | one op per collapsed subtree, **and self-verifying** — see security note below                                          |
+| **Fully-outside**          | `HashWithCount(kv_hash, left_child_hash, right_child_hash, count)` (same)     | same shape as the inside variant                               | the structural count of an outside subtree is needed by the boundary parent's `own_count = aggregate − left − right` derivation; only `HashWithCount` carries a *hash-bound* count, so we use it for outside subtrees too. Plain `Hash(_)` would not bind a count and is therefore not used in count proofs. |
+| **Empty side**             | (the empty-tree sentinel, no `Push` needed)                                   | —                                                              | a missing child contributes hash = 0 and count = 0 to the parent                                                          |
+
+> **Why `HashWithCount` is self-verifying.** The `count` value carried by a
+> `HashWithCount` op is *bound* to the parent merk's hash chain, not trusted
+> on faith. The verifier computes
+> `node_hash_with_count(kv_hash, left_child_hash, right_child_hash, count)`
+> from the four committed fields and uses the result as the subtree's
+> committed `node_hash` for the parent's hash recomputation. If the prover
+> lied about `count`, the recomputed `node_hash` diverges from what the
+> parent committed, and the parent's Merkle-root check fails. (An earlier
+> draft of this design used `HashWithCount(node_hash, count)` only — that
+> form was rejected during review because the count would have been
+> trustlessly attached metadata, with no cryptographic binding. See the
+> "Verifier shape walk" section below for the second half of the
+> security story.)
+
+### Walking running example
+
+We'll use this 7-key `ProvableCountTree` as the running example through every
+diagram below. Counts shown next to each node are "size of the subtree rooted
+here":
+
+```mermaid
+graph TD
+    d["d
count = 7"] + b["b
count = 3"] + f["f
count = 3"] + a["a
count = 1"] + c["c
count = 1"] + e["e
count = 1"] + g["g
count = 1"] + d --> b + d --> f + b --> a + b --> c + f --> e + f --> g + + style d fill:#fef9e7,stroke:#f39c12,stroke-width:2px +``` + +Below, each per-case diagram colours nodes by the role table above: + +- 🟢 **green** = `HashWithCount` (fully-inside, contributes count, not descended) +- 🟡 **yellow** = `KVDigestCount` (on-path / boundary, key tested for in-range) +- ⚪ **gray** = `HashWithCount` used as a fully-outside subtree (carries the + structural count needed by the boundary parent's `own_count` derivation, + but its key is not in range so it contributes 0 to the in-range total) + +--- + +### Case 1 — Open ranges (one bound) + +These are the variants with a single bound: `RangeFrom(a..)`, `RangeTo(..b)`, +`RangeToInclusive(..=b)`, `RangeAfter((a, ..))`. Conceptually we walk down to +that one bound, partitioning each subtree along the way into "fully on the +included side" or "fully on the excluded side". + +#### Example — `RangeFrom("c"..)` → keys ≥ "c" + +Expected: `{c, d, e, f, g}`, count = 5. + +```mermaid +graph TD + d["d
KVDigestCount
key = d, vh, count = 7"] + b["b
KVDigestCount
key = b, vh, count = 3"] + f["f
HashWithCount
kv_hash, l, r, count = 3"] + aH["a
HashWithCount
kv_hash, l, r, count = 1"] + c["c
KVDigestCount
key = c, vh, count = 1"] + d --> b + d --> f + b --> aH + b --> c + + style d fill:#fef9e7,stroke:#f39c12,stroke-width:2px + style b fill:#fef9e7,stroke:#f39c12,stroke-width:2px + style c fill:#fef9e7,stroke:#f39c12,stroke-width:2px + style f fill:#d5f5e3,stroke:#27ae60,stroke-width:2px + style aH fill:#e8e8e8,stroke:#999,stroke-dasharray:5 5 +``` + +Why each role: + +- **d, b, c** — boundary nodes on the walk to the lower bound `"c"`. Each is + `KVDigestCount` because the verifier must test its key against `>= "c"`. +- **a** — left child of `b`; "a" < "c", so its entire subtree is excluded + from the in-range total. Sent as a `HashWithCount` (no key) — the verifier + needs the structural count = 1 to derive `b`'s `own_count`, and this is + the only proof-node type that binds the count to `b`'s hash chain. The + `a` subtree contributes 0 to the in-range total (its key is not tested). +- **f** — right child of `d`; "d" < "f" and we're including everything ≥ "c", + so the entire `f` subtree (including its descendants) is in-range. + We don't need to descend — `f` is sent as a single `HashWithCount` op + whose `(kv_hash, left_child_hash, right_child_hash, count)` lets the + verifier recompute `f.node_hash` self-contained, and contributes the full + subtree count of 3 directly. **The original tree's `e` and `g` children + do not appear as separate proof ops** — their hashes live inside the + `HashWithCount`'s `left_child_hash` / `right_child_hash` fields. + +Verifier total: + +| Node | In range? | Contribution | +|------|-----------|--------------| +| d (KVDigestCount, key="d") | "d" ≥ "c" | **+1** | +| b (KVDigestCount, key="b") | "b" < "c" | +0 | +| a (HashWithCount, count=1) | (outside, key not tested) | +0 | +| c (KVDigestCount, key="c") | "c" ≥ "c" | **+1** | +| f (HashWithCount, count=3) | (whole subtree in range) | **+3** | + +→ **count = 5** ✓ + +#### Example — `RangeAfter(("b", ..))` → keys > "b" + +Same expected match set `{c, d, e, f, g}`, count = 5 — but the boundary +walk stops one level higher (at `b` instead of `c`), and the in-range test +flips from `>=` to `>`. + +```mermaid +graph TD + d["d
KVDigestCount
key = d, vh, count = 7"] + b["b
KVDigestCount
key = b, vh, count = 3"] + f["f
HashWithCount
kv_hash, l, r, count = 3"] + aH["a
HashWithCount
kv_hash, l, r, count = 1"] + c["c
HashWithCount
kv_hash, l, r, count = 1"] + d --> b + d --> f + b --> aH + b --> c + + style d fill:#fef9e7,stroke:#f39c12,stroke-width:2px + style b fill:#fef9e7,stroke:#f39c12,stroke-width:2px + style c fill:#d5f5e3,stroke:#27ae60,stroke-width:2px + style f fill:#d5f5e3,stroke:#27ae60,stroke-width:2px + style aH fill:#e8e8e8,stroke:#999,stroke-dasharray:5 5 +``` + +Why each role differs from the previous example: + +- **b** is now the boundary's terminus, not `c`. It is still `KVDigestCount` + because the verifier needs the key to apply the in-range test — but the + test is now `> "b"`, so `b` itself **fails** and contributes 0. +- **c** is the right child of `b`. Every key in `c`'s subtree is `> "b"` + (here, just the leaf `c` itself), so the whole subtree is in-range. We + don't descend; `c` becomes `HashWithCount` (no key needed — its + `(kv_hash, l, r, count)` self-contains everything the verifier needs) + and contributes its count of 1 directly. Compare to the previous example + where `c` was a boundary node tested against `>= "c"`. +- **a** plays the same role as before — fully outside, sent as + `HashWithCount` so its structural count of 1 is hash-bound to `b`. + Contributes 0 to the in-range total (key not tested). **f's + original-tree children (`e`, `g`) do not appear as separate proof ops** + — they live inside `f`'s `HashWithCount` fields. + +Verifier total: + +| Node | In range? | Contribution | +|------|-----------|--------------| +| d (KVDigestCount, key="d") | "d" > "b" | **+1** | +| b (KVDigestCount, key="b") | "b" > "b" → no | +0 | +| a (HashWithCount, count=1) | (outside, key not tested) | +0 | +| c (HashWithCount, count=1) | (whole subtree in range) | **+1** | +| f (HashWithCount, count=3) | (whole subtree in range) | **+3** | + +→ **count = 5** ✓ + +> **Take-away:** the *match set* is the same as `RangeFrom("c"..)`, but the +> *proof shape* is slightly cheaper — one fewer `KVDigestCount` and one extra +> `HashWithCount` — because the bound aligns with an internal node rather than +> a leaf. The generator picks the shape based on where the bound key lives +> in the tree, not on what the user wrote. + +The same pattern, mirrored, applies to `RangeTo(..b)` and +`RangeToInclusive(..=b)` (upper-bound variants — boundary walk goes right, +fully-inside subtrees hang off the left of each step). The only differences +across all four open-range variants are which side of each split is +"fully-included" and whether the boundary key itself counts (`>=` vs `>` +for the lower side, `<` vs `<=` for the upper side). + +--- + +### Case 2 — Closed ranges (both bounds) + +These are the variants with both a lower and upper bound: `Range(a..b)`, +`RangeInclusive(a..=b)`, `RangeAfterTo((a, b))`, `RangeAfterToInclusive((a, ..=b))`. + +The proof has **two** boundary walks meeting at the lowest common ancestor of +the two bounds. Subtrees fully between the two bounds appear as +`HashWithCount`; subtrees fully outside both bounds **also** appear as +`HashWithCount` (the structural count is needed by the boundary parent's +`own_count` derivation, and only `HashWithCount` binds that count to the +parent's hash chain). + +To make the structure interesting we'll use a slightly bigger example tree +than for Case 1 — 15 keys (`a` through `o`), 4 levels deep, balanced as a +perfect binary tree. Counts shown are subtree sizes: + +```mermaid +graph TD + h["h
count = 15"] + d["d
count = 7"] + l["l
count = 7"] + b["b
count = 3"] + f["f
count = 3"] + j["j
count = 3"] + n["n
count = 3"] + a["a
count = 1"] + c["c
count = 1"] + e["e
count = 1"] + g["g
count = 1"] + i["i
count = 1"] + k["k
count = 1"] + m["m
count = 1"] + o["o
count = 1"] + h --> d + h --> l + d --> b + d --> f + l --> j + l --> n + b --> a + b --> c + f --> e + f --> g + j --> i + j --> k + n --> m + n --> o + + style h fill:#fef9e7,stroke:#f39c12,stroke-width:2px +``` + +#### Example — `RangeInclusive("c"..="l")` → keys ∈ [c, l] + +Expected: `{c, d, e, f, g, h, i, j, k, l}`, count = 10. + +```mermaid +graph TD + h["h
KVDigestCount
key = h, vh, count = 15"] + d["d
KVDigestCount
key = d, vh, count = 7"] + l["l
KVDigestCount
key = l, vh, count = 7"] + b["b
KVDigestCount
key = b, vh, count = 3"] + f["f
HashWithCount
kv_hash, l, r, count = 3"] + j["j
HashWithCount
kv_hash, l, r, count = 3"] + nH["n subtree
HashWithCount
kv_hash, l, r, count = 3"] + aH["a
HashWithCount
kv_hash, l, r, count = 1"] + c["c
KVDigestCount
key = c, vh, count = 1"] + h --> d + h --> l + d --> b + d --> f + l --> j + l --> nH + b --> aH + b --> c + + style h fill:#fef9e7,stroke:#f39c12,stroke-width:2px + style d fill:#fef9e7,stroke:#f39c12,stroke-width:2px + style l fill:#fef9e7,stroke:#f39c12,stroke-width:2px + style b fill:#fef9e7,stroke:#f39c12,stroke-width:2px + style c fill:#fef9e7,stroke:#f39c12,stroke-width:2px + style f fill:#d5f5e3,stroke:#27ae60,stroke-width:2px + style j fill:#d5f5e3,stroke:#27ae60,stroke-width:2px + style aH fill:#e8e8e8,stroke:#999,stroke-dasharray:5 5 + style nH fill:#e8e8e8,stroke:#999,stroke-dasharray:5 5 +``` + +Why each role: + +- **h** — LCA of `"c"` and `"l"`. Sits above both walks, so it's a + `KVDigestCount` and the verifier tests its key against `[c, l]`. +- **d** — on the left walk (down to lower bound `c`). `KVDigestCount`, + key tested. +- **l** — on the right walk (down to upper bound `l`); also the upper bound + itself. `KVDigestCount`, key tested (it passes — `l ≤ l`). +- **b** — on the left walk (`b < c`, so we have to descend further to find + the lower bound). `KVDigestCount`, key tested (it fails — `b < c`). +- **c** — the lower bound itself. `KVDigestCount`, key tested (it passes — + `c ≥ c`). +- **a** — left of `b`; "a" < "c", entire subtree outside. Sent as + `HashWithCount` carrying `(kv_hash, l, r, count = 1)` so its structural + count is hash-bound to `b`. Contributes 0 to the in-range total. +- **n** — right of `l`; entire subtree has keys > "l". The whole `n` + subtree (n, m, o) collapses to a single `HashWithCount` carrying + `(kv_hash, l, r, count = 3)` so its structural count is hash-bound to + `l`. Contributes 0 to the in-range total. +- **f** — right child of `d`. Every key under `f` is `> "d"` and `≤ "g" < "l"`, + so the entire subtree is in-range. We do not descend; `f` becomes a single + `HashWithCount` op carrying `(kv_hash, left_child_hash, right_child_hash, + count=3)` and contributes 3 directly. **Its original-tree children `e` + and `g` do not appear as separate proof ops** — their hashes are inside + `f`'s `HashWithCount` fields. +- **j** — left child of `l`. Same shape as `f`: every key under `j` is + `≥ "i" > "c"` and `≤ "k" < "l"`, so the entire subtree is in-range. + `HashWithCount`, contributes count = 3. `i` and `k` likewise live inside + `j`'s embedded child hashes. + +> **Each collapsed subtree is one Push op.** Because `HashWithCount` +> embeds its `(kv_hash, left_child_hash, right_child_hash, count)` +> directly, every fully-inside subtree contributes exactly **one** proof +> op regardless of its depth in the original tree. The proof for this +> 15-key range scan in a 4-level tree is just **9 push ops** (h, d, b, c, +> a, f, l, j, n) plus the structural Parent/Child ops — barely more than +> the 7-key example in Case 1. This is what "O(log n) regardless of +> count" looks like in practice: deeper trees do not blow up the proof. + +Verifier total: + +| Node | In range? | Contribution | +|------|-----------|--------------| +| h (KVDigestCount, key="h") | "c" ≤ "h" ≤ "l" | **+1** | +| d (KVDigestCount, key="d") | "c" ≤ "d" ≤ "l" | **+1** | +| b (KVDigestCount, key="b") | "b" < "c" → no | +0 | +| a (HashWithCount, count=1) | (outside, key not tested) | +0 | +| c (KVDigestCount, key="c") | "c" ≤ "c" ≤ "l" | **+1** | +| f (HashWithCount, count=3) | (whole subtree in range) | **+3** | +| l (KVDigestCount, key="l") | "c" ≤ "l" ≤ "l" | **+1** | +| j (HashWithCount, count=3) | (whole subtree in range) | **+3** | +| n (HashWithCount, count=3) | (outside, key not tested) | +0 | + +→ **count = 10** ✓ + +#### Variant differences + +The four closed-range variants differ only in **whether each boundary key +itself counts**, not in the proof shape: + +| Variant | Lower test | Upper test | +|----------------------------------|------------|------------| +| `Range(a..b)` | key ≥ a | key < b | +| `RangeInclusive(a..=b)` | key ≥ a | key ≤ b | +| `RangeAfterTo((a, b))` | key > a | key < b | +| `RangeAfterToInclusive((a, ..=b))` | key > a | key ≤ b | + +The verifier applies the relevant test at each boundary `KVDigestCount`. The +generator does not need to know which variant is in play — it always emits the +same shape, and the inclusivity flags travel with the query for the verifier. + +--- + +### Empty subtrees + +An aggregate-count query against an empty Merk returns `count = 0` with a +trivial proof (the empty-tree marker). Asking for `AggregateCountOnRange` on a +path that does not resolve to a tree at all is an error +(`Error::PathNotFound(...)`), the same as any other query. + +### Why this is `O(log n)` regardless of count + +Every diagram above has at most: + +- One walk per bound (so 1 or 2 walks of depth `O(log n)`), +- A constant number of fully-inside subtree roots per level (the "right + siblings" hanging off the left walk and "left siblings" hanging off the + right walk). + +Each of those is a single proof-node Push. Therefore the proof's node count is +`O(log n)`, and crucially does **not** depend on the answer's value. Counting +a billion-key range can be done with the same proof size as counting a +hundred-key range. + +## Verifier shape walk + +The verifier is **two-phase**, not just a "count everything visible" pass. +Without this discipline a malicious prover could: + +1. Send a single `Push(Hash(expected_root))` for a non-empty tree, and + receive `(expected_root, 0)` for any range — root hash matches, count is + trivially zero. +2. Replace an in-range collapsed subtree with a hash carrying the *same* + `node_hash` but no count, undercounting by the missing subtree count. +3. Attach extra `KVDigestCount` children below a keyless leaf node. + `Tree::hash()` for those node types is computed only from their + embedded fields and ignores any reconstructed children, so the root + hash stays valid — but a verifier that summed every visited node would + credit the bogus children as `+1` each. +4. Lie about the structural count of an outside subtree to skew an + ancestor boundary node's `own_count` derivation, over- or under- + counting `NonCounted`-aware boundary contributions. + +To rule out all four, the verifier: + +1. **Phase 1** — decode the proof bytes into a `ProofTree` via + `execute_with_options`. The visit-node closure performs only a coarse + allowlist (`HashWithCount` / `KVDigestCount`; **plain `Hash` is not + accepted in count proofs**) and **does not count anything**. (We + disable the AVL balance check for this proof shape — count proofs + intentionally collapse one side to height 1 while descending the + other.) +2. **Phase 2** — walk the reconstructed tree with the same inherited + exclusive subtree-key bounds the prover used (`(None, None)` at the + root). At each position, call `classify_subtree(bounds, range)` and + bind the proof-tree node type to the classification, returning the pair + `(in_range_count, structural_count)` where `structural_count` is the + merk-recorded aggregate count of this subtree (used by the parent's + `own_count` derivation): + + | Classification | Required node | Children allowed? | `(in_range, structural)` | + |----------------|-----------------------------------------------------------------------|-------------------------|-----------------------------------------------------------------------------------------------------------| + | `Disjoint` | leaf `HashWithCount(_, _, _, count)` | **no** (must be a leaf) | `(0, count)` | + | `Contained` | leaf `HashWithCount(_, _, _, count)` | **no** (must be a leaf) | `(count, count)` — `count` is the merk's aggregate, which already excludes `NonCounted` entries (own = 0) | + | `Boundary` | `KVDigestCount(key, _, aggregate)` with `key` strictly inside `bounds` | yes — recurse | `own_count = aggregate − left_struct − right_struct`; in-range = `left_in + right_in + (own_count if range.contains(key) else 0)`; structural = `aggregate` | + +3. Counts are summed with `checked_add`; the boundary `own_count` uses + `checked_sub` (so a malformed proof claiming children's structural + counts that exceed the parent's aggregate is rejected, not silently + saturated). + +Because every leaf-shape position is forced to be a leaf, attack 3 +(smuggled counted children under a keyless node) is rejected. Because every +`Contained` and `Disjoint` position must hold `HashWithCount` (and its +count is bound to the parent's hash via `node_hash_with_count`), attacks 2 +and 4 are both rejected — outside subtrees can't lie about their +structural count any more than inside ones can. Because the root's +`(None, None)` bounds against any bounded inner range classify as +`Boundary` (requiring `KVDigestCount`), attack 1 is rejected. + +The shape walk is independent of the chain-hash check: even a proof whose +reconstructed root happens to match the expected root will be rejected if +its shape diverges from what `classify_subtree` expects. + +## Decode safety + +`QueryItem::AggregateCountOnRange(Box)` is the only recursive +variant in the enum. To prevent a small malicious payload of repeated +variant-10 bytes from exhausting the stack inside the bincode or serde +decoder before any validation runs: + +- The bincode `Decode` / `BorrowDecode` impls dispatch through internal + `decode_with_depth` helpers with `MAX_QUERY_ITEM_DECODE_DEPTH = 4` (the + only legal nesting is one wrap, plus headroom). Exceeding the limit + errors with `"QueryItem nesting depth exceeded maximum during + deserialization"`. +- The serde `Deserialize` impl deserializes the inner item via a + `NonAggregateInner` newtype wrapper whose `Field` enum **omits** + `AggregateCountOnRange`, so a nested-aggregate payload is rejected by + serde's enum dispatcher immediately, with no recursion through + `QueryItem::deserialize`. +- Defense in depth: an inner `AggregateCountOnRange` is also rejected on + decode (in addition to being rejected by + `Query::validate_aggregate_count_on_range`). + +## Cost Model + +`AggregateCountOnRange` queries are designed to be cheap and predictable: + +- **Storage seeks:** `O(log n)`. +- **Hash calls:** one per node in the proof. +- **Proof bytes:** `O(log n) * (hash size + count varint size)`. + +There is no per-element cost component, because no elements are read or +returned. This is the headline reason the API exists — a billion-element tree +can be counted in a few hundred bytes of proof. + +The cost-tracking integration mirrors regular range queries, but with the +"loaded bytes" component dominated by the proof shape rather than element +payloads. + +## API Sketch + +```rust +use grovedb::{Element, GroveDb, PathQuery, Query, SizedQuery}; +use grovedb_query::QueryItem; + +// "How many votes have keys between block 1_000 and 2_000 (exclusive)?" +// Use the helper constructor to skip the boilerplate of building the Query +// and SizedQuery by hand. +let path_query = PathQuery::new_aggregate_count_on_range( + vec![b"votes".to_vec()], + QueryItem::Range(1_000u64.to_be_bytes().to_vec()..2_000u64.to_be_bytes().to_vec()), +); + +let proof_bytes = db + .prove_query(&path_query, None, grove_version) + .unwrap() + .expect("prove failed"); + +// Verifier side — only needs the proof bytes + the trusted root hash. +let (root, count) = GroveDb::verify_aggregate_count_query( + &proof_bytes, &path_query, grove_version, +).expect("verify failed"); + +assert_eq!(root, expected_root_hash); +println!("votes in [1000, 2000): {}", count); +``` + +## Comparison Table + +| Feature | Regular `Query` | `AggregateSumQuery` | `AggregateCountOnRange` (this doc) | +|----------------------------------|------------------------------|----------------------------------|---------------------------------------| +| Returns | Elements / keys | Sum + matched key/value pairs | A single `u64` count | +| Stops on | Limit, end of range | Sum limit and/or item limit | Range bounds (whole match counted) | +| Subqueries allowed | Yes | No | **No** | +| Other items in same `Query` | Yes | N/A (own struct) | **No** — must be the only item | +| `limit` / `offset` honored | Yes | Yes (item limit) | **No** — rejected at validation | +| Required tree type | Any | `SumTree`, `BigSumTree`, ... | Provable count trees only | +| Proof size relative to result | O(result) | O(matched items) | **O(log n)** regardless of count | + +--- diff --git a/docs/book/src/query-system.md b/docs/book/src/query-system.md index 03bcaf01a..564b0cf5c 100644 --- a/docs/book/src/query-system.md +++ b/docs/book/src/query-system.md @@ -50,9 +50,15 @@ pub enum QueryItem { RangeAfter(RangeFrom>), // (start..) exclusive start RangeAfterTo(Range>), // (start..end) exclusive both RangeAfterToInclusive(RangeInclusive>), // (start..=end] + AggregateCountOnRange(Box), // Count-only — see Aggregate Count Queries } ``` +> **`AggregateCountOnRange`** is a terminal item: when present, it must be the **only** +> item in the `Query`, and the query may not carry subqueries or pagination. +> See [Aggregate Count Queries](aggregate-count-queries.md) for the full +> contract — it is restricted to provable count trees. + Example queries: Merk tree (sorted): `alice bob carol dave eve frank` diff --git a/grovedb-bulk-append-tree/src/proof/mod.rs b/grovedb-bulk-append-tree/src/proof/mod.rs index 7ee0a0d92..c523a69fc 100644 --- a/grovedb-bulk-append-tree/src/proof/mod.rs +++ b/grovedb-bulk-append-tree/src/proof/mod.rs @@ -135,6 +135,13 @@ fn query_to_ranges(query: &Query, total_count: u64) -> Result, B } (s, e) } + QueryItem::AggregateCountOnRange(_) => { + return Err(BulkAppendError::InvalidInput( + "AggregateCountOnRange is only supported on provable count trees, \ + not on BulkAppendTree" + .into(), + )); + } }; ranges.push((start, end)); } diff --git a/grovedb-dense-fixed-sized-merkle-tree/src/proof/mod.rs b/grovedb-dense-fixed-sized-merkle-tree/src/proof/mod.rs index f7afa345d..8178f48be 100644 --- a/grovedb-dense-fixed-sized-merkle-tree/src/proof/mod.rs +++ b/grovedb-dense-fixed-sized-merkle-tree/src/proof/mod.rs @@ -116,6 +116,13 @@ pub(crate) fn query_to_positions(query: &Query, count: u16) -> Result, positions.insert(p); } } + QueryItem::AggregateCountOnRange(_) => { + return Err(DenseMerkleError::InvalidProof( + "AggregateCountOnRange is only supported on provable count trees, \ + not on dense fixed-size merkle trees" + .into(), + )); + } } } diff --git a/grovedb-query/Cargo.toml b/grovedb-query/Cargo.toml index db64d2a2b..33b93049f 100644 --- a/grovedb-query/Cargo.toml +++ b/grovedb-query/Cargo.toml @@ -26,6 +26,7 @@ grovedb-storage = { version = "4.0.0", path = "../storage", optional = true } [dev-dependencies] assert_matches = { workspace = true } +serde_test = "1.0" [features] default = [] diff --git a/grovedb-query/src/proofs/encoding.rs b/grovedb-query/src/proofs/encoding.rs index 8cfadb303..22c20b1d2 100644 --- a/grovedb-query/src/proofs/encoding.rs +++ b/grovedb-query/src/proofs/encoding.rs @@ -150,6 +150,13 @@ impl Encode for Op { dest.write_all(value_hash)?; count.encode_into(dest)?; } + Op::Push(Node::HashWithCount(kv_hash, left_child_hash, right_child_hash, count)) => { + dest.write_all(&[0x1e])?; + dest.write_all(kv_hash)?; + dest.write_all(left_child_hash)?; + dest.write_all(right_child_hash)?; + count.encode_into(dest)?; + } Op::Push(Node::KVValueHashFeatureTypeWithChildHash( key, value, @@ -309,6 +316,18 @@ impl Encode for Op { dest.write_all(value_hash)?; count.encode_into(dest)?; } + Op::PushInverted(Node::HashWithCount( + kv_hash, + left_child_hash, + right_child_hash, + count, + )) => { + dest.write_all(&[0x1f])?; + dest.write_all(kv_hash)?; + dest.write_all(left_child_hash)?; + dest.write_all(right_child_hash)?; + count.encode_into(dest)?; + } Op::PushInverted(Node::KVValueHashFeatureTypeWithChildHash( key, value, @@ -377,6 +396,9 @@ impl Encode for Op { Op::Push(Node::KVDigestCount(key, _, count)) => { 2 + key.len() + HASH_LENGTH + count.encoding_length()? } + Op::Push(Node::HashWithCount(_, _, _, count)) => { + 1 + 3 * HASH_LENGTH + count.encoding_length()? + } Op::Push(Node::KVValueHashFeatureTypeWithChildHash(key, value, _, feature_type, _)) => { let header = if value.len() < 65536 { 4 } else { 6 }; header @@ -419,6 +441,9 @@ impl Encode for Op { Op::PushInverted(Node::KVDigestCount(key, _, count)) => { 2 + key.len() + HASH_LENGTH + count.encoding_length()? } + Op::PushInverted(Node::HashWithCount(_, _, _, count)) => { + 1 + 3 * HASH_LENGTH + count.encoding_length()? + } Op::PushInverted(Node::KVValueHashFeatureTypeWithChildHash( key, value, @@ -722,6 +747,38 @@ impl Decode for Op { child_hash, )) } + 0x1e => { + let mut kv_hash = [0; HASH_LENGTH]; + input.read_exact(&mut kv_hash)?; + let mut left_child_hash = [0; HASH_LENGTH]; + input.read_exact(&mut left_child_hash)?; + let mut right_child_hash = [0; HASH_LENGTH]; + input.read_exact(&mut right_child_hash)?; + let count: u64 = Decode::decode(&mut input)?; + + Self::Push(Node::HashWithCount( + kv_hash, + left_child_hash, + right_child_hash, + count, + )) + } + 0x1f => { + let mut kv_hash = [0; HASH_LENGTH]; + input.read_exact(&mut kv_hash)?; + let mut left_child_hash = [0; HASH_LENGTH]; + input.read_exact(&mut left_child_hash)?; + let mut right_child_hash = [0; HASH_LENGTH]; + input.read_exact(&mut right_child_hash)?; + let count: u64 = Decode::decode(&mut input)?; + + Self::PushInverted(Node::HashWithCount( + kv_hash, + left_child_hash, + right_child_hash, + count, + )) + } 0x1d => { let key_len: u8 = Decode::decode(&mut input)?; let mut key = vec![0; key_len as usize]; @@ -2217,4 +2274,98 @@ mod test { let decoded = Op::decode(&bytes[..]).expect("decode failed"); assert_eq!(decoded, op); } + + #[test] + fn encode_decode_push_hash_with_count() { + // (kv_hash, left_child_hash, right_child_hash, count) — the + // self-verifying compressed-subtree variant for AggregateCountOnRange. + let op = Op::Push(Node::HashWithCount( + [0xAA; HASH_LENGTH], + [0xBB; HASH_LENGTH], + [0xCC; HASH_LENGTH], + 42, + )); + // 1 opcode + 3 * 32 hashes + varint(42) = 1 + 96 + 1 = 98 + let expected_length = 1 + 3 * HASH_LENGTH + ed::Encode::encoding_length(&42u64).unwrap(); + assert_eq!(op.encoding_length(), expected_length); + + let mut bytes = vec![]; + op.encode_into(&mut bytes).unwrap(); + assert_eq!(bytes.len(), expected_length); + assert_eq!(bytes[0], 0x1e); // Push HashWithCount opcode + + let decoded = Op::decode(&bytes[..]).expect("decode failed"); + assert_eq!(decoded, op); + } + + #[test] + fn encode_decode_push_inverted_hash_with_count() { + let op = Op::PushInverted(Node::HashWithCount( + [0x11; HASH_LENGTH], + [0x22; HASH_LENGTH], + [0x33; HASH_LENGTH], + u64::MAX, + )); + let expected_length = 1 + 3 * HASH_LENGTH + ed::Encode::encoding_length(&u64::MAX).unwrap(); + assert_eq!(op.encoding_length(), expected_length); + + let mut bytes = vec![]; + op.encode_into(&mut bytes).unwrap(); + assert_eq!(bytes.len(), expected_length); + assert_eq!(bytes[0], 0x1f); // PushInverted HashWithCount opcode + + let decoded = Op::decode(&bytes[..]).expect("decode failed"); + assert_eq!(decoded, op); + } + + #[test] + fn encode_decode_hash_with_count_zero_count_zero_children() { + // count = 0 (encodes to a 1-byte varint), all-zero hashes — represents + // a leaf-shaped collapsed subtree with no children. + let op = Op::Push(Node::HashWithCount( + [0u8; HASH_LENGTH], + [0u8; HASH_LENGTH], + [0u8; HASH_LENGTH], + 0, + )); + let mut bytes = vec![]; + op.encode_into(&mut bytes).unwrap(); + assert_eq!(bytes[0], 0x1e); + let decoded = Op::decode(&bytes[..]).expect("decode failed"); + assert_eq!(decoded, op); + } + + #[test] + fn decoder_with_hash_with_count_mixed_with_other_count_nodes() { + // Round-trip a small Op stream containing HashWithCount alongside the + // existing count-bearing variants — exercises the Decoder iterator + // boundary handling for the new variants. + let ops = vec![ + Op::Push(Node::HashWithCount( + [1; HASH_LENGTH], + [2; HASH_LENGTH], + [3; HASH_LENGTH], + 7, + )), + Op::Push(Node::KVDigestCount(vec![0xAB], [4; HASH_LENGTH], 1)), + Op::Parent, + Op::Push(Node::Hash([5; HASH_LENGTH])), + Op::Child, + Op::PushInverted(Node::HashWithCount( + [6; HASH_LENGTH], + [7; HASH_LENGTH], + [8; HASH_LENGTH], + 12345, + )), + ]; + + let mut encoded = vec![]; + for op in &ops { + op.encode_into(&mut encoded).unwrap(); + } + + let decoder = Decoder::new(&encoded); + let decoded_ops: Result, _> = decoder.collect(); + assert_eq!(decoded_ops.unwrap(), ops); + } } diff --git a/grovedb-query/src/proofs/mod.rs b/grovedb-query/src/proofs/mod.rs index 4fbf02834..d49eb2e4a 100644 --- a/grovedb-query/src/proofs/mod.rs +++ b/grovedb-query/src/proofs/mod.rs @@ -127,6 +127,30 @@ pub enum Node { /// /// Contains: `(key, value, value_hash, feature_type, child_hash)` KVValueHashFeatureTypeWithChildHash(Vec, Vec, CryptoHash, TreeFeatureType, CryptoHash), + + /// A self-verifying compressed subtree for `AggregateCountOnRange` proofs + /// against a `ProvableCountTree` / `ProvableCountSumTree`. + /// + /// Encodes the subtree's *root* node as `(kv_hash, left_child_hash, + /// right_child_hash, count)`. The verifier reconstructs the subtree's + /// root `node_hash` as + /// `node_hash_with_count(kv_hash, left_child_hash, right_child_hash, count)` + /// and uses that hash exactly as `Hash(...)` would. Because `count` is + /// part of that recomputation, a forged count produces a different hash + /// and the parent's Merkle-root check fails — the count is therefore + /// cryptographically committed by the parent's hash chain, not just + /// trusted on faith. + /// + /// Used to collapse an entire fully-inside subtree into a single proof + /// node: the verifier doesn't need any per-key information (the parent + /// boundary nodes already established that every key under here is + /// in-range), so we hand it the four hashes plus the count. + /// + /// `left_child_hash` / `right_child_hash` are the all-zero `NULL_HASH` + /// when the subtree's root has no left / right child respectively. + /// + /// Contains: `(kv_hash, left_child_hash, right_child_hash, count)` + HashWithCount(CryptoHash, CryptoHash, CryptoHash, u64), } use std::fmt; @@ -185,6 +209,13 @@ impl fmt::Display for Node { hex::encode(value_hash), count ), + Node::HashWithCount(kv_hash, left_child_hash, right_child_hash, count) => format!( + "HashWithCount(kv_hash=HASH[{}], left=HASH[{}], right=HASH[{}], count={})", + hex::encode(kv_hash), + hex::encode(left_child_hash), + hex::encode(right_child_hash), + count + ), Node::KVValueHashFeatureTypeWithChildHash( key, value, diff --git a/grovedb-query/src/query.rs b/grovedb-query/src/query.rs index df7917799..affce9604 100644 --- a/grovedb-query/src/query.rs +++ b/grovedb-query/src/query.rs @@ -303,6 +303,149 @@ impl Query { } } + /// Creates an aggregate-count-on-range query that counts the elements + /// matched by `range`. The resulting query has `AggregateCountOnRange(range)` + /// as its sole item, no subquery branches, and `left_to_right = true` + /// (counting is direction-agnostic). + /// + /// `range` must be a true range variant (`Range`, `RangeInclusive`, + /// `RangeFrom`, `RangeTo`, `RangeToInclusive`, `RangeAfter`, `RangeAfterTo`, + /// or `RangeAfterToInclusive`). Passing `Key`, `RangeFull`, or another + /// `AggregateCountOnRange` is allowed at construction time but will be + /// rejected by [`validate_aggregate_count_on_range`]. + pub fn new_aggregate_count_on_range(range: QueryItem) -> Self { + Self { + items: vec![QueryItem::AggregateCountOnRange(Box::new(range))], + left_to_right: true, + ..Self::default() + } + } + + /// If this query contains an `AggregateCountOnRange` item *anywhere* in + /// its `items` vec, returns a reference to the first such item (whether + /// the surrounding query is well-formed or not). Returns `None` only + /// when no item is an `AggregateCountOnRange`. + /// + /// This is intentionally a **detection-only** helper: malformed queries + /// like `items: [Key(...), AggregateCountOnRange(...)]` still report + /// `Some(...)` here so callers don't accidentally route them through + /// the regular-query path. Use + /// [`Self::validate_aggregate_count_on_range`] when you also need to + /// enforce the well-formedness rules (single item, allowed inner kind, + /// no subqueries, etc.). + pub fn aggregate_count_on_range(&self) -> Option<&QueryItem> { + self.items + .iter() + .find(|item| item.is_aggregate_count_on_range()) + } + + /// Returns `true` if any item in this query — including items inside + /// nested subquery branches — is an `AggregateCountOnRange`. + /// + /// `AggregateCountOnRange` is a *terminal* item: the canonical + /// well-formed query contains exactly one `AggregateCountOnRange` at + /// the top level and nothing else. This recursive detector exists so + /// the prover can validate up front: if any ACOR is present anywhere, + /// the query as a whole must satisfy + /// [`Self::validate_aggregate_count_on_range`] — otherwise a malformed + /// shape (e.g. ACOR hidden inside `default_subquery_branch.subquery`) + /// could slip past a top-level-only check and be silently routed + /// through the regular-proof path. + pub fn has_aggregate_count_on_range_anywhere(&self) -> bool { + if self.aggregate_count_on_range().is_some() { + return true; + } + if let Some(sub) = self.default_subquery_branch.subquery.as_deref() + && sub.has_aggregate_count_on_range_anywhere() + { + return true; + } + if let Some(branches) = &self.conditional_subquery_branches { + for branch in branches.values() { + if let Some(sub) = branch.subquery.as_deref() + && sub.has_aggregate_count_on_range_anywhere() + { + return true; + } + } + } + false + } + + /// Validates the Query-level constraints that apply when an + /// `AggregateCountOnRange` is present. On success, returns a reference + /// to the inner `QueryItem` describing the range to count. + /// + /// Rules enforced (matching the constraints documented in the GroveDB + /// book chapter "Aggregate Count Queries"): + /// + /// 1. The query must contain exactly one item. + /// 2. That item must be `AggregateCountOnRange(_)`. + /// 3. The inner item must not be `Key` (use `has_raw` / `get_raw` for + /// existence tests). + /// 4. The inner item must not be `RangeFull` (read the parent + /// `Element::ProvableCountTree` / `Element::ProvableCountSumTree` + /// bytes directly for the unconditional total). + /// 5. The inner item must not itself be `AggregateCountOnRange`. + /// 6. `default_subquery_branch.subquery` and + /// `default_subquery_branch.subquery_path` must both be `None`. + /// 7. `conditional_subquery_branches` must be `None` or empty. + /// + /// `SizedQuery::limit` / `SizedQuery::offset` checks live at the + /// `PathQuery` / `SizedQuery` layer (see + /// [`SizedQuery::validate_aggregate_count_on_range`]). + pub fn validate_aggregate_count_on_range(&self) -> Result<&QueryItem, Error> { + if self.items.len() != 1 { + return Err(Error::InvalidOperation( + "AggregateCountOnRange must be the only item in the query", + )); + } + let inner = match &self.items[0] { + QueryItem::AggregateCountOnRange(inner) => inner.as_ref(), + _ => { + return Err(Error::InvalidOperation( + "validate_aggregate_count_on_range called on a query without an \ + AggregateCountOnRange item", + )); + } + }; + match inner { + QueryItem::Key(_) => { + return Err(Error::InvalidOperation( + "AggregateCountOnRange may not wrap Key — use has_raw / get_raw for \ + existence tests", + )); + } + QueryItem::RangeFull(_) => { + return Err(Error::InvalidOperation( + "AggregateCountOnRange may not wrap RangeFull — read the parent \ + ProvableCountTree element for the unconditional total", + )); + } + QueryItem::AggregateCountOnRange(_) => { + return Err(Error::InvalidOperation( + "AggregateCountOnRange may not wrap another AggregateCountOnRange", + )); + } + _ => {} + } + if self.default_subquery_branch.subquery.is_some() + || self.default_subquery_branch.subquery_path.is_some() + { + return Err(Error::InvalidOperation( + "AggregateCountOnRange queries may not carry a default subquery branch", + )); + } + if let Some(branches) = &self.conditional_subquery_branches + && !branches.is_empty() + { + return Err(Error::InvalidOperation( + "AggregateCountOnRange queries may not carry conditional subquery branches", + )); + } + Ok(inner) + } + /// Returns `true` if the given key would trigger a subquery (either via /// the default subquery branch or a matching conditional branch). pub fn has_subquery_on_key(&self, key: &[u8], in_path: bool) -> bool { @@ -907,4 +1050,233 @@ mod tests { "innermost query should have no further subquery" ); } + + // ---------- AggregateCountOnRange validation tests ---------- + // + // These hit each numbered rule in `Query::validate_aggregate_count_on_range` + // independently. The happy path is also covered to ensure the success + // arm returns the inner range. + + fn make_acor_query(inner: QueryItem) -> Query { + Query::new_aggregate_count_on_range(inner) + } + + #[test] + fn validate_acor_happy_path_returns_inner() { + let q = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec())); + let inner = q + .validate_aggregate_count_on_range() + .expect("happy path should validate"); + match inner { + QueryItem::Range(r) => { + assert_eq!(r.start, b"a".to_vec()); + assert_eq!(r.end, b"z".to_vec()); + } + _ => panic!("expected inner Range"), + } + } + + #[test] + fn validate_acor_rejects_extra_items() { + let mut q = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec())); + q.items.push(QueryItem::Key(b"extra".to_vec())); + let err = q + .validate_aggregate_count_on_range() + .expect_err("two-item query must fail"); + assert!(matches!(err, crate::error::Error::InvalidOperation(_))); + } + + #[test] + fn validate_acor_rejects_non_acor_only_item() { + // A query with one item that isn't AggregateCountOnRange triggers the + // "validate called on a query without an AggregateCountOnRange item" + // branch. + let q = Query::new_single_query_item(QueryItem::Key(b"k".to_vec())); + let err = q + .validate_aggregate_count_on_range() + .expect_err("non-ACOR-only item must fail"); + assert!(matches!(err, crate::error::Error::InvalidOperation(_))); + } + + #[test] + fn validate_acor_rejects_inner_key() { + let q = make_acor_query(QueryItem::Key(b"k".to_vec())); + let err = q + .validate_aggregate_count_on_range() + .expect_err("inner Key must fail"); + match err { + crate::error::Error::InvalidOperation(msg) => assert!(msg.contains("Key")), + _ => panic!("expected InvalidOperation"), + } + } + + #[test] + fn validate_acor_rejects_inner_range_full() { + let q = make_acor_query(QueryItem::RangeFull(std::ops::RangeFull)); + let err = q + .validate_aggregate_count_on_range() + .expect_err("inner RangeFull must fail"); + match err { + crate::error::Error::InvalidOperation(msg) => assert!(msg.contains("RangeFull")), + _ => panic!("expected InvalidOperation"), + } + } + + #[test] + fn validate_acor_rejects_nested_acor() { + // AggregateCountOnRange wrapping another AggregateCountOnRange. + let inner_acor = QueryItem::AggregateCountOnRange(Box::new(QueryItem::Range( + b"a".to_vec()..b"z".to_vec(), + ))); + let q = make_acor_query(inner_acor); + let err = q + .validate_aggregate_count_on_range() + .expect_err("nested ACOR must fail"); + match err { + crate::error::Error::InvalidOperation(msg) => { + assert!(msg.contains("AggregateCountOnRange")) + } + _ => panic!("expected InvalidOperation"), + } + } + + #[test] + fn validate_acor_rejects_default_subquery_branch() { + let mut q = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec())); + q.default_subquery_branch = SubqueryBranch { + subquery_path: None, + subquery: Some(Box::new(Query::new())), + }; + let err = q + .validate_aggregate_count_on_range() + .expect_err("default subquery branch must fail"); + match err { + crate::error::Error::InvalidOperation(msg) => assert!(msg.contains("subquery")), + _ => panic!("expected InvalidOperation"), + } + } + + #[test] + fn validate_acor_rejects_default_subquery_path() { + let mut q = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec())); + q.default_subquery_branch = SubqueryBranch { + subquery_path: Some(vec![b"x".to_vec()]), + subquery: None, + }; + let err = q + .validate_aggregate_count_on_range() + .expect_err("subquery_path must fail"); + match err { + crate::error::Error::InvalidOperation(msg) => assert!(msg.contains("subquery")), + _ => panic!("expected InvalidOperation"), + } + } + + #[test] + fn validate_acor_rejects_conditional_subquery_branches() { + let mut q = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec())); + let mut branches = IndexMap::new(); + branches.insert( + QueryItem::Key(b"k".to_vec()), + SubqueryBranch { + subquery_path: None, + subquery: Some(Box::new(Query::new())), + }, + ); + q.conditional_subquery_branches = Some(branches); + let err = q + .validate_aggregate_count_on_range() + .expect_err("conditional branches must fail"); + match err { + crate::error::Error::InvalidOperation(msg) => { + assert!(msg.contains("conditional")); + } + _ => panic!("expected InvalidOperation"), + } + } + + #[test] + fn validate_acor_accepts_empty_conditional_branches_map() { + // An empty `Some(IndexMap::new())` is treated as "no branches" by the + // validator (the rule enforces non-empty rejection only). + let mut q = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec())); + q.conditional_subquery_branches = Some(IndexMap::new()); + let inner = q + .validate_aggregate_count_on_range() + .expect("empty conditional map must validate"); + assert!(matches!(inner, QueryItem::Range(_))); + } + + #[test] + fn aggregate_count_on_range_helper_detects_acor_anywhere_in_items() { + // Well-formed shape — single ACOR item. + let q = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec())); + assert!(q.aggregate_count_on_range().is_some()); + + // Two items including ACOR → still detected, so the routing layer + // can hand the malformed query to validate_aggregate_count_on_range + // for a precise error rather than silently treating it as a regular + // query. + let mut q2 = q.clone(); + q2.items.push(QueryItem::Key(b"x".to_vec())); + assert!( + q2.aggregate_count_on_range().is_some(), + "ACOR + extra item must still be detected as ACOR-bearing" + ); + + // ACOR not at index 0 — also detected. + let mut q3 = Query::new_single_query_item(QueryItem::Key(b"x".to_vec())); + q3.items.push(QueryItem::AggregateCountOnRange(Box::new( + QueryItem::Range(b"a".to_vec()..b"z".to_vec()), + ))); + assert!(q3.aggregate_count_on_range().is_some()); + + // No ACOR anywhere → None. + let q4 = Query::new_single_query_item(QueryItem::Key(b"x".to_vec())); + assert!(q4.aggregate_count_on_range().is_none()); + + // Empty items → None. + let q5 = Query::new(); + assert!(q5.aggregate_count_on_range().is_none()); + } + + #[test] + fn has_aggregate_count_on_range_anywhere_walks_subqueries() { + // No ACOR anywhere → false. + let plain = Query::new_single_query_item(QueryItem::Range(b"a".to_vec()..b"z".to_vec())); + assert!(!plain.has_aggregate_count_on_range_anywhere()); + + // Top-level ACOR → true (covered by `aggregate_count_on_range` too). + let top = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec())); + assert!(top.has_aggregate_count_on_range_anywhere()); + + // ACOR hidden inside `default_subquery_branch.subquery` — the + // top-level-only `aggregate_count_on_range` would miss it, but the + // recursive helper finds it. This is the surface that the + // prove_query entry-point gate uses to refuse to run any + // ACOR-bearing query that isn't the canonical single-ACOR shape. + let inner_acor = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec())); + let mut hidden = + Query::new_single_query_item(QueryItem::Range(b"a".to_vec()..b"z".to_vec())); + hidden.set_subquery(inner_acor); + assert!(hidden.aggregate_count_on_range().is_none()); + assert!( + hidden.has_aggregate_count_on_range_anywhere(), + "ACOR hidden in default subquery branch must be detected" + ); + + // ACOR hidden in a conditional subquery branch. + let inner_acor2 = make_acor_query(QueryItem::Range(b"a".to_vec()..b"z".to_vec())); + let mut conditional = + Query::new_single_query_item(QueryItem::Range(b"a".to_vec()..b"z".to_vec())); + conditional.add_conditional_subquery( + QueryItem::Key(b"k".to_vec()), + None, + Some(inner_acor2), + ); + assert!( + conditional.has_aggregate_count_on_range_anywhere(), + "ACOR hidden in conditional subquery branch must be detected" + ); + } } diff --git a/grovedb-query/src/query_item/intersect.rs b/grovedb-query/src/query_item/intersect.rs index 1153e3a1d..22d414390 100644 --- a/grovedb-query/src/query_item/intersect.rs +++ b/grovedb-query/src/query_item/intersect.rs @@ -612,6 +612,7 @@ impl QueryItem { start: RangeSetItem::ExclusiveStart(range.start().clone()), end: RangeSetItem::Inclusive(range.end().clone()), }, + QueryItem::AggregateCountOnRange(inner) => inner.to_range_set(), } } @@ -660,6 +661,7 @@ impl QueryItem { start: RangeSetSimpleItemBorrowed::Exclusive(range.start()), end: RangeSetSimpleItemBorrowed::Inclusive(range.end()), }), + QueryItem::AggregateCountOnRange(inner) => inner.to_range_set_borrowed(), } } diff --git a/grovedb-query/src/query_item/mod.rs b/grovedb-query/src/query_item/mod.rs index 6525f2ad5..b42b9a939 100644 --- a/grovedb-query/src/query_item/mod.rs +++ b/grovedb-query/src/query_item/mod.rs @@ -75,6 +75,22 @@ pub enum QueryItem { /// A range starting **after** a key and extending to another key, /// **inclusive**. RangeAfterToInclusive(RangeInclusive>), + + /// A count-only meta-query that wraps another `QueryItem` describing the + /// range to count. + /// + /// When this variant appears in a `Query`, the query is interpreted as + /// "return the **number of elements** matched by the inner range" instead + /// of returning the elements themselves. The proof is shaped accordingly: + /// boundary nodes are emitted as `KVDigestCount`, fully-inside subtree + /// roots as `KVHashCount`, and fully-outside subtrees as opaque `Hash`. + /// + /// This variant is only valid against `ProvableCountTree` / + /// `ProvableCountSumTree` (and their `NonCounted*` wrapper variants), and + /// it must be the **only** item in the surrounding `Query` (no subqueries, + /// no pagination, no other range items). The inner `QueryItem` may not be + /// `Key`, `RangeFull`, or another `AggregateCountOnRange`. + AggregateCountOnRange(Box), } #[cfg(feature = "serde")] @@ -120,6 +136,12 @@ impl Serialize for QueryItem { "RangeAfterToInclusive", range_after_to_inclusive, ), + QueryItem::AggregateCountOnRange(inner) => serializer.serialize_newtype_variant( + "QueryItem", + 10, + "AggregateCountOnRange", + inner, + ), } } } @@ -143,6 +165,7 @@ impl<'de> Deserialize<'de> for QueryItem { RangeAfter, RangeAfterTo, RangeAfterToInclusive, + AggregateCountOnRange, } struct QueryItemVisitor; @@ -199,6 +222,19 @@ impl<'de> Deserialize<'de> for QueryItem { let range_after_to_inclusive = variant_access.newtype_variant()?; Ok(QueryItem::RangeAfterToInclusive(range_after_to_inclusive)) } + Field::AggregateCountOnRange => { + // Deserialize the inner via a wrapper that rejects + // the `AggregateCountOnRange` tag *before* recursing. + // This is the serde counterpart to the bincode + // depth-bounded decode + nested-rejection added in + // `Self::decode_with_depth`. Without it, a + // `serde`-feature client could send arbitrarily + // deep nested AggregateCountOnRange payloads and + // exhaust the stack inside `QueryItem::deserialize` + // before any validation runs. + let NonAggregateInner(inner) = variant_access.newtype_variant()?; + Ok(QueryItem::AggregateCountOnRange(Box::new(inner))) + } } } } @@ -214,12 +250,107 @@ impl<'de> Deserialize<'de> for QueryItem { "RangeAfter", "RangeAfterTo", "RangeAfterToInclusive", + "AggregateCountOnRange", ]; deserializer.deserialize_enum("QueryItem", VARIANTS, QueryItemVisitor) } } +/// Newtype wrapper used internally by the serde `Deserialize` impl when +/// deserializing the *inner* item of an `AggregateCountOnRange`. The wrapper's +/// `Deserialize` impl mirrors `QueryItem::deserialize` but rejects the +/// `AggregateCountOnRange` field tag immediately — without recursing — so +/// nested aggregate payloads cannot exhaust the stack via repeated variant-10 +/// recursion through `QueryItem::deserialize`. +/// +/// Defense-in-depth: nested `AggregateCountOnRange` is also rejected by +/// `Query::validate_aggregate_count_on_range`, but enforcing it at decode time +/// matches the bincode side and prevents the DoS class on its own. +#[cfg(feature = "serde")] +struct NonAggregateInner(QueryItem); + +#[cfg(feature = "serde")] +impl<'de> Deserialize<'de> for NonAggregateInner { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + // Field set excludes "AggregateCountOnRange"; encountering that tag + // produces a serde "unknown variant" error before any inner + // recursion can happen. + #[derive(Deserialize)] + #[serde(field_identifier, rename_all = "snake_case")] + enum Field { + Key, + Range, + RangeInclusive, + RangeFull, + RangeFrom, + RangeTo, + RangeToInclusive, + RangeAfter, + RangeAfterTo, + RangeAfterToInclusive, + } + + struct V; + impl<'de> serde::de::Visitor<'de> for V { + type Value = NonAggregateInner; + + fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.write_str("non-aggregate QueryItem variant") + } + + fn visit_enum(self, data: A) -> Result + where + A: serde::de::EnumAccess<'de>, + { + let (variant, va) = data.variant()?; + let inner = match variant { + Field::Key => QueryItem::Key(va.newtype_variant()?), + Field::Range => QueryItem::Range(va.newtype_variant()?), + Field::RangeInclusive => QueryItem::RangeInclusive(va.newtype_variant()?), + Field::RangeFull => { + va.unit_variant()?; + QueryItem::RangeFull(RangeFull) + } + Field::RangeFrom => QueryItem::RangeFrom(va.newtype_variant()?), + Field::RangeTo => QueryItem::RangeTo(va.newtype_variant()?), + Field::RangeToInclusive => { + let end: Vec = va.newtype_variant()?; + QueryItem::RangeToInclusive(..=end) + } + Field::RangeAfter => QueryItem::RangeAfter(va.newtype_variant()?), + Field::RangeAfterTo => QueryItem::RangeAfterTo(va.newtype_variant()?), + Field::RangeAfterToInclusive => { + QueryItem::RangeAfterToInclusive(va.newtype_variant()?) + } + }; + Ok(NonAggregateInner(inner)) + } + } + + // The list excludes "AggregateCountOnRange" so a serde format that + // surfaces unknown variants by name (most do) gives a precise error + // for the nested case. + const NON_AGGREGATE_VARIANTS: &[&str] = &[ + "Key", + "Range", + "RangeInclusive", + "RangeFull", + "RangeFrom", + "RangeTo", + "RangeToInclusive", + "RangeAfter", + "RangeAfterTo", + "RangeAfterToInclusive", + ]; + + deserializer.deserialize_enum("QueryItem", NON_AGGREGATE_VARIANTS, V) + } +} + impl Encode for QueryItem { fn encode( &self, @@ -270,14 +401,46 @@ impl Encode for QueryItem { range.start().encode(encoder)?; range.end().encode(encoder) } + QueryItem::AggregateCountOnRange(inner) => { + encoder.writer().write(&[10])?; + inner.as_ref().encode(encoder) + } } } } +/// Maximum recursion depth allowed when decoding a `QueryItem` from bincode. +/// +/// The only recursive variant today is `AggregateCountOnRange(Box)` +/// (variant 10). A malicious payload made of repeated variant-10 bytes +/// would otherwise recurse arbitrarily deep before any validation runs and +/// can stack-overflow the decoder. Since nested `AggregateCountOnRange` is +/// always rejected by `Query::validate_aggregate_count_on_range` anyway, +/// the only legal nesting depth here is **one** (the outer wrapper plus its +/// non-aggregate inner range). We keep a small safety margin. +pub(crate) const MAX_QUERY_ITEM_DECODE_DEPTH: usize = 4; + impl Decode for QueryItem { fn decode>( decoder: &mut D, ) -> Result { + Self::decode_with_depth(decoder, 0) + } +} + +impl QueryItem { + /// Recursive bincode decode with an explicit depth counter. Used to bound + /// nested `AggregateCountOnRange` payloads (which would otherwise allow + /// stack exhaustion via repeated variant-10 bytes). + pub(crate) fn decode_with_depth( + decoder: &mut D, + depth: usize, + ) -> Result { + if depth > MAX_QUERY_ITEM_DECODE_DEPTH { + return Err(DecodeError::Other( + "QueryItem nesting depth exceeded maximum during deserialization", + )); + } let variant_id = u8::decode(decoder)?; match variant_id { @@ -322,9 +485,22 @@ impl Decode for QueryItem { let end = Vec::::decode(decoder)?; Ok(QueryItem::RangeAfterToInclusive(start..=end)) } + 10 => { + let inner = QueryItem::decode_with_depth(decoder, depth + 1)?; + // Defense-in-depth: nested AggregateCountOnRange is invalid + // by validation rules, so we also reject it at decode time. + // The depth guard above remains the primary stack-overflow + // mitigation for malicious deeper nesting. + if matches!(inner, QueryItem::AggregateCountOnRange(_)) { + return Err(DecodeError::Other( + "AggregateCountOnRange must not wrap another AggregateCountOnRange", + )); + } + Ok(QueryItem::AggregateCountOnRange(Box::new(inner))) + } _ => Err(DecodeError::UnexpectedVariant { type_name: "QueryItem", - allowed: &bincode::error::AllowedEnumVariants::Range { min: 0, max: 9 }, + allowed: &bincode::error::AllowedEnumVariants::Range { min: 0, max: 10 }, found: variant_id as u32, }), } @@ -335,6 +511,24 @@ impl<'de, Context> BorrowDecode<'de, Context> for QueryItem { fn borrow_decode>( decoder: &mut D, ) -> Result { + Self::borrow_decode_with_depth(decoder, 0) + } +} + +impl QueryItem { + /// Recursive bincode borrow-decode with an explicit depth counter. + /// Mirrors [`Self::decode_with_depth`] for the borrowed-decoder path; same + /// `MAX_QUERY_ITEM_DECODE_DEPTH` and same nested-`AggregateCountOnRange` + /// rejection apply. + pub(crate) fn borrow_decode_with_depth<'de, D: bincode::de::BorrowDecoder<'de>>( + decoder: &mut D, + depth: usize, + ) -> Result { + if depth > MAX_QUERY_ITEM_DECODE_DEPTH { + return Err(DecodeError::Other( + "QueryItem nesting depth exceeded maximum during deserialization", + )); + } let variant_id = u8::decode(decoder)?; match variant_id { @@ -379,9 +573,18 @@ impl<'de, Context> BorrowDecode<'de, Context> for QueryItem { let end = Vec::::borrow_decode(decoder)?; Ok(QueryItem::RangeAfterToInclusive(start..=end)) } + 10 => { + let inner = QueryItem::borrow_decode_with_depth(decoder, depth + 1)?; + if matches!(inner, QueryItem::AggregateCountOnRange(_)) { + return Err(DecodeError::Other( + "AggregateCountOnRange must not wrap another AggregateCountOnRange", + )); + } + Ok(QueryItem::AggregateCountOnRange(Box::new(inner))) + } _ => Err(DecodeError::UnexpectedVariant { type_name: "QueryItem", - allowed: &bincode::error::AllowedEnumVariants::Range { min: 0, max: 9 }, + allowed: &bincode::error::AllowedEnumVariants::Range { min: 0, max: 10 }, found: variant_id as u32, }), } @@ -427,6 +630,9 @@ impl fmt::Display for QueryItem { hex_to_ascii(range.start()), hex_to_ascii(range.end()) ), + QueryItem::AggregateCountOnRange(inner) => { + write!(f, "AggregateCountOnRange({})", inner) + } } } } @@ -437,6 +643,7 @@ impl QueryItem { match self { QueryItem::Key(key) => key.len() as u32, QueryItem::RangeFull(_) => 0u32, + QueryItem::AggregateCountOnRange(inner) => inner.processing_footprint(), _ => { self.lower_bound().0.map_or(0u32, |x| x.len() as u32) + self.upper_bound().0.map_or(0u32, |x| x.len() as u32) @@ -458,11 +665,12 @@ impl QueryItem { QueryItem::RangeAfter(range) => (Some(range.start.as_ref()), true), QueryItem::RangeAfterTo(range) => (Some(range.start.as_ref()), true), QueryItem::RangeAfterToInclusive(range) => (Some(range.start().as_ref()), true), + QueryItem::AggregateCountOnRange(inner) => inner.lower_bound(), } } /// Returns `true` if this query item has no lower bound (extends to -inf). - pub const fn lower_unbounded(&self) -> bool { + pub fn lower_unbounded(&self) -> bool { match self { QueryItem::Key(_) => false, QueryItem::Range(_) => false, @@ -474,6 +682,7 @@ impl QueryItem { QueryItem::RangeAfter(_) => false, QueryItem::RangeAfterTo(_) => false, QueryItem::RangeAfterToInclusive(_) => false, + QueryItem::AggregateCountOnRange(inner) => inner.lower_unbounded(), } } @@ -491,11 +700,12 @@ impl QueryItem { QueryItem::RangeAfter(_) => (None, true), QueryItem::RangeAfterTo(range) => (Some(range.end.as_ref()), false), QueryItem::RangeAfterToInclusive(range) => (Some(range.end().as_ref()), true), + QueryItem::AggregateCountOnRange(inner) => inner.upper_bound(), } } /// Returns `true` if this query item has no upper bound (extends to +inf). - pub const fn upper_unbounded(&self) -> bool { + pub fn upper_unbounded(&self) -> bool { match self { QueryItem::Key(_) => false, QueryItem::Range(_) => false, @@ -507,6 +717,7 @@ impl QueryItem { QueryItem::RangeAfter(_) => true, QueryItem::RangeAfterTo(_) => false, QueryItem::RangeAfterToInclusive(_) => false, + QueryItem::AggregateCountOnRange(inner) => inner.upper_unbounded(), } } @@ -535,6 +746,7 @@ impl QueryItem { QueryItem::RangeAfter(_) => 7, QueryItem::RangeAfterTo(_) => 8, QueryItem::RangeAfterToInclusive(_) => 9, + QueryItem::AggregateCountOnRange(_) => 10, } } @@ -544,7 +756,8 @@ impl QueryItem { } /// Returns `true` if this query item is any kind of range (not a single - /// key). + /// key). `AggregateCountOnRange` counts as a range — it describes a range + /// to count over. pub const fn is_range(&self) -> bool { matches!( self, @@ -557,6 +770,7 @@ impl QueryItem { | QueryItem::RangeAfter(_) | QueryItem::RangeAfterTo(_) | QueryItem::RangeAfterToInclusive(_) + | QueryItem::AggregateCountOnRange(_) ) } @@ -566,12 +780,30 @@ impl QueryItem { } /// Returns `true` if this query item is a range with at least one unbounded - /// end (e.g., `RangeFull`, `RangeFrom`, `RangeTo`, etc.). - pub const fn is_unbounded_range(&self) -> bool { - !matches!( - self, - QueryItem::Key(_) | QueryItem::Range(_) | QueryItem::RangeInclusive(_) - ) + /// end (e.g., `RangeFull`, `RangeFrom`, `RangeTo`, etc.). For + /// `AggregateCountOnRange`, delegates to the inner item. + pub fn is_unbounded_range(&self) -> bool { + match self { + QueryItem::AggregateCountOnRange(inner) => inner.is_unbounded_range(), + _ => !matches!( + self, + QueryItem::Key(_) | QueryItem::Range(_) | QueryItem::RangeInclusive(_) + ), + } + } + + /// Returns `true` if this query item is the count-only meta-variant. + pub const fn is_aggregate_count_on_range(&self) -> bool { + matches!(self, QueryItem::AggregateCountOnRange(_)) + } + + /// If this is `AggregateCountOnRange`, returns a reference to the inner + /// `QueryItem` describing the range to count. Otherwise returns `None`. + pub fn aggregate_count_inner(&self) -> Option<&QueryItem> { + match self { + QueryItem::AggregateCountOnRange(inner) => Some(inner.as_ref()), + _ => None, + } } /// Enumerates all distinct keys in this query item. Only works for `Key`, @@ -775,6 +1007,7 @@ impl QueryItem { iter.seek_for_prev(end) } } + QueryItem::AggregateCountOnRange(inner) => inner.seek_for_iter(iter, left_to_right), } } @@ -867,6 +1100,9 @@ impl QueryItem { } } } + QueryItem::AggregateCountOnRange(inner) => { + return inner.iter_is_valid_for_type(iter, limit, aggregate_limit, left_to_right); + } }; is_valid.wrap_with_cost(cost) @@ -986,4 +1222,169 @@ mod test { ); assert!(QueryItem::Range(vec![20]..vec![30]) > QueryItem::Range(vec![10]..vec![20])); } + + // ---------- decode-depth + nested-AggregateCountOnRange rejection ---------- + + use super::MAX_QUERY_ITEM_DECODE_DEPTH; + + fn bincode_config() -> bincode::config::Configuration< + bincode::config::BigEndian, + bincode::config::Fixint, + bincode::config::NoLimit, + > { + bincode::config::standard() + .with_big_endian() + .with_fixed_int_encoding() + .with_no_limit() + } + + #[test] + fn decode_rejects_nested_aggregate_count_on_range() { + // A two-level nest: AggregateCountOnRange(AggregateCountOnRange(Range)). + let nested = QueryItem::AggregateCountOnRange(Box::new(QueryItem::AggregateCountOnRange( + Box::new(QueryItem::Range(b"a".to_vec()..b"z".to_vec())), + ))); + let bytes = bincode::encode_to_vec(&nested, bincode_config()).expect("encode succeeds"); + let result: Result<(QueryItem, _), _> = + bincode::decode_from_slice(&bytes, bincode_config()); + let err = result.expect_err("nested AggregateCountOnRange must be rejected at decode"); + let msg = format!("{:?}", err); + assert!( + msg.contains("AggregateCountOnRange") || msg.contains("nesting depth"), + "expected nested-rejection message, got: {msg}" + ); + } + + #[test] + fn decode_caps_depth_for_malicious_payload() { + // Construct a raw byte payload of (MAX_QUERY_ITEM_DECODE_DEPTH + 2) + // copies of the AggregateCountOnRange variant byte (10) followed by + // a base item. This bypasses the constructor-level nested rejection + // but should hit the depth guard. We use Range as the eventual base + // (variants 0..=9 don't recurse). Since variant 10 reads the next + // byte as a recursive QueryItem, repeated 10s recurse without + // bound — exactly the stack-exhaustion case the depth guard + // prevents. + let depth_to_try = MAX_QUERY_ITEM_DECODE_DEPTH + 2; + let mut payload: Vec = Vec::new(); + for _ in 0..depth_to_try { + payload.push(10u8); // AggregateCountOnRange variant tag + } + // Innermost: Range(b"a", b"z"). Variant tag 1, then encoded start + + // end Vecs in big-endian fixed-int config. + payload.push(1u8); + let inner = QueryItem::Range(b"a".to_vec()..b"z".to_vec()); + let inner_bytes = bincode::encode_to_vec(&inner, bincode_config()).unwrap(); + // inner_bytes already starts with the variant tag (1), strip it. + payload.extend_from_slice(&inner_bytes[1..]); + + let result: Result<(QueryItem, _), _> = + bincode::decode_from_slice(&payload, bincode_config()); + let err = result.expect_err("payload exceeding max depth must be rejected"); + let msg = format!("{:?}", err); + assert!( + msg.contains("nesting depth") || msg.contains("AggregateCountOnRange"), + "expected depth-rejection message, got: {msg}" + ); + } + + #[test] + fn decode_accepts_valid_one_level_aggregate_count_on_range() { + // Single-level wrap with a non-aggregate inner. This is the only + // legal shape after validation; decoding must succeed. + let q = QueryItem::AggregateCountOnRange(Box::new(QueryItem::Range( + b"a".to_vec()..b"z".to_vec(), + ))); + let bytes = bincode::encode_to_vec(&q, bincode_config()).unwrap(); + let (decoded, _): (QueryItem, _) = bincode::decode_from_slice(&bytes, bincode_config()) + .expect("single-level wrap must decode"); + assert_eq!(q, decoded); + } + + // ---------- serde-feature: nested AggregateCountOnRange rejection ---------- + // + // The bincode path is depth-bounded above. Mirror the same defense for the + // serde path so serde-feature clients can't bypass the protection — the + // inner item is deserialized through `NonAggregateInner`, whose enum + // field set excludes `AggregateCountOnRange`, so any nested payload is + // rejected immediately by serde without recursion through + // `QueryItem::deserialize`. + // + // We use `serde_test`'s token-level driver here rather than a textual + // format because the existing `Serialize` impl emits variant tags in + // PascalCase (`"AggregateCountOnRange"`) while the existing `Field` enum + // uses `rename_all = "snake_case"` — a pre-existing mismatch unrelated + // to this PR that breaks JSON round-trip but is invisible to formats + // that don't carry variant names textually. Using token streams sidesteps + // that issue and lets us validate the rejection contract directly. + + #[cfg(feature = "serde")] + #[test] + fn serde_decode_rejects_nested_aggregate_count_on_range() { + // Replay the token sequence for an outer AggregateCountOnRange whose + // inner is itself an AggregateCountOnRange. The outer dispatch + // selects the AggregateCountOnRange variant and tries to deserialize + // the inner via `NonAggregateInner`, which does not list + // `aggregate_count_on_range` in its field set — serde_test surfaces + // this as an "unknown variant" error. + use serde_test::{assert_de_tokens_error, Token}; + assert_de_tokens_error::( + &[ + Token::NewtypeVariant { + name: "QueryItem", + variant: "aggregate_count_on_range", + }, + Token::NewtypeVariant { + name: "QueryItem", + variant: "aggregate_count_on_range", + }, + ], + // Exact wording comes from serde's `field_identifier` + // dispatcher rejecting an out-of-set tag — the field set lives + // in `NonAggregateInner`'s `Field` enum, which deliberately + // omits `aggregate_count_on_range`. + "unknown field `aggregate_count_on_range`, expected one of \ + `key`, `range`, `range_inclusive`, `range_full`, `range_from`, \ + `range_to`, `range_to_inclusive`, `range_after`, `range_after_to`, \ + `range_after_to_inclusive`", + ); + } + + #[cfg(feature = "serde")] + #[test] + fn serde_decode_accepts_valid_one_level_aggregate_count_on_range() { + // Outer `AggregateCountOnRange` wrapping a non-aggregate `Range` + // succeeds: the inner dispatch goes through `NonAggregateInner`, + // finds `range`, and the resulting Range is wrapped back up. + use serde_test::{assert_de_tokens, Token}; + let expected = QueryItem::AggregateCountOnRange(Box::new(QueryItem::Range( + b"a".to_vec()..b"z".to_vec(), + ))); + assert_de_tokens( + &expected, + &[ + Token::NewtypeVariant { + name: "QueryItem", + variant: "aggregate_count_on_range", + }, + Token::NewtypeVariant { + name: "QueryItem", + variant: "range", + }, + Token::Struct { + name: "Range", + len: 2, + }, + Token::Str("start"), + Token::Seq { len: Some(1) }, + Token::U8(b'a'), + Token::SeqEnd, + Token::Str("end"), + Token::Seq { len: Some(1) }, + Token::U8(b'z'), + Token::SeqEnd, + Token::StructEnd, + ], + ); + } } diff --git a/grovedb/src/debugger.rs b/grovedb/src/debugger.rs index 86312f97d..c6e7cad6c 100644 --- a/grovedb/src/debugger.rs +++ b/grovedb/src/debugger.rs @@ -550,6 +550,26 @@ fn merk_proof_node_to_grovedbg(node: Node) -> Result { + use grovedb_merk::tree::node_hash_with_count; + let computed_node_hash = + node_hash_with_count(&kv_hash, &left_child_hash, &right_child_hash, count).unwrap(); + MerkProofNode::KVValueHashFeatureType( + vec![], + grovedbg_types::Element::Item { + value: vec![], + element_flags: None, + }, + computed_node_hash, + grovedbg_types::TreeFeatureType::ProvableCountedMerkNode(count), + ) + } }) } diff --git a/grovedb/src/operations/proof/aggregate_count.rs b/grovedb/src/operations/proof/aggregate_count.rs new file mode 100644 index 000000000..6920c78c5 --- /dev/null +++ b/grovedb/src/operations/proof/aggregate_count.rs @@ -0,0 +1,367 @@ +//! GroveDB-side prove/verify glue for `AggregateCountOnRange` queries. +//! +//! The merk-level pieces live in `grovedb_merk::proofs::query::aggregate_count` +//! (proof generation in `Merk::prove_aggregate_count_on_range`, proof +//! verification in `verify_aggregate_count_on_range_proof`). This module +//! adds the GroveDB-level *envelope* handling: a verifier that walks the +//! multi-layer `GroveDBProof` chain (parent merk → ... → leaf merk), +//! verifies the path-element existence proofs at each non-leaf layer, and +//! delegates to the merk-level count verifier at the leaf. +//! +//! The proof generator side is wired directly into +//! [`GroveDb::prove_subqueries`] / [`GroveDb::prove_subqueries_v1`] — see +//! the "Aggregate-count short-circuit" branches there. + +use grovedb_merk::{ + proofs::{ + query::{aggregate_count::verify_aggregate_count_on_range_proof, QueryProofVerify}, + Query as MerkQuery, + }, + tree::{combine_hash, value_hash}, + CryptoHash, +}; +use grovedb_version::{check_grovedb_v0, version::GroveVersion}; + +use crate::{ + operations::proof::{ + GroveDBProof, GroveDBProofV0, GroveDBProofV1, LayerProof, MerkOnlyLayerProof, ProofBytes, + }, + Element, Error, GroveDb, PathQuery, +}; + +impl GroveDb { + /// Verify a serialized `prove_query` proof against an + /// `AggregateCountOnRange` `PathQuery`, returning the GroveDB root hash + /// and the verified count. + /// + /// `path_query` must satisfy + /// [`PathQuery::validate_aggregate_count_on_range`] — a single + /// `AggregateCountOnRange(_)` item, no subqueries, no pagination, and an + /// inner range that isn't `Key`, `RangeFull`, or another + /// `AggregateCountOnRange`. Any other shape is rejected up front with + /// `Error::InvalidQuery` before any bytes are decoded. + /// + /// Returns: + /// - `root_hash` — the reconstructed GroveDB root hash. The caller is + /// responsible for comparing this against their trusted root hash. + /// - `count` — the number of keys in the inner range that were committed + /// by the proof. + /// + /// Cryptographic guarantees: + /// - At each non-leaf layer, a regular single-key merk proof + /// demonstrates that the next path element exists with the recorded + /// value bytes; the verifier checks the chain + /// `combine_hash(H(value), lower_hash) == parent_proof_hash` so a + /// forged path is impossible without a root-hash mismatch. + /// - At the leaf layer, the count is committed by `HashWithCount`'s + /// `node_hash_with_count(kv_hash, left, right, count)` recomputation — + /// tampering with the count produces a different reconstructed merk + /// root, and the chain check above then fails. + pub fn verify_aggregate_count_query( + proof: &[u8], + path_query: &PathQuery, + grove_version: &GroveVersion, + ) -> Result<(CryptoHash, u64), Error> { + check_grovedb_v0!( + "verify_aggregate_count_query", + grove_version + .grovedb_versions + .operations + .proof + .verify_query_with_options + ); + + let inner_range = path_query.validate_aggregate_count_on_range()?.clone(); + + // Decode the GroveDBProof envelope using the same config the prover + // uses on the way out (matches `prove_query`). + let config = bincode::config::standard() + .with_big_endian() + .with_limit::<{ 256 * 1024 * 1024 }>(); + let grovedb_proof: GroveDBProof = bincode::decode_from_slice(proof, config) + .map_err(|e| Error::CorruptedData(format!("unable to decode proof: {}", e)))? + .0; + + let path_keys: Vec<&[u8]> = path_query.path.iter().map(|p| p.as_slice()).collect(); + + match grovedb_proof { + GroveDBProof::V0(GroveDBProofV0 { root_layer, .. }) => verify_v0_layer( + &root_layer, + path_query, + &path_keys, + 0, + &inner_range, + grove_version, + ), + GroveDBProof::V1(GroveDBProofV1 { root_layer }) => verify_v1_layer( + &root_layer, + path_query, + &path_keys, + 0, + &inner_range, + grove_version, + ), + } + } +} + +/// Walk a V0 (`MerkOnlyLayerProof`) envelope. At each non-leaf depth we +/// verify the single-key existence proof for `path[depth]` and descend into +/// the matching lower layer; at the leaf depth we delegate to the merk +/// count verifier. +fn verify_v0_layer( + layer: &MerkOnlyLayerProof, + path_query: &PathQuery, + path_keys: &[&[u8]], + depth: usize, + inner_range: &grovedb_merk::proofs::query::QueryItem, + grove_version: &GroveVersion, +) -> Result<(CryptoHash, u64), Error> { + if depth == path_keys.len() { + // Leaf layer: count proof. + return verify_count_leaf(&layer.merk_proof, inner_range, path_query); + } + + // Non-leaf: build a single-key merk query and verify. + let next_key = path_keys[depth].to_vec(); + let (proven_value_bytes, parent_root_hash, parent_proof_hash) = + verify_single_key_layer_proof_v0(&layer.merk_proof, &next_key, path_query)?; + + // Descend. + let lower_layer = layer.lower_layers.get(&next_key).ok_or_else(|| { + Error::InvalidProof( + path_query.clone(), + format!( + "aggregate-count proof missing lower layer for path key {}", + hex::encode(&next_key) + ), + ) + })?; + let (lower_hash, count) = verify_v0_layer( + lower_layer, + path_query, + path_keys, + depth + 1, + inner_range, + grove_version, + )?; + + // Chain check: combine_hash(H(tree_value), lower_hash) must equal the + // value_hash recorded by the parent merk for this tree element. + enforce_lower_chain( + path_query, + &next_key, + &proven_value_bytes, + &lower_hash, + &parent_proof_hash, + grove_version, + )?; + + Ok((parent_root_hash, count)) +} + +/// Walk a V1 (`LayerProof`) envelope. Mirrors `verify_v0_layer`; the V1 +/// envelope wraps merk proof bytes in `ProofBytes::Merk(_)` and we reject +/// any other tree-specific proof variant for count queries (they're not +/// applicable to provable count trees). +fn verify_v1_layer( + layer: &LayerProof, + path_query: &PathQuery, + path_keys: &[&[u8]], + depth: usize, + inner_range: &grovedb_merk::proofs::query::QueryItem, + grove_version: &GroveVersion, +) -> Result<(CryptoHash, u64), Error> { + let merk_bytes = match &layer.merk_proof { + ProofBytes::Merk(b) => b.as_slice(), + other => { + return Err(Error::InvalidProof( + path_query.clone(), + format!( + "aggregate-count proof has unexpected non-merk leaf bytes: {:?}", + std::mem::discriminant(other) + ), + )); + } + }; + + if depth == path_keys.len() { + return verify_count_leaf(merk_bytes, inner_range, path_query); + } + + let next_key = path_keys[depth].to_vec(); + let (proven_value_bytes, parent_root_hash, parent_proof_hash) = + verify_single_key_layer_proof_v0(merk_bytes, &next_key, path_query)?; + + let lower_layer = layer.lower_layers.get(&next_key).ok_or_else(|| { + Error::InvalidProof( + path_query.clone(), + format!( + "aggregate-count proof missing lower layer for path key {}", + hex::encode(&next_key) + ), + ) + })?; + let (lower_hash, count) = verify_v1_layer( + lower_layer, + path_query, + path_keys, + depth + 1, + inner_range, + grove_version, + )?; + + enforce_lower_chain( + path_query, + &next_key, + &proven_value_bytes, + &lower_hash, + &parent_proof_hash, + grove_version, + )?; + + Ok((parent_root_hash, count)) +} + +/// Verify the leaf layer: bytes are the encoded count-proof Op stream; +/// the inner range is the same one the prover counted over. +fn verify_count_leaf( + leaf_bytes: &[u8], + inner_range: &grovedb_merk::proofs::query::QueryItem, + path_query: &PathQuery, +) -> Result<(CryptoHash, u64), Error> { + let (root_hash, count) = verify_aggregate_count_on_range_proof(leaf_bytes, inner_range) + .unwrap() + .map_err(|e| { + Error::InvalidProof( + path_query.clone(), + format!("aggregate-count leaf proof failed to verify: {}", e), + ) + })?; + Ok((root_hash, count)) +} + +/// Verify a non-leaf layer that should contain a single-key proof for +/// `target_key`. Returns `(proven_value_bytes, this_layer_root_hash, +/// proof_hash_recorded_for_target)`. +/// +/// The "proof_hash" is the value_hash committed by the merk proof for the +/// target key — this is the hash the verifier will compare against +/// `combine_hash(H(child_tree_value), lower_layer_root_hash)` to enforce +/// the chain. +fn verify_single_key_layer_proof_v0( + merk_bytes: &[u8], + target_key: &[u8], + path_query: &PathQuery, +) -> Result<(Vec, CryptoHash, CryptoHash), Error> { + let level_query = MerkQuery { + items: vec![grovedb_merk::proofs::query::QueryItem::Key( + target_key.to_vec(), + )], + left_to_right: true, + ..Default::default() + }; + + let (root_hash, merk_result) = level_query + .execute_proof(merk_bytes, None, true, 0) + .unwrap() + .map_err(|e| { + Error::InvalidProof( + path_query.clone(), + format!( + "non-leaf single-key proof for {} failed to verify: {}", + hex::encode(target_key), + e + ), + ) + })?; + + // Find the result row for our target key and pull the value + proof_hash. + let proved = merk_result + .result_set + .iter() + .find(|p| p.key == target_key) + .ok_or_else(|| { + Error::InvalidProof( + path_query.clone(), + format!( + "non-leaf proof did not contain the expected key {}", + hex::encode(target_key) + ), + ) + })?; + + let value_bytes = proved.value.clone().ok_or_else(|| { + Error::InvalidProof( + path_query.clone(), + format!( + "non-leaf proof for key {} returned no value bytes", + hex::encode(target_key) + ), + ) + })?; + + Ok((value_bytes, root_hash, proved.proof)) +} + +/// Enforce the layer-chain hash equality: the parent merk's recorded +/// value_hash for the tree element must equal `combine_hash(H(value), +/// lower_layer_root_hash)`. This is what makes the count cryptographically +/// bound to the GroveDB root hash — the leaf count proof's reconstructed +/// `lower_hash` must agree with the parent's commitment, transitively up to +/// the root. +/// +/// Intermediate path elements may be any tree type — the GroveDB grove can +/// route through Normal/Sum/Count/etc. trees on the way down to the +/// provable-count leaf. The leaf-level tree-type check is enforced by the +/// merk prover (`Merk::prove_aggregate_count_on_range`); here we only +/// require that each non-leaf element on the path *is* some non-empty tree, +/// since only trees have a lower layer to chain into. +fn enforce_lower_chain( + path_query: &PathQuery, + target_key: &[u8], + proven_value_bytes: &[u8], + lower_hash: &CryptoHash, + parent_proof_hash: &CryptoHash, + grove_version: &GroveVersion, +) -> Result<(), Error> { + let element = Element::deserialize(proven_value_bytes, grove_version) + .map_err(|e| { + Error::InvalidProof( + path_query.clone(), + format!( + "non-leaf proof's element at key {} failed to deserialize: {}", + hex::encode(target_key), + e + ), + ) + })? + .into_underlying(); + if !element.is_any_tree() { + return Err(Error::InvalidProof( + path_query.clone(), + format!( + "aggregate-count proof's path element at key {} is not a tree element \ + (got {:?}); count queries can only descend through tree elements", + hex::encode(target_key), + std::mem::discriminant(&element) + ), + )); + } + + let value_h = value_hash(proven_value_bytes).value().to_owned(); + let combined = combine_hash(&value_h, lower_hash).value().to_owned(); + if combined != *parent_proof_hash { + return Err(Error::InvalidProof( + path_query.clone(), + format!( + "aggregate-count proof chain mismatch at key {}: parent recorded value_hash \ + {} but combine_hash(H(value), lower_root) is {}", + hex::encode(target_key), + hex::encode(parent_proof_hash), + hex::encode(combined) + ), + )); + } + Ok(()) +} diff --git a/grovedb/src/operations/proof/generate.rs b/grovedb/src/operations/proof/generate.rs index eb21e2203..34e0593ce 100644 --- a/grovedb/src/operations/proof/generate.rs +++ b/grovedb/src/operations/proof/generate.rs @@ -109,6 +109,22 @@ impl GroveDb { prove_options: Option, grove_version: &GroveVersion, ) -> CostResult { + // Aggregate-count gate: validate at entry so malformed ACOR + // queries (invalid inner range, ACOR-hidden-in-subquery, etc.) are + // rejected up front instead of being skipped when the recursive + // prover never reaches the ACOR-bearing leaf — for example because + // the path doesn't exist. Without this gate, `prove_query` would + // happily return a regular path/absence proof for an invalid + // aggregate-count request. + if path_query + .query + .query + .has_aggregate_count_on_range_anywhere() + && let Err(e) = path_query.validate_aggregate_count_on_range() + { + return Err(e).wrap_with_cost(OperationCost::default()); + } + match grove_version .grovedb_versions .operations @@ -269,6 +285,37 @@ impl GroveDb { *overall_limit }; + // Aggregate-count short-circuit: if any item at this level is an + // `AggregateCountOnRange`, the surrounding `PathQuery` must validate + // as a well-formed aggregate-count query. We do **not** route on a + // partial match (e.g. a query with extra items, subqueries, or an + // illegal inner) — those would silently produce a count proof for + // the wrong shape. Instead we run the same validation the verifier + // runs and let it surface the precise error. + if query + .items + .iter() + .any(QueryItem::is_aggregate_count_on_range) + { + let inner_range = cost_return_on_error_no_add!( + cost, + path_query.validate_aggregate_count_on_range().cloned() + ); + let (count_ops, _count) = cost_return_on_error!( + &mut cost, + subtree + .prove_aggregate_count_on_range(&inner_range, grove_version) + .map_err(Error::MerkError) + ); + let mut serialized = Vec::with_capacity(128); + encode_into(count_ops.iter(), &mut serialized); + return Ok(MerkOnlyLayerProof { + merk_proof: serialized, + lower_layers: BTreeMap::new(), + }) + .wrap_with_cost(cost); + } + let mut merk_proof = cost_return_on_error!( &mut cost, self.generate_merk_proof( @@ -1012,6 +1059,35 @@ impl GroveDb { *overall_limit }; + // Aggregate-count short-circuit (v1 path). Same validation contract + // as v0: any AggregateCountOnRange at this level requires the + // surrounding PathQuery to validate as a well-formed aggregate-count + // query. The count-proof bytes are wrapped in `ProofBytes::Merk` + // since they share the merk Op stream encoding. + if query + .items + .iter() + .any(QueryItem::is_aggregate_count_on_range) + { + let inner_range = cost_return_on_error_no_add!( + cost, + path_query.validate_aggregate_count_on_range().cloned() + ); + let (count_ops, _count) = cost_return_on_error!( + &mut cost, + subtree + .prove_aggregate_count_on_range(&inner_range, grove_version) + .map_err(Error::MerkError) + ); + let mut serialized = Vec::with_capacity(128); + encode_into(count_ops.iter(), &mut serialized); + return Ok(LayerProof { + merk_proof: ProofBytes::Merk(serialized), + lower_layers: BTreeMap::new(), + }) + .wrap_with_cost(cost); + } + let mut merk_proof = cost_return_on_error!( &mut cost, self.generate_merk_proof( @@ -1862,6 +1938,12 @@ impl GroveDb { } } } + QueryItem::AggregateCountOnRange(_) => { + return Err(Error::InvalidInput( + "AggregateCountOnRange is only supported on provable count trees, \ + not on dense fixed-size merkle trees", + )); + } } } @@ -1980,6 +2062,12 @@ impl GroveDb { } } } + QueryItem::AggregateCountOnRange(_) => { + return Err(Error::InvalidInput( + "AggregateCountOnRange is only supported on provable count trees, \ + not on MMR trees", + )); + } } } @@ -2048,6 +2136,12 @@ impl GroveDb { min_start = min_start.min(s.saturating_add(1)); max_end = max_end.max(e.saturating_add(1)); } + QueryItem::AggregateCountOnRange(_) => { + return Err(Error::InvalidInput( + "AggregateCountOnRange is only supported on provable count trees, \ + not on BulkAppendTree", + )); + } } } @@ -2087,7 +2181,7 @@ impl GroveDb { mod tests { use grovedb_merk::proofs::query::QueryItem; - use crate::GroveDb; + use crate::{Error, GroveDb}; /// Helper: encode a u16 as big-endian bytes. fn be_u16(v: u16) -> Vec { @@ -2225,4 +2319,59 @@ mod tests { end ); } + + // ----------------------------------------------------------------------- + // AggregateCountOnRange rejection on non-provable-count tree types. + // + // `AggregateCountOnRange` is only meaningful against `ProvableCountTree` + // and `ProvableCountSumTree` (their nodes commit a count via + // `node_hash_with_count`). Dense, MMR, and BulkAppendTree have no such + // commitment, so the index-resolution helpers must reject the variant + // outright rather than silently fall through. + // ----------------------------------------------------------------------- + + #[test] + fn dense_tree_rejects_aggregate_count_on_range() { + let inner = QueryItem::RangeInclusive(be_u16(0)..=be_u16(5)); + let items = vec![QueryItem::AggregateCountOnRange(Box::new(inner))]; + let err = GroveDb::query_items_to_positions(&items, 100) + .expect_err("dense tree must reject AggregateCountOnRange"); + match err { + Error::InvalidInput(msg) => assert!( + msg.contains("dense fixed-size") || msg.contains("provable count"), + "unexpected message: {msg}" + ), + other => panic!("expected InvalidInput, got {:?}", other), + } + } + + #[test] + fn mmr_tree_rejects_aggregate_count_on_range() { + let inner = QueryItem::RangeInclusive(be_u64(0)..=be_u64(5)); + let items = vec![QueryItem::AggregateCountOnRange(Box::new(inner))]; + let err = GroveDb::query_items_to_leaf_indices(&items, 7) + .expect_err("MMR must reject AggregateCountOnRange"); + match err { + Error::InvalidInput(msg) => assert!( + msg.contains("MMR") || msg.contains("provable count"), + "unexpected message: {msg}" + ), + other => panic!("expected InvalidInput, got {:?}", other), + } + } + + #[test] + fn bulk_append_tree_rejects_aggregate_count_on_range() { + let inner = QueryItem::RangeInclusive(be_u64(0)..=be_u64(5)); + let items = vec![QueryItem::AggregateCountOnRange(Box::new(inner))]; + let err = GroveDb::query_items_to_range(&items, 100) + .expect_err("BulkAppendTree must reject AggregateCountOnRange"); + match err { + Error::InvalidInput(msg) => assert!( + msg.contains("BulkAppendTree") || msg.contains("provable count"), + "unexpected message: {msg}" + ), + other => panic!("expected InvalidInput, got {:?}", other), + } + } } diff --git a/grovedb/src/operations/proof/mod.rs b/grovedb/src/operations/proof/mod.rs index 1b9729f33..c10681c4b 100644 --- a/grovedb/src/operations/proof/mod.rs +++ b/grovedb/src/operations/proof/mod.rs @@ -1,5 +1,7 @@ //! Proof operations +#[cfg(feature = "minimal")] +mod aggregate_count; #[cfg(feature = "minimal")] mod generate; /// Utility functions for proof display and conversion. @@ -738,6 +740,13 @@ fn node_to_string(node: &Node) -> Result { feature_type, hex::encode(child_hash) ), + Node::HashWithCount(kv_hash, left_child_hash, right_child_hash, count) => format!( + "HashWithCount(kv_hash=HASH[{}], left=HASH[{}], right=HASH[{}], count={})", + hex::encode(kv_hash), + hex::encode(left_child_hash), + hex::encode(right_child_hash), + count + ), }; Ok(s) } diff --git a/grovedb/src/operations/proof/verify.rs b/grovedb/src/operations/proof/verify.rs index 64583f1e1..1f8120893 100644 --- a/grovedb/src/operations/proof/verify.rs +++ b/grovedb/src/operations/proof/verify.rs @@ -1230,6 +1230,12 @@ impl GroveDb { min_start = min_start.min(s.saturating_add(1)); max_end = max_end.max(e.saturating_add(1)); } + QueryItem::AggregateCountOnRange(_) => { + return Err(Error::InvalidInput( + "AggregateCountOnRange is only supported on provable count trees, \ + not on BulkAppendTree", + )); + } } } @@ -1348,6 +1354,12 @@ impl GroveDb { check_cap!(positions); } } + QueryItem::AggregateCountOnRange(_) => { + return Err(Error::InvalidInput( + "AggregateCountOnRange is only supported on provable count trees, \ + not on this tree type", + )); + } } } @@ -2665,7 +2677,8 @@ impl GroveDb { | Node::KVDigestCount(..) | Node::Hash(_) | Node::KVHash(_) - | Node::KVHashCount(..) => None, + | Node::KVHashCount(..) + | Node::HashWithCount(..) => None, } } diff --git a/grovedb/src/query/mod.rs b/grovedb/src/query/mod.rs index 3fd9ecc75..2c1c0c585 100644 --- a/grovedb/src/query/mod.rs +++ b/grovedb/src/query/mod.rs @@ -114,6 +114,43 @@ impl SizedQuery { offset: None, } } + + /// Validates that this sized query is a well-formed + /// `AggregateCountOnRange` query. On success, returns a reference to the + /// inner range item (the `QueryItem` wrapped by `AggregateCountOnRange`). + /// + /// This is the `SizedQuery`-level entry point: it forwards to + /// [`Query::validate_aggregate_count_on_range`] and additionally rejects + /// any non-`None` `limit` or `offset` (counting is an aggregate over the + /// full match set — pagination would silently change the answer). + pub fn validate_aggregate_count_on_range(&self) -> Result<&QueryItem, Error> { + if self.limit.is_some() { + return Err(Error::InvalidQuery( + "AggregateCountOnRange queries may not set SizedQuery::limit", + )); + } + if self.offset.is_some() { + return Err(Error::InvalidQuery( + "AggregateCountOnRange queries may not set SizedQuery::offset", + )); + } + self.query + .validate_aggregate_count_on_range() + .map_err(query_validation_error_to_static_str) + .map_err(Error::InvalidQuery) + } +} + +/// Converts a `Query::validate_aggregate_count_on_range` error into a +/// `&'static str`. Validation only ever returns +/// `grovedb_query::error::Error::InvalidOperation(&'static str)`, so this is +/// just a projection of that variant; any other error variant (which would +/// indicate an unrelated bug) is forwarded as a generic catch-all label. +fn query_validation_error_to_static_str(e: grovedb_query::error::Error) -> &'static str { + match e { + grovedb_query::error::Error::InvalidOperation(msg) => msg, + _ => "AggregateCountOnRange query validation failed", + } } impl PathQuery { @@ -144,6 +181,31 @@ impl PathQuery { Self { path, query } } + /// Construct a `PathQuery` for an aggregate-count-on-range query against + /// the subtree at `path`. `range` is the inner `QueryItem` describing the + /// keys to count over; see [`Query::new_aggregate_count_on_range`] for the + /// allowed range variants. + pub fn new_aggregate_count_on_range(path: Vec>, range: QueryItem) -> Self { + Self::new_unsized(path, Query::new_aggregate_count_on_range(range)) + } + + /// Validates that this `PathQuery` is a well-formed + /// `AggregateCountOnRange` query. On success, returns a reference to the + /// inner range item. + /// + /// Forwards to [`SizedQuery::validate_aggregate_count_on_range`]. + pub fn validate_aggregate_count_on_range(&self) -> Result<&QueryItem, Error> { + self.query.validate_aggregate_count_on_range() + } + + /// Returns `true` if this `PathQuery`'s underlying query carries an + /// `AggregateCountOnRange` item (whether well-formed or not). Use + /// [`Self::validate_aggregate_count_on_range`] when you also need + /// well-formedness. + pub fn has_aggregate_count_on_range(&self) -> bool { + self.query.query.aggregate_count_on_range().is_some() + } + /// The max depth of the query, this is the maximum layers we could get back /// from grovedb /// If the max depth can not be calculated we get None @@ -731,7 +793,7 @@ mod tests { query::{HasSubquery, SinglePathSubquery}, query_result_type::QueryResultType, tests::{common::compare_result_tuples, make_deep_tree, TEST_LEAF}, - Element, GroveDb, PathQuery, SizedQuery, + Element, Error, GroveDb, PathQuery, SizedQuery, }; #[test] @@ -2407,4 +2469,99 @@ mod tests { assert!(result.is_ok()); assert!(!result.unwrap()); } + + // ---------- SizedQuery / PathQuery AggregateCountOnRange validation ---------- + + #[test] + fn sized_query_validate_acor_rejects_limit() { + let mut sq = SizedQuery::new( + Query::new_aggregate_count_on_range(QueryItem::Range(b"a".to_vec()..b"z".to_vec())), + Some(10), + None, + ); + let err = sq + .validate_aggregate_count_on_range() + .expect_err("limit must fail"); + match err { + Error::InvalidQuery(msg) => assert!(msg.contains("limit")), + _ => panic!("expected InvalidQuery"), + } + + // Removing the limit but keeping offset should still fail. + sq.limit = None; + sq.offset = Some(5); + let err = sq + .validate_aggregate_count_on_range() + .expect_err("offset must fail"); + match err { + Error::InvalidQuery(msg) => assert!(msg.contains("offset")), + _ => panic!("expected InvalidQuery"), + } + } + + #[test] + fn sized_query_validate_acor_forwards_query_level_errors() { + // SizedQuery validation should forward Query-level rejections (here: + // inner Key) as InvalidQuery. + let sq = SizedQuery::new( + Query::new_aggregate_count_on_range(QueryItem::Key(b"k".to_vec())), + None, + None, + ); + let err = sq + .validate_aggregate_count_on_range() + .expect_err("inner Key must fail"); + match err { + Error::InvalidQuery(msg) => assert!(msg.contains("Key")), + _ => panic!("expected InvalidQuery"), + } + } + + #[test] + fn sized_query_validate_acor_happy_path() { + let sq = SizedQuery::new( + Query::new_aggregate_count_on_range(QueryItem::Range(b"a".to_vec()..b"z".to_vec())), + None, + None, + ); + let inner = sq + .validate_aggregate_count_on_range() + .expect("happy path must validate"); + assert!(matches!(inner, QueryItem::Range(_))); + } + + #[test] + fn path_query_validate_acor_forwards_to_sized_query() { + // PathQuery::validate_aggregate_count_on_range delegates to + // SizedQuery::validate_aggregate_count_on_range — exercise both error + // and happy paths through the public PathQuery surface. + let pq = PathQuery::new_aggregate_count_on_range( + vec![b"path".to_vec()], + QueryItem::Range(b"a".to_vec()..b"z".to_vec()), + ); + let inner = pq + .validate_aggregate_count_on_range() + .expect("happy path through PathQuery must validate"); + assert!(matches!(inner, QueryItem::Range(_))); + + // Forward limit rejection. + let mut pq_bad = pq.clone(); + pq_bad.query.limit = Some(1); + let err = pq_bad + .validate_aggregate_count_on_range() + .expect_err("limit must fail"); + assert!(matches!(err, Error::InvalidQuery(_))); + } + + #[test] + fn path_query_has_aggregate_count_on_range_recognizes_helper_constructor() { + let pq = PathQuery::new_aggregate_count_on_range( + vec![b"path".to_vec()], + QueryItem::Range(b"a".to_vec()..b"z".to_vec()), + ); + assert!(pq.has_aggregate_count_on_range()); + + let pq_regular = PathQuery::new_single_key(vec![b"p".to_vec()], b"k".to_vec()); + assert!(!pq_regular.has_aggregate_count_on_range()); + } } diff --git a/grovedb/src/tests/aggregate_count_query_tests.rs b/grovedb/src/tests/aggregate_count_query_tests.rs new file mode 100644 index 000000000..f991e03fa --- /dev/null +++ b/grovedb/src/tests/aggregate_count_query_tests.rs @@ -0,0 +1,1233 @@ +//! End-to-end GroveDB tests for `AggregateCountOnRange` queries. +//! +//! These exercise the full prove → encode → decode → verify pipeline against +//! both `ProvableCountTree` and `ProvableCountSumTree` (and their +//! `NonCounted*` wrappers via being the *parent* tree, not the queried one), +//! at various path depths and across the full set of allowed range variants. + +#[cfg(test)] +mod tests { + use grovedb_merk::proofs::query::QueryItem; + use grovedb_version::version::{v2::GROVE_V2, GroveVersion}; + + use crate::{ + tests::{make_test_grovedb, TEST_LEAF}, + Element, GroveDb, PathQuery, SizedQuery, + }; + + /// Insert the 15 single-byte keys "a".."o" into a `ProvableCountTree` + /// rooted at `[TEST_LEAF, "ct"]`. Returns the GroveDB and the resulting + /// root hash. + fn setup_15_key_provable_count_tree( + grove_version: &GroveVersion, + ) -> (crate::tests::TempGroveDb, [u8; 32]) { + let db = make_test_grovedb(grove_version); + db.insert( + [TEST_LEAF].as_ref(), + b"ct", + Element::empty_provable_count_tree(), + None, + None, + grove_version, + ) + .unwrap() + .expect("insert ct"); + for c in b'a'..=b'o' { + db.insert( + [TEST_LEAF, b"ct"].as_ref(), + &[c], + Element::new_item(vec![c]), + None, + None, + grove_version, + ) + .unwrap() + .expect("insert leaf"); + } + let root = db + .grove_db + .root_hash(None, grove_version) + .unwrap() + .expect("root_hash"); + (db, root) + } + + fn setup_15_key_provable_count_sum_tree( + grove_version: &GroveVersion, + ) -> (crate::tests::TempGroveDb, [u8; 32]) { + let db = make_test_grovedb(grove_version); + db.insert( + [TEST_LEAF].as_ref(), + b"cst", + Element::empty_provable_count_sum_tree(), + None, + None, + grove_version, + ) + .unwrap() + .expect("insert cst"); + for c in b'a'..=b'o' { + db.insert( + [TEST_LEAF, b"cst"].as_ref(), + &[c], + // `Item` plays the role of a non-sum element inside a count + // sum tree — we're testing count semantics, not sum. + Element::new_item(vec![c]), + None, + None, + grove_version, + ) + .unwrap() + .expect("insert leaf"); + } + let root = db + .grove_db + .root_hash(None, grove_version) + .unwrap() + .expect("root_hash"); + (db, root) + } + + /// Round-trip helper: build a path_query, prove it, verify it, assert + /// `(root, count)` matches what we expect. + fn round_trip( + db: &crate::tests::TempGroveDb, + expected_root: [u8; 32], + path: Vec>, + inner_range: QueryItem, + expected_count: u64, + grove_version: &GroveVersion, + ) { + let path_query = PathQuery::new_aggregate_count_on_range(path, inner_range); + let proof = db + .grove_db + .prove_query(&path_query, None, grove_version) + .unwrap() + .expect("prove_query should succeed"); + let (root, count) = + GroveDb::verify_aggregate_count_query(&proof, &path_query, grove_version) + .expect("verify should succeed"); + assert_eq!(root, expected_root, "verifier reconstructed wrong root"); + assert_eq!(count, expected_count, "verifier returned wrong count"); + } + + #[test] + fn provable_count_tree_range_inclusive() { + let v = GroveVersion::latest(); + let (db, root) = setup_15_key_provable_count_tree(v); + round_trip( + &db, + root, + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()), + 10, + v, + ); + } + + #[test] + fn provable_count_tree_range_exclusive() { + let v = GroveVersion::latest(); + let (db, root) = setup_15_key_provable_count_tree(v); + round_trip( + &db, + root, + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::Range(b"c".to_vec()..b"l".to_vec()), + 9, + v, + ); + } + + #[test] + fn provable_count_tree_range_from() { + let v = GroveVersion::latest(); + let (db, root) = setup_15_key_provable_count_tree(v); + round_trip( + &db, + root, + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeFrom(b"c".to_vec()..), + 13, + v, + ); + } + + #[test] + fn provable_count_tree_range_after() { + let v = GroveVersion::latest(); + let (db, root) = setup_15_key_provable_count_tree(v); + round_trip( + &db, + root, + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeAfter(b"b".to_vec()..), + 13, + v, + ); + } + + #[test] + fn provable_count_tree_range_to_inclusive() { + let v = GroveVersion::latest(); + let (db, root) = setup_15_key_provable_count_tree(v); + round_trip( + &db, + root, + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeToInclusive(..=b"e".to_vec()), + 5, + v, + ); + } + + #[test] + fn provable_count_tree_range_below_all() { + let v = GroveVersion::latest(); + let (db, root) = setup_15_key_provable_count_tree(v); + round_trip( + &db, + root, + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeInclusive(vec![0x00]..=vec![0x10]), + 0, + v, + ); + } + + #[test] + fn provable_count_sum_tree_range_inclusive() { + let v = GroveVersion::latest(); + let (db, root) = setup_15_key_provable_count_sum_tree(v); + round_trip( + &db, + root, + vec![TEST_LEAF.to_vec(), b"cst".to_vec()], + QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()), + 10, + v, + ); + } + + #[test] + fn rejects_invalid_range_at_construction() { + // A path-query with an inner Key item should be rejected at + // validation time, before any proof generation runs. + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::Key(b"c".to_vec()), + ); + let err = path_query.validate_aggregate_count_on_range(); + assert!(err.is_err(), "Key inner should be rejected"); + } + + #[test] + fn rejects_inner_range_full() { + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeFull(std::ops::RangeFull), + ); + assert!(path_query.validate_aggregate_count_on_range().is_err()); + } + + #[test] + fn rejects_against_normal_tree() { + // Querying a NormalTree with AggregateCountOnRange should fail at + // proof time with an InvalidProofError from the merk layer. We need + // at least one element in the target normal tree so that the + // multi-layer proof generator actually recurses into it (empty + // trees are returned as result rows without a lower-layer descent). + let v = GroveVersion::latest(); + let db = make_test_grovedb(v); + db.insert( + [TEST_LEAF].as_ref(), + b"x", + Element::new_item(b"y".to_vec()), + None, + None, + v, + ) + .unwrap() + .expect("seed normal tree"); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec()], + QueryItem::Range(b"a".to_vec()..b"z".to_vec()), + ); + let proof_result = db.grove_db.prove_query(&path_query, None, v).unwrap(); + assert!( + proof_result.is_err(), + "expected prove_query to fail on NormalTree, got {:?}", + proof_result.ok().map(|b| b.len()) + ); + } + + #[test] + fn count_forgery_is_caught_at_grovedb_level() { + // End-to-end version of the merk-level forgery test: parse the + // GroveDB envelope, descend to the leaf merk proof, find a real + // HashWithCount op at a true op boundary, bump its count, re-encode + // — and the GroveDB verifier should reject the resulting proof + // (root mismatch in the layer chain). + // + // We parse rather than scan-for-byte to ensure we are mutating an + // actual count varint and not, say, a 0x1e byte that happens to live + // inside one of the embedded 32-byte hashes. + let v = GroveVersion::latest(); + let (db, _expected_root) = setup_15_key_provable_count_tree(v); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()), + ); + let proof = db + .grove_db + .prove_query(&path_query, None, v) + .unwrap() + .expect("prove_query should succeed"); + + let tampered = tamper_leaf_count(&proof, &path_query) + .expect("expected at least one HashWithCount in the leaf merk proof"); + + let verify_result = GroveDb::verify_aggregate_count_query(&tampered, &path_query, v); + assert!( + verify_result.is_err(), + "tampered count must be rejected at the GroveDB verifier level, got {:?}", + verify_result.map(|(_, c)| c) + ); + } + + /// Decode the GroveDB proof envelope, walk down to the leaf merk proof + /// bytes (V0: `MerkOnlyLayerProof`; V1: `LayerProof` with + /// `ProofBytes::Merk`), parse the merk proof into ops at true op + /// boundaries, increment the `count` of the first `HashWithCount` op, + /// and re-encode the whole envelope. + /// + /// Returns `None` if no `HashWithCount` is present in the leaf merk + /// proof — the test treats that as an invalid precondition. + fn tamper_leaf_count(proof: &[u8], path_query: &PathQuery) -> Option> { + use bincode::config; + use grovedb_merk::proofs::{encoding::encode_into, Decoder, Node, Op}; + + use crate::operations::proof::{ + GroveDBProof, GroveDBProofV0, GroveDBProofV1, LayerProof, MerkOnlyLayerProof, + ProofBytes, + }; + + let cfg = config::standard() + .with_big_endian() + .with_limit::<{ 256 * 1024 * 1024 }>(); + let (mut decoded, _): (GroveDBProof, _) = bincode::decode_from_slice(proof, cfg).ok()?; + + // Descend through the path layers to obtain a mutable ref to the + // leaf merk proof bytes. + let leaf_bytes: &mut Vec = match &mut decoded { + GroveDBProof::V0(GroveDBProofV0 { root_layer, .. }) => { + let mut layer: &mut MerkOnlyLayerProof = root_layer; + for key in &path_query.path { + layer = layer.lower_layers.get_mut(key)?; + } + &mut layer.merk_proof + } + GroveDBProof::V1(GroveDBProofV1 { root_layer }) => { + let mut layer: &mut LayerProof = root_layer; + for key in &path_query.path { + layer = layer.lower_layers.get_mut(key)?; + } + match &mut layer.merk_proof { + ProofBytes::Merk(b) => b, + _ => return None, + } + } + }; + + // Parse the merk proof into ops, mutate the first HashWithCount, + // re-encode. + let mut ops: Vec = Vec::new(); + for op in Decoder::new(leaf_bytes) { + ops.push(op.ok()?); + } + + let mut tampered = false; + for op in ops.iter_mut() { + match op { + Op::Push(Node::HashWithCount(_, _, _, count)) + | Op::PushInverted(Node::HashWithCount(_, _, _, count)) => { + *count = count.wrapping_add(1); + tampered = true; + break; + } + _ => {} + } + } + if !tampered { + return None; + } + + let mut new_leaf = Vec::new(); + encode_into(ops.iter(), &mut new_leaf); + *leaf_bytes = new_leaf; + + bincode::encode_to_vec( + decoded, + config::standard().with_big_endian().with_no_limit(), + ) + .ok() + } + + /// Build a 3-layer path: TEST_LEAF -> "outer" (NormalTree) -> + /// "inner" (ProvableCountTree) populated with 5 keys "a".."e". + fn setup_three_layer_provable_count_tree( + grove_version: &GroveVersion, + ) -> (crate::tests::TempGroveDb, [u8; 32]) { + let db = make_test_grovedb(grove_version); + db.insert( + [TEST_LEAF].as_ref(), + b"outer", + Element::empty_tree(), + None, + None, + grove_version, + ) + .unwrap() + .expect("insert outer"); + db.insert( + [TEST_LEAF, b"outer"].as_ref(), + b"inner", + Element::empty_provable_count_tree(), + None, + None, + grove_version, + ) + .unwrap() + .expect("insert inner"); + for c in b'a'..=b'e' { + db.insert( + [TEST_LEAF, b"outer", b"inner"].as_ref(), + &[c], + Element::new_item(vec![c]), + None, + None, + grove_version, + ) + .unwrap() + .expect("insert leaf"); + } + let root = db + .grove_db + .root_hash(None, grove_version) + .unwrap() + .expect("root_hash"); + (db, root) + } + + #[test] + fn three_layer_path_round_trip() { + // Exercises the multi-layer chain enforcement: layer 0 proves TEST_LEAF + // exists, layer 1 proves "outer" exists in TEST_LEAF, layer 2 proves + // "inner" exists in outer, layer 3 is the count proof on inner. + let v = GroveVersion::latest(); + let (db, root) = setup_three_layer_provable_count_tree(v); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"outer".to_vec(), b"inner".to_vec()], + QueryItem::RangeInclusive(b"b".to_vec()..=b"d".to_vec()), + ); + let proof = db + .grove_db + .prove_query(&path_query, None, v) + .unwrap() + .expect("prove_query should succeed"); + let (got_root, got_count) = GroveDb::verify_aggregate_count_query(&proof, &path_query, v) + .expect("verify should succeed"); + assert_eq!(got_root, root, "verifier root must match GroveDB root"); + assert_eq!(got_count, 3, "expected count of {{b, c, d}}"); + } + + /// Helper for non-leaf-layer proof mutation tests: decode the V1 + /// envelope, walk to the TEST_LEAF non-leaf merk proof bytes, run + /// `mutate` over its parsed ops, re-encode the merk proof and the + /// envelope. Returns the mutated bytes. + fn mutate_test_leaf_layer_ops( + proof: &[u8], + mutate: impl FnOnce(&mut Vec), + ) -> Vec { + use grovedb_merk::proofs::{encoding::encode_into, Decoder, Op}; + + use crate::operations::proof::{GroveDBProof, GroveDBProofV1, ProofBytes}; + + let mut decoded = decode_envelope(proof); + let GroveDBProof::V1(GroveDBProofV1 { root_layer }) = &mut decoded else { + panic!("expected V1 envelope"); + }; + let test_leaf_layer = root_layer + .lower_layers + .get_mut(&TEST_LEAF.to_vec()) + .expect("TEST_LEAF lower layer"); + let bytes = match &mut test_leaf_layer.merk_proof { + ProofBytes::Merk(b) => b, + _ => panic!("expected Merk bytes at TEST_LEAF non-leaf"), + }; + let mut ops: Vec = Decoder::new(bytes) + .map(|r| r.expect("decode existing op")) + .collect(); + mutate(&mut ops); + let mut new_bytes = Vec::new(); + encode_into(ops.iter(), &mut new_bytes); + *bytes = new_bytes; + reencode_envelope(decoded) + } + + #[test] + fn non_leaf_proof_without_target_key_is_rejected() { + // Mutate the TEST_LEAF non-leaf proof: replace the KV op carrying + // the "ct" key with a Hash op carrying that node's hash. Phase 1 + // decodes successfully, the merk single-key verifier returns Ok + // with an empty result_set (no KV with matching key), and the + // GroveDB-level verifier surfaces "did not contain the expected + // key" via the `ok_or_else` arm. + use grovedb_merk::proofs::{Node, Op}; + + let v = GroveVersion::latest(); + let (db, _root) = setup_15_key_provable_count_tree(v); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()), + ); + let proof = db + .grove_db + .prove_query(&path_query, None, v) + .unwrap() + .expect("prove_query should succeed"); + let mutated = mutate_test_leaf_layer_ops(&proof, |ops| { + for op in ops.iter_mut() { + let key_match = matches!( + op, + Op::Push( + Node::KV(k, _) + | Node::KVValueHash(k, _, _) + | Node::KVValueHashFeatureType(k, _, _, _) + | Node::KVValueHashFeatureTypeWithChildHash(k, _, _, _, _) + ) + | Op::PushInverted( + Node::KV(k, _) + | Node::KVValueHash(k, _, _) + | Node::KVValueHashFeatureType(k, _, _, _) + | Node::KVValueHashFeatureTypeWithChildHash(k, _, _, _, _) + ) if k == b"ct" + ); + if key_match { + *op = Op::Push(Node::Hash([0u8; 32])); + return; + } + } + panic!("test setup: no `ct` KV op found in non-leaf proof"); + }); + let err = GroveDb::verify_aggregate_count_query(&mutated, &path_query, v) + .expect_err("missing target key in non-leaf proof must be rejected"); + match err { + crate::Error::InvalidProof(_, msg) => assert!( + // Either Phase 2 catches "did not contain the expected key" + // or the upstream merk single-key verifier fails first + // because the swapped Hash makes the proof invalid; either + // outcome closes the surface. + msg.contains("did not contain the expected key") + || msg.contains("non-leaf single-key proof"), + "unexpected message: {msg}" + ), + other => panic!("expected InvalidProof, got {:?}", other), + } + } + + #[test] + fn non_leaf_proof_with_kv_replaced_by_kvdigest_is_rejected() { + // Replace "ct" KV in the non-leaf proof with a KVDigest variant + // (key + value_hash, no value). The result_set will contain "ct" + // but with `value = None`, hitting the "no value bytes" arm of + // `verify_single_key_layer_proof_v0`. + use grovedb_merk::proofs::{Node, Op}; + + let v = GroveVersion::latest(); + let (db, _root) = setup_15_key_provable_count_tree(v); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()), + ); + let proof = db + .grove_db + .prove_query(&path_query, None, v) + .unwrap() + .expect("prove_query should succeed"); + let mutated = mutate_test_leaf_layer_ops(&proof, |ops| { + for op in ops.iter_mut() { + let replaced = match op { + Op::Push(Node::KVValueHash(k, _, vh)) + | Op::PushInverted(Node::KVValueHash(k, _, vh)) + if k == b"ct" => + { + Some((k.clone(), *vh)) + } + Op::Push(Node::KVValueHashFeatureType(k, _, vh, _)) + | Op::PushInverted(Node::KVValueHashFeatureType(k, _, vh, _)) + if k == b"ct" => + { + Some((k.clone(), *vh)) + } + Op::Push(Node::KVValueHashFeatureTypeWithChildHash(k, _, vh, _, _)) + | Op::PushInverted(Node::KVValueHashFeatureTypeWithChildHash(k, _, vh, _, _)) + if k == b"ct" => + { + Some((k.clone(), *vh)) + } + _ => None, + }; + if let Some((k, vh)) = replaced { + *op = Op::Push(Node::KVDigest(k, vh)); + return; + } + } + panic!("test setup: no `ct` KVValueHash-flavored op found in non-leaf proof"); + }); + let result = GroveDb::verify_aggregate_count_query(&mutated, &path_query, v); + // Either we hit the "no value bytes" arm (line 295-302) or the + // merk single-key verifier itself rejects the type swap. Both + // are valid — both close the attack surface. + match result { + Err(crate::Error::InvalidProof(_, _)) => {} + other => panic!("expected InvalidProof, got {:?}", other), + } + } + + #[test] + fn non_leaf_proof_with_undeserializable_value_is_rejected() { + // Mutate the "ct" KV node's value bytes to garbage that fails + // `Element::deserialize`. The merk single-key verifier still + // returns Ok (it just hashes the bytes — it doesn't deserialize), + // so enforce_lower_chain hits the deserialize-failure arm. + use grovedb_merk::proofs::{Node, Op}; + + let v = GroveVersion::latest(); + let (db, _root) = setup_15_key_provable_count_tree(v); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()), + ); + let proof = db + .grove_db + .prove_query(&path_query, None, v) + .unwrap() + .expect("prove_query should succeed"); + // Garbage that no Element variant tag matches. + let garbage: Vec = vec![0xff, 0xff, 0xff]; + let mutated = mutate_test_leaf_layer_ops(&proof, |ops| { + for op in ops.iter_mut() { + let replaced = match op { + Op::Push(Node::KVValueHash(k, val, _)) + | Op::PushInverted(Node::KVValueHash(k, val, _)) + if k == b"ct" => + { + *val = garbage.clone(); + true + } + Op::Push(Node::KVValueHashFeatureType(k, val, _, _)) + | Op::PushInverted(Node::KVValueHashFeatureType(k, val, _, _)) + if k == b"ct" => + { + *val = garbage.clone(); + true + } + Op::Push(Node::KVValueHashFeatureTypeWithChildHash(k, val, _, _, _)) + | Op::PushInverted(Node::KVValueHashFeatureTypeWithChildHash( + k, + val, + _, + _, + _, + )) if k == b"ct" => { + *val = garbage.clone(); + true + } + _ => false, + }; + if replaced { + return; + } + } + panic!("test setup: no `ct` value-bearing KV op found in non-leaf proof"); + }); + let result = GroveDb::verify_aggregate_count_query(&mutated, &path_query, v); + // Either the deserialize arm fires (line 330-338) or the chain + // mismatch fires first (because mutating value bytes also breaks + // the value_hash binding committed by the parent). Either rejects. + assert!( + matches!(result, Err(crate::Error::InvalidProof(_, _))), + "mutated value bytes must be rejected, got {:?}", + result.map(|(_, c)| c) + ); + } + + #[test] + fn non_leaf_proof_with_non_tree_element_is_rejected() { + // Mutate the "ct" value bytes to a serialized non-tree Element + // (Item). This deserializes successfully, but enforce_lower_chain's + // `is_any_tree()` guard rejects: aggregate-count proofs can only + // descend through tree elements. + use grovedb_merk::proofs::{Node, Op}; + + let v = GroveVersion::latest(); + let (db, _root) = setup_15_key_provable_count_tree(v); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()), + ); + let proof = db + .grove_db + .prove_query(&path_query, None, v) + .unwrap() + .expect("prove_query should succeed"); + let item_bytes = Element::new_item(vec![0xab, 0xcd]) + .serialize(v) + .expect("serialize item"); + let mutated = mutate_test_leaf_layer_ops(&proof, |ops| { + for op in ops.iter_mut() { + let replaced = match op { + Op::Push(Node::KVValueHash(k, val, _)) + | Op::PushInverted(Node::KVValueHash(k, val, _)) + if k == b"ct" => + { + *val = item_bytes.clone(); + true + } + Op::Push(Node::KVValueHashFeatureType(k, val, _, _)) + | Op::PushInverted(Node::KVValueHashFeatureType(k, val, _, _)) + if k == b"ct" => + { + *val = item_bytes.clone(); + true + } + Op::Push(Node::KVValueHashFeatureTypeWithChildHash(k, val, _, _, _)) + | Op::PushInverted(Node::KVValueHashFeatureTypeWithChildHash( + k, + val, + _, + _, + _, + )) if k == b"ct" => { + *val = item_bytes.clone(); + true + } + _ => false, + }; + if replaced { + return; + } + } + panic!("test setup: no `ct` value-bearing KV op found in non-leaf proof"); + }); + let result = GroveDb::verify_aggregate_count_query(&mutated, &path_query, v); + // Either the non-tree branch fires (line 341-349) or the chain + // hash check fails first (value_hash for the swapped item bytes + // diverges from the parent's commitment). Either rejects. + assert!( + matches!(result, Err(crate::Error::InvalidProof(_, _))), + "non-tree element on path must be rejected, got {:?}", + result.map(|(_, c)| c) + ); + } + + #[test] + fn aggregate_count_with_missing_path_and_invalid_inner_is_rejected_at_entry() { + // Codex finding: validation only fires inside `prove_subqueries` when + // the recursion reaches the ACOR-bearing leaf level. If the path + // doesn't exist (e.g. "missing" key under TEST_LEAF), the recursive + // prover never sees the ACOR item and the malformed query is allowed + // to return a regular path/absence proof. Fix: validate at the + // `prove_query` entry point, before any recursive dispatch. + let v = GroveVersion::latest(); + let db = make_test_grovedb(v); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"missing".to_vec()], + // QueryItem::Key as the inner range is invalid for ACOR. + QueryItem::Key(b"k".to_vec()), + ); + let prove_result = db.grove_db.prove_query(&path_query, None, v).unwrap(); + match prove_result { + Err(crate::Error::InvalidQuery(msg)) => { + assert!( + msg.contains("AggregateCountOnRange may not wrap Key"), + "expected ACOR-Key rejection, got: {msg}" + ); + } + other => panic!( + "malformed ACOR with non-existent path must be rejected at entry, got {:?}", + other.map(|b| b.len()) + ), + } + } + + #[test] + fn aggregate_count_hidden_in_subquery_branch_is_rejected_at_entry() { + // Codex's broader concern: an `AggregateCountOnRange` smuggled + // inside a `default_subquery_branch.subquery` is also invalid (ACOR + // is terminal — it cannot be reached via a normal subquery path) + // and must be rejected up front. The recursive detector + // `has_aggregate_count_on_range_anywhere` finds the hidden ACOR; + // top-level `validate_aggregate_count_on_range` then rejects + // because the surrounding query isn't the canonical single-ACOR + // shape. + let v = GroveVersion::latest(); + let db = make_test_grovedb(v); + let inner_acor = QueryItem::AggregateCountOnRange(Box::new(QueryItem::Range( + b"a".to_vec()..b"z".to_vec(), + ))); + let mut sub_query = grovedb_merk::proofs::Query::new(); + sub_query.insert_item(inner_acor); + let mut top_query = grovedb_merk::proofs::Query::new(); + top_query.insert_range_inclusive(b"a".to_vec()..=b"z".to_vec()); + top_query.set_subquery(sub_query); + let path_query = PathQuery::new( + vec![TEST_LEAF.to_vec()], + SizedQuery::new(top_query, None, None), + ); + let prove_result = db.grove_db.prove_query(&path_query, None, v).unwrap(); + assert!( + matches!(prove_result, Err(crate::Error::InvalidQuery(_))), + "ACOR hidden in subquery branch must be rejected at entry, got {:?}", + prove_result.map(|b| b.len()) + ); + } + + #[test] + fn corrupted_path_layer_byte_is_rejected() { + // Tamper with a non-leaf-layer byte (a tree-element value byte) and + // verify that the chain enforcement catches it. We pick a byte deep + // enough that it lands inside one of the parent merk's KV value bytes. + let v = GroveVersion::latest(); + let (db, _root) = setup_three_layer_provable_count_tree(v); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"outer".to_vec(), b"inner".to_vec()], + QueryItem::RangeInclusive(b"b".to_vec()..=b"d".to_vec()), + ); + let mut proof = db + .grove_db + .prove_query(&path_query, None, v) + .unwrap() + .expect("prove_query should succeed"); + // Flip a byte well inside the proof — the exact location doesn't + // matter as long as it isn't the bincode envelope length prefix. + // Index 32 is past the envelope and into the first inner merk's bytes. + let target = proof.len() / 2; + proof[target] = proof[target].wrapping_add(1); + let verify_result = GroveDb::verify_aggregate_count_query(&proof, &path_query, v); + assert!( + verify_result.is_err(), + "tampered proof byte must be rejected, got {:?}", + verify_result.map(|(_, c)| c) + ); + } + + #[test] + fn provable_count_tree_works_on_grove_v2_envelope() { + // GROVE_V2 dispatches to the V0 prove_query_non_serialized path, which + // produces a `MerkOnlyLayerProof` envelope rather than V1's + // `LayerProof`. Verify the same prove → verify cycle works through that + // envelope. + let v: &GroveVersion = &GROVE_V2; + let (db, root) = setup_15_key_provable_count_tree(v); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()), + ); + let proof = db + .grove_db + .prove_query(&path_query, None, v) + .unwrap() + .expect("prove_query (v0 envelope) should succeed"); + let (got_root, got_count) = GroveDb::verify_aggregate_count_query(&proof, &path_query, v) + .expect("verify should succeed against v0 envelope"); + assert_eq!(got_root, root); + assert_eq!(got_count, 10); + } + + #[test] + fn verify_rejects_malformed_path_query_at_entry() { + // Even before any proof bytes are decoded, the verifier rejects a + // path_query that isn't a well-formed AggregateCountOnRange query. + let v = GroveVersion::latest(); + let bad_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec()], + QueryItem::Key(b"k".to_vec()), // inner Key is not allowed + ); + // Any proof bytes are fine — validation happens before decoding. + let dummy_proof = vec![0u8; 16]; + let err = GroveDb::verify_aggregate_count_query(&dummy_proof, &bad_query, v) + .expect_err("malformed path_query must be rejected up front"); + let s = format!("{:?}", err); + assert!( + s.contains("Key") || s.contains("InvalidQuery"), + "got: {}", + s + ); + } + + #[test] + fn validate_at_construction_rejects_nested_aggregate_count_on_range() { + // Nested AggregateCountOnRange is rejected at validation time. + let pq = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::AggregateCountOnRange(Box::new(QueryItem::Range( + b"a".to_vec()..b"z".to_vec(), + ))), + ); + assert!(pq.validate_aggregate_count_on_range().is_err()); + } + + /// `Element::NonCounted` wrappers tell the parent tree to **skip** the + /// wrapped element when aggregating its own count. + /// `AggregateCountOnRange` honors that: NonCounted children are + /// excluded from the result. + /// + /// Mechanics — every node in a `ProvableCountTree` carries an + /// own_count of 1 (normal) or 0 (NonCounted). The merk-recorded + /// aggregate at any subtree = sum of own_counts in the subtree + /// (NonCounted entries contribute 0). The verifier's shape walk + /// derives each boundary node's own_count as + /// `node_aggregate − left_struct − right_struct` and credits **only + /// own_count** to the in-range total when the key falls in range. + /// For a NonCounted leaf, own_count = 0 and the wrapped key + /// contributes nothing. The structural counts threaded through the + /// walk are hash-bound at every step (every count-bearing proof node + /// feeds its count into `node_hash_with_count`), so a malicious + /// prover can't lie about a NonCounted node's status without + /// breaking the parent's hash chain. + #[test] + fn non_counted_children_are_excluded_from_aggregate_count() { + use crate::tests::TEST_LEAF; + + let v = GroveVersion::latest(); + let db = make_test_grovedb(v); + db.insert( + [TEST_LEAF].as_ref(), + b"ct", + Element::empty_provable_count_tree(), + None, + None, + v, + ) + .unwrap() + .expect("insert ct"); + + // Five regular items — each contributes 1. + for c in [b'a', b'b', b'c', b'd', b'e'] { + db.insert( + [TEST_LEAF, b"ct"].as_ref(), + &[c], + Element::new_item(vec![c]), + None, + None, + v, + ) + .unwrap() + .expect("insert regular item"); + } + + // One NonCounted-wrapped item, key "f" — in-range but contributes + // 0 (own_count = 0). + let nc_item = + Element::new_non_counted(Element::new_item(b"hidden".to_vec())).expect("wrap ok"); + db.insert([TEST_LEAF, b"ct"].as_ref(), b"f", nc_item, None, None, v) + .unwrap() + .expect("insert NonCounted item"); + + let root = db.grove_db.root_hash(None, v).unwrap().expect("root_hash"); + + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeInclusive(b"a".to_vec()..=b"z".to_vec()), + ); + let proof = db + .grove_db + .prove_query(&path_query, None, v) + .unwrap() + .expect("prove"); + let (got_root, got_count) = + GroveDb::verify_aggregate_count_query(&proof, &path_query, v).expect("verify"); + assert_eq!(got_root, root, "root mismatch"); + assert_eq!( + got_count, 5, + "NonCounted-wrapped child must be excluded from the aggregate count" + ); + } + + /// Pin observable cost numbers + proof byte size for a known input so + /// regressions in the proof shape (extra unnecessary nodes, missing + /// short-circuit, etc.) show up as a test failure instead of as a + /// silent perf hit. Values are exact for the 15-key + /// `ProvableCountTree` + `RangeInclusive("c"..="l")` setup; if the + /// proof shape changes intentionally, update them here. + #[test] + fn proof_size_snapshot_for_15_key_closed_range() { + let v = GroveVersion::latest(); + let (db, _root) = setup_15_key_provable_count_tree(v); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()), + ); + let proof = db + .grove_db + .prove_query(&path_query, None, v) + .unwrap() + .expect("prove"); + + // Snapshot the proof byte size. The current shape produces a small + // deterministic byte stream; if this drifts upward without + // intent, the proof shape may have regressed. + // + // The acceptable range is conservative — we only require the + // proof stays bounded by what an O(log n) shape predicts for a + // 4-level tree (a few hundred bytes is the right ballpark; many + // KB would indicate the count short-circuit didn't fire). The + // *current* size is around 650 bytes; a few hundred bytes of + // headroom in either direction tolerates encoding tweaks but + // catches gross regressions. + let len = proof.len(); + assert!( + (300..=900).contains(&len), + "aggregate-count proof size {} bytes is outside the expected \ + [300, 900] window for a 15-key 2-layer query — proof shape \ + may have regressed", + len + ); + + // Round-trip through the verifier as a sanity check that the + // pinned shape is still verifiable. + let (_root, count) = + GroveDb::verify_aggregate_count_query(&proof, &path_query, v).expect("verify"); + assert_eq!(count, 10); + } + + /// Re-encode a (possibly mutated) `GroveDBProof` envelope using the same + /// bincode config the prover uses on the way out. + fn reencode_envelope(decoded: crate::operations::proof::GroveDBProof) -> Vec { + bincode::encode_to_vec( + decoded, + bincode::config::standard() + .with_big_endian() + .with_no_limit(), + ) + .expect("re-encode envelope") + } + + fn decode_envelope(proof: &[u8]) -> crate::operations::proof::GroveDBProof { + bincode::decode_from_slice( + proof, + bincode::config::standard() + .with_big_endian() + .with_limit::<{ 256 * 1024 * 1024 }>(), + ) + .expect("decode envelope") + .0 + } + + #[test] + fn v1_envelope_with_non_merk_proof_bytes_is_rejected() { + // The verifier's V1 layer walker only accepts `ProofBytes::Merk(_)` + // for aggregate-count proofs (other tree types — MMR / BulkAppend / + // Dense / CommitmentTree — cannot host provable count subtrees). If + // we swap the leaf layer's bytes for an `MMR(_)` variant, verification + // must fail with an `InvalidProof` error rather than silently + // succeed or panic. + use crate::operations::proof::{GroveDBProof, GroveDBProofV1, ProofBytes}; + + let v = GroveVersion::latest(); + let (db, _root) = setup_15_key_provable_count_tree(v); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()), + ); + let proof = db + .grove_db + .prove_query(&path_query, None, v) + .unwrap() + .expect("prove_query should succeed"); + + let mut decoded = decode_envelope(&proof); + let GroveDBProof::V1(GroveDBProofV1 { root_layer }) = &mut decoded else { + panic!("expected V1 envelope on latest GroveVersion"); + }; + + // Walk to the leaf layer (depth = path.len()) and swap its bytes + // for an MMR variant. + let leaf_layer = root_layer + .lower_layers + .get_mut(&TEST_LEAF.to_vec()) + .expect("TEST_LEAF lower layer") + .lower_layers + .get_mut(&b"ct".to_vec()) + .expect("ct lower layer"); + leaf_layer.merk_proof = ProofBytes::MMR(vec![0u8; 8]); + + let reencoded = reencode_envelope(decoded); + let err = GroveDb::verify_aggregate_count_query(&reencoded, &path_query, v) + .expect_err("non-Merk leaf bytes must be rejected"); + match err { + crate::Error::InvalidProof(_, msg) => { + assert!( + msg.contains("non-merk"), + "expected non-merk rejection, got: {msg}" + ); + } + other => panic!("expected InvalidProof, got {:?}", other), + } + } + + #[test] + fn v1_envelope_with_missing_lower_layer_is_rejected() { + // The verifier expects a `lower_layers` entry for each non-leaf + // path key. If the prover (or an attacker) drops one, verification + // must fail rather than silently descend through a stub. + use crate::operations::proof::{GroveDBProof, GroveDBProofV1}; + + let v = GroveVersion::latest(); + let (db, _root) = setup_15_key_provable_count_tree(v); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()), + ); + let proof = db + .grove_db + .prove_query(&path_query, None, v) + .unwrap() + .expect("prove_query should succeed"); + + let mut decoded = decode_envelope(&proof); + let GroveDBProof::V1(GroveDBProofV1 { root_layer }) = &mut decoded else { + panic!("expected V1 envelope on latest GroveVersion"); + }; + let test_leaf_layer = root_layer + .lower_layers + .get_mut(&TEST_LEAF.to_vec()) + .expect("TEST_LEAF lower layer"); + // Drop the leaf layer's pointer entry. + let removed = test_leaf_layer.lower_layers.remove(&b"ct".to_vec()); + assert!(removed.is_some(), "test setup: ct layer should exist"); + + let reencoded = reencode_envelope(decoded); + let err = GroveDb::verify_aggregate_count_query(&reencoded, &path_query, v) + .expect_err("missing lower_layer must be rejected"); + match err { + crate::Error::InvalidProof(_, msg) => { + assert!( + msg.contains("missing lower layer"), + "expected missing-lower-layer rejection, got: {msg}" + ); + } + other => panic!("expected InvalidProof, got {:?}", other), + } + } + + #[test] + fn v1_envelope_with_malformed_leaf_count_proof_is_rejected() { + // Replace the leaf merk proof bytes with a single Push(Hash(...)) + // op stream. Phase 1 of the count verifier rejects plain `Hash` as + // a non-allowlisted node type, so `verify_count_leaf` surfaces an + // `InvalidProof` error via its `.map_err(...)` arm rather than + // ever reaching the chain check. + use std::collections::LinkedList; + + use grovedb_merk::proofs::{encoding::encode_into, Node, Op}; + + use crate::operations::proof::{GroveDBProof, GroveDBProofV1, ProofBytes}; + + let v = GroveVersion::latest(); + let (db, _root) = setup_15_key_provable_count_tree(v); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()), + ); + let proof = db + .grove_db + .prove_query(&path_query, None, v) + .unwrap() + .expect("prove_query should succeed"); + + let mut decoded = decode_envelope(&proof); + let GroveDBProof::V1(GroveDBProofV1 { root_layer }) = &mut decoded else { + panic!("expected V1 envelope"); + }; + let leaf_layer = root_layer + .lower_layers + .get_mut(&TEST_LEAF.to_vec()) + .expect("TEST_LEAF lower layer") + .lower_layers + .get_mut(&b"ct".to_vec()) + .expect("ct lower layer"); + + // Build a malformed (but parseable) merk proof: a single Push(Hash) + // that the count verifier's Phase 1 rejects. + let mut ops: LinkedList = LinkedList::new(); + ops.push_back(Op::Push(Node::Hash([0u8; 32]))); + let mut bad_bytes = Vec::new(); + encode_into(ops.iter(), &mut bad_bytes); + leaf_layer.merk_proof = ProofBytes::Merk(bad_bytes); + + let reencoded = reencode_envelope(decoded); + let err = GroveDb::verify_aggregate_count_query(&reencoded, &path_query, v) + .expect_err("malformed leaf count proof must be rejected"); + match err { + crate::Error::InvalidProof(_, msg) => { + assert!( + msg.contains("aggregate-count leaf proof failed to verify"), + "expected leaf-verify failure message, got: {msg}" + ); + } + other => panic!("expected InvalidProof, got {:?}", other), + } + } + + #[test] + fn v1_envelope_with_corrupted_non_leaf_merk_bytes_is_rejected() { + // Mutate the non-leaf merk proof bytes (the layer that proves + // existence of the "ct" tree element under TEST_LEAF). The + // single-key proof verification at that layer should fail before + // we ever descend to the leaf count proof. + use crate::operations::proof::{GroveDBProof, GroveDBProofV1, ProofBytes}; + + let v = GroveVersion::latest(); + let (db, _root) = setup_15_key_provable_count_tree(v); + let path_query = PathQuery::new_aggregate_count_on_range( + vec![TEST_LEAF.to_vec(), b"ct".to_vec()], + QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()), + ); + let proof = db + .grove_db + .prove_query(&path_query, None, v) + .unwrap() + .expect("prove_query should succeed"); + + let mut decoded = decode_envelope(&proof); + let GroveDBProof::V1(GroveDBProofV1 { root_layer }) = &mut decoded else { + panic!("expected V1 envelope"); + }; + // Corrupt the TEST_LEAF non-leaf merk proof bytes by truncating to + // a 1-byte payload, which fails to decode as a proof op stream. + let test_leaf_layer = root_layer + .lower_layers + .get_mut(&TEST_LEAF.to_vec()) + .expect("TEST_LEAF lower layer"); + match &mut test_leaf_layer.merk_proof { + ProofBytes::Merk(b) => { + *b = vec![0xff]; + } + other => panic!( + "expected Merk bytes at non-leaf, got discriminant {:?}", + std::mem::discriminant(other) + ), + } + + let reencoded = reencode_envelope(decoded); + let err = GroveDb::verify_aggregate_count_query(&reencoded, &path_query, v) + .expect_err("corrupted non-leaf merk bytes must be rejected"); + match err { + crate::Error::InvalidProof(_, _) => {} + other => panic!("expected InvalidProof, got {:?}", other), + } + } +} diff --git a/grovedb/src/tests/mod.rs b/grovedb/src/tests/mod.rs index 75f6db21f..1aded513f 100644 --- a/grovedb/src/tests/mod.rs +++ b/grovedb/src/tests/mod.rs @@ -6,6 +6,7 @@ mod query_tests; mod sum_tree_tests; +mod aggregate_count_query_tests; mod batch_coverage_tests; mod batch_delete_tree_tests; mod batch_rejection_tests; diff --git a/grovedb/src/tests/provable_count_sum_tree_tests.rs b/grovedb/src/tests/provable_count_sum_tree_tests.rs index e4cb6aff9..8bee9f4b9 100644 --- a/grovedb/src/tests/provable_count_sum_tree_tests.rs +++ b/grovedb/src/tests/provable_count_sum_tree_tests.rs @@ -80,6 +80,9 @@ mod tests { Node::KVRefValueHashCount(k, ..) => k.clone(), Node::KVHashCount(..) => vec![], Node::Hash(_) | Node::KVHash(_) => vec![], + // HashWithCount is keyless (collapsed subtree representation + // for AggregateCountOnRange proofs). + Node::HashWithCount(..) => vec![], }; results.push((key, count)); } diff --git a/merk/benches/branch_queries.rs b/merk/benches/branch_queries.rs index 69067f501..382a671fe 100644 --- a/merk/benches/branch_queries.rs +++ b/merk/benches/branch_queries.rs @@ -233,7 +233,7 @@ fn get_key_from_node(node: &Node) -> Option> { Node::KVRefValueHash(key, ..) => Some(key.clone()), Node::KVCount(key, ..) => Some(key.clone()), Node::KVRefValueHashCount(key, ..) => Some(key.clone()), - Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) => None, + Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) | Node::HashWithCount(..) => None, } } diff --git a/merk/src/merk/chunks.rs b/merk/src/merk/chunks.rs index f74fb005c..6e383ce08 100644 --- a/merk/src/merk/chunks.rs +++ b/merk/src/merk/chunks.rs @@ -487,6 +487,9 @@ mod test { Node::KVCount(..) => counts.kv += 1, Node::KVHashCount(..) => counts.kv_hash += 1, Node::KVRefValueHashCount(..) => counts.kv_ref_value_hash += 1, + // HashWithCount is hash-equivalent to Hash for the verifier; + // count it under `hash` for the test counter. + Node::HashWithCount(..) => counts.hash += 1, }; }); diff --git a/merk/src/merk/prove.rs b/merk/src/merk/prove.rs index 79c668f18..151098cf8 100644 --- a/merk/src/merk/prove.rs +++ b/merk/src/merk/prove.rs @@ -139,6 +139,51 @@ where .map_ok(|(proof, _, status, ..)| (proof, status.limit)) }) } + + /// Generate a count-only proof for an `AggregateCountOnRange` query. + /// + /// `inner_range` is the `QueryItem` wrapped by `AggregateCountOnRange` + /// (the caller is expected to have already validated and stripped the + /// wrapper at the `Query` level via + /// `Query::validate_aggregate_count_on_range`). + /// + /// The merk's `tree_type` must be one of `ProvableCountTree` or + /// `ProvableCountSumTree` (regardless of whether the merk is empty). + /// Any other tree type is rejected with `Error::InvalidProofError` + /// before any walking happens. + /// + /// On a tree-type-valid but empty Merk this returns + /// `(empty proof, count = 0)` — an empty subtree is a valid input for a + /// count query and the answer is unambiguously zero. + pub fn prove_aggregate_count_on_range( + &self, + inner_range: &QueryItem, + grove_version: &GroveVersion, + ) -> CostResult<(LinkedList, u64), Error> { + let tree_type = self.tree_type; + if !matches!( + tree_type, + crate::TreeType::ProvableCountTree | crate::TreeType::ProvableCountSumTree + ) { + return Err(Error::InvalidProofError(format!( + "AggregateCountOnRange is only valid against ProvableCountTree or \ + ProvableCountSumTree, got {:?}", + tree_type + ))) + .wrap_with_cost(Default::default()); + } + self.use_tree_mut(|maybe_tree| match maybe_tree { + None => Ok((LinkedList::new(), 0u64)).wrap_with_cost(Default::default()), + Some(tree) => { + let mut ref_walker = RefWalker::new(tree, self.source()); + ref_walker.create_aggregate_count_on_range_proof( + inner_range, + tree_type, + grove_version, + ) + } + }) + } } type Proof = (LinkedList, Option); diff --git a/merk/src/proofs/branch/mod.rs b/merk/src/proofs/branch/mod.rs index 7fa4e081c..3d8f27e36 100644 --- a/merk/src/proofs/branch/mod.rs +++ b/merk/src/proofs/branch/mod.rs @@ -120,7 +120,9 @@ impl TrunkQueryResult { | Node::KVRefValueHash(key, ..) | Node::KVCount(key, ..) | Node::KVRefValueHashCount(key, ..) => Some(key.clone()), - Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) => None, + Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) | Node::HashWithCount(..) => { + None + } } } @@ -383,7 +385,9 @@ impl BranchQueryResult { | Node::KVRefValueHash(key, ..) | Node::KVCount(key, ..) | Node::KVRefValueHashCount(key, ..) => Some(key.clone()), - Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) => None, + Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) | Node::HashWithCount(..) => { + None + } } } } diff --git a/merk/src/proofs/query/aggregate_count.rs b/merk/src/proofs/query/aggregate_count.rs new file mode 100644 index 000000000..8cd493986 --- /dev/null +++ b/merk/src/proofs/query/aggregate_count.rs @@ -0,0 +1,1591 @@ +//! Proof generation and verification for `AggregateCountOnRange` queries. +//! +//! This module implements the count-only proof shape described in the GroveDB +//! book chapter "Aggregate Count Queries". It is intentionally **separate** +//! from `create_proof_internal`: regular proofs always descend into a queried +//! subtree, but count proofs *stop* at fully-inside subtree roots and emit a +//! single `HashWithCount` op for the entire collapsed subtree. +//! +//! The proof targets a `ProvableCountTree` or `ProvableCountSumTree` (or +//! their `NonCounted*` wrapper variants — wrappers only affect whether the +//! tree contributes to its parent's count, not its own internal count +//! mechanics). On any other tree type the entry point returns +//! `Error::InvalidProofError`. + +use std::collections::LinkedList; + +use grovedb_costs::{cost_return_on_error, CostResult, CostsExt, OperationCost}; +use grovedb_version::version::GroveVersion; + +use crate::{ + proofs::{ + query::QueryItem, + tree::{execute_with_options, Tree as ProofTree}, + Decoder, Node, Op, + }, + tree::{kv::ValueDefinedCostType, AggregateData, Fetch, RefWalker}, + CryptoHash, Error, TreeType, +}; + +/// All-zero `CryptoHash`, used in `Node::HashWithCount` for missing children. +const NULL_HASH: CryptoHash = [0u8; 32]; + +/// How a subtree's possible-key window relates to the inner range we're +/// counting over. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum SubtreeClassification { + /// Every possible key in this subtree falls **outside** the range. + Disjoint, + /// Every possible key in this subtree falls **inside** the range. + Contained, + /// The subtree straddles a range boundary (or directly contains one). + Boundary, +} + +/// Classify a subtree relative to the inner range. +/// +/// `subtree_lo_excl` and `subtree_hi_excl` are the **exclusive** bounds on +/// what keys can appear under the subtree (derived from ancestors during the +/// walk; both `None` at the root). The range bounds come from the inner +/// `QueryItem`'s `lower_bound` / `upper_bound`. +/// +/// The comparisons treat `subtree_hi_excl` as exclusive (subtree keys are +/// strictly < `subtree_hi_excl`) and `subtree_lo_excl` as exclusive (subtree +/// keys are strictly > `subtree_lo_excl`). For the range bounds, the +/// inclusivity flag returned by `lower_bound`/`upper_bound` is **not** +/// load-bearing for the disjoint/contained tests below — see the inline +/// proofs. +fn classify_subtree( + subtree_lo_excl: Option<&[u8]>, + subtree_hi_excl: Option<&[u8]>, + range: &QueryItem, +) -> SubtreeClassification { + let (range_lo, _range_lo_excl) = range.lower_bound(); + let (range_hi, _range_hi_incl) = range.upper_bound(); + + // Disjoint-LEFT: subtree entirely below the range. + // + // Subtree keys are < subtree_hi_excl. If subtree_hi_excl <= range_lo, + // every subtree key < subtree_hi_excl <= range_lo is also < range_lo, + // so excluded regardless of whether range_lo is inclusive or exclusive. + if let (Some(s_hi), Some(r_lo)) = (subtree_hi_excl, range_lo) + && s_hi <= r_lo + { + return SubtreeClassification::Disjoint; + } + + // Disjoint-RIGHT: subtree entirely above the range. + // + // Subtree keys are > subtree_lo_excl. If subtree_lo_excl >= range_hi, + // every subtree key > subtree_lo_excl >= range_hi is also > range_hi, + // so excluded regardless of whether range_hi is inclusive or exclusive. + if let (Some(s_lo), Some(r_hi)) = (subtree_lo_excl, range_hi) + && s_lo >= r_hi + { + return SubtreeClassification::Disjoint; + } + + // Contained: subtree (s_lo, s_hi) ⊆ range. + // + // Lower side: every subtree key > s_lo. If s_lo >= r_lo, every subtree + // key > s_lo >= r_lo, so > r_lo, satisfying both inclusive and exclusive + // r_lo. If subtree has no lower bound (s_lo = -inf) but range does, the + // subtree could include arbitrarily small keys → not contained. + let lower_contained = match range_lo { + None => true, + Some(r_lo) => match subtree_lo_excl { + Some(s_lo) => s_lo >= r_lo, + None => false, + }, + }; + // Upper side: every subtree key < s_hi. If s_hi <= r_hi, every subtree + // key < s_hi <= r_hi, so < r_hi, satisfying both inclusive and exclusive + // r_hi. (We forgo the slightly tighter "s_hi <= r_hi+1" optimization for + // inclusive r_hi because we don't have key arithmetic.) + let upper_contained = match range_hi { + None => true, + Some(r_hi) => match subtree_hi_excl { + Some(s_hi) => s_hi <= r_hi, + None => false, + }, + }; + + if lower_contained && upper_contained { + SubtreeClassification::Contained + } else { + SubtreeClassification::Boundary + } +} + +/// Returns true if `tree_type` is one of the four tree types that can host an +/// `AggregateCountOnRange` proof. Wrapper types are accepted by stripping +/// down to the inner tree type via `is_provable_count_bearing`. +fn is_provable_count_bearing(tree_type: TreeType) -> bool { + matches!( + tree_type, + TreeType::ProvableCountTree | TreeType::ProvableCountSumTree + ) +} + +/// Pull the count out of a `ProvableCount` / `ProvableCountAndSum` aggregate. +/// Returns `Err(InvalidProofError)` for any other variant — the entry point +/// has already gated `tree_type`, so reaching the error means the tree's +/// in-memory state disagrees with its declared type. +fn provable_count_from_aggregate(data: AggregateData) -> Result { + match data { + AggregateData::ProvableCount(c) => Ok(c), + AggregateData::ProvableCountAndSum(c, _) => Ok(c), + other => Err(Error::InvalidProofError(format!( + "expected ProvableCount aggregate data on a provable count tree, got {:?}", + other + ))), + } +} + +impl RefWalker<'_, S> +where + S: Fetch + Sized + Clone, +{ + /// Generate a count-only proof for an `AggregateCountOnRange` query. + /// + /// `inner_range` is the `QueryItem` wrapped by `AggregateCountOnRange` + /// (already stripped at the caller). `tree_type` must be one of + /// `ProvableCountTree` or `ProvableCountSumTree`; any other tree type is + /// rejected with `Error::InvalidProofError` before any walking happens. + /// + /// The returned tuple is `(proof_ops, count)`: + /// - `proof_ops` is the linear stream the verifier will replay to + /// reconstruct the tree's root hash. + /// - `count` is the prover-side computed count (the verifier independently + /// recomputes it from the proof and compares against the expected root + /// hash; this value is returned as a convenience, not as ground truth). + pub fn create_aggregate_count_on_range_proof( + &mut self, + inner_range: &QueryItem, + tree_type: TreeType, + grove_version: &GroveVersion, + ) -> CostResult<(LinkedList, u64), Error> { + if !is_provable_count_bearing(tree_type) { + return Err(Error::InvalidProofError(format!( + "AggregateCountOnRange is only valid against ProvableCountTree or \ + ProvableCountSumTree, got {:?}", + tree_type + ))) + .wrap_with_cost(OperationCost::default()); + } + + let mut cost = OperationCost::default(); + let mut ops = LinkedList::new(); + let count = cost_return_on_error!( + &mut cost, + emit_count_proof(self, inner_range, None, None, &mut ops, grove_version) + ); + Ok((ops, count)).wrap_with_cost(cost) + } +} + +/// Recursive proof emitter. Always called on a non-empty subtree. +/// +/// At entry, `subtree_lo_excl` / `subtree_hi_excl` are the inherited +/// exclusive key bounds for the subtree this walker points at (both `None` +/// at the root call). +fn emit_count_proof( + walker: &mut RefWalker<'_, S>, + range: &QueryItem, + subtree_lo_excl: Option<&[u8]>, + subtree_hi_excl: Option<&[u8]>, + ops: &mut LinkedList, + grove_version: &GroveVersion, +) -> CostResult +where + S: Fetch + Sized + Clone, +{ + let mut cost = OperationCost::default(); + + // Step 1: classify the current subtree against the inner range. + let class = classify_subtree(subtree_lo_excl, subtree_hi_excl, range); + + if matches!( + class, + SubtreeClassification::Disjoint | SubtreeClassification::Contained + ) { + // Whole subtree is either entirely outside or entirely inside the + // range. Either way we emit a single self-verifying + // `HashWithCount(kv_hash, left_child_hash, right_child_hash, count)` + // op for the subtree's root. + // + // Why HashWithCount even for Disjoint subtrees (rather than the + // smaller `Hash(node_hash)` that an in-range count would never + // need)? Because the parent's `own_count` is computed by the + // verifier as `parent_aggregate − left_struct − right_struct` (see + // `verify_count_shape`), so the *structural* count of every child + // — including disjoint outside subtrees — has to be + // cryptographically bound to the parent's hash chain. The only + // node type that carries a hash-bound count is `HashWithCount` + // (its four committed fields recompute `node_hash_with_count` and + // would diverge under any count tampering). Plain `Hash(node_hash)` + // carries no count, so a malicious prover could lie about the + // structural count and skew the parent's `own_count` + // derivation — leading to silent over/under-counts at boundary + // ancestors. + let aggregate = match walker.tree().aggregate_data() { + Ok(a) => a, + Err(e) => { + return Err(Error::InvalidProofError(format!("aggregate_data: {}", e))) + .wrap_with_cost(cost); + } + }; + let subtree_count = match provable_count_from_aggregate(aggregate) { + Ok(c) => c, + Err(e) => return Err(e).wrap_with_cost(cost), + }; + let kv_hash = *walker.tree().kv_hash(); + let left_child_hash = walker + .tree() + .link(true) + .map(|l| *l.hash()) + .unwrap_or(NULL_HASH); + let right_child_hash = walker + .tree() + .link(false) + .map(|l| *l.hash()) + .unwrap_or(NULL_HASH); + ops.push_back(Op::Push(Node::HashWithCount( + kv_hash, + left_child_hash, + right_child_hash, + subtree_count, + ))); + // For the prover-side in-range total: Contained contributes its + // entire subtree count (which already excludes NonCounted entries + // because their stored aggregate is 0); Disjoint contributes 0. + let in_range_contribution = match class { + SubtreeClassification::Contained => subtree_count, + SubtreeClassification::Disjoint => 0, + SubtreeClassification::Boundary => unreachable!(), + }; + return Ok(in_range_contribution).wrap_with_cost(cost); + } + // class == Boundary — fall through to descent + KVDigestCount emission. + + // Step 2: snapshot what we need from the current node before walking. + // walk(true/false) takes &mut self.tree, so we must drop any existing + // borrows on walker.tree() before calling it. + let node_key: Vec = walker.tree().key().to_vec(); + let node_value_hash: CryptoHash = *walker.tree().value_hash(); + let node_count: u64 = match walker + .tree() + .aggregate_data() + .map_err(|e| Error::InvalidProofError(format!("aggregate_data: {}", e))) + { + Ok(data) => match provable_count_from_aggregate(data) { + Ok(c) => c, + Err(e) => return Err(e).wrap_with_cost(cost), + }, + Err(e) => return Err(e).wrap_with_cost(cost), + }; + + // Snapshot each child link's structural aggregate count from the link + // itself (avoids loading the child for this lookup). The verifier needs + // these to compute `own_count = node_count − left_struct − right_struct` + // at this boundary node. + let left_link_aggregate: u64 = walker + .tree() + .link(true) + .map(|l| l.aggregate_data().as_count_u64()) + .unwrap_or(0); + let right_link_aggregate: u64 = walker + .tree() + .link(false) + .map(|l| l.aggregate_data().as_count_u64()) + .unwrap_or(0); + let left_link_present = walker.tree().link(true).is_some(); + let right_link_present = walker.tree().link(false).is_some(); + + let mut total: u64 = 0; + + // Step 3: handle the LEFT child. Both Disjoint and Contained require a + // one-level walk so the recursive Disjoint/Contained arm can emit a + // self-verifying `HashWithCount` (plain `Hash` is no longer used here + // — see the Disjoint branch comment above). + let left_emitted = if left_link_present { + let left_lo = subtree_lo_excl; + let left_hi: Option<&[u8]> = Some(node_key.as_slice()); + let walked = cost_return_on_error!( + &mut cost, + walker.walk( + true, + None::<&fn(&[u8], &GroveVersion) -> Option>, + grove_version, + ) + ); + let mut left_walker = match walked { + Some(lw) => lw, + None => { + return Err(Error::CorruptedState( + "tree.link(true) was Some but walk(true) returned None", + )) + .wrap_with_cost(cost) + } + }; + let n = cost_return_on_error!( + &mut cost, + emit_count_proof( + &mut left_walker, + range, + left_lo, + left_hi, + ops, + grove_version, + ) + ); + total = total.saturating_add(n); + true + } else { + false + }; + + // Step 4: emit the current node as a boundary KVDigestCount + attach left + // as its left child. The node's own contribution to the in-range count + // is `own_count` (0 for `NonCounted`-wrapped, 1 for normal), derived as + // `node_count − left_struct − right_struct`. This is what makes + // NonCounted entries fall out of the count: a NonCounted leaf has + // node_count = 0 and no children, so own_count = 0. + ops.push_back(Op::Push(Node::KVDigestCount( + node_key.clone(), + node_value_hash, + node_count, + ))); + if left_emitted { + ops.push_back(Op::Parent); + } + if range.contains(&node_key) { + let own_count = node_count + .saturating_sub(left_link_aggregate) + .saturating_sub(right_link_aggregate); + total = total.saturating_add(own_count); + } + + // Step 5: handle the RIGHT child. Same descent pattern as LEFT. + let right_emitted = if right_link_present { + let right_lo: Option<&[u8]> = Some(node_key.as_slice()); + let right_hi = subtree_hi_excl; + let walked = cost_return_on_error!( + &mut cost, + walker.walk( + false, + None::<&fn(&[u8], &GroveVersion) -> Option>, + grove_version, + ) + ); + let mut right_walker = match walked { + Some(rw) => rw, + None => { + return Err(Error::CorruptedState( + "tree.link(false) was Some but walk(false) returned None", + )) + .wrap_with_cost(cost) + } + }; + let n = cost_return_on_error!( + &mut cost, + emit_count_proof( + &mut right_walker, + range, + right_lo, + right_hi, + ops, + grove_version, + ) + ); + total = total.saturating_add(n); + true + } else { + false + }; + + if right_emitted { + ops.push_back(Op::Child); + } + + Ok(total).wrap_with_cost(cost) +} + +/// Verify a count-only proof for an `AggregateCountOnRange` query. +/// +/// `proof_bytes` is the encoded `Vec` produced by +/// [`Merk::prove_aggregate_count_on_range`]; `inner_range` is the same +/// `QueryItem` the prover counted over (caller-supplied — typically extracted +/// from the verifier's `PathQuery`). +/// +/// On success returns `(merk_root_hash, count)`: +/// - `merk_root_hash` is the root hash of the reconstructed merk; the +/// caller must compare it against the expected root hash to complete +/// verification. +/// - `count` is the number of keys in the inner range, computed by replaying +/// the prover's classification walk against the reconstructed proof tree. +/// +/// **Two-phase verification.** Allowlisting node types alone is unsound: +/// a malicious prover can substitute `Hash` for an in-range subtree (to +/// undercount), attach extra `KVDigestCount` children below a keyless +/// `Hash` / `HashWithCount` (to overcount, since their hash recomputation +/// ignores attached children and the root hash would still match), or send +/// a single `Push(Hash(expected_root))` for a non-empty tree (to receive a +/// count of 0 with the trusted root). To prevent all three, this function: +/// +/// 1. Decodes the proof into a `ProofTree` via `execute_with_options` with +/// the AVL balance check disabled (count proofs intentionally collapse +/// one side to height 1) and **does not** count anything in the +/// `visit_node` callback. +/// 2. Walks the reconstructed tree with the same inherited exclusive +/// subtree-key bounds the prover used (`(None, None)` at the root). +/// At each position it calls `classify_subtree(bounds, inner_range)` and +/// requires the proof-tree node type to match the classification: +/// - `Disjoint` → must be a leaf `Hash(_)`. Contributes 0. +/// - `Contained` → must be a leaf `HashWithCount(...)`. Contributes its +/// count. +/// - `Boundary` → must be `KVDigestCount(key, ...)` with `key` strictly +/// inside `bounds`. Recurse left with `(lo, key)` and right with +/// `(key, hi)`; add 1 if `inner_range.contains(key)`. +/// +/// Counts are summed with `checked_add`; an overflow is treated as proof +/// corruption (`u64::MAX` keys is not a real merk shape). The caller is +/// still responsible for verifying the returned `merk_root_hash` against +/// their trusted root. +/// +/// **Empty merk case.** An empty merk is represented by an empty proof byte +/// stream and yields `(NULL_HASH, 0)`. Callers chaining this in a +/// multi-layer proof should recognize that shape explicitly. +pub fn verify_aggregate_count_on_range_proof( + proof_bytes: &[u8], + inner_range: &QueryItem, +) -> CostResult<(CryptoHash, u64), Error> { + if proof_bytes.is_empty() { + // Empty merk → empty proof → count = 0, hash = NULL_HASH. This + // matches the prover-side behavior of returning an empty op stream + // for an empty subtree. + return Ok((NULL_HASH, 0u64)).wrap_with_cost(OperationCost::default()); + } + + let mut cost = OperationCost::default(); + let decoder = Decoder::new(proof_bytes); + + // Phase 1: reconstruct the proof tree. The visit_node closure only + // performs a coarse allowlist; the per-position type/shape check happens + // in Phase 2 below. We still reject blatantly wrong node types here so + // execute() bails early on garbage input. + let tree_result: CostResult = + execute_with_options(decoder, false, false, |node| match node { + // The count proof emits only `HashWithCount` (for collapsed + // Disjoint or Contained subtrees) and `KVDigestCount` (for + // Boundary nodes). Plain `Hash(_)` is no longer used here + // because the structural count it would otherwise stand in + // for is needed by the verifier's `own_count` derivation and + // would not be hash-bound. + Node::HashWithCount(_, _, _, _) | Node::KVDigestCount(_, _, _) => Ok(()), + other => Err(Error::InvalidProofError(format!( + "unexpected node type in aggregate count proof: {}", + other + ))), + }); + let tree = cost_return_on_error!(&mut cost, tree_result); + + // Phase 2: shape-check + count by replaying the prover's classification + // walk. This binds each leaf node's type to the (subtree_bounds × range) + // classification, so the only valid count is the one a faithful prover + // would have produced for this exact range. + let (count, _structural) = match verify_count_shape(&tree, inner_range, None, None) { + Ok(pair) => pair, + Err(e) => return Err(e).wrap_with_cost(cost), + }; + + let root_hash = tree.hash().unwrap_add_cost(&mut cost); + Ok((root_hash, count)).wrap_with_cost(cost) +} + +/// Recursive shape-walk over the reconstructed proof tree. Returns the +/// pair `(in_range_count, structural_count)`: +/// +/// - `in_range_count` — number of keys in the subtree that fall inside the +/// inner range AND have a non-zero own-count (i.e. are not +/// `NonCounted`-wrapped). This is what bubbles up to the verifier's +/// return value. +/// - `structural_count` — the merk-recorded aggregate count of this subtree +/// (counting normal entries as 1 and `NonCounted` entries as 0). The +/// parent uses it to compute its own `own_count` as +/// `parent_node_count − left_struct − right_struct` (since +/// `parent_node_count = own + left_struct + right_struct`). +/// +/// The structural count of every child is **cryptographically bound** to +/// the parent's hash chain because every count-bearing node in a count +/// proof (`KVDigestCount`, `HashWithCount`) has its count fed into +/// `node_hash_with_count` for hash recomputation. Plain `Hash(_)` would +/// not carry a bound count and is therefore not allowed in count proofs; +/// see the prover-side comment in `emit_count_proof` for the full +/// justification. +/// +/// At each node: +/// +/// - Compute the expected classification from the inherited subtree bounds +/// and the inner range. +/// - Require the node's type to match the classification (and reject any +/// children attached under a leaf-shape classification — a malicious +/// prover could otherwise hide counted children under a `HashWithCount` +/// leaf, since its hash recomputation ignores reconstructed children). +/// - Recurse with tightened bounds at `Boundary` nodes, summing with +/// `checked_add` and computing `own_count` via `checked_sub`. +fn verify_count_shape( + tree: &ProofTree, + range: &QueryItem, + lo: Option<&[u8]>, + hi: Option<&[u8]>, +) -> Result<(u64, u64), Error> { + let class = classify_subtree(lo, hi, range); + match class { + SubtreeClassification::Disjoint => match &tree.node { + Node::HashWithCount(_, _, _, count) => { + if tree.left.is_some() || tree.right.is_some() { + return Err(Error::InvalidProofError( + "aggregate-count proof: HashWithCount node at a Disjoint position \ + must be a leaf" + .to_string(), + )); + } + // Disjoint subtree contributes 0 to the in-range count but + // its full structural count to the parent's `own_count` + // computation. + Ok((0, *count)) + } + other => Err(Error::InvalidProofError(format!( + "aggregate-count proof: expected HashWithCount at Disjoint position, got {}", + other + ))), + }, + SubtreeClassification::Contained => match &tree.node { + Node::HashWithCount(_, _, _, count) => { + if tree.left.is_some() || tree.right.is_some() { + return Err(Error::InvalidProofError( + "aggregate-count proof: HashWithCount node at a Contained position \ + must be a leaf" + .to_string(), + )); + } + // Contained subtree's structural count (which excludes + // NonCounted entries because their stored aggregate is 0) + // is exactly its in-range count. + Ok((*count, *count)) + } + other => Err(Error::InvalidProofError(format!( + "aggregate-count proof: expected HashWithCount at Contained position, got {}", + other + ))), + }, + SubtreeClassification::Boundary => match &tree.node { + Node::KVDigestCount(key, _, aggregate) => { + if !key_strictly_inside(key.as_slice(), lo, hi) { + return Err(Error::InvalidProofError(format!( + "aggregate-count proof: KVDigestCount key {} falls outside its \ + inherited subtree bounds (lo={:?}, hi={:?})", + hex::encode(key), + lo.map(hex::encode), + hi.map(hex::encode), + ))); + } + let key_slice = key.as_slice(); + let (left_in, left_struct) = match &tree.left { + Some(child) => verify_count_shape(&child.tree, range, lo, Some(key_slice))?, + None => (0, 0), + }; + let (right_in, right_struct) = match &tree.right { + Some(child) => verify_count_shape(&child.tree, range, Some(key_slice), hi)?, + None => (0, 0), + }; + // own_count = aggregate − left_struct − right_struct. + // Saturating sub here would silently mask a malformed + // proof (children claiming more keys than the parent's + // aggregate), so use checked_sub and reject. + let own_count = aggregate + .checked_sub(left_struct) + .and_then(|s| s.checked_sub(right_struct)) + .ok_or_else(|| { + Error::InvalidProofError(format!( + "aggregate-count proof: child structural counts ({} + {}) exceed \ + parent's aggregate count ({}) at key {}", + left_struct, + right_struct, + aggregate, + hex::encode(key) + )) + })?; + let self_contribution = if range.contains(key_slice) { + own_count + } else { + 0 + }; + let in_range = left_in + .checked_add(right_in) + .and_then(|s| s.checked_add(self_contribution)) + .ok_or_else(|| { + Error::InvalidProofError( + "aggregate-count proof: in-range count overflowed u64".to_string(), + ) + })?; + Ok((in_range, *aggregate)) + } + other => Err(Error::InvalidProofError(format!( + "aggregate-count proof: expected KVDigestCount at Boundary position, got {}", + other + ))), + }, + } +} + +/// Returns true when `key` lies strictly between the exclusive bounds +/// `(lo, hi)`, where `None` represents `-inf` / `+inf`. Used to validate that +/// a `Boundary` `KVDigestCount` carries a key consistent with its inherited +/// subtree window. +fn key_strictly_inside(key: &[u8], lo: Option<&[u8]>, hi: Option<&[u8]>) -> bool { + let lo_ok = lo.is_none_or(|l| key > l); + let hi_ok = hi.is_none_or(|h| key < h); + lo_ok && hi_ok +} + +#[cfg(test)] +mod tests { + use super::*; + + fn range_inclusive(lo: &[u8], hi: &[u8]) -> QueryItem { + QueryItem::RangeInclusive(lo.to_vec()..=hi.to_vec()) + } + + fn range_full() -> QueryItem { + QueryItem::RangeFull(std::ops::RangeFull) + } + + fn range_from(lo: &[u8]) -> QueryItem { + QueryItem::RangeFrom(lo.to_vec()..) + } + + fn range_after(lo: &[u8]) -> QueryItem { + QueryItem::RangeAfter(lo.to_vec()..) + } + + #[test] + fn classify_disjoint_below() { + let r = range_inclusive(b"d", b"f"); + // subtree (None, b"c") — keys < "c", entirely below ["d", "f"]. + assert_eq!( + classify_subtree(None, Some(b"c"), &r), + SubtreeClassification::Disjoint, + ); + } + + #[test] + fn classify_disjoint_above() { + let r = range_inclusive(b"d", b"f"); + // subtree (b"g", None) — keys > "g", entirely above ["d", "f"]. + assert_eq!( + classify_subtree(Some(b"g"), None, &r), + SubtreeClassification::Disjoint, + ); + } + + #[test] + fn classify_disjoint_at_lower_boundary_inclusive() { + let r = range_inclusive(b"d", b"f"); + // subtree (None, b"d") — keys < "d", just below the inclusive bound. + assert_eq!( + classify_subtree(None, Some(b"d"), &r), + SubtreeClassification::Disjoint, + ); + } + + #[test] + fn classify_disjoint_at_upper_boundary_inclusive() { + let r = range_inclusive(b"d", b"f"); + // subtree (b"f", None) — keys > "f", just above the inclusive bound. + assert_eq!( + classify_subtree(Some(b"f"), None, &r), + SubtreeClassification::Disjoint, + ); + } + + #[test] + fn classify_contained_simple() { + let r = range_inclusive(b"a", b"z"); + // subtree (b"d", b"f") — keys in ("d", "f"), all in ["a", "z"]. + assert_eq!( + classify_subtree(Some(b"d"), Some(b"f"), &r), + SubtreeClassification::Contained, + ); + } + + #[test] + fn classify_contained_full_range_full_subtree() { + let r = range_full(); + // The full range matches everything — even an unbounded subtree is + // contained. + assert_eq!( + classify_subtree(None, None, &r), + SubtreeClassification::Contained, + ); + } + + #[test] + fn classify_boundary_overlapping_lower() { + let r = range_inclusive(b"d", b"f"); + // subtree (b"c", b"e") — keys in ("c", "e"), straddles the lower bound. + assert_eq!( + classify_subtree(Some(b"c"), Some(b"e"), &r), + SubtreeClassification::Boundary, + ); + } + + #[test] + fn classify_boundary_overlapping_upper() { + let r = range_inclusive(b"d", b"f"); + // subtree (b"e", b"g") — keys in ("e", "g"), straddles the upper bound. + assert_eq!( + classify_subtree(Some(b"e"), Some(b"g"), &r), + SubtreeClassification::Boundary, + ); + } + + #[test] + fn classify_boundary_unbounded_below_with_bounded_range() { + let r = range_from(b"d"); + // subtree (None, b"e") — could include keys < "d", so boundary. + assert_eq!( + classify_subtree(None, Some(b"e"), &r), + SubtreeClassification::Boundary, + ); + } + + #[test] + fn classify_contained_range_after_exclusive() { + let r = range_after(b"b"); + // RangeAfter(b"b") = (b, +inf). subtree (b"b", b"e") — keys > "b" and + // < "e", all in (b, +inf). Contained. + assert_eq!( + classify_subtree(Some(b"b"), Some(b"e"), &r), + SubtreeClassification::Contained, + ); + } + + // ---------- end-to-end integration tests on a real merk ---------- + // + // These tests build a small ProvableCountTree, generate count proofs + // through the merk-level API, then verify them with the count verifier. + // They cover the four documented categories: open-range (lower-only and + // upper-only) and closed-range (inclusive and after-to-inclusive). Empty + // tree and single-bound edge cases are also exercised. + + use grovedb_costs::CostsExt as _; + use grovedb_version::version::GroveVersion; + + use crate::{ + proofs::{encode_into, Op as ProofOp}, + test_utils::TempMerk, + tree::{Op, TreeFeatureType::ProvableCountedMerkNode}, + Merk, TreeType, + }; + + /// Build a fresh `ProvableCountTree` populated with single-byte keys + /// "a".."o" (15 keys) — same shape as the running example in the book + /// chapter's "Closed ranges" section. Returns the merk and its current + /// root hash. + fn make_15_key_provable_count_tree(grove_version: &GroveVersion) -> (TempMerk, [u8; 32]) { + let mut merk = TempMerk::new_with_tree_type(grove_version, TreeType::ProvableCountTree); + let keys: Vec> = (b'a'..=b'o').map(|c| vec![c]).collect(); + let entries: Vec<(Vec, Op)> = keys + .iter() + .enumerate() + .map(|(i, k)| { + ( + k.clone(), + Op::Put(vec![i as u8], ProvableCountedMerkNode(1)), + ) + }) + .collect(); + merk.apply::<_, Vec<_>>(&entries, &[], None, grove_version) + .unwrap() + .expect("apply should succeed"); + merk.commit(grove_version); + let root_hash = merk.root_hash().unwrap(); + (merk, root_hash) + } + + /// Encode a `LinkedList` into the wire format that the verifier + /// consumes. + fn encode_proof(ops: &LinkedList) -> Vec { + let mut bytes = Vec::with_capacity(128); + encode_into(ops.iter(), &mut bytes); + bytes + } + + /// Round-trip helper: prove the inner range, encode the proof, verify it, + /// assert the recovered root hash matches and the recovered count matches + /// `expected_count`. + fn round_trip( + merk: &Merk>, + expected_root: [u8; 32], + inner_range: QueryItem, + expected_count: u64, + grove_version: &GroveVersion, + ) { + let (ops, prover_count) = merk + .prove_aggregate_count_on_range(&inner_range, grove_version) + .unwrap() + .expect("prove should succeed"); + assert_eq!( + prover_count, expected_count, + "prover count mismatch for range {:?}", + inner_range + ); + let bytes = encode_proof(&ops); + let (root, verifier_count) = verify_aggregate_count_on_range_proof(&bytes, &inner_range) + .unwrap() + .expect("verify should succeed"); + assert_eq!( + root, expected_root, + "verifier reconstructed wrong root for range {:?}", + inner_range + ); + assert_eq!( + verifier_count, expected_count, + "verifier count mismatch for range {:?}", + inner_range + ); + } + + #[test] + fn integration_open_range_from() { + let v = GroveVersion::latest(); + let (merk, root) = make_15_key_provable_count_tree(v); + // RangeFrom("c"..) → keys c..o (13 keys). + round_trip(&merk, root, QueryItem::RangeFrom(b"c".to_vec()..), 13, v); + } + + #[test] + fn integration_open_range_after() { + let v = GroveVersion::latest(); + let (merk, root) = make_15_key_provable_count_tree(v); + // RangeAfter(("b", ..)) → keys c..o (13 keys), same set as RangeFrom("c"..) + // but proof shape differs — the boundary lands on "b" exclusive. + round_trip(&merk, root, QueryItem::RangeAfter(b"b".to_vec()..), 13, v); + } + + #[test] + fn integration_open_range_to() { + let v = GroveVersion::latest(); + let (merk, root) = make_15_key_provable_count_tree(v); + // RangeTo(..b"e") → keys a..d (4 keys, exclusive upper). + round_trip(&merk, root, QueryItem::RangeTo(..b"e".to_vec()), 4, v); + } + + #[test] + fn integration_open_range_to_inclusive() { + let v = GroveVersion::latest(); + let (merk, root) = make_15_key_provable_count_tree(v); + // RangeToInclusive(..=b"e") → keys a..e (5 keys, inclusive upper). + round_trip( + &merk, + root, + QueryItem::RangeToInclusive(..=b"e".to_vec()), + 5, + v, + ); + } + + #[test] + fn integration_closed_range_inclusive() { + let v = GroveVersion::latest(); + let (merk, root) = make_15_key_provable_count_tree(v); + // RangeInclusive("c"..="l") → 10 keys. + round_trip( + &merk, + root, + QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()), + 10, + v, + ); + } + + #[test] + fn integration_closed_range_exclusive() { + let v = GroveVersion::latest(); + let (merk, root) = make_15_key_provable_count_tree(v); + // Range("c".."l") → c..k (9 keys, exclusive upper). + round_trip( + &merk, + root, + QueryItem::Range(b"c".to_vec()..b"l".to_vec()), + 9, + v, + ); + } + + #[test] + fn integration_closed_range_after_to_inclusive() { + let v = GroveVersion::latest(); + let (merk, root) = make_15_key_provable_count_tree(v); + // RangeAfterToInclusive(("c", "l")) → keys d..l (9 keys: d..=l excluding c). + round_trip( + &merk, + root, + QueryItem::RangeAfterToInclusive(b"c".to_vec()..=b"l".to_vec()), + 9, + v, + ); + } + + #[test] + fn integration_closed_range_after_to_exclusive() { + let v = GroveVersion::latest(); + let (merk, root) = make_15_key_provable_count_tree(v); + // RangeAfterTo(("c", "l")) → keys d..l (8 keys, both exclusive). + round_trip( + &merk, + root, + QueryItem::RangeAfterTo(b"c".to_vec()..b"l".to_vec()), + 8, + v, + ); + } + + #[test] + fn integration_range_below_all_keys() { + let v = GroveVersion::latest(); + let (merk, root) = make_15_key_provable_count_tree(v); + // Entire range below the smallest key — should produce count = 0 + // and a Disjoint proof at the root level. + round_trip( + &merk, + root, + QueryItem::RangeInclusive(vec![0x00]..=vec![0x10]), + 0, + v, + ); + } + + #[test] + fn integration_range_above_all_keys() { + let v = GroveVersion::latest(); + let (merk, root) = make_15_key_provable_count_tree(v); + // Entire range above the largest key. + round_trip( + &merk, + root, + QueryItem::RangeInclusive(b"z".to_vec()..=vec![0xff]), + 0, + v, + ); + } + + #[test] + fn integration_empty_merk() { + let v = GroveVersion::latest(); + let merk = TempMerk::new_with_tree_type(v, TreeType::ProvableCountTree); + let (ops, prover_count) = merk + .prove_aggregate_count_on_range(&QueryItem::Range(b"a".to_vec()..b"z".to_vec()), v) + .unwrap() + .expect("prove on empty merk should succeed"); + assert_eq!(prover_count, 0); + // Empty proof means the verifier returns NULL_HASH and count = 0. + let bytes = encode_proof(&ops); + let (root, verifier_count) = verify_aggregate_count_on_range_proof( + &bytes, + &QueryItem::Range(b"a".to_vec()..b"z".to_vec()), + ) + .unwrap() + .expect("verify on empty merk should succeed"); + assert_eq!(root, NULL_HASH); + assert_eq!(verifier_count, 0); + } + + #[test] + fn integration_rejected_on_normal_tree() { + let v = GroveVersion::latest(); + let merk = TempMerk::new(v); // NormalTree + let err = merk + .prove_aggregate_count_on_range(&QueryItem::Range(b"a".to_vec()..b"z".to_vec()), v) + .unwrap(); + assert!( + err.is_err(), + "expected an InvalidProofError on NormalTree, got Ok({:?})", + err.ok().map(|(_, c)| c) + ); + } + + #[test] + fn integration_count_forgery_is_rejected() { + // Demonstrates the cryptographic binding: tamper with the count in a + // HashWithCount op and the verifier's root-hash recomputation must + // diverge from the expected root. + let v = GroveVersion::latest(); + let (merk, expected_root) = make_15_key_provable_count_tree(v); + let inner_range = QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()); + let (mut ops, _prover_count) = merk + .prove_aggregate_count_on_range(&inner_range, v) + .unwrap() + .expect("prove should succeed"); + + // Forge: bump the count on the first HashWithCount op we see. + let mut tampered = false; + for op in ops.iter_mut() { + if let ProofOp::Push(Node::HashWithCount(_, _, _, count)) + | ProofOp::PushInverted(Node::HashWithCount(_, _, _, count)) = op + { + *count = count.saturating_add(1); + tampered = true; + break; + } + } + assert!( + tampered, + "test setup: expected at least one HashWithCount op" + ); + + let bytes = encode_proof(&ops); + let (root, _count) = verify_aggregate_count_on_range_proof(&bytes, &inner_range) + .unwrap() + .expect("verify should still complete (root mismatch is the caller's job)"); + assert_ne!( + root, expected_root, + "tampered count must produce a different reconstructed root hash" + ); + } + + // ---------- attack tests for the shape-walk verifier ---------- + // + // These three tests exercise attacks the old allowlist-only verifier let + // through. With the shape walk in `verify_count_shape`, each one is + // rejected before the caller's root-hash check. + + /// A malicious prover sends a single `Push(Hash(expected_root))` for a + /// non-empty tree. Without the shape check this would return + /// `(expected_root, 0)` for any range. The shape check classifies the + /// root with `(None, None)` against a bounded inner range as `Boundary`, + /// expects `KVDigestCount`, and rejects. + #[test] + fn shape_walk_rejects_single_hash_undercount() { + let v = GroveVersion::latest(); + let (merk, expected_root) = make_15_key_provable_count_tree(v); + let inner_range = QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()); + + // Forged proof: a single Hash op carrying the genuine root hash. + let mut forged: LinkedList = LinkedList::new(); + forged.push_back(ProofOp::Push(Node::Hash(expected_root))); + let bytes = encode_proof(&forged); + + let result = verify_aggregate_count_on_range_proof(&bytes, &inner_range).unwrap(); + let err = result.expect_err("single-Hash forgery must be rejected"); + // keep merk alive for clarity in the test scope + let _ = merk; + // Plain `Hash` is no longer in the count-proof allowlist (it would + // carry an unbound structural count), so the rejection now lands + // in Phase 1's coarse allowlist rather than Phase 2's shape walk. + // Either error message is fine — the attack is rejected. + match err { + Error::InvalidProofError(msg) => { + assert!( + msg.contains("unexpected node type") + || msg.contains("expected KVDigestCount") + || msg.contains("Boundary"), + "unexpected message: {msg}" + ); + } + other => panic!("expected InvalidProofError, got {other:?}"), + } + } + + /// A malicious prover replaces an in-range `HashWithCount` subtree with + /// a `Hash` carrying that subtree's node_hash, undercounting by the + /// subtree's count. The hash chain still matches (same node_hash), so + /// the old allowlist verifier would have happily returned a wrong + /// count. The shape walk classifies that position as `Contained` and + /// requires `HashWithCount`, rejecting the swap. + #[test] + fn shape_walk_rejects_hash_swap_for_contained_subtree() { + let v = GroveVersion::latest(); + let (merk, _root) = make_15_key_provable_count_tree(v); + let inner_range = QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()); + let (mut ops, _) = merk + .prove_aggregate_count_on_range(&inner_range, v) + .unwrap() + .expect("prove succeeds"); + + // Swap the first HashWithCount op for a Hash op carrying the + // computed node_hash for that subtree (so the chain check still + // matches and only the shape walk can detect the attack). + let mut swapped = false; + for op in ops.iter_mut() { + if let ProofOp::Push(Node::HashWithCount(kv_hash, l, r, c)) = op { + let node_hash = crate::tree::node_hash_with_count(kv_hash, l, r, *c).unwrap(); + *op = ProofOp::Push(Node::Hash(node_hash)); + swapped = true; + break; + } + } + assert!( + swapped, + "test setup: expected at least one HashWithCount op" + ); + + let bytes = encode_proof(&ops); + let result = verify_aggregate_count_on_range_proof(&bytes, &inner_range).unwrap(); + assert!( + result.is_err(), + "HashWithCount→Hash swap on a Contained subtree must be rejected by the shape walk" + ); + } + + /// A malicious prover attaches a `KVDigestCount` child under a leaf + /// `HashWithCount`. Because `Tree::hash()` for `HashWithCount` is + /// computed from the four embedded fields and ignores any reconstructed + /// children, the root hash check passes — but a naive verifier that + /// counts every visited node would credit the bogus child as +1. The + /// shape walk requires `Contained` positions to be **leaves**, so it + /// rejects the smuggled-in child. + #[test] + fn shape_walk_rejects_keyless_node_with_attached_children() { + let v = GroveVersion::latest(); + let (merk, _root) = make_15_key_provable_count_tree(v); + let inner_range = QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()); + let (mut ops, _honest_count) = merk + .prove_aggregate_count_on_range(&inner_range, v) + .unwrap() + .expect("prove succeeds"); + + // Smuggle a fake +1 child under the first HashWithCount op. After + // any HashWithCount(...), insert: Push(Hash(zero)) Parent — that + // attaches an extra hashed node as the LEFT child of the + // HashWithCount during reconstruction. Then add a fake + // Push(KVDigestCount) Child that would be picked up by an + // allowlist verifier counting visited keys. + // + // Concretely we splice 4 ops right after the HashWithCount: + // Push(KVDigestCount(in_range_key, value_hash, 1)) + // Parent (attach KVDigestCount as the LEFT child of HashWithCount) + // Push(Hash([0; 32])) + // Child (attach Hash as the RIGHT child of HashWithCount) + // + // The HashWithCount's hash() ignores these children, so the root + // hash recomputation is unaffected. The shape walk catches the + // Contained-position-with-children violation. + let mut new_ops: LinkedList = LinkedList::new(); + let mut spliced = false; + for op in ops.iter() { + new_ops.push_back(op.clone()); + if !spliced && matches!(op, ProofOp::Push(Node::HashWithCount(_, _, _, _))) { + let in_range_key = b"d".to_vec(); + new_ops.push_back(ProofOp::Push(Node::KVDigestCount( + in_range_key, + [0u8; 32], + 1, + ))); + new_ops.push_back(ProofOp::Parent); + new_ops.push_back(ProofOp::Push(Node::Hash([0u8; 32]))); + new_ops.push_back(ProofOp::Child); + spliced = true; + } + } + assert!( + spliced, + "test setup: expected to splice into a HashWithCount" + ); + ops = new_ops; + + let bytes = encode_proof(&ops); + let result = verify_aggregate_count_on_range_proof(&bytes, &inner_range).unwrap(); + assert!( + result.is_err(), + "attaching children under HashWithCount must be rejected (root hash alone wouldn't catch it)" + ); + } + + /// `HashWithCount` is only safe inside the dedicated aggregate-count + /// verifier (which shape-checks the collapsed subtree). The plain + /// `Query::execute_proof` verifier must reject it on sight — otherwise + /// a malicious prover could include `HashWithCount` in a regular + /// query proof, attach fake KV children to it (whose pushes the + /// verifier would credit as query results via `execute_node`), and + /// have the parent's hash chain still verify because + /// `Tree::hash()` for `HashWithCount` ignores attached children. + #[test] + fn regular_query_verifier_rejects_hash_with_count_node() { + use crate::proofs::query::QueryProofVerify; + let v = GroveVersion::latest(); + + // Build a regular merk and a regular range query against it. + let mut merk = TempMerk::new(v); + for i in 0u8..5 { + merk.apply::<_, Vec<_>>( + &[( + vec![i], + Op::Put(vec![i], crate::TreeFeatureType::BasicMerkNode), + )], + &[], + None, + v, + ) + .unwrap() + .expect("apply"); + } + merk.commit(v); + let q = crate::proofs::query::Query::new_single_query_item(QueryItem::Range( + vec![0u8]..vec![5u8], + )); + + // Generate an honest proof, then splice a `HashWithCount` push into + // it. The exact op sequence doesn't matter for what we're testing — + // we just need the regular verifier to refuse to process the proof + // because it contains a `HashWithCount`. + let (mut ops, _) = merk + .prove_unchecked_query_items(&[QueryItem::Range(vec![0u8]..vec![5u8])], None, true, v) + .unwrap() + .expect("prove"); + ops.push_front(ProofOp::Push(Node::HashWithCount( + [0u8; 32], [0u8; 32], [0u8; 32], 0, + ))); + let bytes = encode_proof(&ops); + + let result = q.execute_proof(&bytes, None, true, 0).unwrap(); + let err = result.expect_err("regular query verifier must reject HashWithCount on sight"); + let msg = format!("{}", err); + assert!( + msg.contains("HashWithCount") || msg.contains("aggregate-count"), + "expected HashWithCount-rejection message, got: {msg}" + ); + } + + // ---------- byte-mutation fuzzer ---------- + // + // Stronger forgery-resistance check than the three hand-crafted attack + // tests above: enumerate every byte of an honest proof, flip it to + // each of three different values, and assert the verifier never + // produces a "silent forgery" — i.e. an `Ok((root, count))` where + // the root **matches** the honest one but the count **differs**. + // + // Three safe outcomes per mutation: + // - **Rejection** — Phase 1 decode error, or Phase 2 shape mismatch. + // - **Divergence** — `Ok((root', _))` where `root' != honest_root`, + // so any caller comparing against their trusted root catches it. + // - **Same outcome** — `Ok((honest_root, honest_count))`. This can + // happen for non-canonical re-encodings (e.g. swapping + // `Push` ↔ `PushInverted` doesn't change the reconstructed tree's + // root or the shape walk's count). Harmless: the verifier is + // deterministic on (root, count), and that pair is what the + // caller acts on. + // + // The **unsafe** outcome is `Ok((honest_root, count'))` where + // `count' != honest_count`. The hash chain binds count via + // `node_hash_with_count`, so this should be impossible — the test + // panics if it ever happens. + // + // We also assert each safe branch fires at least once as a sanity + // check that the test is actually exercising the surface. + #[test] + fn fuzz_byte_mutation_no_silent_forgery() { + let v = GroveVersion::latest(); + let (merk, honest_root) = make_15_key_provable_count_tree(v); + let inner_range = QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()); + let (ops, honest_count) = merk + .prove_aggregate_count_on_range(&inner_range, v) + .unwrap() + .expect("prove"); + let honest_bytes = encode_proof(&ops); + assert!(!honest_bytes.is_empty()); + + let mut rejected = 0usize; + let mut diverged = 0usize; + let mut same_outcome = 0usize; + let mut total = 0usize; + + // Three different mutations per byte: +1, +0x55, XOR 0xff. + let deltas: [u8; 3] = [1, 0x55, 0xff]; + for byte_idx in 0..honest_bytes.len() { + for &delta in &deltas { + let mut bytes = honest_bytes.clone(); + let original = bytes[byte_idx]; + let mutated = if delta == 0xff { + original ^ 0xff + } else { + original.wrapping_add(delta) + }; + if mutated == original { + continue; // no-op, don't count + } + bytes[byte_idx] = mutated; + total += 1; + + let result = verify_aggregate_count_on_range_proof(&bytes, &inner_range).unwrap(); + match result { + Err(_) => rejected += 1, + Ok((root, count)) => { + if root == honest_root { + // Same root — the verifier MUST also produce + // the same count, otherwise we have a silent + // count-forgery: the caller would accept the + // forged count thinking it's the honest one. + assert_eq!( + count, honest_count, + "SILENT FORGERY at byte index {} (delta=0x{:02x}): \ + verifier returned the honest root but a wrong count \ + ({} != {}). The hash chain should bind count.", + byte_idx, delta, count, honest_count + ); + same_outcome += 1; + } else { + // Different root — caller's root check catches it. + diverged += 1; + } + } + } + } + } + + // Sanity: each safe branch should fire at least once on a real proof. + assert!( + rejected > 0, + "expected at least one mutation to be rejected outright" + ); + assert!( + diverged > 0, + "expected at least one mutation to diverge the root hash" + ); + // `same_outcome` may legitimately be zero on some encoders, so we + // don't require it. We just require no silent forgery occurred, + // which the inner assert_eq! guarantees. + let _ = same_outcome; + assert_eq!(rejected + diverged + same_outcome, total); + } + + // ---------- randomized round-trip property test ---------- + // + // Build merks with varying sizes and key shapes from a deterministic + // RNG, run a bunch of randomly-chosen ranges through the prove → encode + // → verify pipeline, and assert the verifier's count agrees with a + // ground-truth count computed by directly intersecting the inserted + // keys with the range. Catches silent miscounts that the fixed + // examples above would miss (off-by-one, edge-of-tree, exact-bound + // matches against multi-byte keys, etc.). + #[test] + fn fuzz_random_trees_and_ranges_round_trip() { + // Tiny custom xorshift RNG so we don't have to add a dev-dep. + struct XorShift(u64); + impl XorShift { + fn next_u64(&mut self) -> u64 { + let mut x = self.0; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + self.0 = x; + x + } + fn gen_range(&mut self, lo: usize, hi: usize) -> usize { + lo + (self.next_u64() as usize) % (hi - lo) + } + fn gen_key(&mut self, max_len: usize) -> Vec { + let len = 1 + self.gen_range(0, max_len); + (0..len).map(|_| (self.next_u64() & 0xff) as u8).collect() + } + } + + let v = GroveVersion::latest(); + let mut rng = XorShift(0xDEAD_BEEF_C0FFEE); + let trials = 16; + for trial in 0..trials { + let key_count = rng.gen_range(1, 64); + let mut keys: Vec> = (0..key_count).map(|_| rng.gen_key(8)).collect(); + keys.sort(); + keys.dedup(); + + let mut merk = TempMerk::new_with_tree_type(v, TreeType::ProvableCountTree); + let entries: Vec<(Vec, Op)> = keys + .iter() + .map(|k| (k.clone(), Op::Put(vec![0xAB], ProvableCountedMerkNode(1)))) + .collect(); + merk.apply::<_, Vec<_>>(&entries, &[], None, v) + .unwrap() + .expect("apply"); + merk.commit(v); + let root = merk.root_hash().unwrap(); + + // Try several random ranges per tree, picking shapes that + // exercise both bounded and half-bounded variants. + for sub_trial in 0..6 { + let lo = rng.gen_key(8); + let hi = rng.gen_key(8); + let (lo, hi) = if lo <= hi { (lo, hi) } else { (hi, lo) }; + + let inner_range = match sub_trial % 6 { + 0 => QueryItem::Range(lo.clone()..hi.clone()), + 1 => QueryItem::RangeInclusive(lo.clone()..=hi.clone()), + 2 => QueryItem::RangeFrom(lo.clone()..), + 3 => QueryItem::RangeAfter(lo.clone()..), + 4 => QueryItem::RangeTo(..hi.clone()), + _ => QueryItem::RangeToInclusive(..=hi.clone()), + }; + + let expected = keys + .iter() + .filter(|k| inner_range.contains(k.as_slice())) + .count() as u64; + + let (ops, prover_count) = merk + .prove_aggregate_count_on_range(&inner_range, v) + .unwrap() + .expect("prove"); + assert_eq!( + prover_count, expected, + "trial {} sub {}: prover count mismatch for range {:?}", + trial, sub_trial, inner_range + ); + let bytes = encode_proof(&ops); + let (vroot, vcount) = verify_aggregate_count_on_range_proof(&bytes, &inner_range) + .unwrap() + .expect("verify"); + assert_eq!( + vroot, root, + "trial {} sub {}: verifier root mismatch", + trial, sub_trial + ); + assert_eq!( + vcount, expected, + "trial {} sub {}: verifier count mismatch for range {:?}", + trial, sub_trial, inner_range + ); + } + } + } + + // ---------- shape-walk rejection of malformed proof shapes ---------- + // + // These tests synthesize op streams that are well-formed bytes (Phase 1 + // decode succeeds) but violate the structural invariants the shape walk + // requires (Phase 2 rejection). They exist to lock down the defensive + // error branches in `verify_count_shape` so future refactors that + // accidentally relax them are caught by the test suite. + + /// `HashWithCount` is only valid as a leaf in the proof tree. If the + /// prover attaches children to a Disjoint-position `HashWithCount`, + /// the shape walk must reject — even though the parent's hash chain + /// (which uses `Tree::hash()` for `HashWithCount`, computed from the + /// four embedded fields and ignoring children) would still verify. + #[test] + fn shape_walk_rejects_disjoint_hashwithcount_with_children() { + let v = GroveVersion::latest(); + let (merk, _root) = make_15_key_provable_count_tree(v); + // RangeAfter("o") → all 15 keys are below; the entire tree is + // Disjoint relative to the inner range, so the honest proof is a + // single Push(HashWithCount(...)). + let inner_range = QueryItem::RangeAfter(b"o".to_vec()..); + let (mut ops, _) = merk + .prove_aggregate_count_on_range(&inner_range, v) + .unwrap() + .expect("prove succeeds"); + + // Splice in another HashWithCount as the child (no key, so no + // ordering constraint at Phase 1) so we exercise Phase 2's + // leaf-only assertion at the Disjoint position. + let mut spliced = LinkedList::::new(); + let mut done = false; + for op in ops.iter() { + spliced.push_back(op.clone()); + if !done && matches!(op, ProofOp::Push(Node::HashWithCount(_, _, _, _))) { + spliced.push_back(ProofOp::Push(Node::HashWithCount( + [0u8; 32], [0u8; 32], [0u8; 32], 1, + ))); + spliced.push_back(ProofOp::Parent); + done = true; + } + } + assert!(done, "test setup: expected at least one HashWithCount op"); + ops = spliced; + + let bytes = encode_proof(&ops); + let result = verify_aggregate_count_on_range_proof(&bytes, &inner_range).unwrap(); + let err = result.expect_err("Disjoint HashWithCount with children must be rejected"); + match err { + Error::InvalidProofError(msg) => assert!( + msg.contains("Disjoint position must be a leaf"), + "unexpected message: {msg}" + ), + other => panic!("expected InvalidProofError, got {:?}", other), + } + } + + /// At a Disjoint position the shape walk requires `HashWithCount` (only + /// node type with a hash-bound count). A `Hash` op there would carry an + /// untrusted structural count for the parent's `own_count` derivation, + /// so it must be rejected. + #[test] + fn shape_walk_rejects_non_hashwithcount_at_disjoint() { + let v = GroveVersion::latest(); + let (merk, _root) = make_15_key_provable_count_tree(v); + let inner_range = QueryItem::RangeAfter(b"o".to_vec()..); + let (mut ops, _) = merk + .prove_aggregate_count_on_range(&inner_range, v) + .unwrap() + .expect("prove succeeds"); + + // Replace the single Disjoint HashWithCount with a plain Hash. + let mut swapped = false; + for op in ops.iter_mut() { + if let ProofOp::Push(Node::HashWithCount(kv, l, r, c)) = op { + let node_hash = crate::tree::node_hash_with_count(kv, l, r, *c).unwrap(); + *op = ProofOp::Push(Node::Hash(node_hash)); + swapped = true; + break; + } + } + assert!(swapped, "test setup: expected a HashWithCount op to swap"); + + let bytes = encode_proof(&ops); + let result = verify_aggregate_count_on_range_proof(&bytes, &inner_range).unwrap(); + // Phase 1 rejects plain Hash via the allowlist; Phase 2 would also + // reject "expected HashWithCount at Disjoint position". Either is fine. + let err = result.expect_err("plain Hash at Disjoint must be rejected"); + match err { + Error::InvalidProofError(_) => {} + other => panic!("expected InvalidProofError, got {:?}", other), + } + } + + /// At a Boundary position the shape walk requires the node's key to + /// fall strictly inside the inherited subtree bounds. A prover that + /// emits a `KVDigestCount` whose key is outside those bounds is trying + /// to confuse the recursion's bound tracking — it must be rejected. + #[test] + fn shape_walk_rejects_kvdigestcount_outside_inherited_bounds() { + let v = GroveVersion::latest(); + let (merk, _root) = make_15_key_provable_count_tree(v); + let inner_range = QueryItem::RangeInclusive(b"c".to_vec()..=b"l".to_vec()); + let (mut ops, _) = merk + .prove_aggregate_count_on_range(&inner_range, v) + .unwrap() + .expect("prove succeeds"); + + // Find a Boundary KVDigestCount and rewrite its key to something + // outside the tree (way past 'z'). This will violate the inherited + // (lo, hi) bounds at the verifier's recursion frame. + let mut rewrote = false; + for op in ops.iter_mut() { + if let ProofOp::Push(Node::KVDigestCount(key, _, _)) = op { + *key = vec![0xff, 0xff]; + rewrote = true; + break; + } + } + assert!(rewrote, "test setup: expected a KVDigestCount to rewrite"); + + let bytes = encode_proof(&ops); + let result = verify_aggregate_count_on_range_proof(&bytes, &inner_range).unwrap(); + let err = result.expect_err("KVDigestCount outside bounds must be rejected"); + match err { + Error::InvalidProofError(_) => {} + other => panic!("expected InvalidProofError, got {:?}", other), + } + } +} diff --git a/merk/src/proofs/query/mod.rs b/merk/src/proofs/query/mod.rs index 22352d5ce..1fd556a2f 100644 --- a/merk/src/proofs/query/mod.rs +++ b/merk/src/proofs/query/mod.rs @@ -5,11 +5,16 @@ pub use grovedb_query::*; #[cfg(test)] mod merk_integration_tests; +#[cfg(feature = "minimal")] +pub mod aggregate_count; #[cfg(any(feature = "minimal", feature = "verify"))] mod map; #[cfg(any(feature = "minimal", feature = "verify"))] mod verify; +#[cfg(feature = "minimal")] +pub use aggregate_count::verify_aggregate_count_on_range_proof; + #[cfg(feature = "minimal")] use grovedb_costs::{cost_return_on_error, CostContext, CostResult, CostsExt, OperationCost}; #[cfg(feature = "minimal")] diff --git a/merk/src/proofs/query/verify.rs b/merk/src/proofs/query/verify.rs index 4a11b67fe..822fec0fc 100644 --- a/merk/src/proofs/query/verify.rs +++ b/merk/src/proofs/query/verify.rs @@ -485,6 +485,25 @@ impl QueryProofVerify for Query { ))); } } + Node::HashWithCount(..) => { + // `HashWithCount` is only safe inside the dedicated + // aggregate-count verifier, which shape-checks each + // collapsed subtree against the queried range. The plain + // query verifier does no such shape check, and + // `Tree::hash()` for a `HashWithCount` recomputes its + // hash from the embedded `(kv_hash, l, r, count)` while + // *ignoring* any reconstructed children. A malicious + // prover could therefore hang fake KV pushes under a + // `HashWithCount`, satisfy `execute_node` from those + // pushes (so they appear as query results) while still + // preserving the parent's hash chain. Fail fast here so + // the regular query path can never accept one. + return Err(Error::InvalidProofError( + "HashWithCount node is only valid in aggregate-count proofs; \ + encountered in regular query verification" + .to_string(), + )); + } } last_push = Some(node.clone()); diff --git a/merk/src/proofs/tree.rs b/merk/src/proofs/tree.rs index b733c68ef..09cffe090 100644 --- a/merk/src/proofs/tree.rs +++ b/merk/src/proofs/tree.rs @@ -128,6 +128,20 @@ impl Tree { match &self.node { Node::Hash(hash) => (*hash).wrap_with_cost(Default::default()), + // HashWithCount is self-verifying: the verifier recomputes + // node_hash_with_count(kv_hash, left_child_hash, right_child_hash, count) + // from the four committed fields. If the prover lied about `count` + // the recomputed hash diverges from the parent's expectation and + // the parent's Merkle-root check fails — so the count is bound to + // the proof, not just trusted on faith. + // + // The embedded child hashes (not the reconstructed-Tree's + // children) are what the original subtree's node_hash was computed + // from, so we use them directly here even though `self` is treated + // as a leaf in the proof Tree. + Node::HashWithCount(kv_hash, left_child_hash, right_child_hash, count) => { + node_hash_with_count(kv_hash, left_child_hash, right_child_hash, *count) + } Node::KVHash(kv_hash) => compute_hash(self, *kv_hash), Node::KV(key, value) => kv_hash(key.as_slice(), value.as_slice()) .flat_map(|kv_hash| compute_hash(self, kv_hash)), @@ -377,8 +391,8 @@ impl Tree { } /// Returns the key from this tree node if it's a KV-type node with a key. - /// Returns None for Hash, KVHash, or KVHashCount node types (which only - /// have hashes, not keys). + /// Returns None for Hash, KVHash, KVHashCount, or HashWithCount node + /// types (which only have hashes, not keys). #[cfg(any(feature = "minimal", feature = "verify"))] pub fn key(&self) -> Option<&[u8]> { match &self.node { @@ -392,7 +406,9 @@ impl Tree { | Node::KVCount(key, ..) | Node::KVRefValueHashCount(key, ..) => Some(key.as_slice()), // These nodes don't have keys, only hashes - Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) => None, + Node::Hash(_) | Node::KVHash(_) | Node::KVHashCount(..) | Node::HashWithCount(..) => { + None + } } } @@ -404,6 +420,7 @@ impl Tree { Ok((*feature_type).into()) } Node::KVCount(_, _, count) => Ok(AggregateData::ProvableCount(*count)), + Node::HashWithCount(.., count) => Ok(AggregateData::ProvableCount(*count)), Node::KV(..) | Node::KVValueHash(..) => Ok(AggregateData::NoAggregateData), _ => Err(Error::InvalidProofError( "Cannot extract aggregate data from this node type".to_string(), @@ -500,7 +517,36 @@ pub const MAX_PROOF_TREE_HEIGHT: usize = 92; /// /// Enforces a limit of [`MAX_PROOF_OPS`] operations to prevent /// denial-of-service from malicious proofs. -pub fn execute(ops: I, collapse: bool, mut visit_node: F) -> CostResult +/// +/// Equivalent to [`execute_with_options(ops, collapse, true, visit_node)`] — +/// i.e. enforces the root-level AVL height-balance check after reconstruction. +pub fn execute(ops: I, collapse: bool, visit_node: F) -> CostResult +where + I: IntoIterator>, + F: FnMut(&Node) -> Result<(), Error>, +{ + execute_with_options(ops, collapse, true, visit_node) +} + +#[cfg(any(feature = "minimal", feature = "verify"))] +/// Executes a proof exactly like [`execute`] but lets the caller opt out of +/// the root-level AVL balance check. +/// +/// Existing query / chunk / branch verifiers always pass `verify_avl_balance +/// = true` (via [`execute`]). The aggregate-count verifier passes `false` +/// because count proofs intentionally collapse fully-inside subtrees into a +/// single `HashWithCount` op (height = 1) while still descending the boundary +/// path on the other side, so the reconstructed tree's root will routinely +/// have child heights differing by more than one — that's expected, not +/// proof corruption. The cryptographic guarantees (hash-chain reconstruction, +/// boundary-key checks, count commitment via `node_hash_with_count`) are all +/// independent of AVL balance. +pub fn execute_with_options( + ops: I, + collapse: bool, + verify_avl_balance: bool, + mut visit_node: F, +) -> CostResult where I: IntoIterator>, F: FnMut(&Node) -> Result<(), Error>, @@ -687,9 +733,10 @@ where let tree = stack.pop().unwrap(); - if tree.child_heights.0.max(tree.child_heights.1) - - tree.child_heights.0.min(tree.child_heights.1) - > 1 + if verify_avl_balance + && tree.child_heights.0.max(tree.child_heights.1) + - tree.child_heights.0.min(tree.child_heights.1) + > 1 { return Err(Error::InvalidProofError( "Expected proof to result in a valid avl tree".to_string(),