diff --git a/.bun-version b/.bun-version index 0c00f610..17e63e7a 100644 --- a/.bun-version +++ b/.bun-version @@ -1 +1 @@ -1.3.10 +1.3.11 diff --git a/.gitignore b/.gitignore index a3b3e3e6..bc998cf7 100644 --- a/.gitignore +++ b/.gitignore @@ -141,6 +141,7 @@ docs/plans/ .agents/ .augment/ .claude/ +.context/ .cursor/ .roo/ .full-review/ diff --git a/AGENTS.md b/AGENTS.md index 91ace5b0..1e0fc93f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -100,8 +100,8 @@ evaluator/ ├── offset/ // Offset resolution submodule │ ├── mod.rs // Dispatcher (resolve_offset) and re-exports │ ├── absolute.rs // OffsetError, resolve_absolute_offset -│ ├── indirect.rs // resolve_indirect_offset stub (issue #37) -│ └── relative.rs // resolve_relative_offset stub (issue #38) +│ ├── indirect.rs // resolve_indirect_offset (fully implemented, issue #37) +│ └── relative.rs // resolve_relative_offset (GNU `file` anchor semantics) └── operators/ // Operator application submodule ├── mod.rs // Dispatcher (apply_operator, apply_any_value) and re-exports ├── equality.rs // apply_equal, apply_not_equal @@ -204,7 +204,7 @@ cargo test --doc # Test documentation examples ### Currently Implemented (v0.1.0) -- **Offsets**: Absolute, from-end, and indirect specifications (relative offsets are parsed but not yet evaluated) +- **Offsets**: Absolute, from-end, indirect, and relative specifications (relative offsets `&+N`/`&-N` are evaluated using GNU `file` semantics -- the previous-match anchor) - **Types**: `byte`, `short`, `long`, `quad`, `float`, `double`, `string`, `pstring` with endianness support; unsigned variants `ubyte`, `ushort`/`ubeshort`/`uleshort`, `ulong`/`ubelong`/`ulelong`, `uquad`/`ubequad`/`ulequad`; float/double endian variants `befloat`/`lefloat`, `bedouble`/`ledouble`; 32-bit date/timestamp types `date`/`ldate`/`bedate`/`beldate`/`ledate`/`leldate`; 64-bit date/timestamp types `qdate`/`qldate`/`beqdate`/`beqldate`/`leqdate`/`leqldate`; `pstring` is a Pascal string (length-prefixed) with support for 1/2/4-byte length prefixes via `/B`, `/H` (2-byte BE), `/h` (2-byte LE), `/L` (4-byte BE), `/l` (4-byte LE) suffixes, and the `/J` flag (stored length includes prefix width, JPEG convention) which is combinable with width suffixes (e.g., `pstring/HJ`); date values formatted as "Www Mmm DD HH:MM:SS YYYY" matching GNU `file` output; types are signed by default (libmagic-compatible) - **Operators**: `=` (equal), `!=` (not equal), `<` (less than), `>` (greater than), `<=` (less equal), `>=` (greater equal), `&` (bitwise AND with optional mask), `^` (bitwise XOR), `~` (bitwise NOT), `x` (any value) - **Nested Rules**: Hierarchical rule evaluation with proper indentation @@ -246,7 +246,7 @@ impl BinaryRegex for regex::bytes::Regex { ### Offset Specifications - Indirect offsets are fully implemented (parsing + evaluation) with specifiers: `.b/.B` (byte), `.s/.S` (short), `.l/.L` (long), `.q/.Q` (quad); lowercase = little-endian, uppercase = big-endian (GNU `file` semantics); pointer types signed by default; adjustment after closing paren: `(base.type)+adj` -- Relative offsets are parsed into the AST but evaluation is not yet implemented (#38) +- Relative offsets are fully evaluated against the GNU `file` previous-match anchor: the engine tracks `EvaluationContext::last_match_end()`, advancing it after each successful match by the bytes consumed (variable-width types include c-string NUL terminators and pstring length prefixes). Top-level relative offsets resolve from anchor 0. Magic-file `&+N`/`&-N` *parsing* is still TODO -- relative offsets are exercised programmatically through the AST. ### Magic File Syntax @@ -535,6 +535,7 @@ This guide ensures consistent, high-quality development practices for the libmag - In justfile recipes, never wrap `just` in `{{ mise_exec }}` -- it's redundant - Changelog: `just changelog`, `just changelog-version `, `just changelog-unreleased` - Security contact: (matches PGP key in SECURITY.md) +- `docs/solutions/` — documented solutions to past problems, organized by category (logic-errors/, integration-issues/, security-issues/, developer-experience/) with YAML frontmatter (`tags`, `severity`, `components`). Relevant when implementing or debugging in documented areas. ## Open Source Quality Standards (OSSF Best Practices) diff --git a/GOTCHAS.md b/GOTCHAS.md index b8729285..006e3a40 100644 --- a/GOTCHAS.md +++ b/GOTCHAS.md @@ -26,7 +26,7 @@ Serialization functions live in `src/parser/codegen.rs`, shared by both `build.r ### 2.1 `TypeKind` Exhaustive Matches -Adding a variant to `TypeKind` requires updating exhaustive matches in 10+ files: `ast`, `grammar`, `types`, `codegen`, `strength`, `property_tests`, `evaluator/types/mod.rs` (`read_typed_value`, `coerce_value_to_type`), `output/mod.rs` (2 length matches), `output/json.rs` (`format_value_as_hex`), and `grammar/tests.rs` (stale assertions). Note: `coerce_value_to_type` and output matches use catch-all `_ =>` so they compile without changes but may need semantic updates. +Adding a variant to `TypeKind` requires updating exhaustive matches in 10+ files: `ast`, `grammar`, `types`, `codegen`, `strength`, `property_tests`, `evaluator/types/mod.rs` (`read_typed_value`, `coerce_value_to_type`, **`bytes_consumed`** -- variable-width variants must be matched explicitly or relative-offset anchors will silently corrupt), `output/mod.rs` (2 length matches), `output/json.rs` (`format_value_as_hex`), and `grammar/tests.rs` (stale assertions). Note: `coerce_value_to_type`, output matches, and `bytes_consumed` use catch-all `_ =>` so they compile without changes but may need semantic updates -- `bytes_consumed` will fire a `debug_assert` in test/dev builds for unhandled variable-width variants. ### 2.2 `Operator` Exhaustive Matches @@ -71,6 +71,12 @@ The nom `tuple` combinator is deprecated. Use bare tuple syntax `(a, b, c)` dire Lowercase pointer specifiers (`.s`, `.l`, `.q`) map to **little-endian**, not native endian. Uppercase (`.S`, `.L`, `.Q`) map to big-endian. All numeric pointer types are **signed by default** (per S6.3). The adjustment is parsed **after** the closing paren: `(base.type)+adj`, not `(base.type+adj)`. +### 3.8 Relative Offsets: Global Anchor, No Save/Restore + +`OffsetSpec::Relative(N)` resolves against `EvaluationContext::last_match_end()`, which is updated after every successful match in `evaluate_rules` and is **never saved/restored across child recursion**. This is intentional and matches GNU `file`: a sibling rule sees the anchor wherever the deepest descendant of the previous sibling left it. The anchor is global/shared rather than stack-scoped, but its numeric value is not guaranteed to be non-decreasing -- a successful `Relative(-N)` rule (or any later rule that matches at a lower absolute position) can move it earlier. Do not wrap recursion in a save/restore pair "for safety" -- it would silently break sibling-after-nested chains. The recursion-depth pattern in the same loop *is* save/restore, and the asymmetry is correct. + +The load-bearing invariant is that the anchor is updated *before recursing into children* (so children and their followers see the new anchor). The current code also happens to set the anchor before `matches.push(...)`, but the push-ordering relative to `set_last_match_end` is incidental for anchor correctness -- only the ordering before the `evaluate_rules` recursion call matters. (Future code that reads the anchor while iterating `matches` would make this ordering load-bearing, so do not "optimize" the order without checking call sites first.) `bytes_consumed()` (in `evaluator/types/mod.rs`) is the source of truth for advance distance; for variable-width types it re-derives consumption from the buffer rather than trusting `Value::String.len()` (which can drift from the original byte length via `from_utf8_lossy`). Pascal-string consumption is also clamped against the remaining buffer to prevent attacker-controlled length prefixes from poisoning the anchor to `usize::MAX`. + ## 4. Module Visibility & Re-exports ### 4.1 Private Engine Module diff --git a/docs/solutions/developer-experience/rust-test-visibility-boundary.md b/docs/solutions/developer-experience/rust-test-visibility-boundary.md new file mode 100644 index 00000000..e970813e --- /dev/null +++ b/docs/solutions/developer-experience/rust-test-visibility-boundary.md @@ -0,0 +1,90 @@ +--- +title: Rust Test Visibility Boundary — tests/ vs src/.../tests.rs +category: developer-experience +date: 2026-04-07 +tags: [rust, testing, pub-crate, integration-tests, unit-tests, visibility] +issue: '#38' +pr: '#211' +severity: low +components: [testing] +--- + +# Rust Test Visibility Boundary — `tests/` vs `src/.../tests.rs` + +## Context + +When adding tests that need to exercise `pub(crate)` items directly — for example, injecting internal state via a crate-private setter to verify a graceful-skip contract — the test file location matters. The two test locations in a Rust crate have different visibility semantics, and this is not always obvious until you hit a compile error. + +Encountered during PR #211 (relative offset evaluation) when adding a test that needed `EvaluationContext::set_last_match_end()` — a `pub(crate)` setter — to inject a near-saturation anchor value (`usize::MAX`) and verify that subsequent `OffsetSpec::Relative` rules skip gracefully without panicking. The initial instinct was to put the test in the existing integration test file at `tests/relative_offset_evaluation.rs`, but that failed to compile because `tests/` compiles as an external crate. + +## Guidance + +**Tests that need `pub(crate)` items must live in `src/.../tests.rs`, not in `tests/`.** + +The two test locations in a Rust crate are: + +| Location | Compiles as | Can access `pub(crate)` items? | Can access `pub` items? | +| -------------------------------------------------------------------------- | -------------- | ------------------------------------ | ----------------------- | +| `tests/foo.rs` (integration tests) | External crate | **No** — fails with E0603/E0624 | Yes | +| `src/foo/tests.rs` (module-adjacent unit tests, gated with `#[cfg(test)]`) | Same crate | **Yes** | Yes | +| Doctests in rustdoc | External crate | **No** — same constraint as `tests/` | Yes | + +This matches the project convention already used in `libmagic-rs`: + +- `src/evaluator/tests.rs`, `src/evaluator/engine/tests.rs`, `src/evaluator/types/tests.rs` — unit tests in `#[cfg(test)] mod tests` peer files, full access to crate-private APIs +- `tests/evaluator_tests.rs`, `tests/relative_offset_evaluation.rs` — integration tests that only exercise the public API through `libmagic_rs::...` paths + +## Why This Matters + +Mixing the two up produces compile errors rather than runtime failures, so you find out quickly — but the error message (`error[E0603]: function 'set_last_match_end' is private`) doesn't immediately suggest moving the test to a different file. The instinct is often to widen visibility (change `pub(crate)` to `pub`), which defeats the point of the crate boundary and bloats the public API surface that gets locked in at semver time. + +Picking the right location is also the right answer for **doctests**: rustdoc examples compile as external crates, so `/// use crate::...` never works in doctests on published items. Use `/// use libmagic_rs::...` (or the actual crate name) in doctests. + +## When to Apply + +- Writing a unit test that needs to inject internal state, call a `pub(crate)` constructor, or inspect a private field → put it in `src/.../tests.rs` +- Writing an integration test that exercises the crate's public API through the same entry points an external consumer would use → put it in `tests/` +- Writing a rustdoc example for a `pub` function → use the full external path (e.g., `libmagic_rs::evaluator::evaluate_rules`), not `crate::` +- Tempted to widen `pub(crate)` to `pub` just to write a test → **stop**, move the test to `src/.../tests.rs` instead + +## Examples + +**Wrong location — compile error:** + +```rust +// tests/my_integration_test.rs +use libmagic_rs::evaluator::EvaluationContext; +use libmagic_rs::EvaluationConfig; + +#[test] +fn test_anchor_injection() { + let mut ctx = EvaluationContext::new(EvaluationConfig::default()); + ctx.set_last_match_end(usize::MAX); // error[E0624]: method is private + // ... +} +``` + +**Right location — compiles fine:** + +```rust +// src/evaluator/engine/tests.rs +use super::*; +use crate::evaluator::EvaluationContext; +use crate::EvaluationConfig; + +#[test] +fn test_evaluate_rules_anchor_near_saturation_skips_relative_child_gracefully() { + let mut ctx = EvaluationContext::new(EvaluationConfig::default()); + ctx.set_last_match_end(usize::MAX); // Fine — same crate + // ... assert evaluate_rules skips Relative rules gracefully +} +``` + +The second form compiled on the first try and now pins a safety contract that would be impossible to test from the integration layer. + +## Related + +- PR #211 — `test_evaluate_rules_anchor_near_saturation_skips_relative_child_gracefully` is the concrete case that prompted this learning +- `docs/solutions/security-issues/pstring-anchor-poisoning.md` — the security fix this test regression-guards +- Project GOTCHAS.md §7.1 — "Doctest Import Paths" (use `libmagic_rs::` not `crate::`) — same root cause, different surface +- AGENTS.md §4.1 — `evaluator::engine` is private; integration tests must import the re-exported `libmagic_rs::evaluator::evaluate_rules`, not `evaluator::engine::evaluate_rules` (related but distinct: module visibility, not test file visibility) diff --git a/docs/solutions/security-issues/pstring-anchor-poisoning.md b/docs/solutions/security-issues/pstring-anchor-poisoning.md new file mode 100644 index 00000000..74089fd5 --- /dev/null +++ b/docs/solutions/security-issues/pstring-anchor-poisoning.md @@ -0,0 +1,104 @@ +--- +title: Pascal-string Anchor Poisoning via Attacker-Controlled Length Prefix +category: security-issues +date: 2026-04-07 +tags: [evaluator, relative-offsets, pstring, bounds-check, attacker-input, anchor] +issue: '#38' +pr: '#211' +severity: medium +components: [evaluator/types/mod.rs, evaluator/engine/mod.rs] +--- + +# Pascal-string Anchor Poisoning via Attacker-Controlled Length Prefix + +## Problem + +When implementing the GNU `file` "previous match" anchor for `OffsetSpec::Relative` evaluation (issue #38), the helper that advances the anchor by the bytes a successful read consumed (`bytes_consumed`) read pstring length prefixes directly without bounding them against the actual buffer. A pstring rule with a 4-byte length prefix near `u32::MAX` (e.g., `\xFF\xFF\xFF\xFF`) caused `bytes_consumed` to return ~4 GB, advancing the anchor far past `buffer.len()`. Every subsequent `Relative` rule then resolved to a target `>= buffer.len()` and was silently skipped via the engine's graceful-skip arm — no error, no log loud enough to surface in normal operation, just incomplete classification. + +A crafted file could deliberately trigger this on the first matching pstring rule, suppressing all following type-refinement rules and forcing the engine to report only the broad parent match (e.g., classify a malicious script as "data" instead of "shell script with dangerous interpreter"). + +Caught by the security and adversarial reviewers in `ce:review` autofix mode (PR #211, finding SEC-001 / ADV-001) before merge. + +## Symptoms + +- After a pstring rule with a large 4-byte length prefix matches, all subsequent sibling/child rules using `OffsetSpec::Relative` silently fail to match. +- The match list returned to the caller is missing entries that would have classified the file more specifically. +- No panic, no error, no test failure on benign inputs — only adversarial/fuzz inputs trigger it. +- Debug logs contain `Skipping rule '': BufferOverrun` for each suppressed rule, but the root cause (anchor saturation from a previous pstring) is invisible without correlating across log lines. + +## What Didn't Work + +- **Documenting the gap.** First instinct (for the related fixed-width-types case from the same review) was to tighten the rustdoc to scope the "infallible" claim to variable-width types only, leaving the fixed-width branch unguarded. The Copilot reviewer rejected this in the next round: "Consider adding a guard ... so the function matches its documented defensive behavior and can't advance the anchor when misused." Documentation is not a substitute for invariants — if the contract says infallible, the function should *be* infallible regardless of caller discipline. +- **Adding a `warn!` log on `saturating_add` overflow at the engine site.** Considered as a debugging aid. Rejected because it adds noise without preventing the underlying bug — the anchor would still be poisoned, the rules would still be skipped, the user would still see incomplete classification. Fix the cause, not the symptom. +- **Trusting `read_pstring`'s upstream bounds check.** `read_pstring` itself checks `string_end <= buffer.len()` and errors if the payload would extend beyond the buffer, so a successful read implies the actual payload fit. But `bytes_consumed` re-reads the raw length prefix from the buffer (rather than receiving the byte count from the read function) and didn't apply the same bound. The two functions had divergent contracts that the engine relied on being equivalent. + +## Solution + +Clamp the pstring payload length against the remaining buffer in `pstring_bytes_consumed`, mirroring `read_pstring`'s own bounds enforcement: + +```rust +// src/evaluator/types/mod.rs + +let payload_length = if length_includes_itself { + match stored_length.checked_sub(width) { + Some(n) => n, + None => return 0, + } +} else { + stored_length +}; + +// Clamp against remaining buffer bytes after the prefix. This defends +// against an attacker-controlled length prefix that exceeds the remaining +// buffer: read_pstring would have failed to actually read a payload that +// long, so a successful read implies the payload fit in the buffer. +// Mirroring that bound here keeps the anchor truthful. +let remaining_after_prefix = buffer.len().saturating_sub(prefix_end); +let bounded_payload = payload_length.min(remaining_after_prefix); +let actual_length = max_length.map_or(bounded_payload, |m| m.min(bounded_payload)); +width.saturating_add(actual_length) +``` + +The same review pass also extended the *fixed-width* branch of `bytes_consumed` to bounds-check itself: + +```rust +if let Some(bits) = type_kind.bit_width() { + let width = (bits as usize) / 8; + // Bounds-check the fixed-width path so a misuse cannot advance the + // anchor past the buffer end. The engine guarantees a successful read + // preceded the call, but the guard makes the contract self-consistent + // for any future caller. + return match offset.checked_add(width) { + Some(end) if end <= buffer.len() => width, + _ => 0, + }; +} +``` + +Regression tests pin both branches at `src/evaluator/types/tests.rs`: + +- `test_bytes_consumed_pstring_clamps_oversized_prefix_be` — `\xFF\xFF\xFF\xFF` BE prefix on a buffer with 3 payload bytes → returns `4 + 3 = 7`, not `4 + u32::MAX`. +- `test_bytes_consumed_pstring_clamps_oversized_prefix_le` — same for LE prefix. +- `test_bytes_consumed_fixed_width_returns_zero_past_end` — fixed-width type at `offset == buf.len()`, beyond, and at `usize::MAX` overflow → all return 0. + +## Why This Works + +The fix restores an invariant the engine implicitly relied on: **the bytes the anchor advances by must equal the bytes the read function actually consumed from the buffer.** `read_pstring` already enforced `string_end <= buffer.len()`, so a successful read implies the payload fit in the remaining buffer. By applying the same bound in `bytes_consumed`, the helper becomes consistent with the read function under all inputs — including adversarial ones — without needing to plumb the byte count through the read function's return value. + +The general principle: **when advancing internal state by an attacker-controlled byte count, clamp against the actual buffer reality, not the raw input.** Length prefixes, type-length-value structures, and any "this field is N bytes long" header field from untrusted input must all be bounded before being trusted. + +## Prevention + +- **Document the invariant.** `GOTCHAS.md` S3.8 now notes that "Pascal-string consumption is also clamped against the remaining buffer to prevent attacker-controlled length prefixes from poisoning the anchor to `usize::MAX`." Future contributors editing `bytes_consumed` see the constraint without needing to rediscover it from the security review. +- **Match read function consumption exactly.** If a helper function re-derives a value the corresponding read function already computed (here: payload length), make the helper apply *all* the same bounds checks — not a subset. Diverging contracts between two functions that the engine assumes are equivalent are a recurring class of subtle bugs. +- **Test with adversarial byte patterns.** Unit tests for variable-width type helpers should include `0xFF...FF` length prefixes, `usize::MAX` boundary cases, and `/J` flag underflow — not just typical inputs. The integration test suite should also exercise an attacker-controlled chain (e.g., a parent with an oversized prefix followed by a `Relative` child) end-to-end through `evaluate_rules` to confirm graceful skip rather than silent misclassification. +- **Treat `pub(crate)` boundaries as hints, not guarantees.** `bytes_consumed` was made `pub(crate)` in the same review pass, narrowing it to engine use only. But the visibility narrowing alone doesn't eliminate the bug — it only reduces the blast radius. Defensive bounds checking is still required because future internal callers may not respect the read-then-call invariant. +- **Keep dual-purpose helpers in sync.** When `read_pstring`/`read_string` change their bounds enforcement, `pstring_bytes_consumed`/`string_bytes_consumed` must change too. Add the file pair to `GOTCHAS.md` S2.1 (or similar) as a known coupling so refactors don't silently break the anchor. + +## Related Issues + +- Issue #38 — Evaluator: implement relative offset resolution +- PR #211 — feat(evaluator): implement relative offset resolution (#38) +- Related solution: [docs/solutions/logic-errors/indirect-offset-resolution.md](../logic-errors/indirect-offset-resolution.md) — sibling work that established the offset-resolver patterns this PR followed +- Related solution: [docs/solutions/logic-errors/indirect-offset-gnu-file-semantics.md](../logic-errors/indirect-offset-gnu-file-semantics.md) — same lesson about deriving expectations from GNU `file` source rather than from running the code +- `GOTCHAS.md` S3.8 — Relative Offsets: Anchor is Global-Monotonic, No Save/Restore (load-bearing context for any future contributor touching the evaluator's anchor) diff --git a/docs/src/architecture.md b/docs/src/architecture.md index c6b705b3..5568d7aa 100644 --- a/docs/src/architecture.md +++ b/docs/src/architecture.md @@ -164,8 +164,8 @@ The evaluator executes magic rules against file buffers to identify file types. - `offset/`: Offset resolution submodule - `mod.rs`: Dispatcher (`resolve_offset`) and re-exports - `absolute.rs`: `OffsetError`, `resolve_absolute_offset` - - `indirect.rs`: `resolve_indirect_offset` stub (issue #37) - - `relative.rs`: `resolve_relative_offset` stub (issue #38) + - `indirect.rs`: `resolve_indirect_offset` for indirect pointer-based offset resolution (issue #37, shipped) + - `relative.rs`: `resolve_relative_offset` with GNU `file` semantics (issue #38, PR #211) - `operators/`: Operator application submodule - `mod.rs`: Dispatcher (`apply_operator`) and re-exports - `equality.rs`: `apply_equal`, `apply_not_equal` @@ -185,6 +185,7 @@ The evaluator executes magic rules against file buffers to identify file types. - ✅ **Recursion Limiting**: Prevent stack overflow from deep nesting - ✅ **Signedness Coercion**: Automatic value coercion for signed type comparisons (e.g., `0xff` → `-1` for signed byte) - ✅ **Comparison Operators**: Full support for `<`, `>`, `<=`, `>=` with numeric and lexicographic ordering +- ✅ **Relative Offsets**: Resolution against previous-match anchor using GNU `file` semantics (issue #38, PR #211) - 📋 **Indirect Offsets**: Pointer dereferencing (planned) ### 4. I/O Module (`src/io/`) diff --git a/docs/src/compatibility.md b/docs/src/compatibility.md index 3b4c7ddd..88d7cc6a 100644 --- a/docs/src/compatibility.md +++ b/docs/src/compatibility.md @@ -74,7 +74,7 @@ $ rmagic --json example.elf | Basic patterns | ✅ | ✅ | Complete | String, numeric matching | | Hierarchical rules | ✅ | 🔄 | In Progress | Parent-child relationships | | Indirect offsets | ✅ | 📋 | Planned | Pointer dereferencing | -| Relative offsets | ✅ | 📋 | Planned | Position-relative addressing | +| Relative offsets | ✅ | ✅ | Complete | Position-relative addressing (PR #211) | | Search patterns | ✅ | 📋 | Planned | Pattern searching in ranges | | Bitwise operations | ✅ | ✅ | Complete | AND, OR operations | | String operations | ✅ | 📋 | Planned | Case-insensitive, regex | diff --git a/docs/src/evaluator.md b/docs/src/evaluator.md index 7668f707..14fe32e6 100644 --- a/docs/src/evaluator.md +++ b/docs/src/evaluator.md @@ -50,6 +50,8 @@ Maintains state during rule processing: pub struct EvaluationContext { /// Current offset position for relative calculations current_offset: usize, + /// End offset of the most recent successful match (GNU file anchor) + last_match_end: usize, /// Current recursion depth for safety limits recursion_depth: u32, /// Configuration for evaluation behavior @@ -59,14 +61,21 @@ pub struct EvaluationContext { Note: Fields are private; use accessor methods like `current_offset()`, `recursion_depth()`, and `config()`. -**Key Methods:** +**Public Methods:** - `new()` - Create context with default configuration - `current_offset()` / `set_current_offset()` - Track current buffer position - `recursion_depth()` - Query current recursion depth - `increment_recursion_depth()` / `decrement_recursion_depth()` - Track recursion safely - `timeout_ms()` - Query configured timeout -- `reset()` - Reset context state for reuse +- `reset()` - Reset context state for reuse (clears `current_offset`, `last_match_end`, and `recursion_depth`) + +**Internal (`pub(crate)`) — engine use only, not callable from outside the crate:** + +- `last_match_end()` - Get end offset of the most recent match (the GNU `file` anchor used for relative offset resolution) +- `set_last_match_end(pos: usize)` - Advance the previous-match anchor (called by `evaluate_rules` after each match) + +External library users should not depend on these methods. Use `evaluate_rules` (or `evaluate_rules_with_config`) with an `EvaluationContext` and let the engine manage the anchor automatically. The anchor is reset alongside the other mutable state by `EvaluationContext::reset()`. ### RuleMatch @@ -94,7 +103,7 @@ The `Value` type is from `parser::ast::Value` and represents the actual matched Handles all offset types safely: - **Absolute offsets**: Direct file positions (`0`, `0x100`) -- **Relative offsets**: Based on previous match positions (`&+4`) +- **Relative offsets**: Resolved using `last_match_end + delta` from the previous match anchor (`&+4`, `&-2`) - **From-end offsets**: Calculated from file size (`-4` from end) - **Bounds checking**: All offset calculations are validated @@ -105,6 +114,10 @@ pub fn resolve_offset( ) -> Result ``` +The evaluator uses `resolve_offset_with_context` internally to thread the previous-match anchor through relative offset resolution. `resolve_offset` (the public API) defaults the anchor to 0. For `OffsetSpec::Relative(N)`, this means non-negative deltas resolve like `Absolute(N)` from the start of the buffer, but negative deltas underflow the anchor and return `EvaluationError::InvalidOffset` — they are *not* interpreted like `OffsetSpec::Absolute(-N)` from the end of the buffer. Callers needing GNU `file` anchor semantics (so relative offsets resolve against actual prior matches) should use `evaluate_rules` with an `EvaluationContext`, which tracks the anchor across rules. + +Relative offsets resolve as `last_match_end + delta` with bounds and overflow checks. After each successful match, the context advances `last_match_end` by the bytes consumed by the matched type (c-string types include NUL terminators, pstring types include length prefixes). + ### Type Reading (`evaluator/types/`) Interprets bytes according to type specifications. The types module is organized into submodules for numeric, floating-point, date/timestamp, and string type handling (refactored from a single file in v0.4.2): @@ -550,6 +563,7 @@ assert_eq!(matches_j[0].message, "JPEG-style pstring with self-inclusive length" - [x] Basic evaluation engine structure - [x] Offset resolution (absolute, relative, from-end) +- [x] Relative offset support with previous-match anchor tracking (PR #211, issue #38) - [x] Type reading with endianness support (Byte, Short, Long, Quad, Float, Double, Date, QDate, String, PString with 1/2/4-byte prefixes) - [x] Operator application (Equal, NotEqual, LessThan, GreaterThan, LessEqual, GreaterEqual, BitwiseAnd, BitwiseAndMask) - [x] Hierarchical rule processing with child evaluation @@ -557,7 +571,8 @@ assert_eq!(matches_j[0].message, "JPEG-style pstring with self-inclusive length" - [x] Timeout protection - [x] Recursion depth limiting - [x] Comprehensive test coverage (150+ tests) -- [ ] Indirect offset support (pointer dereferencing) +- [x] Indirect offset support (pointer dereferencing, issue #37) +- [x] Relative offset support (GNU `file` anchor semantics, issue #38) - [ ] Regex type support - [ ] Performance optimizations (rule ordering, caching) diff --git a/mise.lock b/mise.lock index 501bf6fb..7bd8b150 100644 --- a/mise.lock +++ b/mise.lock @@ -1,52 +1,53 @@ # @generated - this file is auto-generated by `mise lock` https://mise.jdx.dev/dev-tools/mise-lock.html [[tools.actionlint]] -version = "1.7.11" +version = "1.7.12" backend = "aqua:rhysd/actionlint" [tools.actionlint."platforms.linux-arm64"] -checksum = "sha256:21bc0dfb57a913fe175298c2a9e906ee630f747cb66d0a934d0d4b69f4ee1235" -url = "https://github.com/rhysd/actionlint/releases/download/v1.7.11/actionlint_1.7.11_linux_arm64.tar.gz" +checksum = "sha256:325e971b6ba9bfa504672e29be93c24981eeb1c07576d730e9f7c8805afff0c6" +url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_linux_arm64.tar.gz" [tools.actionlint."platforms.linux-arm64-musl"] -checksum = "sha256:21bc0dfb57a913fe175298c2a9e906ee630f747cb66d0a934d0d4b69f4ee1235" -url = "https://github.com/rhysd/actionlint/releases/download/v1.7.11/actionlint_1.7.11_linux_arm64.tar.gz" +checksum = "sha256:325e971b6ba9bfa504672e29be93c24981eeb1c07576d730e9f7c8805afff0c6" +url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_linux_arm64.tar.gz" [tools.actionlint."platforms.linux-x64"] -checksum = "sha256:900919a84f2229bac68ca9cd4103ea297abc35e9689ebb842c6e34a3d1b01b0a" -url = "https://github.com/rhysd/actionlint/releases/download/v1.7.11/actionlint_1.7.11_linux_amd64.tar.gz" +checksum = "sha256:8aca8db96f1b94770f1b0d72b6dddcb1ebb8123cb3712530b08cc387b349a3d8" +url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_linux_amd64.tar.gz" [tools.actionlint."platforms.linux-x64-baseline"] -checksum = "sha256:900919a84f2229bac68ca9cd4103ea297abc35e9689ebb842c6e34a3d1b01b0a" -url = "https://github.com/rhysd/actionlint/releases/download/v1.7.11/actionlint_1.7.11_linux_amd64.tar.gz" +checksum = "sha256:8aca8db96f1b94770f1b0d72b6dddcb1ebb8123cb3712530b08cc387b349a3d8" +url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_linux_amd64.tar.gz" [tools.actionlint."platforms.linux-x64-musl"] -checksum = "sha256:900919a84f2229bac68ca9cd4103ea297abc35e9689ebb842c6e34a3d1b01b0a" -url = "https://github.com/rhysd/actionlint/releases/download/v1.7.11/actionlint_1.7.11_linux_amd64.tar.gz" +checksum = "sha256:8aca8db96f1b94770f1b0d72b6dddcb1ebb8123cb3712530b08cc387b349a3d8" +url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_linux_amd64.tar.gz" [tools.actionlint."platforms.linux-x64-musl-baseline"] -checksum = "sha256:900919a84f2229bac68ca9cd4103ea297abc35e9689ebb842c6e34a3d1b01b0a" -url = "https://github.com/rhysd/actionlint/releases/download/v1.7.11/actionlint_1.7.11_linux_amd64.tar.gz" +checksum = "sha256:8aca8db96f1b94770f1b0d72b6dddcb1ebb8123cb3712530b08cc387b349a3d8" +url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_linux_amd64.tar.gz" [tools.actionlint."platforms.macos-arm64"] -checksum = "sha256:a21ba7366d8329e7223faee0ed69eb13da27fe8acabb356bb7eb0b7f1e1cb6d8" -url = "https://github.com/rhysd/actionlint/releases/download/v1.7.11/actionlint_1.7.11_darwin_arm64.tar.gz" +checksum = "sha256:aba9ced2dee8d27fecca3dc7feb1a7f9a52caefa1eb46f3271ea66b6e0e6953f" +url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_darwin_arm64.tar.gz" +provenance = "github-attestations" [tools.actionlint."platforms.macos-x64"] -checksum = "sha256:17ffc17fed8f0258ef6ad4aed932d3272464c7ef7d64e1cb0d65aa97c9752107" -url = "https://github.com/rhysd/actionlint/releases/download/v1.7.11/actionlint_1.7.11_darwin_amd64.tar.gz" +checksum = "sha256:5b44c3bc2255115c9b69e30efc0fecdf498fdb63c5d58e17084fd5f16324c644" +url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_darwin_amd64.tar.gz" [tools.actionlint."platforms.macos-x64-baseline"] -checksum = "sha256:17ffc17fed8f0258ef6ad4aed932d3272464c7ef7d64e1cb0d65aa97c9752107" -url = "https://github.com/rhysd/actionlint/releases/download/v1.7.11/actionlint_1.7.11_darwin_amd64.tar.gz" +checksum = "sha256:5b44c3bc2255115c9b69e30efc0fecdf498fdb63c5d58e17084fd5f16324c644" +url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_darwin_amd64.tar.gz" [tools.actionlint."platforms.windows-x64"] -checksum = "sha256:5414b7124a91f4b5abee62e5c9d84802237734f8d15b9b7032732a32c3ebffa3" -url = "https://github.com/rhysd/actionlint/releases/download/v1.7.11/actionlint_1.7.11_windows_amd64.zip" +checksum = "sha256:6e7241b51e6817ea6a047693d8e6fed13b31819c9a0dd6c5a726e1592d22f6e9" +url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_windows_amd64.zip" [tools.actionlint."platforms.windows-x64-baseline"] -checksum = "sha256:5414b7124a91f4b5abee62e5c9d84802237734f8d15b9b7032732a32c3ebffa3" -url = "https://github.com/rhysd/actionlint/releases/download/v1.7.11/actionlint_1.7.11_windows_amd64.zip" +checksum = "sha256:6e7241b51e6817ea6a047693d8e6fed13b31819c9a0dd6c5a726e1592d22f6e9" +url = "https://github.com/rhysd/actionlint/releases/download/v1.7.12/actionlint_1.7.12_windows_amd64.zip" [[tools.bun]] version = "1.3.11" @@ -97,92 +98,92 @@ checksum = "sha256:9d0e0f923e9626f3bc6044fc32e0d3ab29039aea753f5678ef8801cf26f75 url = "https://github.com/oven-sh/bun/releases/download/bun-v1.3.11/bun-windows-x64-baseline.zip" [[tools.cargo-binstall]] -version = "1.17.8" +version = "1.17.9" backend = "aqua:cargo-bins/cargo-binstall" [tools.cargo-binstall."platforms.linux-arm64"] -checksum = "sha256:81d6245bd1a7a89e914d29af81d82280540e94927e61492a0fc359820cd97abb" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-aarch64-unknown-linux-musl.tgz" +checksum = "sha256:89df253cc00a307209daee7648272bba4d8a342fce084d5922f6b3f4e4db0e25" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-aarch64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-arm64-musl"] -checksum = "sha256:81d6245bd1a7a89e914d29af81d82280540e94927e61492a0fc359820cd97abb" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-aarch64-unknown-linux-musl.tgz" +checksum = "sha256:89df253cc00a307209daee7648272bba4d8a342fce084d5922f6b3f4e4db0e25" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-aarch64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-x64"] -checksum = "sha256:1da1ef72448db667cc4ae6d48e37451087602c8c07dc61782a4a5e538303e015" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-unknown-linux-musl.tgz" +checksum = "sha256:bdea3fd820b118576316bf69c8240f454857287717809e6ecef010faace901ff" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-x64-baseline"] -checksum = "sha256:1da1ef72448db667cc4ae6d48e37451087602c8c07dc61782a4a5e538303e015" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-unknown-linux-musl.tgz" +checksum = "sha256:bdea3fd820b118576316bf69c8240f454857287717809e6ecef010faace901ff" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-x64-musl"] -checksum = "sha256:1da1ef72448db667cc4ae6d48e37451087602c8c07dc61782a4a5e538303e015" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-unknown-linux-musl.tgz" +checksum = "sha256:bdea3fd820b118576316bf69c8240f454857287717809e6ecef010faace901ff" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.linux-x64-musl-baseline"] -checksum = "sha256:1da1ef72448db667cc4ae6d48e37451087602c8c07dc61782a4a5e538303e015" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-unknown-linux-musl.tgz" +checksum = "sha256:bdea3fd820b118576316bf69c8240f454857287717809e6ecef010faace901ff" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-unknown-linux-musl.tgz" [tools.cargo-binstall."platforms.macos-arm64"] -checksum = "sha256:af87346fdb186f0a2333bc0a30cfddd6faa98b31145ef1bb19c284aedea65972" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-aarch64-apple-darwin.zip" +checksum = "sha256:021d537caa2071bbcd8ec50b210ea45af68a6983cca7780918b3603ef68d2585" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-aarch64-apple-darwin.zip" [tools.cargo-binstall."platforms.macos-x64"] -checksum = "sha256:db353e01b582c97382178db9b4dfe22d81109782e480a38f3db953e62f569952" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-apple-darwin.zip" +checksum = "sha256:1d4fbd74c15274a029dd92e19a0ead8e1831b6e6b60f0f5c3690ef517022c41c" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-apple-darwin.zip" [tools.cargo-binstall."platforms.macos-x64-baseline"] -checksum = "sha256:db353e01b582c97382178db9b4dfe22d81109782e480a38f3db953e62f569952" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-apple-darwin.zip" +checksum = "sha256:1d4fbd74c15274a029dd92e19a0ead8e1831b6e6b60f0f5c3690ef517022c41c" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-apple-darwin.zip" [tools.cargo-binstall."platforms.windows-x64"] -checksum = "sha256:fef07560d4e391812091bb30c6ed1bd5289f74403a0c947b47b8a8c7a597b51b" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-pc-windows-msvc.zip" +checksum = "sha256:d17b413a19592af2cf57f9f742d49bef8837099e1407d73e79de5bc7834c4fd6" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-pc-windows-msvc.zip" [tools.cargo-binstall."platforms.windows-x64-baseline"] -checksum = "sha256:fef07560d4e391812091bb30c6ed1bd5289f74403a0c947b47b8a8c7a597b51b" -url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.8/cargo-binstall-x86_64-pc-windows-msvc.zip" +checksum = "sha256:d17b413a19592af2cf57f9f742d49bef8837099e1407d73e79de5bc7834c4fd6" +url = "https://github.com/cargo-bins/cargo-binstall/releases/download/v1.17.9/cargo-binstall-x86_64-pc-windows-msvc.zip" [[tools.cargo-insta]] -version = "1.46.3" +version = "1.47.2" backend = "aqua:mitsuhiko/insta" [tools.cargo-insta."platforms.linux-x64"] -checksum = "sha256:c738c47f8d7e834a0277dddb9410a1f7369d37818738fc6a380f22904a83f6e4" -url = "https://github.com/mitsuhiko/insta/releases/download/1.46.3/cargo-insta-x86_64-unknown-linux-musl.tar.xz" +checksum = "sha256:1c2a2e82200b430f6fa27b6d0ba0059573eae2f32b0b8aa54446184ab7b46ee7" +url = "https://github.com/mitsuhiko/insta/releases/download/1.47.2/cargo-insta-x86_64-unknown-linux-musl.tar.xz" [tools.cargo-insta."platforms.linux-x64-baseline"] -checksum = "sha256:c738c47f8d7e834a0277dddb9410a1f7369d37818738fc6a380f22904a83f6e4" -url = "https://github.com/mitsuhiko/insta/releases/download/1.46.3/cargo-insta-x86_64-unknown-linux-musl.tar.xz" +checksum = "sha256:1c2a2e82200b430f6fa27b6d0ba0059573eae2f32b0b8aa54446184ab7b46ee7" +url = "https://github.com/mitsuhiko/insta/releases/download/1.47.2/cargo-insta-x86_64-unknown-linux-musl.tar.xz" [tools.cargo-insta."platforms.linux-x64-musl"] -checksum = "sha256:c738c47f8d7e834a0277dddb9410a1f7369d37818738fc6a380f22904a83f6e4" -url = "https://github.com/mitsuhiko/insta/releases/download/1.46.3/cargo-insta-x86_64-unknown-linux-musl.tar.xz" +checksum = "sha256:1c2a2e82200b430f6fa27b6d0ba0059573eae2f32b0b8aa54446184ab7b46ee7" +url = "https://github.com/mitsuhiko/insta/releases/download/1.47.2/cargo-insta-x86_64-unknown-linux-musl.tar.xz" [tools.cargo-insta."platforms.linux-x64-musl-baseline"] -checksum = "sha256:c738c47f8d7e834a0277dddb9410a1f7369d37818738fc6a380f22904a83f6e4" -url = "https://github.com/mitsuhiko/insta/releases/download/1.46.3/cargo-insta-x86_64-unknown-linux-musl.tar.xz" +checksum = "sha256:1c2a2e82200b430f6fa27b6d0ba0059573eae2f32b0b8aa54446184ab7b46ee7" +url = "https://github.com/mitsuhiko/insta/releases/download/1.47.2/cargo-insta-x86_64-unknown-linux-musl.tar.xz" [tools.cargo-insta."platforms.macos-arm64"] -checksum = "sha256:1e620252db7964d876da6b4956872ad84d099ee281753ac7c850ae24413947df" -url = "https://github.com/mitsuhiko/insta/releases/download/1.46.3/cargo-insta-aarch64-apple-darwin.tar.xz" +checksum = "sha256:4876319b5201b875188351445b754db09f7674b506daa983634c95d6d44ca51e" +url = "https://github.com/mitsuhiko/insta/releases/download/1.47.2/cargo-insta-aarch64-apple-darwin.tar.xz" [tools.cargo-insta."platforms.macos-x64"] -checksum = "sha256:d55ff42a08ad0fc6deed64bb9ab700c069da9c6da40947d9b658cc33fda3dcda" -url = "https://github.com/mitsuhiko/insta/releases/download/1.46.3/cargo-insta-x86_64-apple-darwin.tar.xz" +checksum = "sha256:62efa25c4e9f8182c16ae46f58bc3e9c8fdbe74aaf0409d3ed06909a70d128a5" +url = "https://github.com/mitsuhiko/insta/releases/download/1.47.2/cargo-insta-x86_64-apple-darwin.tar.xz" [tools.cargo-insta."platforms.macos-x64-baseline"] -checksum = "sha256:d55ff42a08ad0fc6deed64bb9ab700c069da9c6da40947d9b658cc33fda3dcda" -url = "https://github.com/mitsuhiko/insta/releases/download/1.46.3/cargo-insta-x86_64-apple-darwin.tar.xz" +checksum = "sha256:62efa25c4e9f8182c16ae46f58bc3e9c8fdbe74aaf0409d3ed06909a70d128a5" +url = "https://github.com/mitsuhiko/insta/releases/download/1.47.2/cargo-insta-x86_64-apple-darwin.tar.xz" [tools.cargo-insta."platforms.windows-x64"] -checksum = "sha256:fa0cd6810e393392cf347decacd8a710de9ac95b6747a753f037c46b649209aa" -url = "https://github.com/mitsuhiko/insta/releases/download/1.46.3/cargo-insta-x86_64-pc-windows-msvc.zip" +checksum = "sha256:2f2ffcdda5608f78de53509bdd6a5feba185dad9490b5aad951c35bf8c37fa9e" +url = "https://github.com/mitsuhiko/insta/releases/download/1.47.2/cargo-insta-x86_64-pc-windows-msvc.zip" [tools.cargo-insta."platforms.windows-x64-baseline"] -checksum = "sha256:fa0cd6810e393392cf347decacd8a710de9ac95b6747a753f037c46b649209aa" -url = "https://github.com/mitsuhiko/insta/releases/download/1.46.3/cargo-insta-x86_64-pc-windows-msvc.zip" +checksum = "sha256:2f2ffcdda5608f78de53509bdd6a5feba185dad9490b5aad951c35bf8c37fa9e" +url = "https://github.com/mitsuhiko/insta/releases/download/1.47.2/cargo-insta-x86_64-pc-windows-msvc.zip" [[tools."cargo:cargo-audit"]] version = "0.22.1" @@ -193,7 +194,7 @@ version = "0.7.4" backend = "cargo:cargo-auditable" [[tools."cargo:cargo-cyclonedx"]] -version = "0.5.7" +version = "0.5.9" backend = "cargo:cargo-cyclonedx" [[tools."cargo:cargo-deny"]] @@ -205,7 +206,7 @@ version = "0.31.0" backend = "cargo:cargo-dist" [[tools."cargo:cargo-llvm-cov"]] -version = "0.8.4" +version = "0.8.5" backend = "cargo:cargo-llvm-cov" [[tools."cargo:cargo-machete"]] @@ -213,7 +214,7 @@ version = "0.9.1" backend = "cargo:cargo-machete" [[tools."cargo:cargo-nextest"]] -version = "0.9.129" +version = "0.9.132" backend = "cargo:cargo-nextest" [[tools."cargo:cargo-outdated"]] @@ -221,7 +222,7 @@ version = "0.17.0" backend = "cargo:cargo-outdated" [[tools."cargo:cargo-release"]] -version = "1.1.1" +version = "1.1.2" backend = "cargo:cargo-release" [[tools."cargo:git-cliff"]] @@ -261,7 +262,7 @@ version = "0.15.3" backend = "cargo:mdbook-toc" [[tools."cargo:release-plz"]] -version = "0.3.156" +version = "0.3.157" backend = "cargo:release-plz" [[tools.just]] @@ -353,7 +354,7 @@ checksum = "sha256:0fda7ff0a60c0250939fc25361c2d4e6e7853c31c996733fdd5a1dd760bcb url = "https://github.com/lycheeverse/lychee/releases/download/lychee-v0.23.0/lychee-x86_64-windows.exe" [[tools.markdownlint-cli2]] -version = "0.21.0" +version = "0.22.0" backend = "npm:markdownlint-cli2" [[tools."pipx:mdformat"]] @@ -367,10 +368,6 @@ uvx_args = "--with mdformat-gfm --with mdformat-config --with mdformat-footnote version = "4.5.1" backend = "pipx:pre-commit" -[[tools.prettier]] -version = "3.8.1" -backend = "npm:prettier" - [[tools.python]] version = "3.14.3" backend = "core:python" @@ -378,46 +375,57 @@ backend = "core:python" [tools.python."platforms.linux-arm64"] checksum = "sha256:53700338695e402a1a1fe22be4a41fbdacc70e22bb308a48eca8ed67cb7992be" url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" +provenance = "github-attestations" [tools.python."platforms.linux-arm64-musl"] checksum = "sha256:53700338695e402a1a1fe22be4a41fbdacc70e22bb308a48eca8ed67cb7992be" url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" +provenance = "github-attestations" [tools.python."platforms.linux-x64"] checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +provenance = "github-attestations" [tools.python."platforms.linux-x64-baseline"] checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +provenance = "github-attestations" [tools.python."platforms.linux-x64-musl"] checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +provenance = "github-attestations" [tools.python."platforms.linux-x64-musl-baseline"] checksum = "sha256:d7a9f970914bb4c88756fe3bdcc186d4feb90e9500e54f1db47dae4dc9687e39" url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +provenance = "github-attestations" [tools.python."platforms.macos-arm64"] checksum = "sha256:c43aecde4a663aebff99b9b83da0efec506479f1c3f98331442f33d2c43501f9" url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-aarch64-apple-darwin-install_only_stripped.tar.gz" +provenance = "github-attestations" [tools.python."platforms.macos-x64"] checksum = "sha256:9ab41dbc2f100a2a45d1833b9c11165f51051c558b5213eda9a9731d5948a0c0" url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-apple-darwin-install_only_stripped.tar.gz" +provenance = "github-attestations" [tools.python."platforms.macos-x64-baseline"] checksum = "sha256:9ab41dbc2f100a2a45d1833b9c11165f51051c558b5213eda9a9731d5948a0c0" url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-apple-darwin-install_only_stripped.tar.gz" +provenance = "github-attestations" [tools.python."platforms.windows-x64"] checksum = "sha256:bbe19034b35b0267176a7442575ae7dc6343480fd4d35598cb7700173d431e09" url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" +provenance = "github-attestations" [tools.python."platforms.windows-x64-baseline"] checksum = "sha256:bbe19034b35b0267176a7442575ae7dc6343480fd4d35598cb7700173d431e09" url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.14.3+20260324-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" +provenance = "github-attestations" [[tools.rust]] version = "1.94.1" diff --git a/mise.toml b/mise.toml index 69370e54..caa4a31d 100644 --- a/mise.toml +++ b/mise.toml @@ -2,13 +2,13 @@ # Several tools are pinned to "latest" to enable the idiomatic version file support. The version is managed by a version file. [tools] rust = { version = "latest", components = "llvm-tools,cargo,rustfmt,clippy", profile = "default", targets = "aarch64-apple-darwin,aarch64-unknown-linux-gnu,aarch64-pc-windows-msvc,x86_64-apple-darwin,x86_64-unknown-linux-gnu,x86_64-unknown-linux-musl,x86_64-pc-windows-msvc" } -cargo-binstall = "latest" -cargo-insta = "1.46.3" +cargo-binstall = "1.17.9" +cargo-insta = "1.47.2" "cargo:cargo-audit" = "0.22.1" "cargo:cargo-deny" = "0.19.0" "cargo:cargo-dist" = "0.31.0" -"cargo:cargo-llvm-cov" = "0.8.4" -"cargo:cargo-nextest" = "0.9.129" +"cargo:cargo-llvm-cov" = "0.8.5" +"cargo:cargo-nextest" = "0.9.132" "cargo:mdbook" = "0.5.2" "cargo:mdbook-linkcheck" = "0.7.7" "cargo:mdbook-tabs" = "0.3.4" @@ -20,18 +20,17 @@ cargo-insta = "1.46.3" just = "latest" python = "latest" "cargo:cargo-outdated" = "0.17.0" -"cargo:cargo-release" = "1.1.1" +"cargo:cargo-release" = "1.1.2" "cargo:cargo-auditable" = "0.7.4" -"cargo:cargo-cyclonedx" = "0.5.7" +"cargo:cargo-cyclonedx" = "0.5.9" "pipx:mdformat" = { version = "1.0.0", uvx_args = "--with mdformat-gfm --with mdformat-config --with mdformat-footnote --with mdformat-front-matters --with mdformat-simple-breaks --with mdformat-web --with mdformat-wikilink --with mdformat-gfm-alerts --with mdformat-toc" } -prettier = "3.8.1" -actionlint = "1.7.11" +actionlint = "1.7.12" lychee = "0.23.0" -markdownlint-cli2 = "0.21.0" +markdownlint-cli2 = "0.22.0" "cargo:cargo-machete" = "0.9.1" "cargo:git-cliff" = "2.12.0" scorecard = "5.4.0" -"cargo:release-plz" = "0.3.156" +"cargo:release-plz" = "0.3.157" "pipx:pre-commit" = "latest" "bun" = "latest" shellcheck = "0.11.0" diff --git a/src/evaluator/engine/mod.rs b/src/evaluator/engine/mod.rs index ac08c98c..7e782d00 100644 --- a/src/evaluator/engine/mod.rs +++ b/src/evaluator/engine/mod.rs @@ -14,7 +14,7 @@ use crate::parser::ast::MagicRule; use crate::{EvaluationConfig, LibmagicError}; use super::{EvaluationContext, RuleMatch, offset, operators, types}; -use log::debug; +use log::{debug, warn}; /// Evaluate a single magic rule against a file buffer /// @@ -62,6 +62,32 @@ use log::debug; /// assert!(result.is_none()); // Should not match /// ``` /// +/// # Relative offset behavior +/// +/// If the rule uses [`OffsetSpec::Relative`](crate::parser::ast::OffsetSpec::Relative), +/// this function resolves it against anchor 0. There is no "previous match" +/// context when evaluating a single rule in isolation. The two cases are: +/// +/// - **Non-negative delta (`Relative(N)` for `N >= 0`):** resolves to +/// absolute offset `N`, behaving like `Absolute(N)` from the start of the +/// buffer. +/// - **Negative delta (`Relative(-N)` for `N > 0`):** underflows the +/// anchor (`0 - N`) and returns `EvaluationError::InvalidOffset`. This is +/// *not* equivalent to `Absolute(-N)`, which is interpreted as +/// "from end" of the buffer. +/// +/// Callers needing the GNU `file` anchor semantics (where relative offsets +/// resolve against the end of the most recent match) should use +/// [`evaluate_rules`] with an +/// [`EvaluationContext`](crate::evaluator::EvaluationContext), which threads +/// the anchor across the rule list. +/// +/// **Behavior change:** before the relative-offset feature landed in v0.5, +/// `OffsetSpec::Relative` returned `EvaluationError::UnsupportedType` here. +/// It now resolves successfully against anchor 0. Callers with existing +/// error-handling code that pattern-matched `UnsupportedType` for relative +/// offsets must remove that arm. +/// /// # Errors /// /// * `LibmagicError::EvaluationError` - If offset resolution fails, buffer access is out of bounds, @@ -69,9 +95,26 @@ use log::debug; pub fn evaluate_single_rule( rule: &MagicRule, buffer: &[u8], +) -> Result, LibmagicError> { + // Default the relative-offset anchor to 0 -- top-level evaluation with + // no prior match resolves Relative(N) to absolute N (libmagic semantics). + evaluate_single_rule_with_anchor(rule, buffer, 0) +} + +/// Internal: evaluate a single rule against a buffer, supplying an explicit +/// anchor for relative-offset resolution. +/// +/// This is the worker behind both [`evaluate_single_rule`] (which defaults +/// the anchor to 0) and [`evaluate_rules`] (which threads the anchor from +/// `EvaluationContext::last_match_end()`). +fn evaluate_single_rule_with_anchor( + rule: &MagicRule, + buffer: &[u8], + last_match_end: usize, ) -> Result, LibmagicError> { // Step 1: Resolve the offset specification to an absolute position - let absolute_offset = offset::resolve_offset(&rule.offset, buffer)?; + let absolute_offset = + offset::resolve_offset_with_context(&rule.offset, buffer, last_match_end)?; // Step 2: Read and interpret bytes at the resolved offset according to the rule's type let read_value = types::read_typed_value(buffer, absolute_offset, &rule.typ) @@ -115,7 +158,19 @@ pub fn evaluate_single_rule( /// /// * `rules` - The list of magic rules to evaluate /// * `buffer` - The file buffer to evaluate against -/// * `context` - Mutable evaluation context for state management +/// * `context` - Mutable evaluation context for state management. **Callers +/// reusing a context across multiple buffers must call +/// [`EvaluationContext::reset`](crate::evaluator::EvaluationContext::reset) +/// between calls** -- the GNU `file` previous-match anchor and the +/// recursion-depth counter both advance during evaluation and would +/// otherwise leak across buffers. The same applies when this function +/// returns `Err` mid-evaluation (e.g., `LibmagicError::Timeout` or +/// `RecursionLimitExceeded`): both the anchor and (potentially) the +/// recursion depth are left in a partially-advanced state, and a retry +/// on the same context without `reset()` will resolve relative offsets +/// against the stale anchor and apply the wrong recursion budget. +/// [`evaluate_rules_with_config`] always builds a fresh context and is the +/// safer choice when context reuse isn't required. /// /// # Returns /// @@ -187,33 +242,45 @@ pub fn evaluate_rules( return Err(LibmagicError::Timeout { timeout_ms }); } - // Evaluate the current rule with graceful error handling - let match_data = match evaluate_single_rule(rule, buffer) { - Ok(data) => data, - Err( - e @ (LibmagicError::EvaluationError( - crate::error::EvaluationError::BufferOverrun { .. } - | crate::error::EvaluationError::InvalidOffset { .. } - | crate::error::EvaluationError::TypeReadError( - crate::evaluator::types::TypeReadError::BufferOverrun { .. } - | crate::evaluator::types::TypeReadError::InvalidPStringLength { .. }, - ), - ) - | LibmagicError::IoError(_)), - ) => { - // Expected data-dependent evaluation errors -- skip gracefully. - // TypeReadError::UnsupportedType is intentionally NOT caught here - // so that evaluator capability gaps propagate as errors. - debug!("Skipping rule '{}': {}", rule.message, e); - continue; - } - Err(e) => { - // Unexpected errors (InternalError, UnsupportedType, etc.) should propagate - return Err(e); - } - }; + // Evaluate the current rule with graceful error handling. + // Pass the GNU `file` anchor so OffsetSpec::Relative resolves + // correctly against the previous match's end position. + let match_data = + match evaluate_single_rule_with_anchor(rule, buffer, context.last_match_end()) { + Ok(data) => data, + Err( + e @ (LibmagicError::EvaluationError( + crate::error::EvaluationError::BufferOverrun { .. } + | crate::error::EvaluationError::InvalidOffset { .. } + | crate::error::EvaluationError::TypeReadError( + crate::evaluator::types::TypeReadError::BufferOverrun { .. } + | crate::evaluator::types::TypeReadError::InvalidPStringLength { .. }, + ), + ) + | LibmagicError::IoError(_)), + ) => { + // Expected data-dependent evaluation errors -- skip gracefully. + // TypeReadError::UnsupportedType is intentionally NOT caught here + // so that evaluator capability gaps propagate as errors. + debug!("Skipping rule '{}': {}", rule.message, e); + continue; + } + Err(e) => { + // Unexpected errors (InternalError, UnsupportedType, etc.) should propagate + return Err(e); + } + }; if let Some((absolute_offset, read_value)) = match_data { + // Advance the GNU `file` previous-match anchor BEFORE recursing + // into children, so children and their descendants see the new + // anchor. The anchor is updated unconditionally to the end of + // this match -- it may move forward or backward depending on + // where successive rules match (it is *not* a high-watermark). + let consumed = types::bytes_consumed(buffer, absolute_offset, &rule.typ); + let new_anchor = absolute_offset.saturating_add(consumed); + context.set_last_match_end(new_anchor); + let match_result = RuleMatch { message: rule.message.clone(), offset: absolute_offset, @@ -265,8 +332,13 @@ pub fn evaluate_rules( // failures are caught and logged inside the recursive evaluate_rules // call (they never propagate here). This arm guards against future // changes that might alter that error-handling strategy. - debug!( - "Skipping child evaluation under rule '{}': {}", + // + // If this fires, the parent match is still emitted but the entire + // child subtree is silently dropped -- which means a partial, + // possibly-incorrect classification is returned to the caller. + // Logged at warn! (not debug!) so the asymmetry is visible. + warn!( + "Discarding child evaluation under rule '{}' due to unexpected error: {} -- parent match is still emitted; investigate the recursive evaluate_rules error-handling path", rule.message, e ); } diff --git a/src/evaluator/engine/tests.rs b/src/evaluator/engine/tests.rs index b3352dac..c3c57cd8 100644 --- a/src/evaluator/engine/tests.rs +++ b/src/evaluator/engine/tests.rs @@ -4,6 +4,153 @@ use super::*; use crate::parser::ast::{Endianness, OffsetSpec, Operator, TypeKind, Value}; +#[test] +fn test_evaluate_single_rule_relative_resolves_against_anchor_zero() { + // Public evaluate_single_rule has no EvaluationContext, so OffsetSpec::Relative + // resolves against an implicit anchor of 0 -- equivalent to absolute offset N. + // Pin this contract so a future refactor cannot silently regress to UnsupportedType. + let rule = MagicRule { + offset: OffsetSpec::Relative(3), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0xCC), + message: "relative-no-context".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + + // Anchor=0 + delta 3 -> reads at absolute offset 3. + let buffer = &[0xAA, 0xBB, 0xDD, 0xCC, 0xEE]; + let result = evaluate_single_rule(&rule, buffer).unwrap(); + assert!( + result.is_some(), + "evaluate_single_rule with Relative(3) should resolve to absolute 3" + ); + let (offset, value) = result.unwrap(); + assert_eq!(offset, 3); + assert_eq!(value, Value::Uint(0xCC)); +} + +#[test] +fn test_evaluate_rules_anchor_near_saturation_skips_relative_child_gracefully() { + // Pin the contract that an anchor at or near `usize::MAX` does not + // panic and instead causes subsequent Relative rules to fail bounds + // checks gracefully. We can't construct a real match at usize::MAX + // (no realistic buffer is that big), so inject the saturated anchor + // directly via the pub(crate) setter and then evaluate a Relative rule. + use crate::EvaluationConfig; + use crate::evaluator::EvaluationContext; + + let buffer = [0xAA, 0xBB, 0xCC, 0xDD]; + let mut ctx = EvaluationContext::new(EvaluationConfig::default()); + ctx.set_last_match_end(usize::MAX); + + // Relative(0) -> target = usize::MAX, which is >= buffer.len() and + // returns BufferOverrun -> graceful skip in evaluate_rules. + let rule_zero = MagicRule { + offset: OffsetSpec::Relative(0), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0xAA), + message: "rel-zero-near-sat".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + let matches = evaluate_rules(&[rule_zero], &buffer, &mut ctx).unwrap(); + assert!( + matches.is_empty(), + "Relative(0) at usize::MAX anchor must skip, not match or panic" + ); + + // Relative(+1) -> checked_add_signed -> overflow -> InvalidOffset -> skip. + ctx.set_last_match_end(usize::MAX); + let rule_pos = MagicRule { + offset: OffsetSpec::Relative(1), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0xAA), + message: "rel-plus-one-near-sat".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + let matches = evaluate_rules(&[rule_pos], &buffer, &mut ctx).unwrap(); + assert!( + matches.is_empty(), + "Relative(+1) at usize::MAX anchor must skip via InvalidOffset, not panic" + ); + + // Relative(-N) where N is small -> usize::MAX - N, still >= buffer.len() -> skip. + ctx.set_last_match_end(usize::MAX); + let rule_neg = MagicRule { + offset: OffsetSpec::Relative(-1), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0xAA), + message: "rel-minus-one-near-sat".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + let matches = evaluate_rules(&[rule_neg], &buffer, &mut ctx).unwrap(); + assert!( + matches.is_empty(), + "Relative(-1) at usize::MAX anchor must skip, not panic" + ); +} + +#[test] +fn test_evaluate_single_rule_relative_negative_with_zero_anchor_errors() { + // Public evaluate_single_rule uses an implicit anchor of 0. A negative + // Relative delta underflows the anchor and must return + // EvaluationError::InvalidOffset -- NOT Ok(None) (the "no match" path) + // and NOT Absolute(-N)-style from-end semantics. Pin the contract so a + // future refactor can't silently convert this to a graceful skip. + use crate::LibmagicError; + use crate::error::EvaluationError; + + let rule = MagicRule { + offset: OffsetSpec::Relative(-1), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0xAA), + message: "rel-neg-top-level".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + let buffer = &[0xAA, 0xBB]; + let err = evaluate_single_rule(&rule, buffer).unwrap_err(); + assert!( + matches!( + err, + LibmagicError::EvaluationError(EvaluationError::InvalidOffset { offset: -1 }) + ), + "Relative(-1) at anchor 0 must Err(InvalidOffset), got {err:?}" + ); +} + +#[test] +fn test_evaluate_single_rule_relative_zero_resolves_to_buffer_start() { + // Relative(0) with anchor=0 resolves to absolute 0. + let rule = MagicRule { + offset: OffsetSpec::Relative(0), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0xAA), + message: "relative-zero".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + + let buffer = &[0xAA, 0xBB]; + let result = evaluate_single_rule(&rule, buffer).unwrap().unwrap(); + assert_eq!(result.0, 0); +} + #[test] fn test_evaluate_single_rule_byte_equal_match() { let rule = MagicRule { diff --git a/src/evaluator/mod.rs b/src/evaluator/mod.rs index 7f91c49b..8d57184c 100644 --- a/src/evaluator/mod.rs +++ b/src/evaluator/mod.rs @@ -40,6 +40,16 @@ pub use engine::{evaluate_rules, evaluate_rules_with_config, evaluate_single_rul pub struct EvaluationContext { /// Current offset position in the file buffer current_offset: usize, + /// End offset of the most recent successful match. + /// + /// This is the GNU `file`/libmagic anchor used to resolve relative + /// (`&+N` / `&-N`) offsets. It is updated to the end of the most + /// recently matched rule -- the value may *increase or decrease* as + /// successive rules match at different positions; it is not a + /// high-watermark. A fresh context starts with this set to 0, which + /// matches libmagic's behavior of resolving top-level relative offsets + /// from the file start. + last_match_end: usize, /// Current recursion depth for nested rule evaluation recursion_depth: u32, /// Configuration settings for evaluation behavior @@ -66,6 +76,7 @@ impl EvaluationContext { pub const fn new(config: EvaluationConfig) -> Self { Self { current_offset: 0, + last_match_end: 0, recursion_depth: 0, config, } @@ -90,6 +101,34 @@ impl EvaluationContext { self.current_offset = offset; } + /// Get the end offset of the most recent successful match. + /// + /// This is the GNU `file`/libmagic anchor used to resolve relative + /// (`&+N` / `&-N`) offset specifications. A fresh context returns 0, + /// which makes top-level relative offsets resolve from the file start. + /// + /// `pub(crate)` because the anchor is an internal engine detail; external + /// consumers should not couple to it. + #[must_use] + pub(crate) const fn last_match_end(&self) -> usize { + self.last_match_end + } + + /// Set the end offset of the most recent successful match. + /// + /// Called by the evaluation engine after a rule matches, to advance the + /// anchor used by subsequent relative offset resolution. The new value + /// is typically `match_offset + bytes_consumed_by_type`. + /// + /// `pub(crate)` because external callers should not be able to inject + /// arbitrary anchor state. External callers that need to clear the + /// anchor between buffer evaluations should call + /// `EvaluationContext::reset()`, which resets the anchor, current + /// offset, and recursion depth together. + pub(crate) fn set_last_match_end(&mut self, offset: usize) { + self.last_match_end = offset; + } + /// Get the current recursion depth /// /// # Returns @@ -195,6 +234,7 @@ impl EvaluationContext { /// the same configuration settings. pub fn reset(&mut self) { self.current_offset = 0; + self.last_match_end = 0; self.recursion_depth = 0; } } diff --git a/src/evaluator/offset/mod.rs b/src/evaluator/offset/mod.rs index b92c9a65..3da45616 100644 --- a/src/evaluator/offset/mod.rs +++ b/src/evaluator/offset/mod.rs @@ -32,11 +32,26 @@ pub(crate) fn map_offset_error(e: &OffsetError, original_offset: i64) -> Libmagi } } -/// Resolve any offset specification to an absolute position +/// Resolve any offset specification to an absolute position. /// -/// This is a higher-level function that handles all types of offset specifications. -/// Supports absolute, from-end, and indirect offsets. Relative offsets are not yet -/// implemented. +/// Convenience wrapper for callers that do not have a relative-offset anchor +/// (e.g., tests, top-level evaluation with no prior match). Internally +/// delegates with `last_match_end = 0`. For `OffsetSpec::Relative`, that +/// means non-negative deltas behave like absolute offsets from the start of +/// the buffer (`Relative(N)` for `N >= 0` resolves to absolute `N`), but +/// negative deltas underflow the anchor and return +/// `EvaluationError::InvalidOffset` -- they are *not* interpreted like +/// `OffsetSpec::Absolute(-N)` from the end of the buffer. Callers that need +/// relative offsets to anchor against actual prior matches should use +/// `evaluate_rules` and let the engine thread the anchor. +/// +/// **Behavior change:** before the relative-offset feature landed in v0.5, +/// this function returned `EvaluationError::UnsupportedType` for +/// `OffsetSpec::Relative`. It now resolves against anchor 0, which can +/// succeed (non-negative delta) or fail with `InvalidOffset` (negative +/// delta) depending on the value. Callers with existing error-handling code +/// that pattern-matched `UnsupportedType` for relative offsets must remove +/// that arm. /// /// # Arguments /// @@ -64,12 +79,51 @@ pub(crate) fn map_offset_error(e: &OffsetError, original_offset: i64) -> Libmagi /// /// * `LibmagicError::EvaluationError` - If offset resolution fails pub fn resolve_offset(spec: &OffsetSpec, buffer: &[u8]) -> Result { + resolve_offset_with_context(spec, buffer, 0) +} + +/// Resolve any offset specification, including relative offsets, against a +/// previous-match anchor. +/// +/// This is the full dispatcher used by the evaluation engine. It handles all +/// `OffsetSpec` variants: +/// +/// - [`OffsetSpec::Absolute`] / [`OffsetSpec::FromEnd`]: resolved against the +/// buffer (sign-aware), `last_match_end` ignored. +/// - [`OffsetSpec::Indirect`]: resolved by reading a pointer value from the +/// buffer, `last_match_end` ignored. +/// - [`OffsetSpec::Relative`]: resolved as `last_match_end + delta`, +/// bounds-checked. The anchor `0` makes top-level relative offsets resolve +/// from the file start. +/// +/// `pub(crate)` because the anchor-threading contract is internal to the +/// evaluation engine -- external callers use [`resolve_offset`] (which +/// hardcodes anchor 0) or go through `evaluate_rules`. +/// +/// # Arguments +/// +/// * `spec` - The offset specification to resolve +/// * `buffer` - The file buffer to resolve against +/// * `last_match_end` - End offset of the most recent successful match. +/// Supplied by the engine via `EvaluationContext::last_match_end()`. Pass +/// `0` if no prior match exists. +/// +/// # Errors +/// +/// * `LibmagicError::EvaluationError` - If offset resolution fails for any +/// variant. Relative-offset failures surface as `BufferOverrun` (target +/// past end of buffer) or `InvalidOffset` (arithmetic over/underflow). +pub(crate) fn resolve_offset_with_context( + spec: &OffsetSpec, + buffer: &[u8], + last_match_end: usize, +) -> Result { match spec { OffsetSpec::Absolute(offset) => { resolve_absolute_offset(*offset, buffer).map_err(|e| map_offset_error(&e, *offset)) } OffsetSpec::Indirect { .. } => indirect::resolve_indirect_offset(spec, buffer), - OffsetSpec::Relative(_) => relative::resolve_relative_offset(spec, buffer), + OffsetSpec::Relative(_) => relative::resolve_relative_offset(spec, buffer, last_match_end), OffsetSpec::FromEnd(offset) => { // FromEnd is handled the same as negative Absolute offsets resolve_absolute_offset(*offset, buffer).map_err(|e| map_offset_error(&e, *offset)) @@ -142,21 +196,49 @@ mod tests { } #[test] - fn test_resolve_offset_relative_not_implemented() { + fn test_resolve_offset_relative_via_context() { + // Anchor 4 + delta 3 = absolute 7, in-bounds. + let buffer = b"0123456789ABCDEF"; + let spec = OffsetSpec::Relative(3); + let resolved = resolve_offset_with_context(&spec, buffer, 4).unwrap(); + assert_eq!(resolved, 7); + } + + #[test] + fn test_resolve_offset_relative_top_level_default() { + // Calling resolve_offset (no context) should default the anchor to 0. + let buffer = b"0123456789ABCDEF"; + let spec = OffsetSpec::Relative(5); + assert_eq!(resolve_offset(&spec, buffer).unwrap(), 5); + } + + #[test] + fn test_resolve_offset_with_context_passthrough_absolute() { + // The context-aware dispatcher must not affect non-relative variants. let buffer = b"Test data"; - let spec = OffsetSpec::Relative(4); + let spec = OffsetSpec::Absolute(4); + // last_match_end is irrelevant for Absolute. + assert_eq!(resolve_offset_with_context(&spec, buffer, 100).unwrap(), 4); + } - let result = resolve_offset(&spec, buffer); - assert!(result.is_err()); + #[test] + fn test_resolve_offset_with_context_passthrough_from_end() { + let buffer = b"Test data"; + let spec = OffsetSpec::FromEnd(-3); + assert_eq!(resolve_offset_with_context(&spec, buffer, 999).unwrap(), 6); + } - match result.unwrap_err() { - LibmagicError::EvaluationError(crate::error::EvaluationError::UnsupportedType { - type_name, - }) => { - assert!(type_name.contains("Relative offsets not yet implemented")); - } - _ => panic!("Expected EvaluationError with UnsupportedType"), - } + #[test] + fn test_resolve_offset_with_context_passthrough_indirect() { + // Same indirect setup as test_resolve_offset_indirect_success above. + let buffer = b"\x05TestXdata"; + let spec = OffsetSpec::Indirect { + base_offset: 0, + pointer_type: crate::parser::ast::TypeKind::Byte { signed: false }, + adjustment: 0, + endian: crate::parser::ast::Endianness::Little, + }; + assert_eq!(resolve_offset_with_context(&spec, buffer, 42).unwrap(), 5); } #[test] diff --git a/src/evaluator/offset/relative.rs b/src/evaluator/offset/relative.rs index d1647957..f192cc04 100644 --- a/src/evaluator/offset/relative.rs +++ b/src/evaluator/offset/relative.rs @@ -1,27 +1,180 @@ // Copyright (c) 2025-2026 the libmagic-rs contributors // SPDX-License-Identifier: Apache-2.0 -//! Relative offset resolution (not yet implemented) +//! Relative offset resolution. +//! +//! Relative offsets (`&+N` / `&-N` in magic-file syntax) resolve against the +//! end position of the most recent successful match -- the GNU `file` +//! "previous match" anchor. The anchor is supplied by the evaluation engine +//! via `EvaluationContext::last_match_end()` and starts at 0 for a fresh +//! evaluation, which makes top-level relative offsets resolve from the file +//! start (matching libmagic semantics). use crate::LibmagicError; use crate::parser::ast::OffsetSpec; -/// Resolve a relative offset specification +/// Resolve a relative offset specification against the previous-match anchor. /// -/// Relative offsets are calculated relative to the current evaluation position -/// rather than from the start of the file. +/// The result is `last_match_end + delta` (where `delta` may be negative), +/// bounds-checked against the buffer. Both arithmetic underflow/overflow and +/// out-of-buffer targets are reported as `LibmagicError::EvaluationError`. +/// +/// # Arguments +/// +/// * `spec` - Must be `OffsetSpec::Relative(delta)` (debug-asserted). +/// * `buffer` - The file buffer used for the bounds check. +/// * `last_match_end` - End offset of the most recent successful match, or +/// `0` if no prior match exists in this evaluation pass. /// /// # Errors /// -/// Currently returns `LibmagicError::EvaluationError` with `UnsupportedType` -/// as relative offset resolution is not yet implemented. -// TODO: Implement relative offset resolution (issue #38) -pub fn resolve_relative_offset(spec: &OffsetSpec, _buffer: &[u8]) -> Result { +/// * `EvaluationError::InvalidOffset` -- arithmetic over/underflow, or the +/// delta cannot be represented as `isize` on the current target. Caught +/// by the engine's graceful-skip arm, so the rule is silently dropped. +/// * `EvaluationError::BufferOverrun` -- the resolved target is at or past +/// the end of the buffer. Same graceful-skip treatment. +/// * `EvaluationError::InternalError` -- only if called with a non-`Relative` +/// spec (programming error; debug-asserts in test/dev builds). This +/// variant is intentionally NOT in the engine's graceful-skip list and +/// will terminate evaluation if it ever fires in release builds. +pub fn resolve_relative_offset( + spec: &OffsetSpec, + buffer: &[u8], + last_match_end: usize, +) -> Result { debug_assert!( matches!(spec, OffsetSpec::Relative(_)), "resolve_relative_offset called with non-relative spec" ); - Err(LibmagicError::EvaluationError( - crate::error::EvaluationError::unsupported_type("Relative offsets not yet implemented"), - )) + let OffsetSpec::Relative(delta) = spec else { + // Defensive: outside of debug builds, fall through with a clear error + // rather than relying on the assertion. + return Err(LibmagicError::EvaluationError( + crate::error::EvaluationError::internal_error( + "resolve_relative_offset called with non-relative spec", + ), + )); + }; + let delta = *delta; + + let delta_isize = isize::try_from(delta).map_err(|_| { + LibmagicError::EvaluationError(crate::error::EvaluationError::InvalidOffset { + offset: delta, + }) + })?; + + let target = + last_match_end + .checked_add_signed(delta_isize) + .ok_or(LibmagicError::EvaluationError( + crate::error::EvaluationError::InvalidOffset { offset: delta }, + ))?; + + if target >= buffer.len() { + return Err(LibmagicError::EvaluationError( + crate::error::EvaluationError::BufferOverrun { offset: target }, + )); + } + + Ok(target) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::error::EvaluationError; + + fn unwrap_eval_err(err: LibmagicError) -> EvaluationError { + match err { + LibmagicError::EvaluationError(e) => e, + other => panic!("expected EvaluationError, got {other:?}"), + } + } + + #[test] + fn positive_delta_resolves() { + let buf = b"0123456789ABCDEF"; + let spec = OffsetSpec::Relative(3); + assert_eq!(resolve_relative_offset(&spec, buf, 4).unwrap(), 7); + } + + #[test] + fn negative_delta_resolves() { + let buf = b"0123456789ABCDEF"; + let spec = OffsetSpec::Relative(-3); + assert_eq!(resolve_relative_offset(&spec, buf, 8).unwrap(), 5); + } + + #[test] + fn zero_delta_resolves_to_anchor() { + let buf = b"0123456789ABCDEF"; + let spec = OffsetSpec::Relative(0); + assert_eq!(resolve_relative_offset(&spec, buf, 4).unwrap(), 4); + } + + #[test] + fn top_level_anchor_zero_resolves_from_start() { + // last_match_end = 0 means top-level: Relative(5) -> absolute 5. + let buf = b"0123456789ABCDEF"; + let spec = OffsetSpec::Relative(5); + assert_eq!(resolve_relative_offset(&spec, buf, 0).unwrap(), 5); + } + + #[test] + fn top_level_anchor_zero_with_zero_delta_is_zero() { + let buf = b"0123456789ABCDEF"; + let spec = OffsetSpec::Relative(0); + assert_eq!(resolve_relative_offset(&spec, buf, 0).unwrap(), 0); + } + + #[test] + fn last_valid_index_resolves() { + // 16-byte buffer, last valid index is 15. + let buf = b"0123456789ABCDEF"; + let spec = OffsetSpec::Relative(0); + assert_eq!(resolve_relative_offset(&spec, buf, 15).unwrap(), 15); + } + + #[test] + fn negative_delta_underflows_to_invalid_offset() { + let buf = b"0123456789ABCDEF"; + let spec = OffsetSpec::Relative(-5); + let err = unwrap_eval_err(resolve_relative_offset(&spec, buf, 2).unwrap_err()); + assert!( + matches!(err, EvaluationError::InvalidOffset { offset: -5 }), + "got {err:?}" + ); + } + + #[test] + fn positive_delta_overflows_to_invalid_offset() { + // anchor=usize::MAX, delta=+1 -> checked_add_signed returns None. + let buf = b"0123456789ABCDEF"; + let spec = OffsetSpec::Relative(1); + let err = unwrap_eval_err(resolve_relative_offset(&spec, buf, usize::MAX).unwrap_err()); + assert!( + matches!(err, EvaluationError::InvalidOffset { offset: 1 }), + "got {err:?}" + ); + } + + #[test] + fn target_past_buffer_end_returns_buffer_overrun() { + let buf = b"0123456789ABCDEF"; // len 16 + let spec = OffsetSpec::Relative(50); + let err = unwrap_eval_err(resolve_relative_offset(&spec, buf, 10).unwrap_err()); + assert!( + matches!(err, EvaluationError::BufferOverrun { offset: 60 }), + "got {err:?}" + ); + } + + #[test] + fn target_equal_to_buffer_len_is_overrun() { + // Resolved offset == buffer.len() is out of bounds. + let buf = b"0123456789ABCDEF"; // len 16 + let spec = OffsetSpec::Relative(1); + let err = unwrap_eval_err(resolve_relative_offset(&spec, buf, 15).unwrap_err()); + assert!(matches!(err, EvaluationError::BufferOverrun { offset: 16 })); + } } diff --git a/src/evaluator/tests.rs b/src/evaluator/tests.rs index c73cc658..49bb0190 100644 --- a/src/evaluator/tests.rs +++ b/src/evaluator/tests.rs @@ -140,16 +140,19 @@ fn test_evaluation_context_reset() { // Modify the context state context.set_current_offset(100); + context.set_last_match_end(75); context.increment_recursion_depth().unwrap(); context.increment_recursion_depth().unwrap(); assert_eq!(context.current_offset(), 100); + assert_eq!(context.last_match_end(), 75); assert_eq!(context.recursion_depth(), 2); // Reset should restore initial state but keep config context.reset(); assert_eq!(context.current_offset(), 0); + assert_eq!(context.last_match_end(), 0); assert_eq!(context.recursion_depth(), 0); assert_eq!( context.config().max_recursion_depth, @@ -157,6 +160,42 @@ fn test_evaluation_context_reset() { ); } +#[test] +fn test_evaluation_context_last_match_end_initial_zero() { + let config = EvaluationConfig::default(); + let context = EvaluationContext::new(config); + assert_eq!(context.last_match_end(), 0); +} + +#[test] +fn test_evaluation_context_last_match_end_set_get() { + let config = EvaluationConfig::default(); + let mut context = EvaluationContext::new(config); + + context.set_last_match_end(42); + assert_eq!(context.last_match_end(), 42); + + context.set_last_match_end(0); + assert_eq!(context.last_match_end(), 0); + + context.set_last_match_end(usize::MAX); + assert_eq!(context.last_match_end(), usize::MAX); +} + +#[test] +fn test_evaluation_context_last_match_end_clone_independence() { + let config = EvaluationConfig::default(); + let mut context = EvaluationContext::new(config); + context.set_last_match_end(100); + + let cloned = context.clone(); + assert_eq!(cloned.last_match_end(), 100); + + context.set_last_match_end(200); + assert_eq!(context.last_match_end(), 200); + assert_eq!(cloned.last_match_end(), 100); +} + #[test] fn test_evaluation_context_clone() { let config = EvaluationConfig { diff --git a/src/evaluator/types/mod.rs b/src/evaluator/types/mod.rs index fa3e4783..49c595e6 100644 --- a/src/evaluator/types/mod.rs +++ b/src/evaluator/types/mod.rs @@ -171,5 +171,203 @@ pub fn coerce_value_to_type(value: &Value, type_kind: &TypeKind) -> Value { } } +/// Returns the anchor-advance distance for `type_kind` at `offset`. +/// +/// This value is used by the evaluation engine to advance the GNU `file` +/// "previous match" anchor for relative offset resolution. It reflects how +/// far the anchor should move after a successful match, which may include +/// framing bytes such as c-string NUL terminators or pstring length +/// prefixes even when the underlying read helper (`read_string`, +/// `read_pstring`) does not return those bytes as part of the typed value. +/// Callers should not equate this with "bytes `read_typed_value` returned" +/// -- it is specifically the anchor-movement distance, which is a +/// superset for variable-width types. It is `pub(crate)` because no +/// external caller should depend on this anchor-advance contract -- the +/// only intended caller is `evaluate_rules` in the engine. +/// +/// The function is intentionally infallible. For unexpected inputs (offset +/// past end of buffer, malformed pstring prefix, `/J` flag underflow), it +/// returns `0` rather than panicking; the anchor then stays put and the +/// next relative offset will bounds-fail gracefully. The engine only calls +/// it after a successful read, so the defensive paths are belt-and-braces +/// for any future caller that breaks that invariant. +/// +/// # Semantics +/// +/// - **Fixed-width types** (Byte, Short, Long, Quad, Float, Double, Date, +/// QDate): returns `bit_width / 8` when the type's full width fits +/// inside the buffer at `offset`; returns `0` if `offset + width` would +/// exceed `buffer.len()`. This guard mirrors the variable-width path so +/// the anchor cannot advance past the end of the buffer regardless of +/// how the function is called. +/// - **C-string** (`TypeKind::String`): scans for the first NUL within a +/// window of `max_length` bytes (or to the buffer end if `max_length` is +/// `None`). When a NUL is found inside the window, returns +/// `nul_index + 1` -- the NUL byte is counted as consumed, so the next +/// relative offset reads the byte *after* the NUL. When no NUL is found +/// inside the window, returns the window size (no implicit terminator +/// byte is added). The NUL inclusion is intentional and matches GNU +/// `file` semantics: a `Relative(0)` rule following a NUL-terminated +/// string match reads the first byte after the terminator. +/// - **Pascal string** (`TypeKind::PString`): reads the length prefix (1, 2, +/// or 4 bytes, BE/LE), accounts for the `/J` flag (stored length includes +/// prefix width), caps by `max_length`, and returns `prefix_width + +/// actual_payload_bytes`. The result is also clamped against the remaining +/// buffer length so a malicious oversized length prefix cannot poison the +/// anchor. +#[must_use] +pub(crate) fn bytes_consumed(buffer: &[u8], offset: usize, type_kind: &TypeKind) -> usize { + if let Some(bits) = type_kind.bit_width() { + let width = (bits as usize) / 8; + // Bounds-check the fixed-width path so a misuse (offset past end of + // buffer, broken read-then-call invariant) cannot advance the + // anchor past the buffer end. The engine guarantees a successful + // read preceded the call, but the guard makes the contract + // self-consistent for any future caller. + return match offset.checked_add(width) { + Some(end) if end <= buffer.len() => width, + _ => 0, + }; + } + + match type_kind { + TypeKind::String { max_length } => string_bytes_consumed(buffer, offset, *max_length), + TypeKind::PString { + max_length, + length_width, + length_includes_itself, + } => pstring_bytes_consumed( + buffer, + offset, + *max_length, + *length_width, + *length_includes_itself, + ), + // A new variable-width TypeKind variant was added without updating + // this match. Returning 0 here would silently corrupt the GNU `file` + // anchor for any rule using relative offsets after a match of the + // new type. The debug_assert! panics in test/dev builds so the gap + // is caught loudly during testing; release builds compile it out + // and keep the 0 fallback (graceful skip rather than panic), so + // this case is only surfaced by the assertion in non-release + // builds. + // + // GOTCHAS S2.1 lists this match in the new-TypeKind-variant + // checklist -- see that section if you are reading this comment + // because the assertion just fired. + _ => { + debug_assert!( + false, + "bytes_consumed: unhandled variable-width TypeKind variant {type_kind:?} -- update bytes_consumed and GOTCHAS S2.1" + ); + 0 + } + } +} + +/// Compute the anchor-advance distance for a successful c-string match. +/// +/// Uses the same scan logic as `read_string`: it searches from `offset` for +/// the first NUL within `max_length` bytes (or to the end of the buffer +/// when `max_length` is `None`). Unlike the `Value::String` returned by +/// `read_string` (which excludes the NUL terminator from its length), this +/// helper counts the NUL terminator as consumed when one is found, so it +/// returns `length_to_nul + 1`. When no NUL is found (truncated by buffer +/// end or `max_length`), it returns `length_read` with no implicit +/// terminator byte added. +/// +/// Counting the terminator is intentional for relative-offset anchoring: a +/// `Relative(0)` rule following a NUL-terminated string match resolves to +/// the byte *immediately after* the NUL terminator, not the NUL itself. +/// This matches GNU `file` semantics for chained record parsing. Do not +/// "fix" this to align with `read_string`'s byte count -- the asymmetry is +/// the point. +fn string_bytes_consumed(buffer: &[u8], offset: usize, max_length: Option) -> usize { + let Some(remaining) = buffer.get(offset..) else { + return 0; + }; + let search_len = max_length.map_or(remaining.len(), |m| m.min(remaining.len())); + let Some(window) = remaining.get(..search_len) else { + return 0; + }; + match memchr::memchr(0, window) { + Some(nul_idx) => nul_idx.saturating_add(1), + None => search_len, + } +} + +/// Compute the buffer bytes consumed by a successful pstring read. +/// +/// Mirrors `read_pstring`: reads the length prefix, applies the `/J` flag, +/// caps by `max_length`, and returns `prefix_width + payload_bytes`. Returns +/// `0` for any unexpected condition (offset past end, prefix bytes missing, +/// `/J` underflow), since the engine only calls this after a successful read. +fn pstring_bytes_consumed( + buffer: &[u8], + offset: usize, + max_length: Option, + length_width: crate::parser::ast::PStringLengthWidth, + length_includes_itself: bool, +) -> usize { + use crate::parser::ast::PStringLengthWidth; + let width = length_width.byte_count(); + let Some(prefix_end) = offset.checked_add(width) else { + return 0; + }; + let Some(len_bytes) = buffer.get(offset..prefix_end) else { + return 0; + }; + let stored_length = match length_width { + PStringLengthWidth::OneByte => usize::from(len_bytes[0]), + PStringLengthWidth::TwoByteBE => { + let arr: [u8; 2] = match len_bytes.try_into() { + Ok(a) => a, + Err(_) => return 0, + }; + usize::from(u16::from_be_bytes(arr)) + } + PStringLengthWidth::TwoByteLE => { + let arr: [u8; 2] = match len_bytes.try_into() { + Ok(a) => a, + Err(_) => return 0, + }; + usize::from(u16::from_le_bytes(arr)) + } + PStringLengthWidth::FourByteBE => { + let arr: [u8; 4] = match len_bytes.try_into() { + Ok(a) => a, + Err(_) => return 0, + }; + u32::from_be_bytes(arr) as usize + } + PStringLengthWidth::FourByteLE => { + let arr: [u8; 4] = match len_bytes.try_into() { + Ok(a) => a, + Err(_) => return 0, + }; + u32::from_le_bytes(arr) as usize + } + }; + + let payload_length = if length_includes_itself { + match stored_length.checked_sub(width) { + Some(n) => n, + None => return 0, + } + } else { + stored_length + }; + + // Clamp against remaining buffer bytes after the prefix. This defends + // against an attacker-controlled 4-byte length prefix near u32::MAX + // poisoning the anchor: read_pstring would have failed to actually read + // a payload that long, so a successful read implies the payload fit in + // the buffer. Mirroring that bound here keeps the anchor truthful. + let remaining_after_prefix = buffer.len().saturating_sub(prefix_end); + let bounded_payload = payload_length.min(remaining_after_prefix); + let actual_length = max_length.map_or(bounded_payload, |m| m.min(bounded_payload)); + width.saturating_add(actual_length) +} + #[cfg(test)] mod tests; diff --git a/src/evaluator/types/tests.rs b/src/evaluator/types/tests.rs index 1ffc5288..82a86341 100644 --- a/src/evaluator/types/tests.rs +++ b/src/evaluator/types/tests.rs @@ -726,3 +726,306 @@ fn test_coerce_qdate_matches_read_qdate() { "Coerced value should match read_qdate output" ); } + +// ============================================================ +// bytes_consumed tests +// ============================================================ + +use crate::parser::ast::PStringLengthWidth; + +#[test] +fn test_bytes_consumed_fixed_width_types() { + // 16-byte buffer at offset 0: every fixed-width type tested below fits + // inside the bounds guard (`offset + width <= buffer.len()`). A separate + // test `test_bytes_consumed_fixed_width_returns_zero_past_end` exercises + // the guard's 0-return path at and past the boundary. + let buf = &[0u8; 16]; + + let cases: &[(TypeKind, usize)] = &[ + (TypeKind::Byte { signed: false }, 1), + (TypeKind::Byte { signed: true }, 1), + ( + TypeKind::Short { + endian: Endianness::Little, + signed: false, + }, + 2, + ), + ( + TypeKind::Short { + endian: Endianness::Big, + signed: true, + }, + 2, + ), + ( + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + 4, + ), + ( + TypeKind::Quad { + endian: Endianness::Big, + signed: false, + }, + 8, + ), + ( + TypeKind::Float { + endian: Endianness::Little, + }, + 4, + ), + ( + TypeKind::Double { + endian: Endianness::Big, + }, + 8, + ), + ( + TypeKind::Date { + endian: Endianness::Little, + utc: false, + }, + 4, + ), + ( + TypeKind::QDate { + endian: Endianness::Big, + utc: true, + }, + 8, + ), + ]; + + for (typ, expected) in cases { + let consumed = bytes_consumed(buf, 0, typ); + assert_eq!( + consumed, *expected, + "fixed-width width mismatch for {typ:?}" + ); + } +} + +#[test] +fn test_bytes_consumed_string_with_nul() { + // "MZ\0" -> matches "MZ" and consumes 3 bytes (2 + NUL). + let buf = b"MZ\x00rest"; + let typ = TypeKind::String { max_length: None }; + assert_eq!(bytes_consumed(buf, 0, &typ), 3); +} + +#[test] +fn test_bytes_consumed_string_at_offset() { + // String starting mid-buffer. + let buf = b"PREFIXabc\x00tail"; + let typ = TypeKind::String { max_length: None }; + assert_eq!(bytes_consumed(buf, 6, &typ), 4); // "abc" + NUL +} + +#[test] +fn test_bytes_consumed_string_no_nul_in_buffer() { + // No NUL terminator -- consumes to end of buffer (no extra byte for NUL). + let buf = b"NoNull"; + let typ = TypeKind::String { max_length: None }; + assert_eq!(bytes_consumed(buf, 0, &typ), 6); +} + +#[test] +fn test_bytes_consumed_string_empty() { + // Empty string at offset 0 -- just the NUL. + let buf = b"\x00rest"; + let typ = TypeKind::String { max_length: None }; + assert_eq!(bytes_consumed(buf, 0, &typ), 1); +} + +#[test] +fn test_bytes_consumed_string_max_length_caps() { + // max_length = 4, NUL is at index 14 -- read stops at 4 chars, no NUL consumed. + let buf = b"VeryLongString\x00rest"; + let typ = TypeKind::String { + max_length: Some(4), + }; + assert_eq!(bytes_consumed(buf, 0, &typ), 4); +} + +#[test] +fn test_bytes_consumed_string_max_length_finds_nul() { + // max_length = 10 but NUL is at index 5 -- read stops at NUL, consumes 6. + let buf = b"Short\x00LongerSuffix"; + let typ = TypeKind::String { + max_length: Some(10), + }; + assert_eq!(bytes_consumed(buf, 0, &typ), 6); +} + +#[test] +fn test_bytes_consumed_pstring_one_byte() { + // \x05Hello -- prefix(1) + payload(5) = 6 + let buf = b"\x05Hello"; + let typ = TypeKind::PString { + max_length: None, + length_width: PStringLengthWidth::OneByte, + length_includes_itself: false, + }; + assert_eq!(bytes_consumed(buf, 0, &typ), 6); +} + +#[test] +fn test_bytes_consumed_pstring_two_byte_be() { + // \x00\x05Hello -- prefix(2) + payload(5) = 7 + let buf = b"\x00\x05Hello"; + let typ = TypeKind::PString { + max_length: None, + length_width: PStringLengthWidth::TwoByteBE, + length_includes_itself: false, + }; + assert_eq!(bytes_consumed(buf, 0, &typ), 7); +} + +#[test] +fn test_bytes_consumed_pstring_two_byte_le() { + let buf = b"\x05\x00Hello"; + let typ = TypeKind::PString { + max_length: None, + length_width: PStringLengthWidth::TwoByteLE, + length_includes_itself: false, + }; + assert_eq!(bytes_consumed(buf, 0, &typ), 7); +} + +#[test] +fn test_bytes_consumed_pstring_four_byte_be() { + let buf = b"\x00\x00\x00\x01x"; + let typ = TypeKind::PString { + max_length: None, + length_width: PStringLengthWidth::FourByteBE, + length_includes_itself: false, + }; + assert_eq!(bytes_consumed(buf, 0, &typ), 5); +} + +#[test] +fn test_bytes_consumed_pstring_j_flag() { + // /J: stored length 4 -> 4 - 1 (prefix) = 3 bytes payload, total 4 + let buf = b"\x04abc"; + let typ = TypeKind::PString { + max_length: None, + length_width: PStringLengthWidth::OneByte, + length_includes_itself: true, + }; + assert_eq!(bytes_consumed(buf, 0, &typ), 4); +} + +#[test] +fn test_bytes_consumed_pstring_empty() { + // \x00 -- prefix says length 0, total 1 (just the prefix) + let buf = b"\x00"; + let typ = TypeKind::PString { + max_length: None, + length_width: PStringLengthWidth::OneByte, + length_includes_itself: false, + }; + assert_eq!(bytes_consumed(buf, 0, &typ), 1); +} + +#[test] +fn test_bytes_consumed_pstring_max_length_caps() { + // Stored length 10, max_length 5 -- consume prefix(1) + 5 = 6 + let buf = b"\x0aHelloWorld"; + let typ = TypeKind::PString { + max_length: Some(5), + length_width: PStringLengthWidth::OneByte, + length_includes_itself: false, + }; + assert_eq!(bytes_consumed(buf, 0, &typ), 6); +} + +#[test] +fn test_bytes_consumed_pstring_j_flag_underflow_multi_byte() { + // /J with TwoByteBE: stored length 1, prefix width 2 -> underflow -> 0. + let buf = b"\x00\x01xx"; + let typ = TypeKind::PString { + max_length: None, + length_width: PStringLengthWidth::TwoByteBE, + length_includes_itself: true, + }; + assert_eq!(bytes_consumed(buf, 0, &typ), 0); + + // /J with FourByteLE: stored length 3, prefix width 4 -> underflow -> 0. + let buf = b"\x03\x00\x00\x00xx"; + let typ = TypeKind::PString { + max_length: None, + length_width: PStringLengthWidth::FourByteLE, + length_includes_itself: true, + }; + assert_eq!(bytes_consumed(buf, 0, &typ), 0); +} + +#[test] +fn test_bytes_consumed_pstring_clamps_oversized_prefix_be() { + // FourByteBE prefix says 0xFFFFFFFF (4 GB), but the buffer only has + // 3 bytes after the prefix. bytes_consumed must clamp to the remaining + // buffer length, not advance the anchor to ~4 GB. + let buf = b"\xFF\xFF\xFF\xFFabc"; + let typ = TypeKind::PString { + max_length: None, + length_width: PStringLengthWidth::FourByteBE, + length_includes_itself: false, + }; + // 4 (prefix) + min(0xFFFFFFFF, 3) = 4 + 3 = 7 + assert_eq!(bytes_consumed(buf, 0, &typ), 7); +} + +#[test] +fn test_bytes_consumed_pstring_clamps_oversized_prefix_le() { + let buf = b"\xFF\xFF\xFF\xFFhello"; + let typ = TypeKind::PString { + max_length: None, + length_width: PStringLengthWidth::FourByteLE, + length_includes_itself: false, + }; + // 4 + min(0xFFFFFFFF, 5) = 9 + assert_eq!(bytes_consumed(buf, 0, &typ), 9); +} + +#[test] +fn test_bytes_consumed_string_at_past_end_returns_zero() { + // Variable-width branch: out-of-bounds offset returns 0, which keeps + // the anchor in place. The engine guarantees this is never called for + // a successful read, but the path is exercised here for the contract. + let buf = b"abc"; + let typ = TypeKind::String { max_length: None }; + assert_eq!(bytes_consumed(buf, 10, &typ), 0); +} + +#[test] +fn test_bytes_consumed_fixed_width_returns_zero_past_end() { + // Fixed-width branch is bounds-checked: if offset + width > buffer.len() + // it returns 0, mirroring the variable-width path. The engine never + // calls bytes_consumed at an out-of-bounds offset, but the guard makes + // the contract self-consistent for any future caller. + let buf = b"abc"; + let typ = TypeKind::Byte { signed: false }; + // offset == buf.len() leaves no room for a 1-byte read. + assert_eq!(bytes_consumed(buf, 3, &typ), 0); + // Way past end. + assert_eq!(bytes_consumed(buf, 100, &typ), 0); + // Last valid index: 1-byte read fits. + assert_eq!(bytes_consumed(buf, 2, &typ), 1); + + // Multi-byte fixed-width type at the boundary. + let typ_long = TypeKind::Long { + endian: Endianness::Little, + signed: false, + }; + let buf4 = b"abcd"; + // offset 0 + width 4 == buf.len() -> fits + assert_eq!(bytes_consumed(buf4, 0, &typ_long), 4); + // offset 1 + width 4 == 5 > buf.len() -> 0 + assert_eq!(bytes_consumed(buf4, 1, &typ_long), 0); + // overflow: offset = usize::MAX, width = 4 -> checked_add returns None -> 0 + assert_eq!(bytes_consumed(buf4, usize::MAX, &typ_long), 0); +} diff --git a/tessl.json b/tessl.json index 41560dab..1d5a34af 100644 --- a/tessl.json +++ b/tessl.json @@ -1,61 +1,65 @@ { - "name": "libmagic-rs", - "mode": "vendored", - "dependencies": { - "actionbook/rust-skills": { - "version": "1f4becdcb88d1cbccc1880594479f28891102843", - "source": "https://github.com/actionbook/rust-skills", - "include": { - "skills": [ - "coding-guidelines", - "domain-cli", - "m01-ownership", - "m02-resource", - "m03-mutability", - "m04-zero-cost", - "m05-type-driven", - "m06-error-handling", - "m07-concurrency", - "m09-domain", - "m10-performance", - "m11-ecosystem", - "m12-lifecycle", - "m13-domain-error", - "m14-mental-model", - "m15-anti-pattern", - "meta-cognition-parallel", - "rust-call-graph", - "rust-code-navigator", - "rust-deps-visualizer", - "rust-learner", - "rust-refactor-helper", - "rust-skill-creator", - "rust-symbol-analyzer", - "rust-trait-explorer", - "unsafe-checker" - ] - } - }, - "pantheon-ai/github-actions-generator": { - "version": "0.1.1" - }, - "pantheon-ai/mise-complete": { - "version": "0.1.1" - }, - "pantheon-ai/dockerfile-toolkit": { - "version": "0.1.0" - }, - "pantheon-ai/moscow-prioritization": { - "version": "0.1.1" - }, - "pantheon-ai/software-design-principles": { - "version": "0.1.4" - }, - "cisco/software-security": { - "version": "1.2.5" - }, - "tessl-labs/good-oss-citizen": { - "version": "1.0.1" - } + "name": "libmagic-rs", + "mode": "vendored", + "dependencies": { + "actionbook/rust-skills": { + "version": "1f4becdcb88d1cbccc1880594479f28891102843", + "source": "https://github.com/actionbook/rust-skills", + "include": { + "skills": [ + "coding-guidelines", + "domain-cli", + "m01-ownership", + "m02-resource", + "m03-mutability", + "m04-zero-cost", + "m05-type-driven", + "m06-error-handling", + "m07-concurrency", + "m09-domain", + "m10-performance", + "m11-ecosystem", + "m12-lifecycle", + "m13-domain-error", + "m14-mental-model", + "m15-anti-pattern", + "meta-cognition-parallel", + "rust-call-graph", + "rust-code-navigator", + "rust-deps-visualizer", + "rust-learner", + "rust-refactor-helper", + "rust-skill-creator", + "rust-symbol-analyzer", + "rust-trait-explorer", + "unsafe-checker" + ] + } + }, + "pantheon-ai/github-actions-generator": { + "version": "0.1.1" + }, + "pantheon-ai/mise-complete": { + "version": "0.1.1" + }, + "pantheon-ai/dockerfile-toolkit": { + "version": "0.1.0" + }, + "pantheon-ai/moscow-prioritization": { + "version": "0.1.1" + }, + "pantheon-ai/software-design-principles": { + "version": "0.1.4" + }, + "cisco/software-security": { + "version": "1.2.5" + }, + "tessl-labs/good-oss-citizen": { + "version": "1.0.1" + }, + "neonwatty/logo-designer-skill": { + "version": "60285dd8417d5194155d8c2e349e2f8b61ffe6ff", + "source": "https://github.com/neonwatty/logo-designer-skill" } + } } diff --git a/tests/relative_offset_evaluation.rs b/tests/relative_offset_evaluation.rs new file mode 100644 index 00000000..86d5744c --- /dev/null +++ b/tests/relative_offset_evaluation.rs @@ -0,0 +1,518 @@ +// Copyright (c) 2025-2026 the libmagic-rs contributors +// SPDX-License-Identifier: Apache-2.0 + +//! Integration tests for relative offset (`OffsetSpec::Relative`) evaluation. +//! +//! Relative offsets resolve against the end of the most recent successful +//! match (the GNU `file` "previous match" anchor). The evaluation engine +//! threads this anchor through `EvaluationContext::last_match_end()`, and +//! advances it after each successful match by the number of bytes the read +//! consumed. +//! +//! Magic-file syntax for `&+N`/`&-N` is not yet wired into the parser, so +//! these tests construct rules programmatically and exercise them through +//! `evaluate_rules` directly. + +use libmagic_rs::evaluator::{EvaluationContext, evaluate_rules}; +use libmagic_rs::parser::ast::PStringLengthWidth; +use libmagic_rs::{Endianness, EvaluationConfig, MagicRule, OffsetSpec, Operator, TypeKind, Value}; + +fn cfg() -> EvaluationConfig { + EvaluationConfig { + stop_at_first_match: false, + ..Default::default() + } +} + +fn child_rule(offset: OffsetSpec, typ: TypeKind, value: Value, message: &str) -> MagicRule { + MagicRule { + offset, + typ, + op: Operator::Equal, + value, + message: message.to_string(), + children: vec![], + level: 1, + strength_modifier: None, + } +} + +#[test] +fn relative_child_after_long_parent() { + // Buffer: 4-byte LE long (0x12345678) followed by another 4-byte LE long + // (0xCAFEBABE). Parent matches the first long, child uses Relative(0) + // and reads at offset 4 (= parent end). + let buffer = [0x78, 0x56, 0x34, 0x12, 0xBE, 0xBA, 0xFE, 0xCA]; + + let parent = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + op: Operator::Equal, + value: Value::Uint(0x1234_5678), + message: "parent-long".to_string(), + children: vec![child_rule( + OffsetSpec::Relative(0), + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + Value::Uint(0xCAFE_BABE), + "child-long", + )], + level: 0, + strength_modifier: None, + }; + + let mut ctx = EvaluationContext::new(cfg()); + let matches = evaluate_rules(&[parent], &buffer, &mut ctx).unwrap(); + assert_eq!(matches.len(), 2, "expected parent + child match"); + assert_eq!(matches[0].message, "parent-long"); + assert_eq!(matches[0].offset, 0); + assert_eq!(matches[1].message, "child-long"); + assert_eq!(matches[1].offset, 4); +} + +#[test] +fn relative_child_with_positive_delta() { + // Parent matches one byte at offset 0; child uses Relative(2) and reads + // at offset 1 (parent_end) + 2 = 3. + let buffer = [0x7F, 0xAA, 0xBB, 0x42, 0xCC]; + + let parent = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0x7F), + message: "p".to_string(), + children: vec![child_rule( + OffsetSpec::Relative(2), + TypeKind::Byte { signed: false }, + Value::Uint(0x42), + "c", + )], + level: 0, + strength_modifier: None, + }; + + let mut ctx = EvaluationContext::new(cfg()); + let matches = evaluate_rules(&[parent], &buffer, &mut ctx).unwrap(); + assert_eq!(matches.len(), 2); + assert_eq!(matches[1].offset, 3); +} + +#[test] +fn relative_child_with_negative_delta() { + // Parent matches a 4-byte long at offset 4; child Relative(-7) reads at + // (4+4) - 7 = 1. + let buffer = [0x00, 0xAA, 0x00, 0x00, 0x78, 0x56, 0x34, 0x12, 0x00]; + + let parent = MagicRule { + offset: OffsetSpec::Absolute(4), + typ: TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + op: Operator::Equal, + value: Value::Uint(0x1234_5678), + message: "p".to_string(), + children: vec![child_rule( + OffsetSpec::Relative(-7), + TypeKind::Byte { signed: false }, + Value::Uint(0xAA), + "c", + )], + level: 0, + strength_modifier: None, + }; + + let mut ctx = EvaluationContext::new(cfg()); + let matches = evaluate_rules(&[parent], &buffer, &mut ctx).unwrap(); + assert_eq!(matches.len(), 2); + assert_eq!(matches[1].offset, 1); + assert_eq!(matches[1].value, Value::Uint(0xAA)); +} + +#[test] +fn relative_chain_marches_forward() { + // Three consecutive 4-byte LE longs; root + two relative children. + let buffer = [ + 0x78, 0x56, 0x34, 0x12, // 0x12345678 + 0xBE, 0xBA, 0xFE, 0xCA, // 0xCAFEBABE + 0xEF, 0xBE, 0xAD, 0xDE, // 0xDEADBEEF + ]; + + let leaf = child_rule( + OffsetSpec::Relative(0), + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + Value::Uint(0xDEAD_BEEF), + "leaf", + ); + let mut middle = child_rule( + OffsetSpec::Relative(0), + TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + Value::Uint(0xCAFE_BABE), + "middle", + ); + middle.children = vec![leaf]; + + let root = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + op: Operator::Equal, + value: Value::Uint(0x1234_5678), + message: "root".to_string(), + children: vec![middle], + level: 0, + strength_modifier: None, + }; + + let mut ctx = EvaluationContext::new(cfg()); + let matches = evaluate_rules(&[root], &buffer, &mut ctx).unwrap(); + assert_eq!(matches.len(), 3); + let offsets: Vec = matches.iter().map(|m| m.offset).collect(); + assert_eq!(offsets, vec![0, 4, 8]); +} + +#[test] +fn relative_after_string_parent_includes_nul_terminator() { + // String "MZ" at offset 0 followed by NUL (3 bytes consumed), then a + // byte the child reads via Relative(0). + let buffer = b"MZ\x00\x42rest"; + + let parent = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::String { max_length: None }, + op: Operator::Equal, + value: Value::String("MZ".to_string()), + message: "mz".to_string(), + children: vec![child_rule( + OffsetSpec::Relative(0), + TypeKind::Byte { signed: false }, + Value::Uint(0x42), + "byte-after-mz", + )], + level: 0, + strength_modifier: None, + }; + + let mut ctx = EvaluationContext::new(cfg()); + let matches = evaluate_rules(&[parent], buffer, &mut ctx).unwrap(); + assert_eq!(matches.len(), 2, "child should match after MZ + NUL"); + assert_eq!(matches[1].offset, 3); +} + +#[test] +fn relative_after_pstring_parent_consumes_prefix_and_payload() { + // pstring(/B) at offset 0 with prefix 0x05, payload "Hello" (6 bytes + // total), then a byte at offset 6. + let buffer = b"\x05Hello\x42tail"; + + let parent = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::PString { + max_length: None, + length_width: PStringLengthWidth::OneByte, + length_includes_itself: false, + }, + op: Operator::Equal, + value: Value::String("Hello".to_string()), + message: "pstr".to_string(), + children: vec![child_rule( + OffsetSpec::Relative(0), + TypeKind::Byte { signed: false }, + Value::Uint(0x42), + "byte-after-pstr", + )], + level: 0, + strength_modifier: None, + }; + + let mut ctx = EvaluationContext::new(cfg()); + let matches = evaluate_rules(&[parent], buffer, &mut ctx).unwrap(); + assert_eq!(matches.len(), 2); + assert_eq!(matches[1].offset, 6); +} + +#[test] +fn relative_top_level_resolves_from_zero_anchor() { + // No prior match: top-level Relative(2) -> absolute 2. + let buffer = [0xAA, 0xBB, 0x42, 0xCC]; + + let rule = MagicRule { + offset: OffsetSpec::Relative(2), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0x42), + message: "top".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + + let mut ctx = EvaluationContext::new(cfg()); + let matches = evaluate_rules(&[rule], &buffer, &mut ctx).unwrap(); + assert_eq!(matches.len(), 1); + assert_eq!(matches[0].offset, 2); +} + +#[test] +fn relative_sibling_propagation_at_top_level() { + // GNU `file` semantics: anchor advances monotonically; the second + // top-level rule sees the anchor that the first rule left behind. + // First rule matches a 4-byte long at offset 0 -> anchor becomes 4. + // Second rule uses Relative(0) -> reads at offset 4. + let buffer = [0x78, 0x56, 0x34, 0x12, 0x42, 0x00, 0x00, 0x00]; + + let first = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + op: Operator::Equal, + value: Value::Uint(0x1234_5678), + message: "first".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + let second = MagicRule { + offset: OffsetSpec::Relative(0), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0x42), + message: "second".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + + let mut ctx = EvaluationContext::new(cfg()); + let matches = evaluate_rules(&[first, second], &buffer, &mut ctx).unwrap(); + assert_eq!(matches.len(), 2); + assert_eq!(matches[0].offset, 0); + assert_eq!(matches[1].offset, 4); +} + +#[test] +fn relative_out_of_bounds_skips_child_gracefully() { + // Parent matches; child uses Relative(50) which lands past the buffer. + // Engine should skip the child and continue without panicking. + let buffer = [0x7F, 0xAA, 0xBB]; + + let parent = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0x7F), + message: "p".to_string(), + children: vec![child_rule( + OffsetSpec::Relative(50), + TypeKind::Byte { signed: false }, + Value::Uint(0x00), + "c", + )], + level: 0, + strength_modifier: None, + }; + + let mut ctx = EvaluationContext::new(cfg()); + let matches = evaluate_rules(&[parent], &buffer, &mut ctx).unwrap(); + assert_eq!(matches.len(), 1, "only the parent should match"); + assert_eq!(matches[0].message, "p"); +} + +#[test] +fn relative_anchor_can_decrease_when_later_sibling_matches_at_lower_position() { + // GNU `file` semantics: the anchor reflects the END of the most recent + // match -- not a high-watermark. If a later sibling matches at a lower + // absolute position, the anchor moves backwards. This test pins the + // documented "may increase or decrease" behavior so a future + // optimization that adds a max() guard fails loudly. + // + // Layout: 16 bytes. Rule A matches a 4-byte LE long at offset 8. + // After A, anchor = 12. Rule B matches a single byte at offset 2 + // (Absolute(2)). After B, anchor = 3. Rule C uses Relative(0) and + // must read at offset 3, NOT offset 12. + let buffer = [ + 0x00, 0x00, 0xAA, 0x99, 0x00, 0x00, 0x00, 0x00, // bytes 0-7 + 0x78, 0x56, 0x34, 0x12, 0x00, 0x00, 0x00, 0x00, // bytes 8-15 + ]; + + let rule_a = MagicRule { + offset: OffsetSpec::Absolute(8), + typ: TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + op: Operator::Equal, + value: Value::Uint(0x1234_5678), + message: "rule-a-high".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + let rule_b = MagicRule { + offset: OffsetSpec::Absolute(2), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0xAA), + message: "rule-b-low".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + let rule_c = MagicRule { + offset: OffsetSpec::Relative(0), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0x99), + message: "rule-c-relative".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + + let mut ctx = EvaluationContext::new(cfg()); + let matches = evaluate_rules(&[rule_a, rule_b, rule_c], &buffer, &mut ctx).unwrap(); + assert_eq!(matches.len(), 3, "all three rules should match"); + assert_eq!(matches[0].message, "rule-a-high"); + assert_eq!(matches[0].offset, 8); + assert_eq!(matches[1].message, "rule-b-low"); + assert_eq!(matches[1].offset, 2); + assert_eq!( + matches[2].offset, 3, + "rule C must read at offset 3 (rule B's end), proving the anchor moved backwards from 12 -> 3" + ); +} + +#[test] +fn relative_anchor_persists_across_non_matching_intermediate_sibling() { + // First top-level rule matches a 4-byte LE long -> anchor advances to 4. + // Second top-level rule does NOT match (wrong expected value) -> anchor + // stays at 4. + // Third top-level rule uses Relative(0) -> reads at offset 4. + let buffer = [0x78, 0x56, 0x34, 0x12, 0x42, 0x00, 0x00, 0x00]; + + let first = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + op: Operator::Equal, + value: Value::Uint(0x1234_5678), + message: "first".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + let middle_no_match = MagicRule { + offset: OffsetSpec::Absolute(4), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0xDE), // does not match (real byte is 0x42) + message: "middle-skip".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + let third = MagicRule { + offset: OffsetSpec::Relative(0), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0x42), + message: "third".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + + let mut ctx = EvaluationContext::new(cfg()); + let matches = evaluate_rules(&[first, middle_no_match, third], &buffer, &mut ctx).unwrap(); + assert_eq!(matches.len(), 2, "first + third match, middle skipped"); + assert_eq!(matches[0].message, "first"); + assert_eq!(matches[1].message, "third"); + assert_eq!(matches[1].offset, 4); +} + +#[test] +fn relative_anchor_resets_between_evaluations_via_reset() { + // Evaluate against a first buffer, advancing the anchor. Reset the + // context. Evaluate against a second buffer with a Relative(0) rule; + // the anchor must start at 0, not the leaked value from the first run. + let buffer_a = [0x78, 0x56, 0x34, 0x12]; + let buffer_b = [0x42, 0xAA, 0xBB, 0xCC]; + + let pass_one = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Long { + endian: Endianness::Little, + signed: false, + }, + op: Operator::Equal, + value: Value::Uint(0x1234_5678), + message: "pass-one".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + let pass_two = MagicRule { + offset: OffsetSpec::Relative(0), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0x42), + message: "pass-two".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + + let mut ctx = EvaluationContext::new(cfg()); + let _ = evaluate_rules(&[pass_one], &buffer_a, &mut ctx).unwrap(); + ctx.reset(); + let matches = evaluate_rules(&[pass_two], &buffer_b, &mut ctx).unwrap(); + assert_eq!(matches.len(), 1); + assert_eq!( + matches[0].offset, 0, + "Relative(0) should resolve to 0 after reset" + ); +} + +#[test] +fn relative_underflow_skips_child_gracefully() { + // Anchor=1 (after parent byte), child Relative(-100) underflows. + let buffer = [0x7F, 0xAA]; + + let parent = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Byte { signed: false }, + op: Operator::Equal, + value: Value::Uint(0x7F), + message: "p".to_string(), + children: vec![child_rule( + OffsetSpec::Relative(-100), + TypeKind::Byte { signed: false }, + Value::Uint(0x00), + "c", + )], + level: 0, + strength_modifier: None, + }; + + let mut ctx = EvaluationContext::new(cfg()); + let matches = evaluate_rules(&[parent], &buffer, &mut ctx).unwrap(); + assert_eq!(matches.len(), 1); +}