diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 5f6b2bd2..5a8b72fe 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -5,15 +5,21 @@ updates: schedule: interval: "weekly" rebase-strategy: "disabled" + commit-message: + prefix: "chore(deps)" - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" rebase-strategy: "disabled" + commit-message: + prefix: "chore(deps)" - package-ecosystem: "devcontainers" directory: "/" schedule: interval: "weekly" rebase-strategy: "disabled" + commit-message: + prefix: "chore(deps)" diff --git a/.mergify.yml b/.mergify.yml index bdd69f71..2492d85b 100644 --- a/.mergify.yml +++ b/.mergify.yml @@ -11,47 +11,69 @@ queue_rules: - check-success = coverage pull_request_rules: - # Tier 1: Maintainer PRs -- queue when maintainer adds 'lgtm' label - - name: Queue maintainer PRs with lgtm label + # Tier 1: Trusted bot PRs -- auto-approve and queue immediately + - name: Auto-approve and queue dependabot PRs conditions: - base = main - - "author=@maintainers" - - label = lgtm + - author = dependabot[bot] + - -draft - label != do-not-merge + # release.yml is autogenerated by cargo-dist -- dependabot updates to + # pinned actions in it will break the release pipeline. Dependabot has no + # way to ignore specific workflow files, so we block it here instead. + - -files~=\.github/workflows/release\.yml actions: + review: + type: APPROVE + message: Automatically approved by Mergify + queue: + name: default + + - name: Auto-approve and queue dosu PRs + conditions: + - base = main + - author = dosubot[bot] + - -draft + - label != do-not-merge + actions: + review: + type: APPROVE + message: Automatically approved by Mergify queue: name: default - # Tier 2: Trusted bot PRs -- auto-queue when checks pass - name: Auto-queue release-plz PRs conditions: - base = main - "head ~= ^release-plz-" + - -draft - label != do-not-merge actions: queue: name: default - - name: Auto-approve and queue dependabot PRs + # Tier 2: Maintainer PRs -- queue when maintainer self-labels 'lgtm' + # (no approval required; solves the sole-maintainer self-merge problem) + - name: Queue maintainer PRs with lgtm label conditions: - base = main - - author = dependabot[bot] + - "author=@maintainers" + - -draft + - label = lgtm - label != do-not-merge - - -files~=\.github/workflows/release\.yml actions: - review: - type: APPROVE - message: Automatically approved by Mergify queue: name: default - # Tier 3: All other PRs (external contributors, copilot) -- require maintainer approval + # Tier 3: External contributor PRs -- require maintainer approval - name: Queue external PRs when approved by maintainer conditions: - base = main - "-author=@maintainers" - author != dependabot[bot] + - author != dosubot[bot] - "-head ~= ^release-plz-" + - -draft - "approved-reviews-by=@maintainers" - label != do-not-merge actions: @@ -69,6 +91,13 @@ pull_request_rules: update: {} merge_protections: + - name: Enforce conventional commit + description: Make sure that we follow https://www.conventionalcommits.org/en/v1.0.0/ + if: + - base = main + success_conditions: + - "title ~= ^(fix|feat|docs|style|refactor|perf|test|build|ci|chore|revert)(?:\\\\(.+\\\\))?!?:" + - name: CI must pass description: >- All CI checks must pass. This protection prevents manual merges diff --git a/AGENTS.md b/AGENTS.md index 06f8289f..b6c411f1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -65,7 +65,7 @@ Target File → Memory Mapper → File Buffer // Core data structures in lib.rs pub struct MagicRule { /* ... */ } pub enum TypeKind { - Byte, + Byte { signed: bool }, Short { endian: Endianness, signed: bool }, Long { endian: Endianness, signed: bool }, String { max_length: Option }, @@ -138,10 +138,13 @@ pub fn evaluate_magic_rules( - `src/error.rs` is shared with `build.rs` -- cannot reference lib-only types like `crate::io::IoError` - `FileError(String)` wraps structured I/O errors as strings to work around the build.rs constraint +- `build.rs` and `src/build_helpers.rs` have duplicate `serialize_*` functions -- both must be updated when adding enum variants - Use `ParseError::IoError` for I/O errors in parser code, not `ParseError::invalid_syntax` - Use `LibmagicError::ConfigError` for config validation, not `ParseError::invalid_syntax` - Clippy pedantic lints are active (e.g., prefer `trailing_zeros()` over bitwise masks) - All public enum variants need `# Examples` rustdoc sections +- Comparison operators share a `compare_values() -> Option` helper in `operators.rs` -- new comparison logic goes there, not in individual `apply_*` functions +- libmagic types are signed by default (`byte`, `short`, `long`); unsigned variants use `u` prefix (`ubyte`, `ushort`, `ulong`, etc.) ### Naming Conventions @@ -180,20 +183,20 @@ cargo test --doc # Test documentation examples - **Property Tests**: Use `proptest` for fuzzing magic rule evaluation - **Benchmarks**: Critical path performance tests with `criterion` - **Coverage**: Target >85% with `cargo llvm-cov` +- **Test style**: Prefer table-driven tests over one-assertion-per-function tests; consolidate related cases into a single test with descriptive failure messages ## Magic File Compatibility ### Currently Implemented (v0.1.0) - **Offsets**: Absolute and from-end specifications (indirect and relative are parsed but not yet evaluated) -- **Types**: `byte`, `short`, `long`, `string` with endianness support -- **Operators**: `=` (equal), `!=` (not equal), `&` (bitwise AND with optional mask) +- **Types**: `byte`, `short`, `long`, `string` with endianness support; unsigned variants `ubyte`, `ushort`/`ubeshort`/`uleshort`, `ulong`/`ubelong`/`ulelong`; types are signed by default (libmagic-compatible) +- **Operators**: `=` (equal), `!=` (not equal), `<` (less than), `>` (greater than), `<=` (less equal), `>=` (greater equal), `&` (bitwise AND with optional mask) - **Nested Rules**: Hierarchical rule evaluation with proper indentation - **String Matching**: Exact string matching with null-termination ### Planned Features (v1.0+) -- Comparison operators: `>`, `<`, `>=`, `<=` - Bitwise XOR operator: `^` - Regex type: Pattern matching with binary-safe regex support - Additional types: 64-bit integers, floats, doubles, dates @@ -226,7 +229,6 @@ impl BinaryRegex for regex::bytes::Regex { ### Operators -- No comparison operators (`>`, `<`, `>=`, `<=`) - No XOR operator (`^`) - No negation operator (`~`) - BitwiseAnd supports mask values but not all libmagic mask syntax @@ -316,13 +318,16 @@ sample.bin: ELF 64-bit LSB executable, x86-64, version 1 (SYSV) ### Adding New Operators -> **Note:** Currently implemented operators are `Equal`, `NotEqual`, and `BitwiseAnd` (with `BitwiseAndMask`). Comparison operators (`>`, `<`) and XOR (`^`) are planned for future releases. +> **Note:** Currently implemented operators are `Equal`, `NotEqual`, `LessThan`, `GreaterThan`, `LessEqual`, `GreaterEqual`, and `BitwiseAnd` (with `BitwiseAndMask`). XOR (`^`) is planned for future releases. 1. Extend `Operator` enum in `src/parser/ast.rs` 2. Add parsing logic in `src/parser/grammar.rs` 3. Implement operator logic in `src/evaluator/operators.rs` -4. Add tests for the new operator -5. Update documentation +4. Update `serialize_operator()` in both `src/build_helpers.rs` AND `build.rs` (they have duplicate match statements) +5. Update strength calculation match in `src/evaluator/strength.rs` +6. Update `arb_operator()` in `tests/property_tests.rs` +7. Add tests for the new operator +8. Update documentation ### Performance Optimization diff --git a/build.rs b/build.rs index 11e91f0b..cd7b935a 100644 --- a/build.rs +++ b/build.rs @@ -269,7 +269,7 @@ fn serialize_offset_spec(offset: &OffsetSpec) -> String { fn serialize_type_kind(typ: &TypeKind) -> String { match typ { - TypeKind::Byte => "TypeKind::Byte".to_string(), + TypeKind::Byte { signed } => format!("TypeKind::Byte {{ signed: {signed} }}"), TypeKind::Short { endian, signed } => format!( "TypeKind::Short {{ endian: {}, signed: {} }}", serialize_endianness(*endian), @@ -293,6 +293,10 @@ fn serialize_operator(op: &Operator) -> String { match op { Operator::Equal => "Operator::Equal".to_string(), Operator::NotEqual => "Operator::NotEqual".to_string(), + Operator::LessThan => "Operator::LessThan".to_string(), + Operator::GreaterThan => "Operator::GreaterThan".to_string(), + Operator::LessEqual => "Operator::LessEqual".to_string(), + Operator::GreaterEqual => "Operator::GreaterEqual".to_string(), Operator::BitwiseAnd => "Operator::BitwiseAnd".to_string(), Operator::BitwiseAndMask(mask) => format!("Operator::BitwiseAndMask({mask})"), } @@ -301,7 +305,14 @@ fn serialize_operator(op: &Operator) -> String { fn serialize_value(value: &Value) -> String { match value { Value::Uint(number) => format!("Value::Uint({})", format_number(*number)), - Value::Int(number) => format!("Value::Int({})", format_number(*number as u64)), + Value::Int(number) => { + if *number < 0 { + let abs = number.unsigned_abs(); + format!("Value::Int(-{})", format_number(abs)) + } else { + format!("Value::Int({})", format_number(*number as u64)) + } + } Value::Bytes(bytes) => format!("Value::Bytes({})", format_byte_vec(bytes)), Value::String(text) => format!( "Value::String(String::from({}))", diff --git a/docs/API_REFERENCE.md b/docs/API_REFERENCE.md index 48fbbc87..a2ffe62e 100644 --- a/docs/API_REFERENCE.md +++ b/docs/API_REFERENCE.md @@ -298,7 +298,7 @@ use libmagic_rs::TypeKind; | Variant | Description | |---------|-------------| -| `Byte` | Single byte | +| `Byte { signed }` | Single byte with explicit signedness | | `Short { endian, signed }` | 16-bit integer | | `Long { endian, signed }` | 32-bit integer | | `String { max_length }` | String data | @@ -313,10 +313,14 @@ use libmagic_rs::Operator; | Variant | Description | |---------|-------------| -| `Equal` | Equality comparison | -| `NotEqual` | Inequality comparison | -| `BitwiseAnd` | Bitwise AND | -| `BitwiseAndMask(u64)` | Bitwise AND with mask | +| `Equal` | Equality comparison (`=` or `==`) | +| `NotEqual` | Inequality comparison (`!=` or `<>`) | +| `LessThan` | Less than comparison (`<`) | +| `GreaterThan` | Greater than comparison (`>`) | +| `LessEqual` | Less than or equal comparison (`<=`) | +| `GreaterEqual` | Greater than or equal comparison (`>=`) | +| `BitwiseAnd` | Bitwise AND (`&`) | +| `BitwiseAndMask(u64)` | Bitwise AND with mask value | #### Value diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index f2758431..c02b5df2 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -82,7 +82,7 @@ libmagic-rs/ │ │ │ ├── parser/ # Magic file parsing │ │ ├── mod.rs # Parser interface, file loading -│ │ ├── ast.rs # AST definitions (MagicRule, TypeKind, etc.) +│ │ ├── ast.rs # AST definitions (MagicRule, TypeKind::Byte { signed: bool }, etc.) │ │ └── grammar.rs # nom-based parsing combinators │ │ │ ├── evaluator/ # Rule evaluation engine @@ -272,6 +272,9 @@ pub struct MagicRule { - Child rules are evaluated only if parent matches - Deeper matches = higher confidence +**Operator Support:** +- Supports comparison operators (`<`, `>`, `<=`, `>=`) in addition to equality (`=`, `!=`) and bitwise operators (`&`) + ### EvaluationContext Tracks state during rule evaluation. @@ -472,8 +475,19 @@ The evaluation hot path is optimized for: 1. Add variant to `Operator` enum (`ast.rs`) 2. Add parsing logic (`grammar.rs`) 3. Add comparison logic (`operators.rs`) -4. Add tests -5. Update documentation +4. Add serialization for build-time (`build.rs` and `build_helpers.rs`) +5. Add tests +6. Update documentation + +**Implemented Operators:** +- `Equal` (`=`, `==`) +- `NotEqual` (`!=`, `<>`) +- `LessThan` (`<`) +- `GreaterThan` (`>`) +- `LessEqual` (`<=`) +- `GreaterEqual` (`>=`) +- `BitwiseAnd` (`&`) +- `BitwiseAndMask` (`&` with mask) ### Adding Output Formats diff --git a/docs/MAGIC_FORMAT.md b/docs/MAGIC_FORMAT.md index 1d12fb97..f1332a8f 100644 --- a/docs/MAGIC_FORMAT.md +++ b/docs/MAGIC_FORMAT.md @@ -192,8 +192,10 @@ Example: |----------|-------------|---------| | `=` | Equal (default) | `0 long =0xcafebabe` | | `!` | Not equal | `4 byte !0` | -| `>` | Greater than | `8 long >1000` | | `<` | Less than | `8 long <100` | +| `>` | Greater than | `8 long >1000` | +| `<=` | Less than or equal | `8 long <=100` | +| `>=` | Greater than or equal | `8 long >=1000` | | `&` | Bitwise AND | `4 byte &0x80` | | `^` | Bitwise XOR | `4 byte ^0xff` | @@ -469,7 +471,7 @@ Consider: - Indirect offsets (basic) - Byte, short, long types - String type -- Equal, not-equal operators +- Comparison operators (`=`, `!`, `<`, `>`, `<=`, `>=`) - Bitwise AND operator - Nested rules - Comments @@ -484,6 +486,7 @@ Consider: ### Recently Added +- **Comparison operators**: Full support for `<`, `>`, `<=`, `>=` operators - **Strength modifiers**: The `!:strength` directive for adjusting rule priority --- diff --git a/docs/src/api-reference.md b/docs/src/api-reference.md index 63ba8531..3ee6d559 100644 --- a/docs/src/api-reference.md +++ b/docs/src/api-reference.md @@ -223,7 +223,7 @@ use libmagic_rs::TypeKind; | Variant | Description | |---------|-------------| -| `Byte` | Single byte | +| `Byte { signed }` | Single byte with explicit signedness | | `Short { endian, signed }` | 16-bit integer | | `Long { endian, signed }` | 32-bit integer | | `String { max_length }` | String data | @@ -238,8 +238,12 @@ use libmagic_rs::Operator; | Variant | Description | |---------|-------------| -| `Equal` | Equality comparison | -| `NotEqual` | Inequality comparison | +| `Equal` | Equality comparison (`=`) | +| `NotEqual` | Inequality comparison (`!=`) | +| `LessThan` | Less than comparison (`<`) | +| `GreaterThan` | Greater than comparison (`>`) | +| `LessEqual` | Less than or equal comparison (`<=`) | +| `GreaterEqual` | Greater than or equal comparison (`>=`) | | `BitwiseAnd` | Bitwise AND | | `BitwiseAndMask(u64)` | Bitwise AND with mask | diff --git a/docs/src/ast-structures.md b/docs/src/ast-structures.md index 044597dd..eab4b483 100644 --- a/docs/src/ast-structures.md +++ b/docs/src/ast-structures.md @@ -56,14 +56,14 @@ Magic rules can contain child rules that are evaluated when the parent matches: ```rust let parent_rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: false }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF".to_string(), children: vec![ MagicRule { offset: OffsetSpec::Absolute(4), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: false }, op: Operator::Equal, value: Value::Uint(1), message: "32-bit".to_string(), @@ -72,7 +72,7 @@ let parent_rule = MagicRule { }, MagicRule { offset: OffsetSpec::Absolute(4), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: false }, op: Operator::Equal, value: Value::Uint(2), message: "64-bit".to_string(), @@ -169,7 +169,7 @@ The `TypeKind` enum specifies how to interpret bytes at the given offset: ```rust pub enum TypeKind { /// Single byte (8-bit) - Byte, + Byte { signed: bool }, /// 16-bit integer Short { endian: Endianness, signed: bool }, @@ -185,8 +185,11 @@ pub enum TypeKind { **Examples:** ```rust -// Single byte -let byte_type = TypeKind::Byte; +// Single unsigned byte +let byte_type = TypeKind::Byte { signed: false }; + +// Single signed byte +let signed_byte_type = TypeKind::Byte { signed: true }; // 16-bit little-endian unsigned integer let short_le = TypeKind::Short { @@ -218,13 +221,18 @@ pub enum Endianness { ## Operator Types -The `Operator` enum defines comparison operations: +The `Operator` enum defines comparison and bitwise operations: ```rust pub enum Operator { - Equal, // == - NotEqual, // != - BitwiseAnd, // & (bitwise AND for pattern matching) + Equal, // == (equality comparison) + NotEqual, // != (inequality comparison) + LessThan, // < (less-than comparison) + GreaterThan, // > (greater-than comparison) + LessEqual, // <= (less-than-or-equal comparison) + GreaterEqual, // >= (greater-than-or-equal comparison) + BitwiseAnd, // & (bitwise AND for pattern matching) + BitwiseAndMask(u64), // & (bitwise AND with mask value) } ``` @@ -237,8 +245,23 @@ let equal_op = Operator::Equal; // Not equal let not_equal_op = Operator::NotEqual; +// Less than comparison +let less_op = Operator::LessThan; + +// Greater than comparison +let greater_op = Operator::GreaterThan; + +// Less than or equal +let less_equal_op = Operator::LessEqual; + +// Greater than or equal +let greater_equal_op = Operator::GreaterEqual; + // Bitwise AND (useful for flag checking) let bitwise_op = Operator::BitwiseAnd; + +// Bitwise AND with mask +let bitwise_mask_op = Operator::BitwiseAndMask(0xFF00); ``` ## Value Types @@ -313,7 +336,7 @@ let elf_rules = vec![ children: vec![ MagicRule { offset: OffsetSpec::Absolute(4), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: false }, op: Operator::Equal, value: Value::Uint(1), message: "32-bit".to_string(), @@ -322,7 +345,7 @@ let elf_rules = vec![ }, MagicRule { offset: OffsetSpec::Absolute(4), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: false }, op: Operator::Equal, value: Value::Uint(2), message: "64-bit".to_string(), @@ -374,8 +397,8 @@ let script_rule = MagicRule { ### Type Selection -1. **Use `Byte`** for single-byte values and flags -2. **Use `Short/Long`** with explicit endianness for multi-byte integers +1. **Use `Byte { signed }`** for single-byte values and flags, specifying signedness +2. **Use `Short/Long`** with explicit endianness and signedness for multi-byte integers 3. **Use `String`** with length limits for text patterns 4. **Use `Bytes`** for exact binary sequences diff --git a/docs/src/evaluator.md b/docs/src/evaluator.md index 9cfb7576..7d63e10e 100644 --- a/docs/src/evaluator.md +++ b/docs/src/evaluator.md @@ -43,9 +43,11 @@ Note: Fields are private; use accessor methods like `current_offset()`, `recursi **Key Methods:** - `new()` - Create context with default configuration -- `with_config()` - Create context with custom configuration -- `check_timeout()` - Verify evaluation hasn't exceeded time limit -- `increment_depth()` / `decrement_depth()` - Track recursion safely +- `current_offset()` / `set_current_offset()` - Track current buffer position +- `recursion_depth()` - Query current recursion depth +- `increment_recursion_depth()` / `decrement_recursion_depth()` - Track recursion safely +- `timeout_ms()` - Query configured timeout +- `reset()` - Reset context state for reuse ### MatchResult (`evaluator/mod.rs`) @@ -61,6 +63,8 @@ pub struct MatchResult { pub level: u32, /// The matched value (parsed according to rule type) pub value: Value, + /// Confidence score (0.0 to 1.0) based on rule hierarchy depth + pub confidence: f64, } ``` @@ -79,8 +83,7 @@ Handles all offset types safely: pub fn resolve_offset( spec: &OffsetSpec, buffer: &[u8], - context: &EvaluationContext, -) -> Result +) -> Result ``` ### Type Reading (`evaluator/types.rs`) @@ -94,11 +97,11 @@ Interprets bytes according to type specifications: - **Bounds checking**: Prevents buffer overruns ```rust -pub fn read_type_value( +pub fn read_typed_value( buffer: &[u8], offset: usize, type_kind: &TypeKind, -) -> Result +) -> Result ``` ### Operator Application (`evaluator/operators.rs`) @@ -107,14 +110,20 @@ Applies comparison operations: - **Equal** (`=`, `==`): Exact value matching - **NotEqual** (`!=`, `<>`): Non-matching values +- **LessThan** (`<`): Less-than comparison (numeric or lexicographic) +- **GreaterThan** (`>`): Greater-than comparison (numeric or lexicographic) +- **LessEqual** (`<=`): Less-than-or-equal comparison (numeric or lexicographic) +- **GreaterEqual** (`>=`): Greater-than-or-equal comparison (numeric or lexicographic) - **BitwiseAnd** (`&`): Pattern matching for flags - **BitwiseAndMask**: AND with mask then compare +Comparison operators support numeric comparisons across different integer types using `i128` coercion for cross-type compatibility. + ```rust pub fn apply_operator( - op: &Operator, - actual: &TypeValue, - expected: &Value, + operator: &Operator, + left: &Value, + right: &Value, ) -> bool ``` @@ -204,7 +213,7 @@ The evaluator uses graceful degradation: - **Invalid offsets**: Skip rule, continue with others - **Type mismatches**: Skip rule, continue with others -- **Timeout exceeded**: Return partial results collected so far +- **Timeout exceeded**: Return error (partial results are not preserved) - **Recursion limit**: Stop descent, continue siblings ```rust @@ -237,25 +246,25 @@ let result = evaluate_rules_with_config(&rules, buffer, &config)?; ### Primary Functions ```rust -/// Evaluate rules with default configuration +/// Evaluate rules with context for recursion tracking pub fn evaluate_rules( rules: &[MagicRule], buffer: &[u8], -) -> Result, EvaluationError>; + context: &mut EvaluationContext, +) -> Result, LibmagicError>; -/// Evaluate rules with custom configuration +/// Evaluate rules with custom configuration (creates context internally) pub fn evaluate_rules_with_config( rules: &[MagicRule], buffer: &[u8], config: &EvaluationConfig, -) -> Result, EvaluationError>; +) -> Result, LibmagicError>; /// Evaluate a single rule (used internally and for testing) pub fn evaluate_single_rule( rule: &MagicRule, buffer: &[u8], - context: &mut EvaluationContext, -) -> Result, EvaluationError>; +) -> Result, LibmagicError>; ``` ### Usage Example @@ -288,7 +297,7 @@ for m in matches { - [x] Basic evaluation engine structure - [x] Offset resolution (absolute, relative, from-end) - [x] Type reading with endianness support (Byte, Short, Long, String) -- [x] Operator application (Equal, NotEqual, BitwiseAnd) +- [x] Operator application (Equal, NotEqual, LessThan, GreaterThan, LessEqual, GreaterEqual, BitwiseAnd, BitwiseAndMask) - [x] Hierarchical rule processing with child evaluation - [x] Error handling with graceful degradation - [x] Timeout protection diff --git a/docs/src/magic-format.md b/docs/src/magic-format.md index 4a3ab4a7..233bb9c1 100644 --- a/docs/src/magic-format.md +++ b/docs/src/magic-format.md @@ -149,7 +149,9 @@ String escape sequences: - `\t` - tab - `\\` - backslash -### String Flags +### String Flags (Not Yet Implemented) + +> **Note:** String flags are documented for libmagic compatibility reference but are not yet implemented in libmagic-rs. | Flag | Description | |------|-------------| @@ -169,11 +171,13 @@ Example: | Operator | Description | Example | |----------|-------------|---------| | `=` | Equal (default) | `0 long =0xcafebabe` | -| `!` | Not equal | `4 byte !0` | +| `!=` | Not equal | `4 byte !=0` | | `>` | Greater than | `8 long >1000` | | `<` | Less than | `8 long <100` | +| `>=` | Greater than or equal | `8 long >=1000` | +| `<=` | Less than or equal | `8 long <=100` | | `&` | Bitwise AND | `4 byte &0x80` | -| `^` | Bitwise XOR | `4 byte ^0xff` | +| `^` | Bitwise XOR (not yet implemented) | `4 byte ^0xff` | ### Bitwise AND with Mask @@ -437,7 +441,7 @@ Consider: - Indirect offsets (basic) - Byte, short, long types - String type -- Equal, not-equal operators +- Comparison operators (equal, not-equal, less-than, greater-than, less-equal, greater-equal) - Bitwise AND operator - Nested rules - Comments diff --git a/docs/src/parser.md b/docs/src/parser.md index 503bd467..df60c628 100644 --- a/docs/src/parser.md +++ b/docs/src/parser.md @@ -70,6 +70,12 @@ parse_operator("==") // Ok(("", Operator::Equal)) parse_operator("!=") // Ok(("", Operator::NotEqual)) parse_operator("<>") // Ok(("", Operator::NotEqual)) +// Comparison operators +parse_operator("<") // Ok(("", Operator::LessThan)) +parse_operator(">") // Ok(("", Operator::GreaterThan)) +parse_operator("<=") // Ok(("", Operator::LessEqual)) +parse_operator(">=") // Ok(("", Operator::GreaterEqual)) + // Bitwise operators parse_operator("&") // Ok(("", Operator::BitwiseAnd)) ``` @@ -80,6 +86,7 @@ parse_operator("&") // Ok(("", Operator::BitwiseAnd)) - ✅ Precedence handling (longer operators matched first) - ✅ Whitespace tolerance - ✅ Invalid operator rejection with clear errors +- ✅ Eight comparison and bitwise operators supported ### Value Parsing (`parse_value`) @@ -180,7 +187,7 @@ fn test_parse_number_edge_cases() { ## Complete Magic File Parsing -The parser now provides complete magic file parsing through the `parse_text_magic_file()` function: +The parser provides complete magic file parsing through the `parse_text_magic_file()` function: ```rust use libmagic_rs::parser::parse_text_magic_file; @@ -197,6 +204,8 @@ assert_eq!(rules.len(), 1); // One root rule assert_eq!(rules[0].children.len(), 2); // Two child rules ``` +The parser distinguishes between signed and unsigned type variants (e.g., `byte` vs `ubyte`, `leshort` vs `uleshort`), mapping them to the `signed` field in `TypeKind::Byte { signed: bool }` and similar type variants. Unprefixed types default to signed in accordance with libmagic conventions. + ### Format Detection The parser automatically detects magic file formats: diff --git a/src/build_helpers.rs b/src/build_helpers.rs index 504e14e3..882755f7 100644 --- a/src/build_helpers.rs +++ b/src/build_helpers.rs @@ -211,7 +211,7 @@ fn serialize_offset_spec(offset: &OffsetSpec) -> String { fn serialize_type_kind(typ: &TypeKind) -> String { match typ { - TypeKind::Byte => "TypeKind::Byte".to_string(), + TypeKind::Byte { signed } => format!("TypeKind::Byte {{ signed: {signed} }}"), TypeKind::Short { endian, signed } => format!( "TypeKind::Short {{ endian: {}, signed: {} }}", serialize_endianness(*endian), @@ -235,6 +235,10 @@ fn serialize_operator(op: &Operator) -> String { match op { Operator::Equal => "Operator::Equal".to_string(), Operator::NotEqual => "Operator::NotEqual".to_string(), + Operator::LessThan => "Operator::LessThan".to_string(), + Operator::GreaterThan => "Operator::GreaterThan".to_string(), + Operator::LessEqual => "Operator::LessEqual".to_string(), + Operator::GreaterEqual => "Operator::GreaterEqual".to_string(), Operator::BitwiseAnd => "Operator::BitwiseAnd".to_string(), Operator::BitwiseAndMask(mask) => format!("Operator::BitwiseAndMask({mask})"), } @@ -437,9 +441,16 @@ mod tests { #[test] fn test_serialize_type_kind_byte() { - let typ = TypeKind::Byte; - let serialized = serialize_type_kind(&typ); - assert_eq!(serialized, "TypeKind::Byte"); + let signed = TypeKind::Byte { signed: true }; + assert_eq!( + serialize_type_kind(&signed), + "TypeKind::Byte { signed: true }" + ); + let unsigned = TypeKind::Byte { signed: false }; + assert_eq!( + serialize_type_kind(&unsigned), + "TypeKind::Byte { signed: false }" + ); } #[test] @@ -486,6 +497,22 @@ mod tests { serialize_operator(&Operator::NotEqual), "Operator::NotEqual" ); + assert_eq!( + serialize_operator(&Operator::LessThan), + "Operator::LessThan" + ); + assert_eq!( + serialize_operator(&Operator::GreaterThan), + "Operator::GreaterThan" + ); + assert_eq!( + serialize_operator(&Operator::LessEqual), + "Operator::LessEqual" + ); + assert_eq!( + serialize_operator(&Operator::GreaterEqual), + "Operator::GreaterEqual" + ); assert_eq!( serialize_operator(&Operator::BitwiseAnd), "Operator::BitwiseAnd" @@ -592,7 +619,7 @@ mod tests { fn test_generate_builtin_rules_single_rule() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7F), message: "test".to_string(), @@ -604,7 +631,7 @@ mod tests { let generated = generate_builtin_rules(&[rule]); assert!(generated.contains("OffsetSpec::Absolute(0)")); - assert!(generated.contains("TypeKind::Byte")); + assert!(generated.contains("TypeKind::Byte { signed: true }")); assert!(generated.contains("Operator::Equal")); assert!(generated.contains("Value::Uint(127)")); assert!(generated.contains("test")); @@ -621,7 +648,7 @@ mod tests { fn test_serialize_children_with_nested_rule() { let child = MagicRule { offset: OffsetSpec::Absolute(4), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(1), message: "child".to_string(), @@ -691,7 +718,7 @@ mod tests { assert!(result.is_ok()); let generated = result.unwrap(); assert!(generated.contains("OffsetSpec::Absolute(0)")); - assert!(generated.contains("TypeKind::Byte")); + assert!(generated.contains("TypeKind::Byte { signed: true }")); assert!(generated.contains("Value::Uint(127)")); assert!(generated.contains("ELF executable")); } diff --git a/src/builtin_rules.magic b/src/builtin_rules.magic index 9acdc277..28791fc1 100644 --- a/src/builtin_rules.magic +++ b/src/builtin_rules.magic @@ -7,7 +7,8 @@ # Format: offset type value message # Hierarchical rules use > prefix for nesting # -# Supported types: byte, short, long, leshort, beshort, lelong, belong, string +# Supported types: byte, ubyte, short, ushort, long, ulong, +# leshort, uleshort, beshort, ubeshort, lelong, ulelong, belong, ubelong, string # Escape sequences: octal escapes in quoted strings (e.g., "\177ELF") # ============================================================ @@ -44,10 +45,10 @@ # ============================================================ # JPEG Images (magic: 0xff 0xd8) -0 beshort 0xffd8 JPEG image data +0 ubeshort 0xffd8 JPEG image data # PNG Images (magic: 0x89 'P' 'N' 'G') -0 belong 0x89504e47 PNG image data +0 ubelong 0x89504e47 PNG image data # GIF Images 0 string "GIF8" GIF image data diff --git a/src/evaluator/mod.rs b/src/evaluator/mod.rs index d74c2c1a..516b481a 100644 --- a/src/evaluator/mod.rs +++ b/src/evaluator/mod.rs @@ -273,7 +273,7 @@ impl MatchResult { /// // Create a rule to check for ELF magic bytes at offset 0 /// let rule = MagicRule { /// offset: OffsetSpec::Absolute(0), -/// typ: TypeKind::Byte, +/// typ: TypeKind::Byte { signed: true }, /// op: Operator::Equal, /// value: Value::Uint(0x7f), /// message: "ELF magic".to_string(), @@ -306,9 +306,12 @@ pub fn evaluate_single_rule( let read_value = types::read_typed_value(buffer, absolute_offset, &rule.typ) .map_err(|e| LibmagicError::EvaluationError(e.into()))?; - // Step 3: Apply the operator to compare the read value with the expected value + // Step 3: Coerce the rule's expected value to match the type's signedness/width + let expected_value = types::coerce_value_to_type(&rule.value, &rule.typ); + + // Step 4: Apply the operator to compare the read value with the expected value Ok( - operators::apply_operator(&rule.op, &read_value, &rule.value) + operators::apply_operator(&rule.op, &read_value, &expected_value) .then_some((absolute_offset, read_value)), ) } @@ -352,14 +355,14 @@ pub fn evaluate_single_rule( /// // Create a hierarchical rule set for ELF files /// let parent_rule = MagicRule { /// offset: OffsetSpec::Absolute(0), -/// typ: TypeKind::Byte, +/// typ: TypeKind::Byte { signed: true }, /// op: Operator::Equal, /// value: Value::Uint(0x7f), /// message: "ELF".to_string(), /// children: vec![ /// MagicRule { /// offset: OffsetSpec::Absolute(4), -/// typ: TypeKind::Byte, +/// typ: TypeKind::Byte { signed: true }, /// op: Operator::Equal, /// value: Value::Uint(2), /// message: "64-bit".to_string(), @@ -410,10 +413,21 @@ pub fn evaluate_rules( // Evaluate the current rule with graceful error handling let match_data = match evaluate_single_rule(rule, buffer) { Ok(data) => data, - Err(_e) => { - // Skip rules with evaluation errors (graceful degradation) + Err( + LibmagicError::EvaluationError( + crate::error::EvaluationError::BufferOverrun { .. } + | crate::error::EvaluationError::InvalidOffset { .. } + | crate::error::EvaluationError::TypeReadError(_), + ) + | LibmagicError::IoError(_), + ) => { + // Expected evaluation errors for individual rules -- skip gracefully continue; } + Err(e) => { + // Unexpected errors (InternalError, UnsupportedType, etc.) should propagate + return Err(e); + } }; if let Some((absolute_offset, read_value)) = match_data { @@ -454,8 +468,20 @@ pub fn evaluate_rules( }, )); } - Err(_e) => { - // Non-critical child evaluation errors are skipped (graceful degradation) + Err( + LibmagicError::EvaluationError( + crate::error::EvaluationError::BufferOverrun { .. } + | crate::error::EvaluationError::InvalidOffset { .. } + | crate::error::EvaluationError::TypeReadError(_), + ) + | LibmagicError::IoError(_), + ) => { + // Expected child evaluation errors -- skip gracefully + } + Err(e) => { + // Unexpected errors in children should propagate + context.decrement_recursion_depth()?; + return Err(e); } } @@ -498,7 +524,7 @@ pub fn evaluate_rules( /// /// let rule = MagicRule { /// offset: OffsetSpec::Absolute(0), -/// typ: TypeKind::Byte, +/// typ: TypeKind::Byte { signed: true }, /// op: Operator::Equal, /// value: Value::Uint(0x7f), /// message: "ELF magic".to_string(), @@ -538,7 +564,7 @@ mod tests { fn test_evaluate_single_rule_byte_equal_match() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF magic".to_string(), @@ -556,7 +582,7 @@ mod tests { fn test_evaluate_single_rule_byte_equal_no_match() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF magic".to_string(), @@ -574,7 +600,7 @@ mod tests { fn test_evaluate_single_rule_byte_not_equal_match() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::NotEqual, value: Value::Uint(0x00), message: "Non-zero byte".to_string(), @@ -592,7 +618,7 @@ mod tests { fn test_evaluate_single_rule_byte_not_equal_no_match() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::NotEqual, value: Value::Uint(0x7f), message: "Not ELF magic".to_string(), @@ -610,7 +636,7 @@ mod tests { fn test_evaluate_single_rule_byte_bitwise_and_match() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::BitwiseAnd, value: Value::Uint(0x80), // Check if high bit is set message: "High bit set".to_string(), @@ -628,7 +654,7 @@ mod tests { fn test_evaluate_single_rule_byte_bitwise_and_no_match() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::BitwiseAnd, value: Value::Uint(0x80), // Check if high bit is set message: "High bit set".to_string(), @@ -814,7 +840,7 @@ mod tests { fn test_evaluate_single_rule_different_offsets() { let rule = MagicRule { offset: OffsetSpec::Absolute(2), // Read from offset 2 - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x4c), message: "ELF class byte".to_string(), @@ -832,7 +858,7 @@ mod tests { fn test_evaluate_single_rule_negative_offset() { let rule = MagicRule { offset: OffsetSpec::Absolute(-1), // Last byte - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x46), message: "Last byte".to_string(), @@ -850,7 +876,7 @@ mod tests { fn test_evaluate_single_rule_from_end_offset() { let rule = MagicRule { offset: OffsetSpec::FromEnd(-2), // Second to last byte - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x4c), message: "Second to last byte".to_string(), @@ -868,7 +894,7 @@ mod tests { fn test_evaluate_single_rule_offset_out_of_bounds() { let rule = MagicRule { offset: OffsetSpec::Absolute(10), // Beyond buffer - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x00), message: "Out of bounds".to_string(), @@ -952,7 +978,7 @@ mod tests { fn test_evaluate_single_rule_empty_buffer() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x00), message: "Empty buffer".to_string(), @@ -1018,7 +1044,7 @@ fn test_evaluate_single_rule_cross_type_comparison() { // Test that cross-type integer comparisons use coercion (Uint(42) == Int(42)) let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Int(42), // Int value vs Uint from byte read message: "Cross-type comparison".to_string(), @@ -1128,7 +1154,7 @@ fn test_evaluate_single_rule_all_operators() { // Test Equal operator let equal_rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x42), message: "Equal test".to_string(), @@ -1141,7 +1167,7 @@ fn test_evaluate_single_rule_all_operators() { // Test NotEqual operator let not_equal_rule = MagicRule { offset: OffsetSpec::Absolute(1), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::NotEqual, value: Value::Uint(0x42), message: "NotEqual test".to_string(), @@ -1158,7 +1184,7 @@ fn test_evaluate_single_rule_all_operators() { // Test BitwiseAnd operator let bitwise_and_rule = MagicRule { offset: OffsetSpec::Absolute(3), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::BitwiseAnd, value: Value::Uint(0x80), message: "BitwiseAnd test".to_string(), @@ -1173,6 +1199,158 @@ fn test_evaluate_single_rule_all_operators() { ); // 0x80 & 0x80 = 0x80 } +#[test] +fn test_evaluate_single_rule_comparison_operators() { + let buffer = &[0x42, 0x00, 0xff, 0x80]; + + // LessThan: byte at offset 1 is 0x00, 0x00 < 0x42 = true + let less_than_rule = MagicRule { + offset: OffsetSpec::Absolute(1), + typ: TypeKind::Byte { signed: false }, + op: Operator::LessThan, + value: Value::Uint(0x42), + message: "LessThan test".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + assert!( + evaluate_single_rule(&less_than_rule, buffer) + .unwrap() + .is_some() + ); + + // GreaterThan: byte at offset 2 is 0xff, 0xff > 0x42 = true + let greater_than_rule = MagicRule { + offset: OffsetSpec::Absolute(2), + typ: TypeKind::Byte { signed: false }, + op: Operator::GreaterThan, + value: Value::Uint(0x42), + message: "GreaterThan test".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + assert!( + evaluate_single_rule(&greater_than_rule, buffer) + .unwrap() + .is_some() + ); + + // LessEqual: byte at offset 0 is 0x42, 0x42 <= 0x42 = true + let less_equal_rule = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Byte { signed: false }, + op: Operator::LessEqual, + value: Value::Uint(0x42), + message: "LessEqual test".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + assert!( + evaluate_single_rule(&less_equal_rule, buffer) + .unwrap() + .is_some() + ); + + // GreaterEqual: byte at offset 0 is 0x42, 0x42 >= 0x42 = true + let greater_equal_rule = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Byte { signed: false }, + op: Operator::GreaterEqual, + value: Value::Uint(0x42), + message: "GreaterEqual test".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + assert!( + evaluate_single_rule(&greater_equal_rule, buffer) + .unwrap() + .is_some() + ); +} + +#[test] +fn test_evaluate_comparison_with_signed_byte() { + // 0x80 = -128 as signed byte, 128 as unsigned byte + let buffer = &[0x80]; + + // Signed byte: reads as Int(-128), which IS less than Uint(0) + let signed_rule = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Byte { signed: true }, + op: Operator::LessThan, + value: Value::Uint(0), + message: "signed less".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + assert!( + evaluate_single_rule(&signed_rule, buffer) + .unwrap() + .is_some() + ); + + // Unsigned byte: reads as Uint(128), which is NOT less than Uint(0) + let unsigned_rule = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Byte { signed: false }, + op: Operator::LessThan, + value: Value::Uint(0), + message: "unsigned less".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + assert!( + evaluate_single_rule(&unsigned_rule, buffer) + .unwrap() + .is_none() + ); +} + +#[test] +fn test_evaluate_comparison_operators_negative_cases() { + let buffer = &[0x42]; // 66 + + let cases: Vec<(Operator, u64, bool)> = vec![ + // LessThan: 66 < 66 = false, 66 < 67 = true + (Operator::LessThan, 66, false), + (Operator::LessThan, 67, true), + // GreaterThan: 66 > 66 = false, 66 > 65 = true + (Operator::GreaterThan, 66, false), + (Operator::GreaterThan, 65, true), + // LessEqual: 66 <= 65 = false, 66 <= 66 = true + (Operator::LessEqual, 65, false), + (Operator::LessEqual, 66, true), + // GreaterEqual: 66 >= 67 = false, 66 >= 66 = true + (Operator::GreaterEqual, 67, false), + (Operator::GreaterEqual, 66, true), + ]; + + for (op, value, expected) in cases { + let rule = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Byte { signed: false }, + op: op.clone(), + value: Value::Uint(value), + message: "test".to_string(), + children: vec![], + level: 0, + strength_modifier: None, + }; + let result = evaluate_single_rule(&rule, buffer).unwrap(); + assert_eq!( + result.is_some(), + expected, + "{op:?} with value {value}: expected {expected}" + ); + } +} + #[test] fn test_evaluate_single_rule_edge_case_values() { // Test with maximum values @@ -1216,10 +1394,10 @@ fn test_evaluate_single_rule_edge_case_values() { #[test] fn test_evaluate_single_rule_various_buffer_sizes() { - // Test with single byte buffer + // Test with single byte buffer (unsigned for values > 127) let single_byte_rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: false }, op: Operator::Equal, value: Value::Uint(0xaa), message: "Single byte".to_string(), @@ -1237,7 +1415,7 @@ fn test_evaluate_single_rule_various_buffer_sizes() { let large_buffer: Vec = (0..1024).map(|i| (i % 256) as u8).collect(); let large_rule = MagicRule { offset: OffsetSpec::Absolute(1000), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: false }, op: Operator::Equal, value: Value::Uint((1000 % 256) as u64), message: "Large buffer".to_string(), @@ -1637,7 +1815,7 @@ fn test_evaluate_rules_empty_list() { fn test_evaluate_rules_single_matching_rule() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF magic".to_string(), @@ -1656,14 +1834,15 @@ fn test_evaluate_rules_single_matching_rule() { assert_eq!(matches[0].message, "ELF magic"); assert_eq!(matches[0].offset, 0); assert_eq!(matches[0].level, 0); - assert_eq!(matches[0].value, Value::Uint(0x7f)); + // Signed byte read: 0x7f -> Value::Int(127) + assert_eq!(matches[0].value, Value::Int(0x7f)); } #[test] fn test_evaluate_rules_single_non_matching_rule() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x50), // ZIP magic, not ELF message: "ZIP magic".to_string(), @@ -1685,7 +1864,7 @@ fn test_evaluate_rules_single_non_matching_rule() { fn test_evaluate_rules_multiple_rules_stop_at_first() { let rule1 = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "First match".to_string(), @@ -1696,7 +1875,7 @@ fn test_evaluate_rules_multiple_rules_stop_at_first() { let rule2 = MagicRule { offset: OffsetSpec::Absolute(1), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x45), message: "Second match".to_string(), @@ -1722,7 +1901,7 @@ fn test_evaluate_rules_multiple_rules_stop_at_first() { fn test_evaluate_rules_multiple_rules_find_all() { let rule1 = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "First match".to_string(), @@ -1733,7 +1912,7 @@ fn test_evaluate_rules_multiple_rules_find_all() { let rule2 = MagicRule { offset: OffsetSpec::Absolute(1), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x45), message: "Second match".to_string(), @@ -1760,7 +1939,7 @@ fn test_evaluate_rules_multiple_rules_find_all() { fn test_evaluate_rules_hierarchical_parent_child() { let child_rule = MagicRule { offset: OffsetSpec::Absolute(4), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x02), // ELF class 64-bit message: "64-bit".to_string(), @@ -1771,7 +1950,7 @@ fn test_evaluate_rules_hierarchical_parent_child() { let parent_rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF".to_string(), @@ -1797,7 +1976,7 @@ fn test_evaluate_rules_hierarchical_parent_child() { fn test_evaluate_rules_hierarchical_parent_no_match() { let child_rule = MagicRule { offset: OffsetSpec::Absolute(4), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x02), message: "64-bit".to_string(), @@ -1808,7 +1987,7 @@ fn test_evaluate_rules_hierarchical_parent_no_match() { let parent_rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x50), // ZIP magic, not ELF message: "ZIP".to_string(), @@ -1830,7 +2009,7 @@ fn test_evaluate_rules_hierarchical_parent_no_match() { fn test_evaluate_rules_hierarchical_parent_match_child_no_match() { let child_rule = MagicRule { offset: OffsetSpec::Absolute(4), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x01), // ELF class 32-bit, but buffer has 64-bit message: "32-bit".to_string(), @@ -1841,7 +2020,7 @@ fn test_evaluate_rules_hierarchical_parent_match_child_no_match() { let parent_rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF".to_string(), @@ -1865,7 +2044,7 @@ fn test_evaluate_rules_hierarchical_parent_match_child_no_match() { fn test_evaluate_rules_deep_hierarchy() { let grandchild_rule = MagicRule { offset: OffsetSpec::Absolute(5), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x01), // Little endian message: "little-endian".to_string(), @@ -1876,7 +2055,7 @@ fn test_evaluate_rules_deep_hierarchy() { let child_rule = MagicRule { offset: OffsetSpec::Absolute(4), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x02), // 64-bit message: "64-bit".to_string(), @@ -1887,7 +2066,7 @@ fn test_evaluate_rules_deep_hierarchy() { let parent_rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF".to_string(), @@ -1915,7 +2094,7 @@ fn test_evaluate_rules_deep_hierarchy() { fn test_evaluate_rules_multiple_children() { let child1 = MagicRule { offset: OffsetSpec::Absolute(4), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x02), message: "64-bit".to_string(), @@ -1926,7 +2105,7 @@ fn test_evaluate_rules_multiple_children() { let child2 = MagicRule { offset: OffsetSpec::Absolute(5), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x01), message: "little-endian".to_string(), @@ -1937,7 +2116,7 @@ fn test_evaluate_rules_multiple_children() { let parent_rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF".to_string(), @@ -1966,7 +2145,7 @@ fn test_evaluate_rules_recursion_depth_limit() { // Create a deeply nested rule structure that exceeds the limit let mut current_rule = MagicRule { offset: OffsetSpec::Absolute(10), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x00), message: "Deep level".to_string(), @@ -1979,7 +2158,7 @@ fn test_evaluate_rules_recursion_depth_limit() { for i in (0u32..10u32).rev() { current_rule = MagicRule { offset: OffsetSpec::Absolute(i64::from(i)), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(u64::from(i)), message: format!("Level {i}"), @@ -2013,7 +2192,7 @@ fn test_evaluate_rules_recursion_depth_limit() { fn test_evaluate_rules_with_config_convenience() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF magic".to_string(), @@ -2035,7 +2214,7 @@ fn test_evaluate_rules_with_config_convenience() { fn test_evaluate_rules_timeout() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF magic".to_string(), @@ -2065,7 +2244,7 @@ fn test_evaluate_rules_timeout() { fn test_evaluate_rules_empty_buffer() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "Should not match".to_string(), @@ -2091,7 +2270,7 @@ fn test_evaluate_rules_empty_buffer() { fn test_evaluate_rules_mixed_matching_non_matching() { let rule1 = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "Matches".to_string(), @@ -2102,7 +2281,7 @@ fn test_evaluate_rules_mixed_matching_non_matching() { let rule2 = MagicRule { offset: OffsetSpec::Absolute(1), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x99), // Doesn't match message: "Doesn't match".to_string(), @@ -2113,7 +2292,7 @@ fn test_evaluate_rules_mixed_matching_non_matching() { let rule3 = MagicRule { offset: OffsetSpec::Absolute(2), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x4c), message: "Also matches".to_string(), @@ -2140,7 +2319,7 @@ fn test_evaluate_rules_mixed_matching_non_matching() { fn test_evaluate_rules_context_state_preservation() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF magic".to_string(), @@ -2215,7 +2394,7 @@ fn test_error_recovery_skip_problematic_rules() { // Valid rule that should match MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "Valid rule".to_string(), @@ -2226,7 +2405,7 @@ fn test_error_recovery_skip_problematic_rules() { // Invalid rule with out-of-bounds offset MagicRule { offset: OffsetSpec::Absolute(100), // Beyond buffer - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x00), message: "Invalid rule".to_string(), @@ -2237,7 +2416,7 @@ fn test_error_recovery_skip_problematic_rules() { // Another valid rule that should match MagicRule { offset: OffsetSpec::Absolute(1), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x45), message: "Another valid rule".to_string(), @@ -2271,7 +2450,7 @@ fn test_error_recovery_child_rule_failures() { // Test that parent evaluation continues when child rules fail let rules = vec![MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "Parent rule".to_string(), @@ -2279,7 +2458,7 @@ fn test_error_recovery_child_rule_failures() { // Valid child rule MagicRule { offset: OffsetSpec::Absolute(1), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x45), message: "Valid child".to_string(), @@ -2290,7 +2469,7 @@ fn test_error_recovery_child_rule_failures() { // Invalid child rule MagicRule { offset: OffsetSpec::Absolute(100), // Beyond buffer - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x00), message: "Invalid child".to_string(), @@ -2323,7 +2502,7 @@ fn test_error_recovery_mixed_rule_types() { // Valid byte rule MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "Valid byte".to_string(), @@ -2386,7 +2565,7 @@ fn test_error_recovery_all_rules_fail() { // Out of bounds offset MagicRule { offset: OffsetSpec::Absolute(100), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x00), message: "Out of bounds".to_string(), @@ -2424,7 +2603,7 @@ fn test_error_recovery_timeout_propagation() { // Test that timeout errors are properly propagated (not gracefully handled) let rules = vec![MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "Test rule".to_string(), @@ -2463,13 +2642,13 @@ fn test_error_recovery_recursion_limit_propagation() { // Test that recursion limit errors are properly propagated let rules = vec![MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "Parent".to_string(), children: vec![MagicRule { offset: OffsetSpec::Absolute(1), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x45), message: "Child".to_string(), @@ -2512,7 +2691,7 @@ fn test_error_recovery_preserves_context_state() { // Valid rule MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "Valid rule".to_string(), @@ -2523,7 +2702,7 @@ fn test_error_recovery_preserves_context_state() { // Invalid rule MagicRule { offset: OffsetSpec::Absolute(100), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x00), message: "Invalid rule".to_string(), @@ -2555,7 +2734,7 @@ fn test_debug_error_recovery() { // Simple test to debug error recovery let rule = MagicRule { offset: OffsetSpec::Absolute(100), // Beyond buffer - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x00), message: "Out of bounds rule".to_string(), @@ -2586,7 +2765,7 @@ fn test_debug_mixed_rules() { // Valid rule that should match MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "Valid rule".to_string(), @@ -2597,7 +2776,7 @@ fn test_debug_mixed_rules() { // Invalid rule with out-of-bounds offset MagicRule { offset: OffsetSpec::Absolute(100), // Beyond buffer - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x00), message: "Invalid rule".to_string(), @@ -2608,7 +2787,7 @@ fn test_debug_mixed_rules() { // Another valid rule that should match MagicRule { offset: OffsetSpec::Absolute(1), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x45), message: "Another valid rule".to_string(), diff --git a/src/evaluator/offset.rs b/src/evaluator/offset.rs index 7a919029..43451532 100644 --- a/src/evaluator/offset.rs +++ b/src/evaluator/offset.rs @@ -332,7 +332,7 @@ mod tests { let buffer = b"Test data"; let spec = OffsetSpec::Indirect { base_offset: 0, - pointer_type: crate::parser::ast::TypeKind::Byte, + pointer_type: crate::parser::ast::TypeKind::Byte { signed: true }, adjustment: 0, endian: crate::parser::ast::Endianness::Little, }; diff --git a/src/evaluator/operators.rs b/src/evaluator/operators.rs index acd6e387..e3d77c24 100644 --- a/src/evaluator/operators.rs +++ b/src/evaluator/operators.rs @@ -7,12 +7,15 @@ //! to values during magic rule evaluation. It handles type-safe comparisons //! between different Value variants. +use std::cmp::Ordering; + use crate::parser::ast::{Operator, Value}; /// Apply equality comparison between two values /// /// Compares two `Value` instances for equality, handling proper type matching. -/// Values of different types are considered unequal. +/// Cross-type integer comparisons (`Uint` vs `Int`) are supported via `i128` +/// coercion. Incompatible types (e.g., string vs integer) are considered unequal. /// /// # Arguments /// @@ -21,7 +24,7 @@ use crate::parser::ast::{Operator, Value}; /// /// # Returns /// -/// `true` if the values are equal and of the same type, `false` otherwise +/// `true` if the values are equal (including cross-type integer coercion), `false` otherwise /// /// # Examples /// @@ -46,26 +49,7 @@ use crate::parser::ast::{Operator, Value}; /// ``` #[must_use] pub fn apply_equal(left: &Value, right: &Value) -> bool { - match (left, right) { - // Unsigned integer comparison - (Value::Uint(a), Value::Uint(b)) => a == b, - - // Signed integer comparison - (Value::Int(a), Value::Int(b)) => a == b, - - // Byte sequence comparison - (Value::Bytes(a), Value::Bytes(b)) => a == b, - - // String comparison - (Value::String(a), Value::String(b)) => a == b, - - // Cross-type integer coercion (safe via i128 to avoid overflow) - (Value::Uint(a), Value::Int(b)) => i128::from(*a) == i128::from(*b), - (Value::Int(a), Value::Uint(b)) => i128::from(*a) == i128::from(*b), - - // Different non-integer types are never equal - _ => false, - } + compare_values(left, right) == Some(Ordering::Equal) } /// Apply inequality comparison between two values @@ -165,6 +149,111 @@ pub fn apply_bitwise_and(left: &Value, right: &Value) -> bool { } } +/// Compare two values and return their ordering, if comparable +/// +/// Returns `Some(Ordering)` for same-type comparisons (integers, strings, bytes) +/// and cross-type integer comparisons (via `i128` coercion). Returns `None` for +/// incomparable type combinations. +/// +/// # Examples +/// +/// ``` +/// use std::cmp::Ordering; +/// use libmagic_rs::parser::ast::Value; +/// use libmagic_rs::evaluator::operators::compare_values; +/// +/// assert_eq!(compare_values(&Value::Uint(5), &Value::Uint(10)), Some(Ordering::Less)); +/// assert_eq!(compare_values(&Value::Int(-1), &Value::Uint(0)), Some(Ordering::Less)); +/// assert_eq!(compare_values(&Value::Uint(42), &Value::Int(42)), Some(Ordering::Equal)); +/// assert_eq!(compare_values(&Value::Uint(1), &Value::String("1".to_string())), None); +/// ``` +#[must_use] +pub fn compare_values(left: &Value, right: &Value) -> Option { + match (left, right) { + (Value::Uint(a), Value::Uint(b)) => Some(a.cmp(b)), + (Value::Int(a), Value::Int(b)) => Some(a.cmp(b)), + (Value::Uint(a), Value::Int(b)) => Some(i128::from(*a).cmp(&i128::from(*b))), + (Value::Int(a), Value::Uint(b)) => Some(i128::from(*a).cmp(&i128::from(*b))), + (Value::String(a), Value::String(b)) => Some(a.cmp(b)), + (Value::Bytes(a), Value::Bytes(b)) => Some(a.cmp(b)), + _ => None, + } +} + +/// Apply less-than comparison between two values +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::parser::ast::Value; +/// use libmagic_rs::evaluator::operators::apply_less_than; +/// +/// assert!(apply_less_than(&Value::Uint(5), &Value::Uint(10))); +/// assert!(!apply_less_than(&Value::Uint(10), &Value::Uint(10))); +/// assert!(apply_less_than(&Value::Int(-1), &Value::Uint(0))); +/// ``` +#[must_use] +pub fn apply_less_than(left: &Value, right: &Value) -> bool { + compare_values(left, right) == Some(Ordering::Less) +} + +/// Apply greater-than comparison between two values +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::parser::ast::Value; +/// use libmagic_rs::evaluator::operators::apply_greater_than; +/// +/// assert!(apply_greater_than(&Value::Uint(10), &Value::Uint(5))); +/// assert!(!apply_greater_than(&Value::Uint(10), &Value::Uint(10))); +/// assert!(apply_greater_than(&Value::Uint(0), &Value::Int(-1))); +/// ``` +#[must_use] +pub fn apply_greater_than(left: &Value, right: &Value) -> bool { + compare_values(left, right) == Some(Ordering::Greater) +} + +/// Apply less-than-or-equal comparison between two values +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::parser::ast::Value; +/// use libmagic_rs::evaluator::operators::apply_less_equal; +/// +/// assert!(apply_less_equal(&Value::Uint(10), &Value::Uint(10))); +/// assert!(apply_less_equal(&Value::Uint(5), &Value::Uint(10))); +/// assert!(!apply_less_equal(&Value::Uint(10), &Value::Uint(5))); +/// ``` +#[must_use] +pub fn apply_less_equal(left: &Value, right: &Value) -> bool { + matches!( + compare_values(left, right), + Some(Ordering::Less | Ordering::Equal) + ) +} + +/// Apply greater-than-or-equal comparison between two values +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::parser::ast::Value; +/// use libmagic_rs::evaluator::operators::apply_greater_equal; +/// +/// assert!(apply_greater_equal(&Value::Uint(10), &Value::Uint(10))); +/// assert!(apply_greater_equal(&Value::Uint(10), &Value::Uint(5))); +/// assert!(!apply_greater_equal(&Value::Uint(5), &Value::Uint(10))); +/// ``` +#[must_use] +pub fn apply_greater_equal(left: &Value, right: &Value) -> bool { + matches!( + compare_values(left, right), + Some(Ordering::Greater | Ordering::Equal) + ) +} + /// Apply operator to two values using the specified operator type /// /// This is the main operator application interface that dispatches to the appropriate @@ -173,7 +262,8 @@ pub fn apply_bitwise_and(left: &Value, right: &Value) -> bool { /// /// # Arguments /// -/// * `operator` - The operator to apply (`Equal`, `NotEqual`, or `BitwiseAnd`) +/// * `operator` - The operator to apply (`Equal`, `NotEqual`, `LessThan`, +/// `GreaterThan`, `LessEqual`, `GreaterEqual`, `BitwiseAnd`, or `BitwiseAndMask`) /// * `left` - The left-hand side value (typically from file data) /// * `right` - The right-hand side value (typically from magic rule) /// @@ -201,6 +291,34 @@ pub fn apply_bitwise_and(left: &Value, right: &Value) -> bool { /// &Value::Uint(24) /// )); /// +/// // Less-than comparison +/// assert!(apply_operator( +/// &Operator::LessThan, +/// &Value::Uint(5), +/// &Value::Uint(10) +/// )); +/// +/// // Greater-than comparison +/// assert!(apply_operator( +/// &Operator::GreaterThan, +/// &Value::Uint(10), +/// &Value::Uint(5) +/// )); +/// +/// // Less-than-or-equal comparison +/// assert!(apply_operator( +/// &Operator::LessEqual, +/// &Value::Uint(10), +/// &Value::Uint(10) +/// )); +/// +/// // Greater-than-or-equal comparison +/// assert!(apply_operator( +/// &Operator::GreaterEqual, +/// &Value::Uint(10), +/// &Value::Uint(10) +/// )); +/// /// // Bitwise AND operation /// assert!(apply_operator( /// &Operator::BitwiseAnd, @@ -220,6 +338,10 @@ pub fn apply_operator(operator: &Operator, left: &Value, right: &Value) -> bool match operator { Operator::Equal => apply_equal(left, right), Operator::NotEqual => apply_not_equal(left, right), + Operator::LessThan => apply_less_than(left, right), + Operator::GreaterThan => apply_greater_than(left, right), + Operator::LessEqual => apply_less_equal(left, right), + Operator::GreaterEqual => apply_greater_equal(left, right), Operator::BitwiseAnd => apply_bitwise_and(left, right), Operator::BitwiseAndMask(mask) => { // Apply mask to left value, then compare with right @@ -1569,6 +1691,10 @@ mod tests { let operators = [ Operator::Equal, Operator::NotEqual, + Operator::LessThan, + Operator::GreaterThan, + Operator::LessEqual, + Operator::GreaterEqual, Operator::BitwiseAnd, Operator::BitwiseAndMask(0xFF), ]; @@ -1590,6 +1716,10 @@ mod tests { let expected = match operator { Operator::Equal => apply_equal(left, right), Operator::NotEqual => apply_not_equal(left, right), + Operator::LessThan => apply_less_than(left, right), + Operator::GreaterThan => apply_greater_than(left, right), + Operator::LessEqual => apply_less_equal(left, right), + Operator::GreaterEqual => apply_greater_equal(left, right), Operator::BitwiseAnd => apply_bitwise_and(left, right), Operator::BitwiseAndMask(mask) => { // Apply mask to left value, then compare with right @@ -1617,4 +1747,120 @@ mod tests { } } } + + // ---- compare_values + comparison operator tests ---- + + #[test] + fn test_compare_values_ordering() { + use std::cmp::Ordering::*; + + // Same-type integer comparisons + assert_eq!( + compare_values(&Value::Uint(5), &Value::Uint(10)), + Some(Less) + ); + assert_eq!( + compare_values(&Value::Uint(10), &Value::Uint(10)), + Some(Equal) + ); + assert_eq!( + compare_values(&Value::Uint(10), &Value::Uint(5)), + Some(Greater) + ); + assert_eq!( + compare_values(&Value::Int(-10), &Value::Int(-5)), + Some(Less) + ); + assert_eq!( + compare_values(&Value::Int(i64::MIN), &Value::Int(0)), + Some(Less) + ); + + // Cross-type integer comparisons via i128 + assert_eq!(compare_values(&Value::Int(-1), &Value::Uint(0)), Some(Less)); + assert_eq!( + compare_values(&Value::Uint(42), &Value::Int(42)), + Some(Equal) + ); + assert_eq!( + compare_values(&Value::Uint(u64::MAX), &Value::Int(-1)), + Some(Greater) + ); + + // String comparisons + assert_eq!( + compare_values(&Value::String("abc".into()), &Value::String("abd".into())), + Some(Less) + ); + assert_eq!( + compare_values(&Value::String("abc".into()), &Value::String("abc".into())), + Some(Equal) + ); + + // Bytes comparisons (lexicographic, including different lengths) + assert_eq!( + compare_values(&Value::Bytes(vec![1]), &Value::Bytes(vec![2])), + Some(Less) + ); + assert_eq!( + compare_values(&Value::Bytes(vec![1]), &Value::Bytes(vec![1])), + Some(Equal) + ); + assert_eq!( + compare_values(&Value::Bytes(vec![1]), &Value::Bytes(vec![1, 2])), + Some(Less) + ); + assert_eq!( + compare_values(&Value::Bytes(vec![]), &Value::Bytes(vec![1])), + Some(Less) + ); + + // Incomparable types return None + assert_eq!( + compare_values(&Value::Uint(1), &Value::String("1".into())), + None + ); + assert_eq!(compare_values(&Value::Int(1), &Value::Bytes(vec![1])), None); + } + + #[test] + fn test_comparison_operators_consistency() { + // Verify all four comparison functions agree with compare_values + let pairs = vec![ + (Value::Uint(5), Value::Uint(10)), + (Value::Uint(10), Value::Uint(10)), + (Value::Uint(10), Value::Uint(5)), + (Value::Int(-10), Value::Int(-5)), + (Value::Int(-1), Value::Uint(0)), + (Value::Uint(u64::MAX), Value::Int(-1)), + (Value::String("abc".into()), Value::String("abd".into())), + (Value::Bytes(vec![1, 2]), Value::Bytes(vec![1, 3])), + (Value::Bytes(vec![1]), Value::Bytes(vec![1, 2])), + (Value::Uint(1), Value::String("1".into())), // incomparable + ]; + + for (left, right) in &pairs { + let ord = compare_values(left, right); + assert_eq!( + apply_less_than(left, right), + ord == Some(Ordering::Less), + "< for {left:?}, {right:?}" + ); + assert_eq!( + apply_greater_than(left, right), + ord == Some(Ordering::Greater), + "> for {left:?}, {right:?}" + ); + assert_eq!( + apply_less_equal(left, right), + matches!(ord, Some(Ordering::Less | Ordering::Equal)), + "<= for {left:?}, {right:?}" + ); + assert_eq!( + apply_greater_equal(left, right), + matches!(ord, Some(Ordering::Greater | Ordering::Equal)), + ">= for {left:?}, {right:?}" + ); + } + } } diff --git a/src/evaluator/strength.rs b/src/evaluator/strength.rs index 0469eb76..109540b2 100644 --- a/src/evaluator/strength.rs +++ b/src/evaluator/strength.rs @@ -82,7 +82,7 @@ pub fn calculate_default_strength(rule: &MagicRule) -> i32 { // 16-bit integers are moderately specific TypeKind::Short { .. } => 10, // Single bytes are least specific - TypeKind::Byte => 5, + TypeKind::Byte { .. } => 5, }; // Operator contribution: equality is most specific @@ -91,6 +91,11 @@ pub fn calculate_default_strength(rule: &MagicRule) -> i32 { Operator::Equal => 10, // Inequality is somewhat specific Operator::NotEqual => 5, + // Comparison operators are moderately specific + Operator::LessThan + | Operator::GreaterThan + | Operator::LessEqual + | Operator::GreaterEqual => 6, // Bitwise AND with mask is moderately specific Operator::BitwiseAndMask(_) => 7, // Plain bitwise AND is least specific @@ -207,7 +212,7 @@ pub fn apply_strength_modifier(base_strength: i32, modifier: &StrengthModifier) /// /// let rule = MagicRule { /// offset: OffsetSpec::Absolute(0), -/// typ: TypeKind::Byte, +/// typ: TypeKind::Byte { signed: true }, /// op: Operator::Equal, /// value: Value::Uint(0x7f), /// message: "ELF magic".to_string(), @@ -250,7 +255,7 @@ pub fn calculate_rule_strength(rule: &MagicRule) -> i32 { /// let mut rules = vec![ /// MagicRule { /// offset: OffsetSpec::Absolute(0), -/// typ: TypeKind::Byte, +/// typ: TypeKind::Byte { signed: true }, /// op: Operator::Equal, /// value: Value::Uint(0x7f), /// message: "byte rule".to_string(), @@ -307,7 +312,7 @@ pub fn sort_rules_by_strength(rules: &mut [MagicRule]) { /// let rules = vec![ /// MagicRule { /// offset: OffsetSpec::Absolute(0), -/// typ: TypeKind::Byte, +/// typ: TypeKind::Byte { signed: true }, /// op: Operator::Equal, /// value: Value::Uint(0), /// message: "byte rule".to_string(), @@ -362,7 +367,7 @@ mod tests { #[test] fn test_strength_type_byte() { let rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Absolute(0), Value::Uint(0), @@ -435,7 +440,7 @@ mod tests { #[test] fn test_strength_operator_not_equal() { let rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::NotEqual, OffsetSpec::Absolute(0), Value::Uint(0), @@ -448,7 +453,7 @@ mod tests { #[test] fn test_strength_operator_bitwise_and() { let rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::BitwiseAnd, OffsetSpec::Absolute(0), Value::Uint(0), @@ -461,7 +466,7 @@ mod tests { #[test] fn test_strength_operator_bitwise_and_mask() { let rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::BitwiseAndMask(0xFF), OffsetSpec::Absolute(0), Value::Uint(0), @@ -471,10 +476,31 @@ mod tests { assert_eq!(strength, 22); } + #[test] + fn test_strength_comparison_operators() { + let operators = [ + Operator::LessThan, + Operator::GreaterThan, + Operator::LessEqual, + Operator::GreaterEqual, + ]; + for op in operators { + let rule = make_rule( + TypeKind::Byte { signed: true }, + op.clone(), + OffsetSpec::Absolute(0), + Value::Uint(0), + ); + let strength = calculate_default_strength(&rule); + // Byte: 5, Comparison: 6, Absolute: 10, Numeric: 0 = 21 + assert_eq!(strength, 21, "Failed for operator: {op:?}"); + } + } + #[test] fn test_strength_offset_indirect() { let rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Indirect { base_offset: 0, @@ -495,7 +521,7 @@ mod tests { #[test] fn test_strength_offset_relative() { let rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Relative(4), Value::Uint(0), @@ -508,7 +534,7 @@ mod tests { #[test] fn test_strength_offset_from_end() { let rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::FromEnd(-4), Value::Uint(0), @@ -521,7 +547,7 @@ mod tests { #[test] fn test_strength_value_bytes() { let rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Absolute(0), Value::Bytes(vec![0x7f, 0x45, 0x4c, 0x46]), @@ -643,7 +669,7 @@ mod tests { #[test] fn test_rule_strength_without_modifier() { let rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Absolute(0), Value::Uint(0), @@ -655,7 +681,7 @@ mod tests { #[test] fn test_rule_strength_with_add_modifier() { let mut rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Absolute(0), Value::Uint(0), @@ -668,7 +694,7 @@ mod tests { #[test] fn test_rule_strength_with_multiply_modifier() { let mut rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Absolute(0), Value::Uint(0), @@ -681,7 +707,7 @@ mod tests { #[test] fn test_rule_strength_with_set_modifier() { let mut rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Absolute(0), Value::Uint(0), @@ -700,7 +726,7 @@ mod tests { let mut rules = vec![ { let mut r = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Absolute(0), Value::Uint(0), @@ -744,7 +770,7 @@ mod tests { }, { let mut r = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Absolute(0), Value::Uint(0), @@ -773,7 +799,7 @@ mod tests { #[test] fn test_sort_rules_single() { let mut rules = vec![make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Absolute(0), Value::Uint(0), @@ -787,7 +813,7 @@ mod tests { let rules = vec![ { let mut r = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Absolute(0), Value::Uint(0), @@ -830,7 +856,7 @@ mod tests { Value::String("AB".to_string()), ); let byte_rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Absolute(0), Value::Uint(0x7f), @@ -849,13 +875,13 @@ mod tests { #[test] fn test_strength_comparison_absolute_vs_relative_offset() { let absolute_rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Absolute(0), Value::Uint(0x7f), ); let relative_rule = make_rule( - TypeKind::Byte, + TypeKind::Byte { signed: true }, Operator::Equal, OffsetSpec::Relative(4), Value::Uint(0x7f), diff --git a/src/evaluator/types.rs b/src/evaluator/types.rs index 671875f7..b0b3a790 100644 --- a/src/evaluator/types.rs +++ b/src/evaluator/types.rs @@ -40,10 +40,12 @@ pub enum TypeReadError { /// /// * `buffer` - The byte buffer to read from /// * `offset` - The offset position to read the byte from +/// * `signed` - Whether to interpret the byte as signed (`i8`) or unsigned (`u8`) /// /// # Returns /// -/// Returns `Ok(Value::Uint(byte_value))` if the read is successful, or +/// Returns `Ok(Value::Uint(byte_value))` for unsigned reads or +/// `Ok(Value::Int(byte_value))` for signed reads if the read is successful, or /// `Err(TypeReadError::BufferOverrun)` if the offset is beyond the buffer bounds. /// /// # Security @@ -59,25 +61,33 @@ pub enum TypeReadError { /// use libmagic_rs::evaluator::types::read_byte; /// use libmagic_rs::parser::ast::Value; /// -/// let buffer = &[0x7f, 0x45, 0x4c, 0x46]; // ELF magic bytes +/// let buffer = &[0x7f, 0x80, 0x4c, 0x46]; // example bytes /// -/// // Read first byte (0x7f) -/// let result = read_byte(buffer, 0).unwrap(); -/// assert_eq!(result, Value::Uint(0x7f)); +/// // Read unsigned byte (0x80 = 128) +/// let result = read_byte(buffer, 1, false).unwrap(); +/// assert_eq!(result, Value::Uint(0x80)); /// -/// // Read last byte (0x46) -/// let result = read_byte(buffer, 3).unwrap(); -/// assert_eq!(result, Value::Uint(0x46)); +/// // Read signed byte (0x80 = -128) +/// let result = read_byte(buffer, 1, true).unwrap(); +/// assert_eq!(result, Value::Int(-128)); /// ``` /// /// # Errors /// /// Returns `TypeReadError::BufferOverrun` if the offset is greater than or equal to /// the buffer length. -pub fn read_byte(buffer: &[u8], offset: usize) -> Result { +pub fn read_byte(buffer: &[u8], offset: usize, signed: bool) -> Result { buffer .get(offset) - .map(|&byte| Value::Uint(u64::from(byte))) + .map(|&byte| { + if signed { + // Wrapping is intentional: e.g., 0x80 -> -128 as i8 + #[allow(clippy::cast_possible_wrap)] + Value::Int(i64::from(byte as i8)) + } else { + Value::Uint(u64::from(byte)) + } + }) .ok_or(TypeReadError::BufferOverrun { offset, buffer_len: buffer.len(), @@ -270,8 +280,7 @@ pub fn read_long( /// /// # Errors /// -/// Returns `TypeReadError::BufferOverrun` if the offset is beyond the buffer bounds, -/// or if no null terminator is found within the available buffer space when no `max_length` is specified. +/// Returns `TypeReadError::BufferOverrun` if the offset is greater than or equal to the buffer length. pub fn read_string( buffer: &[u8], offset: usize, @@ -288,7 +297,7 @@ pub fn read_string( // Get the slice starting from offset let remaining_buffer = &buffer[offset..]; - // Determine the actual length to read (uses SIMD-accelerated memchr for null scan) + // Determine the actual length to read (uses memchr for efficient null byte scanning) let read_length = if let Some(max_len) = max_length { // Find null terminator within max_length, or use max_length if no null found let search_len = std::cmp::min(max_len, remaining_buffer.len()); @@ -330,8 +339,8 @@ pub fn read_string( /// /// let buffer = &[0x7f, 0x45, 0x4c, 0x46, 0x34, 0x12]; /// -/// // Read a byte -/// let byte_result = read_typed_value(buffer, 0, &TypeKind::Byte).unwrap(); +/// // Read an unsigned byte +/// let byte_result = read_typed_value(buffer, 0, &TypeKind::Byte { signed: false }).unwrap(); /// assert_eq!(byte_result, Value::Uint(0x7f)); /// /// // Read a little-endian short @@ -353,123 +362,122 @@ pub fn read_typed_value( type_kind: &TypeKind, ) -> Result { match type_kind { - TypeKind::Byte => read_byte(buffer, offset), + TypeKind::Byte { signed } => read_byte(buffer, offset, *signed), TypeKind::Short { endian, signed } => read_short(buffer, offset, *endian, *signed), TypeKind::Long { endian, signed } => read_long(buffer, offset, *endian, *signed), TypeKind::String { max_length } => read_string(buffer, offset, *max_length), } } +/// Coerce a rule's expected value to match the type's signedness and width. +/// +/// In libmagic, comparison values like `0xff` in `0 byte =0xff` are interpreted +/// at the type's bit width. For a signed byte, `0xff` means `-1` (the signed +/// interpretation of that bit pattern). This function performs that coercion so +/// that comparisons work correctly regardless of how the value was parsed. +/// +/// Only affects `Value::Uint` values paired with signed types whose values exceed +/// the signed range. All other combinations pass through unchanged. +/// +/// # Examples +/// +/// ``` +/// use libmagic_rs::evaluator::types::coerce_value_to_type; +/// use libmagic_rs::parser::ast::{TypeKind, Value}; +/// +/// // 0xff for signed byte -> -1 +/// let coerced = coerce_value_to_type(&Value::Uint(0xff), &TypeKind::Byte { signed: true }); +/// assert_eq!(coerced, Value::Int(-1)); +/// +/// // 0x7f for signed byte -> unchanged (fits in signed range) +/// let coerced = coerce_value_to_type(&Value::Uint(0x7f), &TypeKind::Byte { signed: true }); +/// assert_eq!(coerced, Value::Uint(0x7f)); +/// +/// // Unsigned types pass through unchanged +/// let coerced = coerce_value_to_type(&Value::Uint(0xff), &TypeKind::Byte { signed: false }); +/// assert_eq!(coerced, Value::Uint(0xff)); +/// ``` +#[must_use] +pub fn coerce_value_to_type(value: &Value, type_kind: &TypeKind) -> Value { + match (value, type_kind) { + (Value::Uint(v), TypeKind::Byte { signed: true }) if *v > i8::MAX as u64 => + { + #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] + Value::Int(i64::from(*v as u8 as i8)) + } + (Value::Uint(v), TypeKind::Short { signed: true, .. }) if *v > i16::MAX as u64 => + { + #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] + Value::Int(i64::from(*v as u16 as i16)) + } + (Value::Uint(v), TypeKind::Long { signed: true, .. }) if *v > i32::MAX as u64 => + { + #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] + Value::Int(i64::from(*v as u32 as i32)) + } + _ => value.clone(), + } +} + #[cfg(test)] mod tests { use super::*; #[test] - fn test_read_byte_success() { - let buffer = &[0x7f, 0x45, 0x4c, 0x46]; - - // Test reading each byte - assert_eq!(read_byte(buffer, 0).unwrap(), Value::Uint(0x7f)); - assert_eq!(read_byte(buffer, 1).unwrap(), Value::Uint(0x45)); - assert_eq!(read_byte(buffer, 2).unwrap(), Value::Uint(0x4c)); - assert_eq!(read_byte(buffer, 3).unwrap(), Value::Uint(0x46)); - } - - #[test] - fn test_read_byte_zero_value() { - let buffer = &[0x00, 0xff]; - - // Test reading zero byte - assert_eq!(read_byte(buffer, 0).unwrap(), Value::Uint(0)); - // Test reading max byte value - assert_eq!(read_byte(buffer, 1).unwrap(), Value::Uint(255)); + fn test_read_byte_values() { + // All 256 unsigned values + let buffer: Vec = (0..=255).collect(); + for (i, &byte) in buffer.iter().enumerate() { + assert_eq!( + read_byte(&buffer, i, false).unwrap(), + Value::Uint(u64::from(byte)) + ); + } } #[test] - fn test_read_byte_single_byte_buffer() { - let buffer = &[0x42]; - - // Should succeed for offset 0 - assert_eq!(read_byte(buffer, 0).unwrap(), Value::Uint(0x42)); - - // Should fail for offset 1 - let result = read_byte(buffer, 1); - assert!(result.is_err()); + fn test_read_byte_out_of_bounds() { + // Empty buffer assert_eq!( - result.unwrap_err(), + read_byte(&[], 0, false).unwrap_err(), TypeReadError::BufferOverrun { - offset: 1, - buffer_len: 1 + offset: 0, + buffer_len: 0 } ); - } - - #[test] - fn test_read_byte_empty_buffer() { - let buffer = &[]; - - // Should fail for any offset - let result = read_byte(buffer, 0); - assert!(result.is_err()); + // Just past end assert_eq!( - result.unwrap_err(), + read_byte(&[0x42], 1, false).unwrap_err(), TypeReadError::BufferOverrun { - offset: 0, - buffer_len: 0 + offset: 1, + buffer_len: 1 } ); - } - - #[test] - fn test_read_byte_out_of_bounds() { - let buffer = &[0x01, 0x02, 0x03]; - - // Test various out-of-bounds offsets - let test_cases = [3, 4, 10, 100, usize::MAX]; - - for offset in test_cases { - let result = read_byte(buffer, offset); - assert!(result.is_err()); - assert_eq!( - result.unwrap_err(), - TypeReadError::BufferOverrun { - offset, - buffer_len: 3 - } - ); - } - } - - #[test] - fn test_read_byte_large_buffer() { - // Test with a larger buffer to ensure no performance issues - let buffer: Vec = (0..=255).collect(); - - // Test reading from various positions - assert_eq!(read_byte(&buffer, 0).unwrap(), Value::Uint(0)); - assert_eq!(read_byte(&buffer, 127).unwrap(), Value::Uint(127)); - assert_eq!(read_byte(&buffer, 255).unwrap(), Value::Uint(255)); - - // Test out of bounds - let result = read_byte(&buffer, 256); - assert!(result.is_err()); + // Way past end assert_eq!( - result.unwrap_err(), + read_byte(&[1, 2, 3], 100, false).unwrap_err(), TypeReadError::BufferOverrun { - offset: 256, - buffer_len: 256 + offset: 100, + buffer_len: 3 } ); } #[test] - fn test_read_byte_all_byte_values() { - // Test that all possible byte values are correctly converted to u64 - let buffer: Vec = (0..=255).collect(); - - for (i, &expected_byte) in buffer.iter().enumerate() { - let result = read_byte(&buffer, i).unwrap(); - assert_eq!(result, Value::Uint(u64::from(expected_byte))); + fn test_read_byte_signedness() { + let cases: Vec<(u8, bool, Value)> = vec![ + (0x00, false, Value::Uint(0)), + (0x7f, false, Value::Uint(127)), + (0x80, false, Value::Uint(128)), + (0xff, false, Value::Uint(255)), + (0x00, true, Value::Int(0)), + (0x7f, true, Value::Int(127)), + (0x80, true, Value::Int(-128)), + (0xff, true, Value::Int(-1)), + ]; + for (byte, signed, expected) in cases { + let result = read_byte(&[byte], 0, signed).unwrap(); + assert_eq!(result, expected, "byte=0x{byte:02x}, signed={signed}"); } } @@ -479,68 +487,9 @@ mod tests { offset: 10, buffer_len: 5, }; - - let error_string = format!("{error}"); - assert!(error_string.contains("Buffer overrun")); - assert!(error_string.contains("offset 10")); - assert!(error_string.contains("buffer length is 5")); - } - - #[test] - fn test_type_read_error_debug() { - let error = TypeReadError::BufferOverrun { - offset: 42, - buffer_len: 20, - }; - - let debug_string = format!("{error:?}"); - assert!(debug_string.contains("BufferOverrun")); - assert!(debug_string.contains("offset: 42")); - assert!(debug_string.contains("buffer_len: 20")); - } - - #[test] - fn test_type_read_error_equality() { - let error1 = TypeReadError::BufferOverrun { - offset: 5, - buffer_len: 3, - }; - let error2 = TypeReadError::BufferOverrun { - offset: 5, - buffer_len: 3, - }; - let error3 = TypeReadError::BufferOverrun { - offset: 6, - buffer_len: 3, - }; - - assert_eq!(error1, error2); - assert_ne!(error1, error3); - } - - #[test] - fn test_read_byte_boundary_conditions() { - let buffer = &[0xaa, 0xbb, 0xcc]; - - // Test reading at exact boundary (last valid index) - assert_eq!(read_byte(buffer, 2).unwrap(), Value::Uint(0xcc)); - - // Test reading just past boundary - let result = read_byte(buffer, 3); - assert!(result.is_err()); - } - - #[test] - fn test_read_byte_return_type() { - let buffer = &[0x80]; // Test with high bit set - - let result = read_byte(buffer, 0).unwrap(); - - // Verify it returns Value::Uint, not Value::Int - match result { - Value::Uint(val) => assert_eq!(val, 0x80), - _ => panic!("Expected Value::Uint variant"), - } + let msg = format!("{error}"); + assert!(msg.contains("offset 10")); + assert!(msg.contains("buffer length is 5")); } // Tests for read_short function @@ -855,8 +804,8 @@ mod tests { let buffer = &[0x34, 0x12, 0x78, 0x56, 0xbc, 0x9a, 0xde, 0xf0]; // Read as individual bytes - let byte0 = read_byte(buffer, 0).unwrap(); - let byte1 = read_byte(buffer, 1).unwrap(); + let byte0 = read_byte(buffer, 0, false).unwrap(); + let byte1 = read_byte(buffer, 1, false).unwrap(); // Read as short let short = read_short(buffer, 0, Endianness::Little, false).unwrap(); @@ -913,7 +862,7 @@ mod tests { #[test] fn test_read_typed_value_byte() { let buffer = &[0x7f, 0x45, 0x4c, 0x46]; - let type_kind = TypeKind::Byte; + let type_kind = TypeKind::Byte { signed: false }; let result = read_typed_value(buffer, 0, &type_kind).unwrap(); assert_eq!(result, Value::Uint(0x7f)); @@ -1368,7 +1317,7 @@ fn test_read_typed_value_all_supported_types() { // Test all supported TypeKind variants let test_cases = vec![ - (TypeKind::Byte, 0, Value::Uint(0x7f)), + (TypeKind::Byte { signed: false }, 0, Value::Uint(0x7f)), ( TypeKind::Short { endian: Endianness::Little, @@ -1451,8 +1400,8 @@ fn test_read_typed_value_consistency_with_direct_calls() { let buffer = &[0x34, 0x12, 0x78, 0x56, 0xbc, 0x9a, 0xde, 0xf0]; // Test that read_typed_value gives same results as direct function calls - let byte_type = TypeKind::Byte; - let direct_byte = read_byte(buffer, 0).unwrap(); + let byte_type = TypeKind::Byte { signed: false }; + let direct_byte = read_byte(buffer, 0, false).unwrap(); let typed_byte = read_typed_value(buffer, 0, &byte_type).unwrap(); assert_eq!(direct_byte, typed_byte); @@ -1479,7 +1428,7 @@ fn test_read_typed_value_empty_buffer() { // All types should fail on empty buffer let types = vec![ - TypeKind::Byte, + TypeKind::Byte { signed: false }, TypeKind::Short { endian: Endianness::Little, signed: false, @@ -1502,3 +1451,167 @@ fn test_read_typed_value_empty_buffer() { } } } + +#[test] +#[allow(clippy::too_many_lines)] +fn test_coerce_value_to_type() { + let cases = [ + // Signed byte: values above i8::MAX get coerced + ( + Value::Uint(0xff), + TypeKind::Byte { signed: true }, + Value::Int(-1), + ), + ( + Value::Uint(0x80), + TypeKind::Byte { signed: true }, + Value::Int(-128), + ), + ( + Value::Uint(0xfe), + TypeKind::Byte { signed: true }, + Value::Int(-2), + ), + // Signed byte: values in signed range pass through + ( + Value::Uint(0x7f), + TypeKind::Byte { signed: true }, + Value::Uint(0x7f), + ), + ( + Value::Uint(0), + TypeKind::Byte { signed: true }, + Value::Uint(0), + ), + ( + Value::Uint(1), + TypeKind::Byte { signed: true }, + Value::Uint(1), + ), + // Unsigned byte: all values pass through + ( + Value::Uint(0xff), + TypeKind::Byte { signed: false }, + Value::Uint(0xff), + ), + ( + Value::Uint(0x80), + TypeKind::Byte { signed: false }, + Value::Uint(0x80), + ), + // Signed short: values above i16::MAX get coerced + ( + Value::Uint(0xffff), + TypeKind::Short { + endian: Endianness::Native, + signed: true, + }, + Value::Int(-1), + ), + ( + Value::Uint(0x8000), + TypeKind::Short { + endian: Endianness::Native, + signed: true, + }, + Value::Int(-32768), + ), + ( + Value::Uint(0xffd8), + TypeKind::Short { + endian: Endianness::Big, + signed: true, + }, + Value::Int(-40), + ), + // Signed short: values in signed range pass through + ( + Value::Uint(0x7fff), + TypeKind::Short { + endian: Endianness::Native, + signed: true, + }, + Value::Uint(0x7fff), + ), + // Unsigned short: all values pass through + ( + Value::Uint(0xffff), + TypeKind::Short { + endian: Endianness::Native, + signed: false, + }, + Value::Uint(0xffff), + ), + // Signed long: values above i32::MAX get coerced + ( + Value::Uint(0xffff_ffff), + TypeKind::Long { + endian: Endianness::Native, + signed: true, + }, + Value::Int(-1), + ), + ( + Value::Uint(0x8000_0000), + TypeKind::Long { + endian: Endianness::Native, + signed: true, + }, + Value::Int(-2_147_483_648), + ), + ( + Value::Uint(0x8950_4e47), + TypeKind::Long { + endian: Endianness::Big, + signed: true, + }, + Value::Int(-1_991_225_785), + ), + // Signed long: values in signed range pass through + ( + Value::Uint(0x7fff_ffff), + TypeKind::Long { + endian: Endianness::Native, + signed: true, + }, + Value::Uint(0x7fff_ffff), + ), + // Unsigned long: all values pass through + ( + Value::Uint(0xffff_ffff), + TypeKind::Long { + endian: Endianness::Native, + signed: false, + }, + Value::Uint(0xffff_ffff), + ), + // Non-Uint values pass through unchanged + ( + Value::Int(-1), + TypeKind::Byte { signed: true }, + Value::Int(-1), + ), + ( + Value::Int(42), + TypeKind::Long { + endian: Endianness::Native, + signed: true, + }, + Value::Int(42), + ), + // String type: values pass through + ( + Value::Uint(0xff), + TypeKind::String { max_length: None }, + Value::Uint(0xff), + ), + ]; + + for (i, (input, type_kind, expected)) in cases.iter().enumerate() { + let result = coerce_value_to_type(input, type_kind); + assert_eq!( + result, *expected, + "Case {i}: coerce({input:?}, {type_kind:?})" + ); + } +} diff --git a/src/parser/ast.rs b/src/parser/ast.rs index 9036e606..02e742f6 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -81,7 +81,10 @@ pub enum OffsetSpec { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub enum TypeKind { /// Single byte - Byte, + Byte { + /// Whether value is signed + signed: bool, + }, /// 16-bit integer Short { /// Byte order @@ -106,13 +109,93 @@ pub enum TypeKind { /// Comparison and bitwise operators #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub enum Operator { - /// Equality comparison + /// Equality comparison (`=` or `==`) + /// + /// # Examples + /// + /// ``` + /// use libmagic_rs::parser::ast::Operator; + /// + /// let op = Operator::Equal; + /// assert_eq!(op, Operator::Equal); + /// ``` Equal, - /// Inequality comparison + /// Inequality comparison (`!=` or `<>`) + /// + /// # Examples + /// + /// ``` + /// use libmagic_rs::parser::ast::Operator; + /// + /// let op = Operator::NotEqual; + /// assert_eq!(op, Operator::NotEqual); + /// ``` NotEqual, - /// Bitwise AND operation (without mask) + /// Less-than comparison (`<`) + /// + /// # Examples + /// + /// ``` + /// use libmagic_rs::parser::ast::Operator; + /// + /// let op = Operator::LessThan; + /// assert_eq!(op, Operator::LessThan); + /// ``` + LessThan, + /// Greater-than comparison (`>`) + /// + /// # Examples + /// + /// ``` + /// use libmagic_rs::parser::ast::Operator; + /// + /// let op = Operator::GreaterThan; + /// assert_eq!(op, Operator::GreaterThan); + /// ``` + GreaterThan, + /// Less-than-or-equal comparison (`<=`) + /// + /// # Examples + /// + /// ``` + /// use libmagic_rs::parser::ast::Operator; + /// + /// let op = Operator::LessEqual; + /// assert_eq!(op, Operator::LessEqual); + /// ``` + LessEqual, + /// Greater-than-or-equal comparison (`>=`) + /// + /// # Examples + /// + /// ``` + /// use libmagic_rs::parser::ast::Operator; + /// + /// let op = Operator::GreaterEqual; + /// assert_eq!(op, Operator::GreaterEqual); + /// ``` + GreaterEqual, + /// Bitwise AND operation without mask (`&`) + /// + /// # Examples + /// + /// ``` + /// use libmagic_rs::parser::ast::Operator; + /// + /// let op = Operator::BitwiseAnd; + /// assert_eq!(op, Operator::BitwiseAnd); + /// ``` BitwiseAnd, - /// Bitwise AND operation with mask value + /// Bitwise AND operation with mask value (`&` with a mask operand) + /// + /// # Examples + /// + /// ``` + /// use libmagic_rs::parser::ast::Operator; + /// + /// let op = Operator::BitwiseAndMask(0xFF00); + /// assert_eq!(op, Operator::BitwiseAndMask(0xFF00)); + /// ``` BitwiseAndMask(u64), } @@ -319,7 +402,7 @@ mod tests { OffsetSpec::Absolute(-100), OffsetSpec::Indirect { base_offset: 0x20, - pointer_type: TypeKind::Byte, + pointer_type: TypeKind::Byte { signed: true }, adjustment: 0, endian: Endianness::Little, }, @@ -517,8 +600,8 @@ mod tests { // TypeKind tests #[test] fn test_type_kind_byte() { - let byte_type = TypeKind::Byte; - assert_eq!(byte_type, TypeKind::Byte); + let byte_type = TypeKind::Byte { signed: true }; + assert_eq!(byte_type, TypeKind::Byte { signed: true }); } #[test] @@ -566,7 +649,7 @@ mod tests { #[test] fn test_type_kind_serialization() { let types = vec![ - TypeKind::Byte, + TypeKind::Byte { signed: true }, TypeKind::Short { endian: Endianness::Little, signed: false, @@ -622,7 +705,7 @@ mod tests { fn test_magic_rule_creation() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF magic".to_string(), @@ -640,7 +723,7 @@ mod tests { fn test_magic_rule_with_children() { let child_rule = MagicRule { offset: OffsetSpec::Absolute(4), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(1), message: "32-bit".to_string(), @@ -764,7 +847,7 @@ mod tests { fn test_magic_rule_with_strength_modifier() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF magic".to_string(), @@ -786,7 +869,7 @@ mod tests { fn test_magic_rule_without_strength_modifier() { let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF magic".to_string(), diff --git a/src/parser/grammar.rs b/src/parser/grammar.rs index 69c7567c..b36d98f0 100644 --- a/src/parser/grammar.rs +++ b/src/parser/grammar.rs @@ -141,6 +141,10 @@ pub fn parse_offset(input: &str) -> IResult<&str, OffsetSpec> { /// Supports both symbolic and text representations of operators: /// - `=` or `==` for equality /// - `!=` or `<>` for inequality +/// - `<` for less-than +/// - `>` for greater-than +/// - `<=` for less-than-or-equal +/// - `>=` for greater-than-or-equal /// - `&` for bitwise AND /// /// # Examples @@ -153,6 +157,10 @@ pub fn parse_offset(input: &str) -> IResult<&str, OffsetSpec> { /// assert_eq!(parse_operator("=="), Ok(("", Operator::Equal))); /// assert_eq!(parse_operator("!="), Ok(("", Operator::NotEqual))); /// assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual))); +/// assert_eq!(parse_operator("<"), Ok(("", Operator::LessThan))); +/// assert_eq!(parse_operator(">"), Ok(("", Operator::GreaterThan))); +/// assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual))); +/// assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual))); /// assert_eq!(parse_operator("&"), Ok(("", Operator::BitwiseAnd))); /// ``` /// @@ -188,6 +196,16 @@ pub fn parse_operator(input: &str) -> IResult<&str, Operator> { return Ok((remaining, Operator::NotEqual)); } + if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("<=")(input) { + let (remaining, _) = multispace0(remaining)?; + return Ok((remaining, Operator::LessEqual)); + } + + if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>(">=")(input) { + let (remaining, _) = multispace0(remaining)?; + return Ok((remaining, Operator::GreaterEqual)); + } + if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("=")(input) { // Check that we don't have another '=' following (to reject "==") if remaining.starts_with('=') { @@ -212,6 +230,16 @@ pub fn parse_operator(input: &str) -> IResult<&str, Operator> { return Ok((remaining, Operator::BitwiseAnd)); } + if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>("<")(input) { + let (remaining, _) = multispace0(remaining)?; + return Ok((remaining, Operator::LessThan)); + } + + if let Ok((remaining, _)) = tag::<&str, &str, nom::error::Error<&str>>(">")(input) { + let (remaining, _) = multispace0(remaining)?; + return Ok((remaining, Operator::GreaterThan)); + } + // If no operator matches, return an error Err(nom::Err::Error(nom::error::Error::new( input, @@ -382,7 +410,7 @@ fn parse_escape_sequence(input: &str) -> IResult<&str, char> { '"' => '"', '\'' => '\'', '0' => '\0', - _ => escaped_char, // Fallback for other characters + _ => unreachable!("one_of constrains input to known escape characters"), }; Ok((input, result_char)) @@ -837,12 +865,26 @@ mod tests { Ok(("extra", Operator::NotEqual)) ); - // Test that "<>" is parsed correctly + // Test that "<>" is parsed correctly, not as "<" followed by ">" assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual))); assert_eq!( parse_operator("<> extra"), Ok(("extra", Operator::NotEqual)) ); + + // Test that "<=" is parsed as LessEqual, not "<" followed by "=" + assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual))); + assert_eq!( + parse_operator("<= extra"), + Ok(("extra", Operator::LessEqual)) + ); + + // Test that ">=" is parsed as GreaterEqual, not ">" followed by "=" + assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual))); + assert_eq!( + parse_operator(">= extra"), + Ok(("extra", Operator::GreaterEqual)) + ); } #[test] @@ -851,8 +893,6 @@ mod tests { assert!(parse_operator("").is_err()); assert!(parse_operator("abc").is_err()); assert!(parse_operator("123").is_err()); - assert!(parse_operator(">").is_err()); - assert!(parse_operator("<").is_err()); assert!(parse_operator("!").is_err()); assert!(parse_operator("===").is_err()); // Too many equals assert!(parse_operator("&&").is_err()); // Double ampersand not supported @@ -909,6 +949,10 @@ mod tests { ("==", Operator::Equal), ("!=", Operator::NotEqual), ("<>", Operator::NotEqual), + ("<", Operator::LessThan), + (">", Operator::GreaterThan), + ("<=", Operator::LessEqual), + (">=", Operator::GreaterEqual), ("&", Operator::BitwiseAnd), ]; @@ -921,6 +965,80 @@ mod tests { } } + #[test] + fn test_parse_operator_less_than() { + // Basic less-than + assert_eq!(parse_operator("<"), Ok(("", Operator::LessThan))); + + // With whitespace + assert_eq!(parse_operator(" < "), Ok(("", Operator::LessThan))); + assert_eq!(parse_operator(" < "), Ok(("", Operator::LessThan))); + assert_eq!(parse_operator("\t<\t"), Ok(("", Operator::LessThan))); + + // With remaining input + assert_eq!(parse_operator("< 42"), Ok(("42", Operator::LessThan))); + } + + #[test] + fn test_parse_operator_greater_than() { + // Basic greater-than + assert_eq!(parse_operator(">"), Ok(("", Operator::GreaterThan))); + + // With whitespace + assert_eq!(parse_operator(" > "), Ok(("", Operator::GreaterThan))); + assert_eq!(parse_operator(" > "), Ok(("", Operator::GreaterThan))); + assert_eq!(parse_operator("\t>\t"), Ok(("", Operator::GreaterThan))); + + // With remaining input + assert_eq!(parse_operator("> 42"), Ok(("42", Operator::GreaterThan))); + } + + #[test] + fn test_parse_operator_less_equal() { + // Basic less-or-equal + assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual))); + + // With whitespace + assert_eq!(parse_operator(" <= "), Ok(("", Operator::LessEqual))); + assert_eq!(parse_operator(" <= "), Ok(("", Operator::LessEqual))); + assert_eq!(parse_operator("\t<=\t"), Ok(("", Operator::LessEqual))); + + // With remaining input + assert_eq!(parse_operator("<= 42"), Ok(("42", Operator::LessEqual))); + } + + #[test] + fn test_parse_operator_greater_equal() { + // Basic greater-or-equal + assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual))); + + // With whitespace + assert_eq!(parse_operator(" >= "), Ok(("", Operator::GreaterEqual))); + assert_eq!(parse_operator(" >= "), Ok(("", Operator::GreaterEqual))); + assert_eq!(parse_operator("\t>=\t"), Ok(("", Operator::GreaterEqual))); + + // With remaining input + assert_eq!(parse_operator(">= 42"), Ok(("42", Operator::GreaterEqual))); + } + + #[test] + fn test_parse_operator_comparison_disambiguation() { + // <> still parses as NotEqual + assert_eq!(parse_operator("<>"), Ok(("", Operator::NotEqual))); + + // <= parses as LessEqual, not LessThan with "=" remaining + assert_eq!(parse_operator("<="), Ok(("", Operator::LessEqual))); + + // >= parses as GreaterEqual, not GreaterThan with "=" remaining + assert_eq!(parse_operator(">="), Ok(("", Operator::GreaterEqual))); + + // "< >" (with space) parses as LessThan with "> " remaining + assert_eq!(parse_operator("< >"), Ok((">", Operator::LessThan))); + + // "> =" (with space) parses as GreaterThan with "= " remaining + assert_eq!(parse_operator("> ="), Ok(("=", Operator::GreaterThan))); + } + // Value parsing tests #[test] fn test_parse_hex_bytes_with_backslash_x() { @@ -1419,8 +1537,8 @@ mod tests { /// use libmagic_rs::parser::grammar::parse_type; /// use libmagic_rs::parser::ast::{TypeKind, Endianness}; /// -/// assert_eq!(parse_type("byte"), Ok(("", TypeKind::Byte))); -/// assert_eq!(parse_type("leshort"), Ok(("", TypeKind::Short { endian: Endianness::Little, signed: false }))); +/// assert_eq!(parse_type("byte"), Ok(("", TypeKind::Byte { signed: true }))); +/// assert_eq!(parse_type("leshort"), Ok(("", TypeKind::Short { endian: Endianness::Little, signed: true }))); /// assert_eq!(parse_type("string"), Ok(("", TypeKind::String { max_length: None }))); /// ``` /// Parse a type specification with optional attached operator @@ -1432,6 +1550,15 @@ pub fn parse_type_and_operator(input: &str) -> IResult<&str, (TypeKind, Option IResult<&str, (TypeKind, Option TypeKind::Byte, + "byte" => TypeKind::Byte { signed: true }, + "ubyte" => TypeKind::Byte { signed: false }, "short" => TypeKind::Short { + endian: Endianness::Native, + signed: true, + }, + "ushort" => TypeKind::Short { endian: Endianness::Native, signed: false, }, "leshort" => TypeKind::Short { + endian: Endianness::Little, + signed: true, + }, + "uleshort" => TypeKind::Short { endian: Endianness::Little, signed: false, }, "beshort" => TypeKind::Short { + endian: Endianness::Big, + signed: true, + }, + "ubeshort" => TypeKind::Short { endian: Endianness::Big, signed: false, }, "long" => TypeKind::Long { + endian: Endianness::Native, + signed: true, + }, + "ulong" => TypeKind::Long { endian: Endianness::Native, signed: false, }, "lelong" => TypeKind::Long { + endian: Endianness::Little, + signed: true, + }, + "ulelong" => TypeKind::Long { endian: Endianness::Little, signed: false, }, "belong" => TypeKind::Long { + endian: Endianness::Big, + signed: true, + }, + "ubelong" => TypeKind::Long { endian: Endianness::Big, signed: false, }, @@ -1809,14 +1961,17 @@ pub fn has_continuation(input: &str) -> bool { #[test] fn test_parse_type_basic() { - assert_eq!(parse_type("byte"), Ok(("", TypeKind::Byte))); + assert_eq!( + parse_type("byte"), + Ok(("", TypeKind::Byte { signed: true })) + ); assert_eq!( parse_type("short"), Ok(( "", TypeKind::Short { endian: Endianness::Native, - signed: false + signed: true } )) ); @@ -1826,7 +1981,7 @@ fn test_parse_type_basic() { "", TypeKind::Long { endian: Endianness::Native, - signed: false + signed: true } )) ); @@ -1844,7 +1999,7 @@ fn test_parse_type_endianness() { "", TypeKind::Short { endian: Endianness::Little, - signed: false + signed: true } )) ); @@ -1854,7 +2009,7 @@ fn test_parse_type_endianness() { "", TypeKind::Short { endian: Endianness::Big, - signed: false + signed: true } )) ); @@ -1864,7 +2019,7 @@ fn test_parse_type_endianness() { "", TypeKind::Long { endian: Endianness::Little, - signed: false + signed: true } )) ); @@ -1874,7 +2029,7 @@ fn test_parse_type_endianness() { "", TypeKind::Long { endian: Endianness::Big, - signed: false + signed: true } )) ); @@ -1882,7 +2037,10 @@ fn test_parse_type_endianness() { #[test] fn test_parse_type_with_whitespace() { - assert_eq!(parse_type(" byte "), Ok(("", TypeKind::Byte))); + assert_eq!( + parse_type(" byte "), + Ok(("", TypeKind::Byte { signed: true })) + ); assert_eq!( parse_type("\tstring\t"), Ok(("", TypeKind::String { max_length: None })) @@ -1893,7 +2051,7 @@ fn test_parse_type_with_whitespace() { "", TypeKind::Long { endian: Endianness::Little, - signed: false + signed: true } )) ); @@ -1901,7 +2059,10 @@ fn test_parse_type_with_whitespace() { #[test] fn test_parse_type_with_remaining_input() { - assert_eq!(parse_type("byte ="), Ok(("=", TypeKind::Byte))); + assert_eq!( + parse_type("byte ="), + Ok(("=", TypeKind::Byte { signed: true })) + ); assert_eq!( parse_type("string \\x7f"), Ok(("\\x7f", TypeKind::String { max_length: None })) @@ -1916,6 +2077,123 @@ fn test_parse_type_invalid() { assert!(parse_type("float").is_err()); } +#[test] +fn test_parse_type_unsigned_variants() { + assert_eq!( + parse_type("ubyte"), + Ok(("", TypeKind::Byte { signed: false })) + ); + assert_eq!( + parse_type("ushort"), + Ok(( + "", + TypeKind::Short { + endian: Endianness::Native, + signed: false, + } + )) + ); + assert_eq!( + parse_type("ubeshort"), + Ok(( + "", + TypeKind::Short { + endian: Endianness::Big, + signed: false, + } + )) + ); + assert_eq!( + parse_type("uleshort"), + Ok(( + "", + TypeKind::Short { + endian: Endianness::Little, + signed: false, + } + )) + ); + assert_eq!( + parse_type("ulong"), + Ok(( + "", + TypeKind::Long { + endian: Endianness::Native, + signed: false, + } + )) + ); + assert_eq!( + parse_type("ubelong"), + Ok(( + "", + TypeKind::Long { + endian: Endianness::Big, + signed: false, + } + )) + ); + assert_eq!( + parse_type("ulelong"), + Ok(( + "", + TypeKind::Long { + endian: Endianness::Little, + signed: false, + } + )) + ); +} + +#[test] +fn test_parse_type_signed_defaults() { + // In libmagic, unprefixed types are signed by default + assert_eq!( + parse_type("byte"), + Ok(("", TypeKind::Byte { signed: true })) + ); + assert_eq!( + parse_type("short"), + Ok(( + "", + TypeKind::Short { + endian: Endianness::Native, + signed: true, + } + )) + ); + assert_eq!( + parse_type("long"), + Ok(( + "", + TypeKind::Long { + endian: Endianness::Native, + signed: true, + } + )) + ); + assert_eq!( + parse_type("beshort"), + Ok(( + "", + TypeKind::Short { + endian: Endianness::Big, + signed: true, + } + )) + ); + assert_eq!( + parse_type("belong"), + Ok(( + "", + TypeKind::Long { + endian: Endianness::Big, + signed: true, + } + )) + ); +} + #[test] fn test_parse_rule_offset_absolute() { assert_eq!( @@ -2041,7 +2319,7 @@ fn test_parse_magic_rule_child() { assert_eq!(remaining, ""); assert_eq!(rule.level, 1); assert_eq!(rule.offset, OffsetSpec::Absolute(4)); - assert_eq!(rule.typ, TypeKind::Byte); + assert_eq!(rule.typ, TypeKind::Byte { signed: true }); assert_eq!(rule.op, Operator::Equal); assert_eq!(rule.value, Value::Uint(1)); assert_eq!(rule.message, "32-bit"); @@ -2059,7 +2337,7 @@ fn test_parse_magic_rule_with_operator() { rule.typ, TypeKind::Long { endian: Endianness::Little, - signed: false + signed: true } ); assert_eq!(rule.op, Operator::BitwiseAndMask(0xf000_0000)); @@ -2075,7 +2353,7 @@ fn test_parse_magic_rule_no_message() { assert_eq!(remaining, ""); assert_eq!(rule.level, 0); assert_eq!(rule.offset, OffsetSpec::Absolute(0)); - assert_eq!(rule.typ, TypeKind::Byte); + assert_eq!(rule.typ, TypeKind::Byte { signed: true }); assert_eq!(rule.op, Operator::Equal); assert_eq!(rule.value, Value::Uint(0x7f)); assert_eq!(rule.message, ""); @@ -2093,7 +2371,7 @@ fn test_parse_magic_rule_nested() { rule.typ, TypeKind::Short { endian: Endianness::Little, - signed: false + signed: true } ); assert_eq!(rule.op, Operator::Equal); @@ -2109,7 +2387,7 @@ fn test_parse_magic_rule_with_whitespace() { assert_eq!(remaining, ""); assert_eq!(rule.level, 1); assert_eq!(rule.offset, OffsetSpec::Absolute(4)); - assert_eq!(rule.typ, TypeKind::Byte); + assert_eq!(rule.typ, TypeKind::Byte { signed: true }); assert_eq!(rule.op, Operator::Equal); assert_eq!(rule.value, Value::Uint(1)); assert_eq!(rule.message, "32-bit"); @@ -2141,7 +2419,7 @@ fn test_parse_magic_rule_hex_offset() { rule.typ, TypeKind::Long { endian: Endianness::Big, - signed: false + signed: true } ); assert_eq!(rule.op, Operator::Equal); @@ -2157,7 +2435,7 @@ fn test_parse_magic_rule_negative_offset() { assert_eq!(remaining, ""); assert_eq!(rule.level, 0); assert_eq!(rule.offset, OffsetSpec::Absolute(-4)); - assert_eq!(rule.typ, TypeKind::Byte); + assert_eq!(rule.typ, TypeKind::Byte { signed: true }); assert_eq!(rule.op, Operator::Equal); assert_eq!(rule.value, Value::Uint(0)); assert_eq!(rule.message, "End marker"); @@ -2251,7 +2529,13 @@ fn test_parse_magic_rule_real_world_examples() { fn test_parse_magic_rule_edge_cases() { // Test various edge cases let edge_cases = [ - ("0 byte 0", 0, TypeKind::Byte, Value::Uint(0), ""), + ( + "0 byte 0", + 0, + TypeKind::Byte { signed: true }, + Value::Uint(0), + "", + ), ( ">>>16 string \"\" Empty string", 3, @@ -2264,7 +2548,7 @@ fn test_parse_magic_rule_edge_cases() { 0, TypeKind::Long { endian: Endianness::Little, - signed: false, + signed: true, }, Value::Uint(0xFFFF_FFFF), "Max value", diff --git a/src/parser/preprocessing.rs b/src/parser/preprocessing.rs index ef081586..8ac2e09a 100644 --- a/src/parser/preprocessing.rs +++ b/src/parser/preprocessing.rs @@ -225,7 +225,7 @@ mod tests { let line = li(1, "0 byte 1 ELF"); let rule = parse_magic_rule_line(&line).unwrap(); assert_eq!(rule.level, 0); - assert!(matches!(rule.typ, TypeKind::Byte)); + assert!(matches!(rule.typ, TypeKind::Byte { .. })); } #[test] diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index b318a08a..bb0ac9c2 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -260,6 +260,96 @@ fn test_custom_rules_no_match_fallback() { assert_eq!(result.description, "data"); } +// ============================================================ +// Comparison Operators End-to-End +// ============================================================ + +#[test] +fn test_comparison_operators_in_magic_rules() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("compare.magic"); + + // Version detection: match magic, then use comparison on version byte + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, "0 belong 0x7f454c46 ELF").unwrap(); + writeln!(f, ">4 ubyte >1 64-bit").unwrap(); + writeln!(f, ">4 ubyte <=1 32-bit").unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + + // class byte = 2, which is >1 -> "64-bit" + let result = db.evaluate_buffer(b"\x7fELF\x02").unwrap(); + assert!( + result.description.contains("64-bit"), + "Expected 64-bit for class=2, got: {}", + result.description + ); + + // class byte = 1, which is <=1 -> "32-bit" + let result = db.evaluate_buffer(b"\x7fELF\x01").unwrap(); + assert!( + result.description.contains("32-bit"), + "Expected 32-bit for class=1, got: {}", + result.description + ); +} + +#[test] +fn test_less_than_greater_than_operators() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("range.magic"); + + let mut f = fs::File::create(&magic_path).unwrap(); + writeln!(f, "0 ubyte <0x10 Low byte value").unwrap(); + writeln!(f, "0 ubyte >=0x80 High byte value").unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + + // 0x05 < 0x10 -> "Low byte value" + let result = db.evaluate_buffer(b"\x05rest").unwrap(); + assert!( + result.description.contains("Low byte value"), + "Expected low byte match, got: {}", + result.description + ); + + // 0xFF >= 0x80 -> "High byte value" + let result = db.evaluate_buffer(b"\xFFrest").unwrap(); + assert!( + result.description.contains("High byte value"), + "Expected high byte match, got: {}", + result.description + ); +} + +#[test] +fn test_signed_byte_comparison_integration() { + let temp_dir = TempDir::new().unwrap(); + let magic_path = temp_dir.path().join("signed.magic"); + + let mut f = fs::File::create(&magic_path).unwrap(); + // byte (signed default) with comparison + writeln!(f, "0 byte >0 Positive first byte").unwrap(); + + let db = MagicDatabase::load_from_file(&magic_path).unwrap(); + + // 0x7f = 127 as signed, which is > 0 + let result = db.evaluate_buffer(b"\x7f").unwrap(); + assert!( + result.description.contains("Positive first byte"), + "Expected match for 0x7f (signed 127 > 0), got: {}", + result.description + ); + + // 0x80 = -128 as signed, which is NOT > 0 + let result = db.evaluate_buffer(b"\x80").unwrap(); + assert!( + !result.description.contains("Positive first byte"), + "Expected no match for 0x80 (signed -128 is not > 0), got: {}", + result.description + ); +} + // ============================================================ // Load from Directory End-to-End // ============================================================ @@ -268,9 +358,9 @@ fn test_custom_rules_no_match_fallback() { fn test_load_directory_of_magic_files() { let temp_dir = TempDir::new().unwrap(); - // Create multiple magic files using belong for binary magic numbers + // Create multiple magic files using ubelong for binary magic numbers let mut f1 = fs::File::create(temp_dir.path().join("images.magic")).unwrap(); - writeln!(f1, "0 belong 0x89504e47 PNG image").unwrap(); + writeln!(f1, "0 ubelong 0x89504e47 PNG image").unwrap(); let mut f2 = fs::File::create(temp_dir.path().join("docs.magic")).unwrap(); writeln!(f2, "0 string \"%PDF-\" PDF document").unwrap(); diff --git a/tests/property_tests.rs b/tests/property_tests.rs index 1c0e9819..52e05402 100644 --- a/tests/property_tests.rs +++ b/tests/property_tests.rs @@ -27,7 +27,7 @@ fn arb_offset_spec() -> impl Strategy { /// Generate a valid TypeKind for testing fn arb_type_kind() -> impl Strategy { prop_oneof![ - Just(TypeKind::Byte), + any::().prop_map(|signed| TypeKind::Byte { signed }), (any::(), any::()).prop_map(|(is_big, signed)| { TypeKind::Short { endian: if is_big { @@ -59,6 +59,10 @@ fn arb_operator() -> impl Strategy { prop_oneof![ Just(Operator::Equal), Just(Operator::NotEqual), + Just(Operator::LessThan), + Just(Operator::GreaterThan), + Just(Operator::LessEqual), + Just(Operator::GreaterEqual), Just(Operator::BitwiseAnd), (0u64..=255u64).prop_map(Operator::BitwiseAndMask), ]