From 23be7d1ebb58742264510f50b2d1e7ae35f833b1 Mon Sep 17 00:00:00 2001 From: "dosubot[bot]" <131922026+dosubot[bot]@users.noreply.github.com> Date: Sun, 1 Mar 2026 07:48:40 +0000 Subject: [PATCH] docs: Dosu updates for PR #104 --- docs/src/architecture.md | 43 +++++++++++++++++++++++++++++--- docs/src/testing-guidelines.md | 2 +- docs/src/testing.md | 45 ++++++++++++++++++++++++++++++---- 3 files changed, 80 insertions(+), 10 deletions(-) diff --git a/docs/src/architecture.md b/docs/src/architecture.md index 2433c38b..65e5d7e2 100644 --- a/docs/src/architecture.md +++ b/docs/src/architecture.md @@ -63,7 +63,7 @@ The parser is responsible for converting magic files (text-based DSL) into an Ab - ✅ **Number parsing**: Decimal and hexadecimal with overflow protection - ✅ **Offset parsing**: Absolute offsets with comprehensive validation -- ✅ **Operator parsing**: Equality, inequality, and bitwise AND operators +- ✅ **Operator parsing**: Equality (`=`, `==`), inequality (`!=`, `<>`), comparison (`<`, `>`, `<=`, `>=`), and bitwise AND (`&`) operators - ✅ **Value parsing**: Strings, numbers, and hex byte sequences with escape sequences - ✅ **Error handling**: Comprehensive nom error handling with meaningful messages - ✅ **Rule parsing**: Complete rule parsing via `parse_magic_rule()` @@ -88,6 +88,24 @@ pub struct MagicRule { pub children: Vec, // Nested rules pub level: u32, // Indentation level } + +pub enum TypeKind { + Byte { signed: bool }, // Single byte with explicit signedness + Short { endian: Endianness, signed: bool }, + Long { endian: Endianness, signed: bool }, + String { max_length: Option }, +} + +pub enum Operator { + Equal, // = or == + NotEqual, // != or <> + LessThan, // < + GreaterThan, // > + LessEqual, // <= + GreaterEqual, // >= + BitwiseAnd, // & + BitwiseAndMask(u64), // & with mask +} ``` **Design Principles:** @@ -96,6 +114,7 @@ pub struct MagicRule { - **Serializable**: Full serde support for caching - **Self-contained**: No external dependencies in AST nodes - **Type-safe**: Rust's type system prevents invalid rule combinations +- **Explicit signedness**: `TypeKind::Byte` and integer types distinguish signed from unsigned interpretations ### 3. Evaluator Module (`src/evaluator/`) @@ -105,7 +124,7 @@ The evaluator executes magic rules against file buffers to identify file types. - `mod.rs`: Main evaluation engine with `EvaluationContext` and `MatchResult` - `offset.rs`: Offset resolution (absolute, relative, from-end) -- `types.rs`: Type interpretation with endianness handling +- `types.rs`: Type interpretation with endianness handling and signedness coercion - `operators.rs`: Comparison and bitwise operations **Implemented Features:** @@ -117,6 +136,8 @@ The evaluator executes magic rules against file buffers to identify file types. - ✅ **Graceful Degradation**: Skip problematic rules, continue evaluation - ✅ **Timeout Protection**: Configurable time limits - ✅ **Recursion Limiting**: Prevent stack overflow from deep nesting +- ✅ **Signedness Coercion**: Automatic value coercion for signed type comparisons (e.g., `0xff` → `-1` for signed byte) +- ✅ **Comparison Operators**: Full support for `<`, `>`, `<=`, `>=` with numeric and lexicographic ordering - 📋 **Indirect Offsets**: Pointer dereferencing (planned) ### 4. I/O Module (`src/io/`) @@ -217,8 +238,8 @@ Magic rules form a tree structure where: ```mermaid flowchart TD R[Root Rule
e.g., "0 string PK"] - R -->|match| C1[Child Rule 1
e.g., ">4 byte 0x14"] - R -->|match| C2[Child Rule 2
e.g., ">4 byte 0x06"] + R -->|match| C1[Child Rule 1
e.g., ">4 ubyte 0x14"] + R -->|match| C2[Child Rule 2
e.g., ">4 ubyte 0x06"] C1 -->|match| G1[Grandchild
ZIP archive v2.0] C2 -->|match| G2[Grandchild
ZIP archive v1.0] @@ -229,6 +250,20 @@ flowchart TD style G2 fill:#c8e6c9 ``` +**Operator Support:** + +The evaluator supports all comparison and bitwise operators: + +- **Equality**: `=` or `==` (exact match) +- **Inequality**: `!=` or `<>` (not equal) +- **Less-than**: `<` (numeric or lexicographic) +- **Greater-than**: `>` (numeric or lexicographic) +- **Less-equal**: `<=` (numeric or lexicographic) +- **Greater-equal**: `>=` (numeric or lexicographic) +- **Bitwise AND**: `&` (bit pattern matching) + +Comparison operators support both numeric comparisons (with automatic type coercion between signed and unsigned integers via `i128`) and lexicographic comparisons for strings and byte sequences. + ### Memory-Safe Buffer Access All buffer operations use safe Rust patterns: diff --git a/docs/src/testing-guidelines.md b/docs/src/testing-guidelines.md index 1226b418..63640b90 100644 --- a/docs/src/testing-guidelines.md +++ b/docs/src/testing-guidelines.md @@ -116,7 +116,7 @@ fn test_magic_rule_evaluation_with_matching_bytes() { // Arrange let rule = MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: false }, op: Operator::Equal, value: Value::Uint(0x7f), message: "ELF magic".to_string(), diff --git a/docs/src/testing.md b/docs/src/testing.md index d3bd3d8e..8f38007a 100644 --- a/docs/src/testing.md +++ b/docs/src/testing.md @@ -62,16 +62,16 @@ test result: ok. 98 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out **TypeKind Tests:** -- `test_type_kind_byte` - Single byte type handling +- `test_type_kind_byte` - Single byte type handling with signedness - `test_type_kind_short` - 16-bit integer types with endianness - `test_type_kind_long` - 32-bit integer types with endianness - `test_type_kind_string` - String types with length limits -- `test_type_kind_serialization` - All type serialization +- `test_type_kind_serialization` - All type serialization including signed/unsigned variants **Operator Tests:** -- `test_operator_variants` - All operator types -- `test_operator_serialization` - Operator serialization +- `test_operator_variants` - All operator types (Equal, NotEqual, LessThan, GreaterThan, LessEqual, GreaterEqual, BitwiseAnd, BitwiseAndMask) +- `test_operator_serialization` - Operator serialization including comparison operators **MagicRule Tests:** @@ -104,6 +104,7 @@ test result: ok. 98 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out - `test_parse_operator_equality` - Equality operators (= and ==) - `test_parse_operator_inequality` - Inequality operators (!= and \<>) +- `test_parse_operator_comparison` - Comparison operators (\<, >, \<=, >=) - `test_parse_operator_bitwise_and` - Bitwise AND operator (&) - `test_parse_operator_with_remaining_input` - Partial parsing - `test_parse_operator_precedence` - Operator precedence handling @@ -300,6 +301,40 @@ fn test_parser_error_conditions() { } ``` +**Testing Signed vs Unsigned Byte Behavior:** + +```rust +#[test] +fn test_signed_unsigned_byte_handling() { + // Test signed byte interpretation + let signed_rule = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Byte { signed: true }, + op: Operator::GreaterThan, + value: Value::Int(0), + message: "Positive signed byte".to_string(), + children: vec![], + level: 0, + }; + + // 0x7f = 127 as signed (positive) + // 0x80 = -128 as signed (negative) + + // Test unsigned byte interpretation + let unsigned_rule = MagicRule { + offset: OffsetSpec::Absolute(0), + typ: TypeKind::Byte { signed: false }, + op: Operator::GreaterThan, + value: Value::Uint(127), + message: "Large unsigned byte".to_string(), + children: vec![], + level: 0, + }; + + // Both 0x7f and 0x80 are > 127 when interpreted as unsigned +} +``` + ### Test Data Management **Test Fixtures:** @@ -313,7 +348,7 @@ const PDF_MAGIC: &str = "%PDF-"; fn create_test_rule() -> MagicRule { MagicRule { offset: OffsetSpec::Absolute(0), - typ: TypeKind::Byte, + typ: TypeKind::Byte { signed: true }, op: Operator::Equal, value: Value::Uint(0x7f), message: "Test rule".to_string(),