diff --git a/docs/API_REFERENCE.md b/docs/API_REFERENCE.md index 3af1ca3f..ee8cfc89 100644 --- a/docs/API_REFERENCE.md +++ b/docs/API_REFERENCE.md @@ -301,8 +301,22 @@ use libmagic_rs::TypeKind; | `Byte { signed }` | Single byte with explicit signedness (changed in v0.2.0) | | `Short { endian, signed }` | 16-bit integer | | `Long { endian, signed }` | 32-bit integer | +| `Quad { endian, signed }` | 64-bit integer | | `String { max_length }` | String data | +##### 64-bit Integer Types + +The `Quad` variant supports six endian-signedness combinations: + +| Type Specifier | Endianness | Signedness | Description | +|----------------|------------|------------|-------------| +| `quad` | Native | Signed | Native-endian signed 64-bit integer | +| `uquad` | Native | Unsigned | Native-endian unsigned 64-bit integer | +| `lequad` | Little | Signed | Little-endian signed 64-bit integer | +| `ulequad` | Little | Unsigned | Little-endian unsigned 64-bit integer | +| `bequad` | Big | Signed | Big-endian signed 64-bit integer | +| `ubequad` | Big | Unsigned | Big-endian unsigned 64-bit integer | + **Version Note:** In v0.2.0, the `Byte` variant changed from a unit variant to a struct variant with a `signed` field. #### Operator diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index c02b5df2..0355499b 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -267,6 +267,13 @@ pub struct MagicRule { } ``` +**TypeKind Variants:** +- `Byte { signed: bool }` - 8-bit integer +- `Short { endian: Endianness, signed: bool }` - 16-bit integer +- `Long { endian: Endianness, signed: bool }` - 32-bit integer +- `Quad { endian: Endianness, signed: bool }` - 64-bit integer +- `String { max_length: Option }` - Null-terminated string + **Hierarchical Structure:** - Top-level rules (level 0) are entry points - Child rules are evaluated only if parent matches @@ -467,8 +474,18 @@ The evaluation hot path is optimized for: 1. Add variant to `TypeKind` enum (`ast.rs`) 2. Add parsing logic (`grammar.rs`) 3. Add reading logic (`types.rs`) -4. Add tests -5. Update documentation +4. Add serialization support (`build_helpers.rs`) +5. Add tests +6. Update documentation + +**Example: Quad Type Implementation** + +The `Quad` type (64-bit integer) demonstrates the type system extension pattern. The implementation includes: +- `TypeKind::Quad { endian: Endianness, signed: bool }` variant in the AST +- `read_quad()` function for safe buffer access with bounds checking +- Parsing support for `quad`, `uquad`, `lequad`, `ulequad`, `bequad`, `ubequad` type names +- Strength calculation (specificity score of 16, highest among numeric types) +- Serialization for build-time rule compilation ### Adding New Operators diff --git a/docs/MAGIC_FORMAT.md b/docs/MAGIC_FORMAT.md index f1332a8f..b0904c40 100644 --- a/docs/MAGIC_FORMAT.md +++ b/docs/MAGIC_FORMAT.md @@ -119,6 +119,7 @@ Types for indirect offsets: - `.b` - byte (1 byte) - `.s` - short (2 bytes) - `.l` - long (4 bytes) +- `.q` - quad (8 bytes) ### Relative Offset @@ -146,12 +147,22 @@ The `&` prefix indicates relative offset. | `long` | 4 bytes | native | | `lelong` | 4 bytes | little-endian | | `belong` | 4 bytes | big-endian | +| `quad` | 8 bytes | native | +| `lequad` | 8 bytes | little-endian | +| `bequad` | 8 bytes | big-endian | + +All integer types have unsigned variants prefixed with `u`: +- `ubyte`, `ushort`, `uleshort`, `ubeshort` +- `ulong`, `ulelong`, `ubelong` +- `uquad`, `ulequad`, `ubequad` Examples: ``` 0 byte 0x7f (byte match) 0 leshort 0x5a4d DOS MZ signature 0 belong 0xcafebabe Java class file +0 lequad 0x1234567890abcdef (64-bit little-endian) +8 uquad >0x8000000000000000 (unsigned 64-bit check) ``` ### String Type @@ -469,7 +480,7 @@ Consider: - Absolute offsets - Relative offsets - Indirect offsets (basic) -- Byte, short, long types +- Byte, short, long, quad types (8-bit, 16-bit, 32-bit, 64-bit integers) - String type - Comparison operators (`=`, `!`, `<`, `>`, `<=`, `>=`) - Bitwise AND operator @@ -481,6 +492,7 @@ Consider: - Regex patterns - Date/time types - Float types +- 128-bit integer types - Use/name directives - Default rules @@ -488,6 +500,7 @@ Consider: - **Comparison operators**: Full support for `<`, `>`, `<=`, `>=` operators - **Strength modifiers**: The `!:strength` directive for adjusting rule priority +- **64-bit integers**: `quad` type family (`quad`, `uquad`, `lequad`, `ulequad`, `bequad`, `ubequad`) --- diff --git a/docs/src/architecture.md b/docs/src/architecture.md index 65e5d7e2..8896d909 100644 --- a/docs/src/architecture.md +++ b/docs/src/architecture.md @@ -93,6 +93,7 @@ pub enum TypeKind { Byte { signed: bool }, // Single byte with explicit signedness Short { endian: Endianness, signed: bool }, Long { endian: Endianness, signed: bool }, + Quad { endian: Endianness, signed: bool }, String { max_length: Option }, } @@ -114,7 +115,7 @@ pub enum Operator { - **Serializable**: Full serde support for caching - **Self-contained**: No external dependencies in AST nodes - **Type-safe**: Rust's type system prevents invalid rule combinations -- **Explicit signedness**: `TypeKind::Byte` and integer types distinguish signed from unsigned interpretations +- **Explicit signedness**: `TypeKind::Byte` and integer types (Short, Long, Quad) distinguish signed from unsigned interpretations ### 3. Evaluator Module (`src/evaluator/`) diff --git a/docs/src/ast-structures.md b/docs/src/ast-structures.md index f821b183..1e465c9b 100644 --- a/docs/src/ast-structures.md +++ b/docs/src/ast-structures.md @@ -179,6 +179,9 @@ pub enum TypeKind { /// 32-bit integer Long { endian: Endianness, signed: bool }, + /// 64-bit integer + Quad { endian: Endianness, signed: bool }, + /// String data String { max_length: Option }, } @@ -205,6 +208,18 @@ let long_be = TypeKind::Long { signed: true }; +// 64-bit little-endian unsigned integer +let quad_le = TypeKind::Quad { + endian: Endianness::Little, + signed: false +}; + +// 64-bit big-endian signed integer +let quad_be = TypeKind::Quad { + endian: Endianness::Big, + signed: true +}; + // Null-terminated string, max 256 bytes let string_type = TypeKind::String { max_length: Some(256) @@ -402,7 +417,7 @@ let script_rule = MagicRule { ### Type Selection 1. **Use `Byte { signed }`** for single-byte values and flags, specifying signedness -2. **Use `Short/Long`** with explicit endianness and signedness for multi-byte integers +2. **Use `Short/Long/Quad`** with explicit endianness and signedness for multi-byte integers 3. **Use `String`** with length limits for text patterns 4. **Use `Bytes`** for exact binary sequences