diff --git a/.claude/instincts/ast-derive-pattern.md b/.claude/instincts/ast-derive-pattern.md
new file mode 100644
index 00000000..2f80f6a6
--- /dev/null
+++ b/.claude/instincts/ast-derive-pattern.md
@@ -0,0 +1,45 @@
+---
+id: libmagic-rs-ast-derives
+trigger: "when creating new AST types or data structures"
+confidence: 0.9
+domain: rust-types
+source: local-repo-analysis
+---
+
+# AST Type Derive Pattern
+
+## Action
+
+When creating new data structures (especially AST nodes):
+
+1. Derive: `Debug, Clone, Serialize, Deserialize, PartialEq, Eq`
+2. Add rustdoc with `# Examples` section
+3. Use `#[non_exhaustive]` on public enums
+4. Document each field/variant with `///` comments
+5. Import `serde::{Serialize, Deserialize}` (not `serde_derive`)
+
+```rust
+use serde::{Deserialize, Serialize};
+
+/// Description of the type
+///
+/// # Examples
+///
+/// ```
+/// use libmagic_rs::parser::ast::MyType;
+/// let val = MyType::Variant(42);
+/// ```
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[non_exhaustive]
+pub enum MyType {
+    /// Description of variant
+    Variant(i64),
+}
+```
+
+## Evidence
+
+- All types in `src/parser/ast.rs` follow this exact pattern
+- 6 enum types and 1 struct type all derive the same 6 traits
+- Every public type has rustdoc examples
+- Every variant has `///` doc comments
diff --git a/.claude/instincts/build-script-testing.md b/.claude/instincts/build-script-testing.md
new file mode 100644
index 00000000..86102896
--- /dev/null
+++ b/.claude/instincts/build-script-testing.md
@@ -0,0 +1,26 @@
+---
+id: libmagic-rs-build-testing
+trigger: "when modifying build.rs or build-time logic"
+confidence: 0.85
+domain: rust-build
+source: local-repo-analysis
+---
+
+# Build Script Testing Pattern
+
+## Action
+
+Build scripts (`build.rs`) cannot import the crate being built. To test build logic:
+
+1. Extract build logic into `src/build_helpers.rs` with `#[cfg(any(test, doc))]`
+2. Keep `build.rs` minimal - it should only call functions from the helper module
+3. Write unit tests in `build_helpers.rs` to verify all code paths
+4. Test error cases (invalid magic files, malformed input)
+
+This ensures build-time failures produce clear error messages and are properly tested.
+
+## Evidence
+
+- `src/build_helpers.rs` exists with testable parsing and code generation
+- `build.rs` delegates to functions from `build_helpers`
+- Pattern documented in AGENTS.md as a project convention
diff --git a/.claude/instincts/co-change-awareness.md b/.claude/instincts/co-change-awareness.md
new file mode 100644
index 00000000..d26ed27e
--- /dev/null
+++ b/.claude/instincts/co-change-awareness.md
@@ -0,0 +1,29 @@
+---
+id: libmagic-rs-co-change
+trigger: "when modifying core source files"
+confidence: 0.85
+domain: architecture
+source: local-repo-analysis
+---
+
+# Co-Change Awareness
+
+## Action
+
+When modifying these files, check if related files also need updates:
+
+| If you change... | Also check... |
+|-------------------|--------------|
+| `src/lib.rs` | `src/main.rs`, `src/parser/ast.rs`, tests |
+| `src/evaluator/mod.rs` | `src/lib.rs`, `src/main.rs`, tests |
+| `src/parser/ast.rs` | `src/lib.rs`, `src/parser/grammar.rs`, `src/evaluator/types.rs` |
+| `src/main.rs` | `tests/cli_integration_tests.rs` |
+| `Cargo.toml` | `src/lib.rs`, `src/main.rs` |
+| `src/parser/grammar.rs` | `src/parser/mod.rs`, `tests/parser_integration_tests.rs` |
+
+## Evidence
+
+- Analyzed 34 commits for file co-change frequency
+- `src/lib.rs` + `src/main.rs` changed together 8 times (100% of main.rs changes)
+- `src/evaluator/mod.rs` + `src/lib.rs` changed together 8 times
+- API types are re-exported through `src/lib.rs`, making it a hub file
diff --git a/.claude/instincts/conventional-commits.md b/.claude/instincts/conventional-commits.md
new file mode 100644
index 00000000..0b50d333
--- /dev/null
+++ b/.claude/instincts/conventional-commits.md
@@ -0,0 +1,39 @@
+---
+id: libmagic-rs-commits
+trigger: "when writing a commit message"
+confidence: 0.8
+domain: git
+source: local-repo-analysis
+---
+
+# Conventional Commits Format
+
+## Action
+
+Prefix commit messages with type:
+
+- `feat:` - New features (most common)
+- `fix:` - Bug fixes
+- `chore:` - Maintenance (deps, config)
+- `docs:` - Documentation changes
+- `test:` - Test additions/changes
+- `refactor:` - Code restructuring
+- `perf:` - Performance improvements
+- `ci:` - CI/CD changes
+
+Optional scope: `chore(deps):`, `chore(ci):`
+
+PR references: Include `(#N)` suffix when applicable.
+
+Examples from this repo:
+- `feat: implement comprehensive test infrastructure`
+- `feat: evaluation enhancements with confidence, MIME, tags, metadata (#29)`
+- `chore(deps): update dependencies in mise.toml for improved tooling`
+- `feat: built-in rules build time compilation fallback (#28)`
+
+## Evidence
+
+- Analyzed 34 commits
+- 77% follow conventional commit format
+- `feat:` is the most common type (30% of commits)
+- PR numbers consistently included as `(#N)` suffix
diff --git a/.claude/instincts/error-handling-pattern.md b/.claude/instincts/error-handling-pattern.md
new file mode 100644
index 00000000..9b111a7d
--- /dev/null
+++ b/.claude/instincts/error-handling-pattern.md
@@ -0,0 +1,42 @@
+---
+id: libmagic-rs-error-handling
+trigger: "when adding new error types or handling errors"
+confidence: 0.9
+domain: rust-errors
+source: local-repo-analysis
+---
+
+# Error Handling with thiserror + Constructor Methods
+
+## Action
+
+When creating or extending error types:
+
+1. Use `thiserror::Error` derive macro
+2. Include contextual fields (line numbers, offsets, names)
+3. Add named constructor methods for each variant
+4. Mark constructors with `#[must_use]`
+5. Use `impl Into<String>` for string parameters
+6. Write unit tests that verify Display output
+
+```rust
+#[derive(Debug, thiserror::Error)]
+pub enum MyError {
+    #[error("Something failed at {location}: {reason}")]
+    SomethingFailed { location: usize, reason: String },
+}
+
+impl MyError {
+    #[must_use]
+    pub fn something_failed(location: usize, reason: impl Into<String>) -> Self {
+        Self::SomethingFailed { location, reason: reason.into() }
+    }
+}
+```
+
+## Evidence
+
+- `src/error.rs` defines 3 error enums with 20+ variants, all following this pattern
+- Every variant has a corresponding constructor method
+- All constructors use `impl Into<String>` and `#[must_use]`
+- Comprehensive tests verify Display formatting for each variant
diff --git a/.claude/instincts/just-task-runner.md b/.claude/instincts/just-task-runner.md
new file mode 100644
index 00000000..b91c4143
--- /dev/null
+++ b/.claude/instincts/just-task-runner.md
@@ -0,0 +1,35 @@
+---
+id: libmagic-rs-just-tasks
+trigger: "when running project commands or CI checks"
+confidence: 0.9
+domain: tooling
+source: local-repo-analysis
+---
+
+# Use just for Task Running
+
+## Action
+
+Use `just` (not raw cargo) for project tasks. Key commands:
+
+| Command | Purpose |
+|---------|---------|
+| `just ci-check` | Full CI parity check (run before committing) |
+| `just test` | Run tests with nextest |
+| `just lint-rust` | Clippy with `-D warnings` |
+| `just fmt` | Format Rust code |
+| `just coverage` | Generate LCOV coverage report |
+| `just coverage-report` | HTML coverage with browser open |
+| `just bench` | Run all benchmarks |
+| `just audit` | Security audit |
+| `just docs` | Build and serve documentation |
+| `just test-compatibility` | Test against original libmagic test suite |
+
+All commands use `mise exec --` prefix for tool version management.
+
+## Evidence
+
+- `justfile` has 50+ recipes organized into sections
+- All CI workflows use `just` commands
+- `mise.toml` manages tool versions (Rust, pre-commit, mdbook, etc.)
+- Cross-platform support with `[unix]`/`[windows]` annotations
diff --git a/.claude/instincts/strict-clippy-compliance.md b/.claude/instincts/strict-clippy-compliance.md
new file mode 100644
index 00000000..c84c8710
--- /dev/null
+++ b/.claude/instincts/strict-clippy-compliance.md
@@ -0,0 +1,30 @@
+---
+id: libmagic-rs-strict-clippy
+trigger: "when writing or modifying Rust code in this project"
+confidence: 0.95
+domain: rust-linting
+source: local-repo-analysis
+---
+
+# Strict Clippy Compliance
+
+## Action
+
+This project enforces extremely strict clippy settings. When writing code:
+
+- Never use `.unwrap()` - use `?`, `.ok()`, or match instead (clippy `unwrap_used = "deny"`)
+- Never use `panic!()` in library code (clippy `panic = "deny"`)
+- Never use `unsafe` code (workspace `unsafe_code = "forbid"`)
+- Never use direct indexing on slices/buffers - use `.get()` (clippy `indexing_slicing = "warn"`)
+- Never use `&str[n..]` - use `strip_prefix()`/`strip_suffix()` (clippy `string_slice = "warn"`)
+- Mark all constructors/getters with `#[must_use]`
+- Avoid `as` casts where possible (clippy `as_conversions = "warn"`)
+- No `dbg!()` macros (clippy `dbg_macro = "warn"`)
+- No `todo!()` macros (clippy `todo = "warn"`)
+
+## Evidence
+
+- Cargo.toml contains 80+ clippy lint configurations
+- `unsafe_code = "forbid"` and `unwrap_used = "deny"` are workspace-level
+- All 34 analyzed commits maintain zero-warning compliance
+- CI enforces `cargo clippy -- -D warnings`
diff --git a/.claude/instincts/testing-conventions.md b/.claude/instincts/testing-conventions.md
new file mode 100644
index 00000000..8ea04678
--- /dev/null
+++ b/.claude/instincts/testing-conventions.md
@@ -0,0 +1,39 @@
+---
+id: libmagic-rs-testing
+trigger: "when writing tests or adding new functionality"
+confidence: 0.9
+domain: testing
+source: local-repo-analysis
+---
+
+# Testing Conventions
+
+## Action
+
+Follow these testing patterns:
+
+1. **Unit tests**: Place in `#[cfg(test)] mod tests` within each source file
+2. **Integration tests**: Add to `tests/` directory with `_tests.rs` suffix
+3. **CLI tests**: Use `insta` snapshots in `tests/cli_integration_tests.rs`
+4. **Property tests**: Add to `tests/property_tests.rs` using `proptest`
+5. **Benchmarks**: Add to `benches/` using `criterion` with `harness = false`
+
+Run tests with:
+```bash
+cargo nextest run --workspace --no-capture   # Standard
+just ci-check                                 # Full CI parity
+just coverage                                 # With coverage
+```
+
+Test naming: `test_<module>_<behavior>` (e.g., `test_parse_error_display`)
+
+Coverage target: >85% with `cargo llvm-cov`
+
+## Evidence
+
+- 8 test files in `tests/` directory
+- 3 benchmark files in `benches/`
+- Every source file has inline `#[cfg(test)]` module
+- `insta` used for snapshot testing CLI output
+- `proptest` used for property-based testing
+- `criterion` used for benchmarks (not built-in bench)
diff --git a/.claude/skills/SKILL.md b/.claude/skills/SKILL.md
new file mode 100644
index 00000000..0a44ff34
--- /dev/null
+++ b/.claude/skills/SKILL.md
@@ -0,0 +1,190 @@
+---
+name: libmagic-rs-patterns
+description: Coding patterns extracted from libmagic-rs repository
+version: 1.0.0
+source: local-git-analysis
+analyzed_commits: 34
+---
+
+# libmagic-rs Development Patterns
+
+## Commit Conventions
+
+This project uses **conventional commits** (77% of commits follow the pattern):
+
+- `feat:` - New features (most common, ~30%)
+- `chore:` / `chore(deps):` / `chore(ci):` - Maintenance tasks
+- `fix:` - Bug fixes
+- `refactor:` - Code restructuring
+- `docs:` - Documentation updates
+- `test:` - Test additions/changes
+
+PR references use `(#N)` suffix format.
+
+## Architecture
+
+### Module Structure
+
+```
+src/
+  lib.rs             # Public API (MagicDatabase), re-exports
+  main.rs            # CLI binary (rmagic), clap-based
+  error.rs           # Error types (LibmagicError, ParseError, EvaluationError)
+  mime.rs            # MIME type detection
+  tags.rs            # Tag-based classification
+  build_helpers.rs   # Testable build script logic
+  builtin_rules.rs   # Built-in magic rules (compiled at build time)
+  builtin_rules.magic # Magic rule definitions
+  parser/
+    mod.rs           # Parser public interface
+    ast.rs           # AST node definitions (OffsetSpec, TypeKind, Operator, etc.)
+    grammar.rs       # nom-based parser combinators
+  evaluator/
+    mod.rs           # Main evaluation engine
+    offset.rs        # Offset resolution (absolute, indirect, relative)
+    operators.rs     # Comparison and bitwise operations
+    types.rs         # Type interpretation with endianness
+    strength.rs      # Rule strength calculation
+  io/
+    mod.rs           # Memory-mapped file I/O
+  output/
+    mod.rs           # Output formatting interface
+    json.rs          # JSON output format
+    text.rs          # Text output format
+```
+
+### Pipeline Pattern
+
+```
+Magic File -> Parser -> AST -> Evaluator -> Match Results -> Output Formatter
+                                  ^
+Target File -> Memory Mapper -> File Buffer
+```
+
+### Core Types
+
+- `MagicDatabase` - Main entry point for loading rules and evaluating files
+- `MagicRule` - A single magic rule with offset, type, operator, value, description
+- `OffsetSpec` - Absolute, Indirect, Relative, or FromEnd offsets
+- `TypeKind` - Byte, Short, Long, String, Regex with endianness
+- `Operator` - Equal, NotEqual, Greater, Less, BitwiseAnd, Xor
+- `LibmagicError` / `ParseError` / `EvaluationError` - Error hierarchy
+
+## Co-Change Patterns (Files That Change Together)
+
+| File Group | Frequency | Reason |
+|------------|-----------|--------|
+| `src/lib.rs` + `src/main.rs` | 8x | API changes require CLI updates |
+| `src/evaluator/mod.rs` + `src/lib.rs` | 8x | Evaluator changes exposed through lib API |
+| `Cargo.toml` + `src/lib.rs` | 6x | Dependency changes affect library code |
+| `src/lib.rs` + `src/parser/ast.rs` | 5x | AST changes re-exported through lib |
+| `src/main.rs` + `tests/cli_integration_tests.rs` | 4x | CLI changes require test updates |
+
+## Clippy Configuration
+
+Extremely strict clippy setup in `Cargo.toml`:
+
+- `unsafe_code = "forbid"` - No unsafe code allowed
+- `warnings = "deny"` - Zero warnings policy
+- `unwrap_used = "deny"` - Never use `.unwrap()` in production
+- `panic = "deny"` - Never panic in production
+- `pedantic`, `nursery`, `cargo` lint groups enabled
+- Security-focused lints: `indexing_slicing`, `arithmetic_side_effects`, `string_slice`
+
+## Error Handling Pattern
+
+Use `thiserror` with constructor methods:
+
+```rust
+#[derive(Debug, thiserror::Error)]
+pub enum ParseError {
+    #[error("Invalid syntax at line {line}: {message}")]
+    InvalidSyntax { line: usize, message: String },
+}
+
+impl ParseError {
+    #[must_use]
+    pub fn invalid_syntax(line: usize, message: impl Into<String>) -> Self {
+        Self::InvalidSyntax { line, message: message.into() }
+    }
+}
+```
+
+- Every error variant has a named constructor method
+- Constructors use `impl Into<String>` for ergonomic creation
+- All constructors marked `#[must_use]`
+- Error messages include contextual info (line numbers, offsets)
+
+## Testing Patterns
+
+### Test Organization
+
+- **Unit tests**: `#[cfg(test)] mod tests` in each source file
+- **Integration tests**: `tests/*.rs` directory
+- **Compatibility tests**: `tests/compatibility_tests.rs` against real libmagic test suite
+- **Property tests**: `tests/property_tests.rs` using `proptest`
+- **Benchmarks**: `benches/*.rs` using `criterion`
+- **Snapshots**: `insta` for CLI output snapshots
+
+### Test File Naming
+
+- CLI tests: `tests/cli_integration_tests.rs`
+- JSON output: `tests/json_integration_test.rs`
+- Parser: `tests/parser_integration_tests.rs`
+- Properties: `tests/property_tests.rs`
+
+### Commands
+
+```bash
+cargo nextest run --workspace --no-capture    # Standard test run
+just ci-check                                  # Full CI parity check
+just coverage                                  # Generate coverage report
+just test-compatibility                        # Test against original libmagic
+```
+
+## Build Script Pattern
+
+Build logic extracted into testable library module:
+
+```rust
+// src/build_helpers.rs - testable parsing and code generation
+#[cfg(any(test, doc))]
+pub mod build_helpers { ... }
+
+// build.rs - minimal, delegates to build_helpers
+fn main() { ... }
+```
+
+## Tooling
+
+- **mise**: Tool version manager (replaces asdf/rtx)
+- **just**: Task runner (justfile)
+- **cargo-nextest**: Fast test runner
+- **cargo-llvm-cov**: Code coverage
+- **insta**: Snapshot testing
+- **criterion**: Benchmarking
+- **pre-commit**: Git hooks
+- **actionlint**: GitHub Actions validation
+- **mdbook**: Documentation site
+- **prettier**: JSON/YAML formatting
+- **mdformat**: Markdown formatting
+- **cargo-dist**: Binary distribution
+
+## Rust Edition & Style
+
+- **Edition**: 2024
+- **MSRV**: 1.85
+- **rustfmt**: `style_edition = "2024"`
+- All derive macros: `Debug, Clone, Serialize, Deserialize, PartialEq, Eq`
+- Public API types derive `Serialize` + `Deserialize`
+- Doc comments on all public items with examples
+- `#[non_exhaustive]` on public enums
+
+## Safety Rules
+
+1. Use `.get()` for buffer access, never direct indexing
+2. Use `strip_prefix()`/`strip_suffix()` instead of `&str[n..]`
+3. No `unwrap()` or `panic!()` in library code
+4. Bounds checking on all byte reads
+5. Configurable timeouts for evaluation
+6. No non-ASCII literals in code/comments
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
new file mode 100644
index 00000000..a40a31f2
--- /dev/null
+++ b/.github/workflows/benchmarks.yml
@@ -0,0 +1,124 @@
+name: Benchmarks
+
+on:
+    # Run on schedule (weekly on Sunday at 3 AM UTC)
+    schedule:
+        - cron: "0 3 * * 0"
+    # Run on PRs that modify performance-critical code
+    pull_request:
+        branches: [main]
+        paths:
+            - "src/evaluator/**"
+            - "src/parser/**"
+            - "src/io/**"
+            - "benches/**"
+            - "Cargo.toml"
+    # Allow manual triggering
+    workflow_dispatch:
+
+env:
+    CARGO_TERM_COLOR: always
+
+jobs:
+    benchmark:
+        name: Run Benchmarks
+        runs-on: ubuntu-latest
+
+        steps:
+            - uses: actions/checkout@v6
+
+            - uses: jdx/mise-action@v3
+              with:
+                  install: true
+                  cache: true
+                  github_token: ${{ secrets.GITHUB_TOKEN }}
+
+            - name: Cache cargo registry
+              uses: actions/cache@v4
+              with:
+                  path: |
+                      ~/.cargo/registry
+                      ~/.cargo/git
+                      target
+                  key: ${{ runner.os }}-cargo-bench-${{ hashFiles('**/Cargo.lock') }}
+                  restore-keys: |
+                      ${{ runner.os }}-cargo-bench-
+                      ${{ runner.os }}-cargo-
+
+            - name: Run benchmarks
+              run: cargo bench --bench parser_bench --bench evaluation_bench --bench io_bench
+
+            - name: Upload benchmark results
+              uses: actions/upload-artifact@v4
+              with:
+                  name: benchmark-results
+                  path: target/criterion/
+                  retention-days: 30
+
+    # Compare benchmarks on PRs
+    benchmark-compare:
+        name: Compare Benchmarks
+        runs-on: ubuntu-latest
+        if: github.event_name == 'pull_request'
+
+        steps:
+            - uses: actions/checkout@v6
+              with:
+                  fetch-depth: 0
+
+            - uses: jdx/mise-action@v3
+              with:
+                  install: true
+                  cache: true
+                  github_token: ${{ secrets.GITHUB_TOKEN }}
+
+            - name: Cache cargo registry
+              uses: actions/cache@v4
+              with:
+                  path: |
+                      ~/.cargo/registry
+                      ~/.cargo/git
+                      target
+                  key: ${{ runner.os }}-cargo-bench-${{ hashFiles('**/Cargo.lock') }}
+                  restore-keys: |
+                      ${{ runner.os }}-cargo-bench-
+                      ${{ runner.os }}-cargo-
+
+            - name: Install critcmp
+              run: cargo install critcmp
+
+            - name: Run benchmarks on PR branch
+              run: |
+                  cargo bench --bench parser_bench --bench evaluation_bench --bench io_bench -- --save-baseline pr
+
+            - name: Checkout main branch
+              run: git checkout origin/main
+
+            - name: Run benchmarks on main branch
+              continue-on-error: true
+              id: main-bench
+              run: |
+                  cargo bench --bench parser_bench --bench evaluation_bench --bench io_bench -- --save-baseline main
+
+            - name: Compare benchmarks
+              if: steps.main-bench.outcome == 'success'
+              run: |
+                  {
+                    echo "## Benchmark Comparison"
+                    echo ""
+                    echo "Comparing PR branch against main:"
+                    echo ""
+                    echo '```'
+                    critcmp main pr || true
+                    echo '```'
+                  } >> "$GITHUB_STEP_SUMMARY"
+
+            - name: Skip comparison (no main benchmarks)
+              if: steps.main-bench.outcome != 'success'
+              run: |
+                  {
+                    echo "## Benchmark Comparison"
+                    echo ""
+                    echo "Skipped: main branch does not have matching benchmark targets."
+                    echo "Comparison will be available once benchmarks are merged to main."
+                  } >> "$GITHUB_STEP_SUMMARY"
diff --git a/.github/workflows/compatibility.yml b/.github/workflows/compatibility.yml
index f37f6d76..25b94b83 100644
--- a/.github/workflows/compatibility.yml
+++ b/.github/workflows/compatibility.yml
@@ -8,19 +8,24 @@ on:
     schedule:
         # Run daily at 2 AM UTC to catch any regressions
         - cron: "0 2 * * *"
+    workflow_dispatch:
+
+env:
+    CARGO_TERM_COLOR: always
 
 jobs:
     compatibility-tests:
-        name: Compatibility Tests
-        if: false
+        name: Compatibility Tests (${{ matrix.os }})
         runs-on: ${{ matrix.os }}
 
         strategy:
+            fail-fast: false
             matrix:
                 os: [ubuntu-latest, windows-latest, macos-latest]
 
         steps:
             - uses: actions/checkout@v6
+
             - uses: jdx/mise-action@v3
               with:
                   install: true
@@ -34,4 +39,14 @@ jobs:
               run: cargo build --release
 
             - name: Run compatibility tests
-              run: cargo test test_compatibility_with_original_libmagic -- --ignored
+              run: cargo test test_compatibility_with_original_libmagic -- --ignored --nocapture
+
+            - name: Upload test results on failure
+              if: failure()
+              uses: actions/upload-artifact@v4
+              with:
+                  name: compatibility-test-results-${{ matrix.os }}
+                  path: |
+                      target/nextest/
+                      target/tmp/
+                  retention-days: 7
diff --git a/.gitignore b/.gitignore
index 8e68824b..8525a105 100644
--- a/.gitignore
+++ b/.gitignore
@@ -132,3 +132,4 @@ megalinter-reports/
 
 # megalinter crud
 undefined/
+.claude.local.md
diff --git a/Cargo.toml b/Cargo.toml
index b0ea55ec..e49e4a85 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -28,9 +28,6 @@ exclude = [
     "AGENTS.md",
 ]
 
-[package.metadata.cargo-machete]
-ignored = ["byteorder"] # Ignoring it since we're going to use it later.
-
 # Workspace lint configuration
 [workspace.lints.rust]
 # Security: Forbid unsafe code globally
@@ -148,8 +145,9 @@ path = "src/main.rs"
 [dependencies]
 byteorder = "1.5.0"
 cfg-if = "1.0.4"
-clap = { version = "4.5.54", features = ["derive"] }
+clap = { version = "4.5.57", features = ["derive"] }
 clap-stdin = "0.8.0"
+memchr = "2.7.6"
 memmap2 = "0.9.9"
 nom = "8.0.0"
 serde = { version = "1.0.228", features = ["derive"] }
@@ -162,13 +160,28 @@ serde = { version = "1.0.228", features = ["derive"] }
 thiserror = "2.0.18"
 
 [dev-dependencies]
-criterion = "0.8.1"
-insta = { version = "1.46.1", features = ["json"] }
+criterion = "0.8.2"
+insta = { version = "1.46.3", features = ["json"] }
 nix = { version = "0.31.1", features = ["fs"] }
-proptest = "1.9.0"
-regex = "1.12.2"
-temp-env = "0.3.6"
-tempfile = "3.24.0"
+proptest = "1.10.0"
+regex = "1.12.3"
+tempfile = "3.25.0"
+
+[[bench]]
+name = "parser_bench"
+harness = false
+
+[[bench]]
+name = "evaluation_bench"
+harness = false
+
+[[bench]]
+name = "io_bench"
+harness = false
+
+[profile.release]
+lto = "thin"
+codegen-units = 1
 
 # The profile that 'dist' will build with
 [profile.dist]
diff --git a/benches/evaluation_bench.rs b/benches/evaluation_bench.rs
new file mode 100644
index 00000000..6e0e6b58
--- /dev/null
+++ b/benches/evaluation_bench.rs
@@ -0,0 +1,207 @@
+//! Evaluation benchmarks for libmagic-rs
+//!
+//! Benchmarks rule evaluation performance against various file types:
+//! - ELF binary detection
+//! - ZIP archive detection
+//! - PDF document detection
+//! - Unknown data handling
+
+use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
+use libmagic_rs::{EvaluationConfig, MagicDatabase};
+use std::hint::black_box;
+
+/// Create a minimal ELF 64-bit header for testing
+fn create_elf64_header() -> Vec<u8> {
+    let mut header = vec![0u8; 64];
+
+    // ELF magic number
+    header[0] = 0x7f;
+    header[1] = b'E';
+    header[2] = b'L';
+    header[3] = b'F';
+
+    // 64-bit
+    header[4] = 2;
+
+    // Little endian
+    header[5] = 1;
+
+    // ELF version
+    header[6] = 1;
+
+    // System V ABI
+    header[7] = 0;
+
+    // Type: executable
+    header[16] = 2;
+
+    // Machine: x86-64
+    header[18] = 0x3e;
+
+    // ELF version
+    header[20] = 1;
+
+    header
+}
+
+/// Create a minimal ZIP header for testing
+fn create_zip_header() -> Vec<u8> {
+    vec![
+        0x50, 0x4b, 0x03, 0x04, // PK signature
+        0x14, 0x00, // Version needed
+        0x00, 0x00, // General purpose flags
+        0x08, 0x00, // Compression method (deflate)
+        0x00, 0x00, // Last mod time
+        0x00, 0x00, // Last mod date
+        0x00, 0x00, 0x00, 0x00, // CRC-32
+        0x00, 0x00, 0x00, 0x00, // Compressed size
+        0x00, 0x00, 0x00, 0x00, // Uncompressed size
+        0x04, 0x00, // Filename length
+        0x00, 0x00, // Extra field length
+        b't', b'e', b's', b't', // Filename
+    ]
+}
+
+/// Create a minimal PDF header for testing
+fn create_pdf_header() -> Vec<u8> {
+    b"%PDF-1.7\n%\xe2\xe3\xcf\xd3\n".to_vec()
+}
+
+/// Create random unknown data for testing fallback behavior
+fn create_unknown_data(size: usize) -> Vec<u8> {
+    (0..size).map(|i| (i % 256) as u8).collect()
+}
+
+/// Benchmark evaluation of different file types
+fn bench_file_type_detection(c: &mut Criterion) {
+    let db = MagicDatabase::with_builtin_rules().expect("should load");
+
+    let mut group = c.benchmark_group("file_type_detection");
+
+    // ELF detection
+    let elf_data = create_elf64_header();
+    group.throughput(Throughput::Bytes(elf_data.len() as u64));
+    group.bench_function("detect_elf", |b| {
+        b.iter(|| {
+            let result = db
+                .evaluate_buffer(black_box(&elf_data))
+                .expect("should evaluate");
+            black_box(result)
+        })
+    });
+
+    // ZIP detection
+    let zip_data = create_zip_header();
+    group.throughput(Throughput::Bytes(zip_data.len() as u64));
+    group.bench_function("detect_zip", |b| {
+        b.iter(|| {
+            let result = db
+                .evaluate_buffer(black_box(&zip_data))
+                .expect("should evaluate");
+            black_box(result)
+        })
+    });
+
+    // PDF detection
+    let pdf_data = create_pdf_header();
+    group.throughput(Throughput::Bytes(pdf_data.len() as u64));
+    group.bench_function("detect_pdf", |b| {
+        b.iter(|| {
+            let result = db
+                .evaluate_buffer(black_box(&pdf_data))
+                .expect("should evaluate");
+            black_box(result)
+        })
+    });
+
+    // Unknown data (fallback to "data")
+    let unknown_data = create_unknown_data(64);
+    group.throughput(Throughput::Bytes(unknown_data.len() as u64));
+    group.bench_function("detect_unknown", |b| {
+        b.iter(|| {
+            let result = db
+                .evaluate_buffer(black_box(&unknown_data))
+                .expect("should evaluate");
+            black_box(result)
+        })
+    });
+
+    group.finish();
+}
+
+/// Benchmark evaluation with different buffer sizes
+fn bench_buffer_sizes(c: &mut Criterion) {
+    let db = MagicDatabase::with_builtin_rules().expect("should load");
+
+    let mut group = c.benchmark_group("buffer_sizes");
+
+    // Test with various buffer sizes
+    for size in [64, 256, 1024, 4096, 16384, 65536] {
+        let data = create_unknown_data(size);
+        group.throughput(Throughput::Bytes(size as u64));
+        group.bench_with_input(BenchmarkId::new("unknown", size), &data, |b, data| {
+            b.iter(|| {
+                let result = db
+                    .evaluate_buffer(black_box(data))
+                    .expect("should evaluate");
+                black_box(result)
+            })
+        });
+    }
+
+    group.finish();
+}
+
+/// Benchmark evaluation with different configurations
+fn bench_evaluation_configs(c: &mut Criterion) {
+    let elf_data = create_elf64_header();
+
+    let mut group = c.benchmark_group("evaluation_configs");
+    group.throughput(Throughput::Bytes(elf_data.len() as u64));
+
+    // Default configuration
+    group.bench_function("config_default", |b| {
+        let db = MagicDatabase::with_builtin_rules_and_config(EvaluationConfig::default())
+            .expect("should load");
+        b.iter(|| {
+            let result = db
+                .evaluate_buffer(black_box(&elf_data))
+                .expect("should evaluate");
+            black_box(result)
+        })
+    });
+
+    // Performance configuration
+    group.bench_function("config_performance", |b| {
+        let db = MagicDatabase::with_builtin_rules_and_config(EvaluationConfig::performance())
+            .expect("should load");
+        b.iter(|| {
+            let result = db
+                .evaluate_buffer(black_box(&elf_data))
+                .expect("should evaluate");
+            black_box(result)
+        })
+    });
+
+    // Comprehensive configuration
+    group.bench_function("config_comprehensive", |b| {
+        let db = MagicDatabase::with_builtin_rules_and_config(EvaluationConfig::comprehensive())
+            .expect("should load");
+        b.iter(|| {
+            let result = db
+                .evaluate_buffer(black_box(&elf_data))
+                .expect("should evaluate");
+            black_box(result)
+        })
+    });
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_file_type_detection,
+    bench_buffer_sizes,
+    bench_evaluation_configs
+);
+criterion_main!(benches);
diff --git a/benches/io_bench.rs b/benches/io_bench.rs
new file mode 100644
index 00000000..81f2b9aa
--- /dev/null
+++ b/benches/io_bench.rs
@@ -0,0 +1,131 @@
+//! I/O benchmarks for libmagic-rs
+//!
+//! Benchmarks file I/O operations including:
+//! - FileBuffer creation
+//! - Memory-mapped file access
+//! - Buffer slice operations
+
+use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
+use libmagic_rs::io::FileBuffer;
+use std::hint::black_box;
+use std::io::Write;
+use tempfile::NamedTempFile;
+
+/// Create a temporary file with random data
+fn create_temp_file(size: usize) -> NamedTempFile {
+    let mut file = NamedTempFile::new().expect("create temp file");
+    let data: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+    file.write_all(&data).expect("write data");
+    file.flush().expect("flush");
+    file
+}
+
+/// Benchmark FileBuffer creation from files of various sizes
+fn bench_file_buffer_creation(c: &mut Criterion) {
+    let mut group = c.benchmark_group("file_buffer_creation");
+
+    for size in [64, 1024, 4096, 65536, 262144, 1048576] {
+        let temp_file = create_temp_file(size);
+        let path = temp_file.path();
+
+        group.throughput(Throughput::Bytes(size as u64));
+        group.bench_with_input(BenchmarkId::new("mmap", size), &path, |b, path| {
+            b.iter(|| {
+                let buffer = FileBuffer::new(black_box(path)).expect("should create");
+                black_box(buffer)
+            })
+        });
+    }
+
+    group.finish();
+}
+
+/// Benchmark slice access patterns on FileBuffer
+fn bench_buffer_access(c: &mut Criterion) {
+    let temp_file = create_temp_file(1_048_576); // 1MB file
+    let path = temp_file.path();
+    let buffer = FileBuffer::new(path).expect("should create");
+    let slice = buffer.as_slice();
+
+    let mut group = c.benchmark_group("buffer_access");
+
+    // Sequential read
+    group.bench_function("sequential_read_1mb", |b| {
+        b.iter(|| {
+            let mut sum: u64 = 0;
+            for &byte in slice {
+                sum = sum.wrapping_add(u64::from(byte));
+            }
+            black_box(sum)
+        })
+    });
+
+    // Random access pattern (simulating rule evaluation)
+    group.bench_function("random_access_pattern", |b| {
+        let offsets = [0, 4, 16, 64, 256, 1024, 4096, 65536, 262144];
+        b.iter(|| {
+            let mut sum: u64 = 0;
+            for &offset in &offsets {
+                if offset < slice.len() {
+                    sum = sum.wrapping_add(u64::from(slice[offset]));
+                }
+            }
+            black_box(sum)
+        })
+    });
+
+    // Slice extraction (common in rule evaluation)
+    group.bench_function("slice_extraction", |b| {
+        b.iter(|| {
+            let slices = [
+                slice.get(0..4),
+                slice.get(4..8),
+                slice.get(16..32),
+                slice.get(64..128),
+            ];
+            black_box(slices)
+        })
+    });
+
+    group.finish();
+}
+
+/// Benchmark small file handling (common case)
+fn bench_small_files(c: &mut Criterion) {
+    let mut group = c.benchmark_group("small_files");
+
+    // Common small file sizes
+    for size in [8, 16, 32, 64, 128, 256, 512] {
+        let temp_file = create_temp_file(size);
+        let path = temp_file.path();
+
+        group.throughput(Throughput::Bytes(size as u64));
+        group.bench_with_input(
+            BenchmarkId::new("create_and_access", size),
+            &path,
+            |b, path| {
+                b.iter(|| {
+                    let buffer = FileBuffer::new(black_box(path)).expect("should create");
+                    let slice = buffer.as_slice();
+                    // Simulate typical magic number check
+                    let result = if slice.len() >= 4 {
+                        (slice.get(0..4).map(<[u8]>::to_vec), slice.len())
+                    } else {
+                        (None, slice.len())
+                    };
+                    black_box(result)
+                })
+            },
+        );
+    }
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_file_buffer_creation,
+    bench_buffer_access,
+    bench_small_files
+);
+criterion_main!(benches);
diff --git a/benches/parser_bench.rs b/benches/parser_bench.rs
new file mode 100644
index 00000000..3a19359d
--- /dev/null
+++ b/benches/parser_bench.rs
@@ -0,0 +1,108 @@
+//! Parser benchmarks for libmagic-rs
+//!
+//! Benchmarks magic file parsing performance including:
+//! - Single line parsing
+//! - Multi-line rule parsing
+//! - Built-in rules loading
+//! - Directory loading
+
+use criterion::{Criterion, Throughput, criterion_group, criterion_main};
+use libmagic_rs::MagicDatabase;
+use std::hint::black_box;
+
+/// Benchmark loading built-in magic rules
+fn bench_builtin_rules_loading(c: &mut Criterion) {
+    c.bench_function("load_builtin_rules", |b| {
+        b.iter(|| {
+            let db = MagicDatabase::with_builtin_rules().expect("should load");
+            black_box(db)
+        })
+    });
+}
+
+/// Benchmark loading magic file from disk
+fn bench_magic_file_loading(c: &mut Criterion) {
+    let magic_path = "src/builtin_rules.magic";
+
+    // Skip if file doesn't exist
+    if !std::path::Path::new(magic_path).exists() {
+        return;
+    }
+
+    let mut group = c.benchmark_group("magic_file_loading");
+
+    // Measure throughput based on file size
+    let file_size = std::fs::metadata(magic_path).map(|m| m.len()).unwrap_or(0);
+    group.throughput(Throughput::Bytes(file_size));
+
+    group.bench_function("load_builtin_magic_file", |b| {
+        b.iter(|| {
+            let db = MagicDatabase::load_from_file(black_box(magic_path)).expect("should load");
+            black_box(db)
+        })
+    });
+
+    group.finish();
+}
+
+/// Benchmark parsing individual magic rule patterns
+fn bench_rule_parsing(c: &mut Criterion) {
+    use std::io::Write;
+    use tempfile::NamedTempFile;
+
+    let mut group = c.benchmark_group("rule_parsing");
+
+    // Simple string match rule
+    let simple_rule = "0 string \"test\" Test file\n";
+    group.bench_function("parse_simple_string_rule", |b| {
+        let mut temp = NamedTempFile::new().expect("temp file");
+        temp.write_all(simple_rule.as_bytes()).expect("write temp");
+        let path = temp.path().to_owned();
+
+        b.iter(|| {
+            let db = MagicDatabase::load_from_file(black_box(&path)).expect("should load");
+            black_box(db)
+        })
+    });
+
+    // Numeric rule with endianness
+    let numeric_rule = "0 belong 0x7f454c46 ELF executable\n";
+    group.bench_function("parse_numeric_rule", |b| {
+        let mut temp = NamedTempFile::new().expect("temp file");
+        temp.write_all(numeric_rule.as_bytes()).expect("write temp");
+        let path = temp.path().to_owned();
+
+        b.iter(|| {
+            let db = MagicDatabase::load_from_file(black_box(&path)).expect("should load");
+            black_box(db)
+        })
+    });
+
+    // Complex nested rule
+    let nested_rule = r#"0 string \x7fELF ELF
+>4 byte 1 32-bit
+>4 byte 2 64-bit
+>5 byte 1 LSB
+>5 byte 2 MSB
+"#;
+    group.bench_function("parse_nested_rules", |b| {
+        let mut temp = NamedTempFile::new().expect("temp file");
+        temp.write_all(nested_rule.as_bytes()).expect("write temp");
+        let path = temp.path().to_owned();
+
+        b.iter(|| {
+            let db = MagicDatabase::load_from_file(black_box(&path)).expect("should load");
+            black_box(db)
+        })
+    });
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_builtin_rules_loading,
+    bench_magic_file_loading,
+    bench_rule_parsing
+);
+criterion_main!(benches);
diff --git a/docs/src/evaluator.md b/docs/src/evaluator.md
index b2aec50b..9cfb7576 100644
--- a/docs/src/evaluator.md
+++ b/docs/src/evaluator.md
@@ -229,7 +229,7 @@ let config = EvaluationConfig {
     ..Default::default()
 };
 
-let result = evaluate_rules_with_config(&rules, buffer, config)?;
+let result = evaluate_rules_with_config(&rules, buffer, &config)?;
 ```
 
 ## API Reference
@@ -247,7 +247,7 @@ pub fn evaluate_rules(
 pub fn evaluate_rules_with_config(
     rules: &[MagicRule],
     buffer: &[u8],
-    config: EvaluationConfig,
+    config: &EvaluationConfig,
 ) -> Result<Vec<MatchResult>, EvaluationError>;
 
 /// Evaluate a single rule (used internally and for testing)
diff --git a/docs/src/getting-started.md b/docs/src/getting-started.md
index 37f4264b..d7aa2925 100644
--- a/docs/src/getting-started.md
+++ b/docs/src/getting-started.md
@@ -256,7 +256,7 @@ let rule = MagicRule {
 // Evaluate against a buffer
 let buffer = &[0x7f, 0x45, 0x4c, 0x46]; // ELF magic bytes
 let config = EvaluationConfig::default();
-let matches = evaluate_rules_with_config(&[rule], buffer, config)?;
+let matches = evaluate_rules_with_config(&[rule], buffer, &config)?;
 
 assert_eq!(matches.len(), 1);
 assert_eq!(matches[0].message, "ELF magic");
diff --git a/docs/src/testing.md b/docs/src/testing.md
index 642fa8bf..d3bd3d8e 100644
--- a/docs/src/testing.md
+++ b/docs/src/testing.md
@@ -195,44 +195,55 @@ fn test_hierarchical_rules() {
 }
 ```
 
-### Property Tests (Planned)
+### Property Tests
 
-Using `proptest` for fuzz-like testing:
+Property-based testing using `proptest` is implemented in `tests/property_tests.rs`:
+
+```bash
+# Run property tests
+cargo test --test property_tests
+
+# Run with more test cases
+PROPTEST_CASES=1000 cargo test --test property_tests
+```
+
+The property tests verify:
+
+- **Serialization roundtrips**: AST types serialize and deserialize correctly
+- **Evaluation safety**: Evaluation never panics on arbitrary input
+- **Configuration validation**: Invalid configurations are rejected
+- **Known pattern detection**: ELF, ZIP, PDF patterns are correctly detected
+
+Example property test:
 
 ```rust
 use proptest::prelude::*;
 
 proptest! {
     #[test]
-    fn test_number_parsing_roundtrip(n in any::<i64>()) {
-        let s = n.to_string();
-        let (remaining, parsed) = parse_number(&s).unwrap();
-        assert_eq!(remaining, "");
-        assert_eq!(parsed, n);
-    }
-
-    #[test]
-    fn test_offset_parsing_never_panics(s in ".*") {
-        // Should never panic, even on invalid input
-        let _ = parse_offset(&s);
+    fn prop_evaluation_never_panics(buffer in prop::collection::vec(any::<u8>(), 0..1024)) {
+        let db = MagicDatabase::with_builtin_rules().expect("should load");
+        // Should not panic regardless of buffer contents
+        let result = db.evaluate_buffer(&buffer);
+        prop_assert!(result.is_ok());
     }
 }
 ```
 
-### Compatibility Tests (Planned)
+### Compatibility Tests
 
-Validate against GNU `file` command:
+Compatibility tests validate against GNU `file` command using the canonical test suite from the file project. Test data is located in `third_party/tests/`.
 
-```rust
-#[test]
-fn test_elf_detection_compatibility() {
-    let gnu_result = run_gnu_file("third_party/tests/elf64.testfile");
-    let our_result = evaluate_file("third_party/tests/elf64.testfile");
+```bash
+# Run compatibility tests (requires test files)
+cargo test test_compatibility_with_original_libmagic -- --ignored
 
-    assert_eq!(extract_file_type(&gnu_result), our_result.description);
-}
+# Or use the just recipe
+just test-compatibility
 ```
 
+The compatibility workflow runs automatically in CI on pushes to main/develop.
+
 ## Test Utilities and Helpers
 
 ### Common Test Patterns
@@ -507,21 +518,44 @@ cargo insta accept
 3. **Test Cross-Platform**: Verify tests pass on both Windows and Unix
 4. **Keep Snapshots Small**: Use focused tests for specific CLI features
 
-## Future Testing Plans
+## Benchmarks
 
-### Integration Testing
+Performance benchmarks are implemented using Criterion in the `benches/` directory:
 
-- **Complete Workflow Tests**: End-to-end magic file parsing and evaluation
-- **File Format Tests**: Comprehensive testing against known file formats
-- **Error Recovery Tests**: Graceful handling of malformed inputs
+```bash
+# Run all benchmarks
+cargo bench
+
+# Run specific benchmark group
+cargo bench parser
+cargo bench evaluation
+cargo bench io
+
+# Generate HTML benchmark report
+cargo bench -- --noplot
+```
 
-### Compatibility Testing
+### Available Benchmarks
 
-- **GNU file Compatibility**: Validate results against original implementation
-- **Magic File Compatibility**: Test with real-world magic databases
-- **Performance Parity**: Ensure comparable performance to libmagic
+| Benchmark | Description |
+|-----------|-------------|
+| `parser_bench` | Magic file parsing performance |
+| `evaluation_bench` | Rule evaluation against various file types |
+| `io_bench` | Memory-mapped I/O operations |
+
+### Benchmark CI
+
+Benchmarks run automatically:
+
+- **Weekly**: Scheduled runs on Sunday at 3 AM UTC
+- **On PR**: When performance-critical code changes (src/evaluator, src/parser, src/io, benches)
+- **Manual**: Via workflow_dispatch
+
+The CI compares PR benchmarks against the main branch and reports regressions.
+
+## Future Testing Plans
 
-### Fuzzing Integration
+### Fuzzing Integration (Phase 2)
 
 - **Parser Fuzzing**: Use cargo-fuzz for parser robustness
 - **Evaluator Fuzzing**: Test evaluation engine with malformed files
diff --git a/justfile b/justfile
index d251ee81..ec45e614 100644
--- a/justfile
+++ b/justfile
@@ -1,8 +1,8 @@
 # Cross-platform justfile using OS annotations
 # Windows uses PowerShell, Unix uses bash
 
-set shell := ["bash", "-cu"]
-set windows-shell := ["powershell", "-NoProfile", "-Command"]
+set windows-shell := ["powershell.exe", "-c"]
+set shell := ["bash", "-c"]
 set dotenv-load := true
 set ignore-comments := true
 
@@ -90,7 +90,7 @@ lint: lint-rust lint-actions lint-docs lint-justfile
 
 # Individual lint recipes
 lint-actions:
-    @{{ mise_exec }} actionlint .github/workflows/audit.yml .github/workflows/ci.yml .github/workflows/codeql.yml .github/workflows/compatibility.yml .github/workflows/copilot-setup-steps.yml .github/workflows/docs.yml .github/workflows/release.yml .github/workflows/security.yml
+    @{{ mise_exec }} actionlint .github/workflows/audit.yml .github/workflows/benchmarks.yml .github/workflows/ci.yml .github/workflows/codeql.yml .github/workflows/compatibility.yml .github/workflows/copilot-setup-steps.yml .github/workflows/docs.yml .github/workflows/release.yml .github/workflows/security.yml
 
 lint-docs:
     @{{ mise_exec }} markdownlint-cli2 docs/**/*.md README.md
@@ -197,6 +197,30 @@ coverage:
 coverage-check:
     @{{ mise_exec }} just _coverage --fail-under-lines 9.7
 
+# Generate HTML coverage report for local viewing
+[unix]
+coverage-report:
+    #!/usr/bin/env bash
+    set -euo pipefail
+    rm -rf target/llvm-cov-target
+    RUSTFLAGS="--cfg coverage" {{ mise_exec }} cargo llvm-cov --workspace --html --open
+
+[windows]
+coverage-report:
+    $env:RUSTFLAGS = "--cfg coverage"; {{ mise_exec }} cargo llvm-cov --workspace --html --open
+
+# Show coverage summary by file
+[unix]
+coverage-summary:
+    #!/usr/bin/env bash
+    set -euo pipefail
+    rm -rf target/llvm-cov-target
+    RUSTFLAGS="--cfg coverage" {{ mise_exec }} cargo llvm-cov --workspace
+
+[windows]
+coverage-summary:
+    $env:RUSTFLAGS = "--cfg coverage"; {{ mise_exec }} cargo llvm-cov --workspace
+
 # Full local CI parity check
 ci-check: pre-commit-run fmt-check lint-rust lint-rust-min test-ci build-release audit coverage-check dist-plan
 
diff --git a/mise.toml b/mise.toml
index 6b351feb..e082f365 100644
--- a/mise.toml
+++ b/mise.toml
@@ -23,6 +23,8 @@ rust                        = { version = "1.91.0", components = "llvm-tools,car
 "cargo:cargo-cyclonedx"     = "0.5.7"
 "pipx:mdformat"             = { version = "0.7.21", uvx_args = "--with mdformat-gfm --with mdformat-frontmatter --with mdformat-footnote --with mdformat-simple-breaks --with mdformat-gfm-alerts --with mdformat-toc --with mdformat-wikilink --with mdformat-tables" }
 prettier                    = "3.8.1"
-actionlint = "1.7.10"
-lychee = "0.22.0"
-markdownlint-cli2 = "0.20.0"
+actionlint                  = "1.7.10"
+lychee                      = "0.22.0"
+markdownlint-cli2           = "0.20.0"
+pre-commit                  = "latest"
+"cargo:cargo-machete" = "0.9.1"
diff --git a/src/error.rs b/src/error.rs
index 180ee1a8..5d1bdee8 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -27,6 +27,41 @@ pub enum LibmagicError {
         /// The timeout duration in milliseconds
         timeout_ms: u64,
     },
+
+    /// Invalid configuration parameter.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use libmagic_rs::LibmagicError;
+    ///
+    /// let error = LibmagicError::ConfigError {
+    ///     reason: "invalid timeout value".to_string(),
+    /// };
+    /// assert!(matches!(error, LibmagicError::ConfigError { .. }));
+    /// ```
+    #[error("Configuration error: {reason}")]
+    ConfigError {
+        /// Description of the configuration issue
+        reason: String,
+    },
+
+    /// File I/O error with structured context (path, operation).
+    ///
+    /// Unlike `IoError` which wraps a generic `std::io::Error`, this variant
+    /// preserves the structured error information from file I/O operations
+    /// (e.g., file path, operation type).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use libmagic_rs::LibmagicError;
+    ///
+    /// let error = LibmagicError::FileError("failed to read /path/to/file".to_string());
+    /// assert!(matches!(error, LibmagicError::FileError(_)));
+    /// ```
+    #[error("File error: {0}")]
+    FileError(String),
 }
 
 /// Errors that can occur during magic file parsing.
diff --git a/src/evaluator/mod.rs b/src/evaluator/mod.rs
index 1f1c2ccd..56fd9f67 100644
--- a/src/evaluator/mod.rs
+++ b/src/evaluator/mod.rs
@@ -6,9 +6,6 @@
 use crate::parser::ast::MagicRule;
 use crate::{EvaluationConfig, LibmagicError};
 use serde::{Deserialize, Serialize};
-use std::sync::{Arc, mpsc};
-use std::thread;
-use std::time::Duration;
 
 #[cfg(test)]
 use crate::parser::ast::{Endianness, OffsetSpec, Operator, TypeKind, Value};
@@ -260,8 +257,9 @@ impl MatchResult {
 ///
 /// # Returns
 ///
-/// Returns `Ok(true)` if the rule matches, `Ok(false)` if it doesn't match,
-/// or `Err(LibmagicError)` if evaluation fails due to buffer access issues or other errors.
+/// Returns `Ok(Some((offset, value)))` if the rule matches (with the resolved offset and
+/// read value), `Ok(None)` if it doesn't match, or `Err(LibmagicError)` if evaluation
+/// fails due to buffer access issues or other errors.
 ///
 /// # Examples
 ///
@@ -283,18 +281,21 @@ impl MatchResult {
 ///
 /// let elf_buffer = &[0x7f, 0x45, 0x4c, 0x46]; // ELF magic bytes
 /// let result = evaluate_single_rule(&rule, elf_buffer).unwrap();
-/// assert!(result); // Should match
+/// assert!(result.is_some()); // Should match
 ///
 /// let non_elf_buffer = &[0x50, 0x4b, 0x03, 0x04]; // ZIP magic bytes
 /// let result = evaluate_single_rule(&rule, non_elf_buffer).unwrap();
-/// assert!(!result); // Should not match
+/// assert!(result.is_none()); // Should not match
 /// ```
 ///
 /// # Errors
 ///
 /// * `LibmagicError::EvaluationError` - If offset resolution fails, buffer access is out of bounds,
 ///   or type interpretation fails
-pub fn evaluate_single_rule(rule: &MagicRule, buffer: &[u8]) -> Result<bool, LibmagicError> {
+pub fn evaluate_single_rule(
+    rule: &MagicRule,
+    buffer: &[u8],
+) -> Result<Option<(usize, crate::parser::ast::Value)>, LibmagicError> {
     // Step 1: Resolve the offset specification to an absolute position
     let absolute_offset = offset::resolve_offset(&rule.offset, buffer)?;
 
@@ -303,29 +304,10 @@ pub fn evaluate_single_rule(rule: &MagicRule, buffer: &[u8]) -> Result<bool, Lib
         .map_err(|e| LibmagicError::EvaluationError(e.into()))?;
 
     // Step 3: Apply the operator to compare the read value with the expected value
-    let matches = operators::apply_operator(&rule.op, &read_value, &rule.value);
-
-    Ok(matches)
-}
-
-/// Helper function to determine if an error is a buffer overrun
-///
-/// Buffer overruns are expected when evaluating rules against short buffers, and should
-/// be silently skipped rather than logged as warnings.
-fn is_buffer_overrun_error(error: &LibmagicError) -> bool {
-    match error {
-        LibmagicError::EvaluationError(eval_error) => match eval_error {
-            crate::error::EvaluationError::BufferOverrun { .. } => true,
-            crate::error::EvaluationError::TypeReadError(type_error) => {
-                matches!(
-                    type_error,
-                    crate::evaluator::types::TypeReadError::BufferOverrun { .. }
-                )
-            }
-            _ => false,
-        },
-        _ => false,
-    }
+    Ok(
+        operators::apply_operator(&rule.op, &read_value, &rule.value)
+            .then_some((absolute_offset, read_value)),
+    )
 }
 
 /// Evaluate a list of magic rules against a file buffer with hierarchical processing
@@ -407,65 +389,31 @@ pub fn evaluate_rules(
     buffer: &[u8],
     context: &mut EvaluationContext,
 ) -> Result<Vec<MatchResult>, LibmagicError> {
-    let mut matches = Vec::with_capacity(rules.len());
+    let mut matches = Vec::with_capacity(8);
     let start_time = std::time::Instant::now();
+    let mut rule_count = 0u32;
 
     for rule in rules {
-        // Check timeout if configured
-        if let Some(timeout_ms) = context.timeout_ms() {
-            if start_time.elapsed().as_millis() > u128::from(timeout_ms) {
-                return Err(LibmagicError::Timeout { timeout_ms });
+        // Check timeout periodically (every 16 rules) to reduce syscall overhead
+        rule_count = rule_count.wrapping_add(1);
+        if rule_count.trailing_zeros() >= 4 {
+            if let Some(timeout_ms) = context.timeout_ms() {
+                if start_time.elapsed().as_millis() > u128::from(timeout_ms) {
+                    return Err(LibmagicError::Timeout { timeout_ms });
+                }
             }
         }
 
         // Evaluate the current rule with graceful error handling
-        let rule_matches = match evaluate_single_rule(rule, buffer) {
-            Ok(matches) => matches,
-            Err(e) => {
-                // Skip buffer overruns silently (expected for short buffers)
-                // Log other errors and continue with next rule (graceful degradation)
-                if !is_buffer_overrun_error(&e) {
-                    eprintln!(
-                        "Warning: Skipping rule '{}' due to error: {}",
-                        rule.message, e
-                    );
-                }
+        let match_data = match evaluate_single_rule(rule, buffer) {
+            Ok(data) => data,
+            Err(_e) => {
+                // Skip rules with evaluation errors (graceful degradation)
                 continue;
             }
         };
 
-        if rule_matches {
-            // Create match result for this rule with graceful error handling
-            let absolute_offset = match offset::resolve_offset(&rule.offset, buffer) {
-                Ok(offset) => offset,
-                Err(e) => {
-                    // Skip buffer overruns silently (expected for short buffers)
-                    if !is_buffer_overrun_error(&e) {
-                        eprintln!(
-                            "Warning: Skipping rule '{}' due to offset resolution error: {}",
-                            rule.message, e
-                        );
-                    }
-                    continue;
-                }
-            };
-
-            let read_value = match types::read_typed_value(buffer, absolute_offset, &rule.typ) {
-                Ok(value) => value,
-                Err(e) => {
-                    // Skip buffer overruns silently (expected for short buffers)
-                    let eval_error: crate::error::EvaluationError = e.into();
-                    let lib_error: LibmagicError = eval_error.into();
-                    if !is_buffer_overrun_error(&lib_error) {
-                        eprintln!(
-                            "Warning: Skipping rule '{}' due to type reading error: {}",
-                            rule.message, lib_error
-                        );
-                    }
-                    continue;
-                }
-            };
-
+        if let Some((absolute_offset, read_value)) = match_data {
             let match_result = MatchResult {
                 message: rule.message.clone(),
                 offset: absolute_offset,
@@ -503,15 +451,8 @@ pub fn evaluate_rules(
                             },
                         ));
                     }
-                    Err(e) => {
-                        // Skip buffer overruns silently (expected for short buffers)
-                        // Other errors in child evaluation are logged but don't stop parent evaluation
-                        if !is_buffer_overrun_error(&e) {
-                            eprintln!(
-                                "Warning: Error evaluating children of rule '{}': {}",
-                                rule.message, e
-                            );
-                        }
+                    Err(_e) => {
+                        // Non-critical child evaluation errors are skipped (graceful degradation)
                     }
                 }
 
@@ -567,7 +508,7 @@ pub fn evaluate_rules(
 /// let buffer = &[0x7f, 0x45, 0x4c, 0x46];
 /// let config = EvaluationConfig::default();
 ///
-/// let matches = evaluate_rules_with_config(&rules, buffer, config).unwrap();
+/// let matches = evaluate_rules_with_config(&rules, buffer, &config).unwrap();
 /// assert_eq!(matches.len(), 1);
 /// assert_eq!(matches[0].message, "ELF magic");
 /// ```
@@ -579,52 +520,10 @@ pub fn evaluate_rules(
 pub fn evaluate_rules_with_config(
     rules: &[MagicRule],
     buffer: &[u8],
-    config: EvaluationConfig,
+    config: &EvaluationConfig,
 ) -> Result<Vec<MatchResult>, LibmagicError> {
-    // If no timeout is configured, evaluate normally
-    let Some(timeout_ms) = config.timeout_ms else {
-        let mut context = EvaluationContext::new(config);
-        return evaluate_rules(rules, buffer, &mut context);
-    };
-
-    // With timeout: spawn evaluation in a thread and wait with timeout
-    // Use Arc to share data without cloning the potentially large rules/buffer
-    let rules_arc = Arc::new(rules.to_vec());
-    let buffer_arc = Arc::new(buffer.to_vec());
-    let config_clone = config.clone();
-
-    let (tx, rx) = mpsc::channel();
-
-    // Clone Arcs for the thread (cheap reference count increment)
-    let rules_thread = Arc::clone(&rules_arc);
-    let buffer_thread = Arc::clone(&buffer_arc);
-
-    // Spawn evaluation in separate thread
-    // Note: The thread will run to completion even if we return early on timeout.
-    // True cancellation would require cooperative cancellation (checking a flag
-    // periodically during evaluation) or running in a separate process.
-    // For most use cases, the thread will complete quickly or the process will
-    // exit, cleaning up the thread automatically.
-    thread::spawn(move || {
-        let mut context = EvaluationContext::new(config_clone);
-        let result = evaluate_rules(&rules_thread, &buffer_thread, &mut context);
-        // Send result; ignore error if receiver was dropped (timeout occurred)
-        let _ = tx.send(result);
-    });
-
-    // Wait for result with timeout
-    match rx.recv_timeout(Duration::from_millis(timeout_ms)) {
-        Ok(result) => result,
-        Err(mpsc::RecvTimeoutError::Timeout) => Err(LibmagicError::Timeout { timeout_ms }),
-        Err(mpsc::RecvTimeoutError::Disconnected) => {
-            // Thread panicked or dropped sender
-            Err(LibmagicError::EvaluationError(
-                crate::error::EvaluationError::internal_error(
-                    "Evaluation thread terminated unexpectedly",
-                ),
-            ))
-        }
-    }
+    let mut context = EvaluationContext::new(config.clone());
+    evaluate_rules(rules, buffer, &mut context)
 }
 
 #[cfg(test)]
@@ -647,7 +546,7 @@ mod tests {
 
         let buffer = &[0x7f, 0x45, 0x4c, 0x46]; // ELF magic bytes
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(result);
+        assert!(result.is_some());
     }
 
     #[test]
@@ -665,7 +564,7 @@ mod tests {
 
         let buffer = &[0x50, 0x4b, 0x03, 0x04]; // ZIP magic bytes
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(!result);
+        assert!(result.is_none());
     }
 
     #[test]
@@ -683,7 +582,7 @@ mod tests {
 
         let buffer = &[0x7f, 0x45, 0x4c, 0x46];
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(result); // 0x7f != 0x00
+        assert!(result.is_some()); // 0x7f != 0x00
     }
 
     #[test]
@@ -701,7 +600,7 @@ mod tests {
 
         let buffer = &[0x7f, 0x45, 0x4c, 0x46];
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(!result); // 0x7f == 0x7f, so NotEqual is false
+        assert!(result.is_none()); // 0x7f == 0x7f, so NotEqual is false
     }
 
     #[test]
@@ -719,7 +618,7 @@ mod tests {
 
         let buffer = &[0xff, 0x45, 0x4c, 0x46]; // 0xff has high bit set
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(result); // 0xff & 0x80 = 0x80 (non-zero)
+        assert!(result.is_some()); // 0xff & 0x80 = 0x80 (non-zero)
     }
 
     #[test]
@@ -737,7 +636,7 @@ mod tests {
 
         let buffer = &[0x7f, 0x45, 0x4c, 0x46]; // 0x7f has high bit clear
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(!result); // 0x7f & 0x80 = 0x00 (zero)
+        assert!(result.is_none()); // 0x7f & 0x80 = 0x00 (zero)
     }
 
     #[test]
@@ -758,7 +657,7 @@ mod tests {
 
         let buffer = &[0x34, 0x12, 0x56, 0x78]; // 0x1234 in little-endian
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(result);
+        assert!(result.is_some());
     }
 
     #[test]
@@ -779,7 +678,7 @@ mod tests {
 
         let buffer = &[0x12, 0x34, 0x56, 0x78]; // 0x1234 in big-endian
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(result);
+        assert!(result.is_some());
     }
 
     #[test]
@@ -800,7 +699,7 @@ mod tests {
 
         let buffer = &[0xff, 0x7f, 0x00, 0x00]; // 0x7fff in little-endian
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(result);
+        assert!(result.is_some());
     }
 
     #[test]
@@ -821,7 +720,7 @@ mod tests {
 
         let buffer = &[0xff, 0xff, 0x00, 0x00]; // 0xffff in little-endian
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(result);
+        assert!(result.is_some());
     }
 
     #[test]
@@ -842,7 +741,7 @@ mod tests {
 
         let buffer = &[0x78, 0x56, 0x34, 0x12, 0x00]; // 0x12345678 in little-endian
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(result);
+        assert!(result.is_some());
     }
 
     #[test]
@@ -863,7 +762,7 @@ mod tests {
 
         let buffer = &[0x12, 0x34, 0x56, 0x78, 0x00]; // 0x12345678 in big-endian
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(result);
+        assert!(result.is_some());
     }
 
     #[test]
@@ -884,7 +783,7 @@ mod tests {
 
         let buffer = &[0xff, 0xff, 0xff, 0x7f, 0x00]; // 0x7fffffff in little-endian
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(result);
+        assert!(result.is_some());
     }
 
     #[test]
@@ -905,7 +804,7 @@ mod tests {
 
         let buffer = &[0xff, 0xff, 0xff, 0xff, 0x00]; // 0xffffffff in little-endian
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(result);
+        assert!(result.is_some());
     }
 
     #[test]
@@ -923,7 +822,7 @@ mod tests {
 
         let buffer = &[0x7f, 0x45, 0x4c, 0x46]; // ELF magic bytes
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(result); // buffer[2] == 0x4c
+        assert!(result.is_some()); // buffer[2] == 0x4c
     }
 
     #[test]
@@ -941,7 +840,7 @@ mod tests {
 
         let buffer = &[0x7f, 0x45, 0x4c, 0x46]; // ELF magic bytes
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(result); // Last byte is 0x46
+        assert!(result.is_some()); // Last byte is 0x46
     }
 
     #[test]
@@ -959,7 +858,7 @@ mod tests {
 
         let buffer = &[0x7f, 0x45, 0x4c, 0x46]; // ELF magic bytes
         let result = evaluate_single_rule(&rule, buffer).unwrap();
-        assert!(result); // buffer[2] == 0x4c (second to last)
+        assert!(result.is_some()); // buffer[2] == 0x4c (second to last)
     }
 
     #[test]
@@ -1090,7 +989,7 @@ mod tests {
         let result = evaluate_single_rule(&rule, buffer);
         assert!(result.is_ok());
         let matches = result.unwrap();
-        assert!(matches); // Should match
+        assert!(matches.is_some()); // Should match
 
         // Test non-matching string
         let rule_no_match = MagicRule {
@@ -1107,13 +1006,13 @@ mod tests {
         let result = evaluate_single_rule(&rule_no_match, buffer);
         assert!(result.is_ok());
         let matches = result.unwrap();
-        assert!(!matches); // Should not match
+        assert!(matches.is_none()); // Should not match
     }
 }
 
 #[test]
 fn test_evaluate_single_rule_cross_type_comparison() {
-    // Test that cross-type comparisons work correctly (should not match)
+    // Test that cross-type integer comparisons use coercion (Uint(42) == Int(42))
     let rule = MagicRule {
         offset: OffsetSpec::Absolute(0),
         typ: TypeKind::Byte,
@@ -1127,7 +1026,7 @@ fn test_evaluate_single_rule_cross_type_comparison() {
 
     let buffer = &[42]; // Byte value 42
     let result = evaluate_single_rule(&rule, buffer).unwrap();
-    assert!(!result); // Should not match due to type mismatch (Uint vs Int)
+    assert!(result.is_some()); // Should match via cross-type integer coercion
 }
 
 #[test]
@@ -1148,7 +1047,7 @@ fn test_evaluate_single_rule_bitwise_and_with_shorts() {
 
     let buffer = &[0x34, 0x12]; // 0x1234 in little-endian
     let result = evaluate_single_rule(&rule, buffer).unwrap();
-    assert!(result); // 0x1234 & 0xff00 = 0x1200 (non-zero)
+    assert!(result.is_some()); // 0x1234 & 0xff00 = 0x1200 (non-zero)
 }
 
 #[test]
@@ -1169,7 +1068,7 @@ fn test_evaluate_single_rule_bitwise_and_with_longs() {
 
     let buffer = &[0x12, 0x34, 0x56, 0x78]; // 0x12345678 in big-endian
     let result = evaluate_single_rule(&rule, buffer).unwrap();
-    assert!(result); // 0x12345678 & 0xffff0000 = 0x12340000 (non-zero)
+    assert!(result.is_some()); // 0x12345678 & 0xffff0000 = 0x12340000 (non-zero)
 }
 
 #[test]
@@ -1191,11 +1090,11 @@ fn test_evaluate_single_rule_comprehensive_elf_check() {
 
     let elf_buffer = &[0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01]; // ELF64 header start
     let result = evaluate_single_rule(&rule, elf_buffer).unwrap();
-    assert!(result);
+    assert!(result.is_some());
 
     let non_elf_buffer = &[0x50, 0x4b, 0x03, 0x04, 0x14, 0x00]; // ZIP header
     let result = evaluate_single_rule(&rule, non_elf_buffer).unwrap();
-    assert!(!result);
+    assert!(result.is_none());
 }
 
 #[test]
@@ -1216,7 +1115,7 @@ fn test_evaluate_single_rule_native_endianness() {
 
     let buffer = &[0x01, 0x02]; // Non-zero bytes
     let result = evaluate_single_rule(&rule, buffer).unwrap();
-    assert!(result); // Should be non-zero regardless of endianness
+    assert!(result.is_some()); // Should be non-zero regardless of endianness
 }
 
 #[test]
@@ -1234,7 +1133,7 @@ fn test_evaluate_single_rule_all_operators() {
         level: 0,
         strength_modifier: None,
     };
-    assert!(evaluate_single_rule(&equal_rule, buffer).unwrap());
+    assert!(evaluate_single_rule(&equal_rule, buffer).unwrap().is_some());
 
     // Test NotEqual operator
     let not_equal_rule = MagicRule {
@@ -1247,7 +1146,11 @@ fn test_evaluate_single_rule_all_operators() {
         level: 0,
         strength_modifier: None,
     };
-    assert!(evaluate_single_rule(&not_equal_rule, buffer).unwrap()); // 0x00 != 0x42
+    assert!(
+        evaluate_single_rule(&not_equal_rule, buffer)
+            .unwrap()
+            .is_some()
+    ); // 0x00 != 0x42
 
     // Test BitwiseAnd operator
     let bitwise_and_rule = MagicRule {
@@ -1260,7 +1163,11 @@ fn test_evaluate_single_rule_all_operators() {
         level: 0,
         strength_modifier: None,
     };
-    assert!(evaluate_single_rule(&bitwise_and_rule, buffer).unwrap()); // 0x80 & 0x80 = 0x80
+    assert!(
+        evaluate_single_rule(&bitwise_and_rule, buffer)
+            .unwrap()
+            .is_some()
+    ); // 0x80 & 0x80 = 0x80
 }
 
 #[test]
@@ -1282,7 +1189,7 @@ fn test_evaluate_single_rule_edge_case_values() {
 
     let max_buffer = &[0xff, 0xff, 0xff, 0xff];
     let result = evaluate_single_rule(&max_uint_rule, max_buffer).unwrap();
-    assert!(result);
+    assert!(result.is_some());
 
     // Test with minimum signed value
     let min_int_rule = MagicRule {
@@ -1301,7 +1208,7 @@ fn test_evaluate_single_rule_edge_case_values() {
 
     let min_buffer = &[0x00, 0x00, 0x00, 0x80]; // 0x80000000 in little-endian
     let result = evaluate_single_rule(&min_int_rule, min_buffer).unwrap();
-    assert!(result);
+    assert!(result.is_some());
 }
 
 #[test]
@@ -1320,7 +1227,7 @@ fn test_evaluate_single_rule_various_buffer_sizes() {
 
     let single_buffer = &[0xaa];
     let result = evaluate_single_rule(&single_byte_rule, single_buffer).unwrap();
-    assert!(result);
+    assert!(result.is_some());
 
     // Test with large buffer
     #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
@@ -1337,7 +1244,7 @@ fn test_evaluate_single_rule_various_buffer_sizes() {
     };
 
     let result = evaluate_single_rule(&large_rule, &large_buffer).unwrap();
-    assert!(result);
+    assert!(result.is_some());
 }
 
 // Tests for EvaluationContext
@@ -2116,7 +2023,7 @@ fn test_evaluate_rules_with_config_convenience() {
     let buffer = &[0x7f, 0x45, 0x4c, 0x46];
     let config = EvaluationConfig::default();
 
-    let matches = evaluate_rules_with_config(&rules, buffer, config).unwrap();
+    let matches = evaluate_rules_with_config(&rules, buffer, &config).unwrap();
     assert_eq!(matches.len(), 1);
     assert_eq!(matches[0].message, "ELF magic");
 }
diff --git a/src/evaluator/operators.rs b/src/evaluator/operators.rs
index 93b5a8c6..e307dcc1 100644
--- a/src/evaluator/operators.rs
+++ b/src/evaluator/operators.rs
@@ -32,8 +32,8 @@ use crate::parser::ast::{Operator, Value};
 /// // Same type, different value
 /// assert!(!apply_equal(&Value::Uint(42), &Value::Uint(24)));
 ///
-/// // Different types, same numeric value
-/// assert!(!apply_equal(&Value::Uint(42), &Value::Int(42)));
+/// // Cross-type integer coercion
+/// assert!(apply_equal(&Value::Uint(42), &Value::Int(42)));
 ///
 /// // String comparison
 /// assert!(apply_equal(
@@ -56,7 +56,11 @@ pub fn apply_equal(left: &Value, right: &Value) -> bool {
         // String comparison
         (Value::String(a), Value::String(b)) => a == b,
 
-        // Different types are never equal
+        // Cross-type integer coercion (safe via i128 to avoid overflow)
+        (Value::Uint(a), Value::Int(b)) => i128::from(*a) == i128::from(*b),
+        (Value::Int(a), Value::Uint(b)) => i128::from(*a) == i128::from(*b),
+
+        // Different non-integer types are never equal
         _ => false,
     }
 }
@@ -88,8 +92,8 @@ pub fn apply_equal(left: &Value, right: &Value) -> bool {
 /// // Same type, same value
 /// assert!(!apply_not_equal(&Value::Uint(42), &Value::Uint(42)));
 ///
-/// // Different types (always not equal)
-/// assert!(apply_not_equal(&Value::Uint(42), &Value::Int(42)));
+/// // Cross-type integers with same numeric value (equal via coercion)
+/// assert!(!apply_not_equal(&Value::Uint(42), &Value::Int(42)));
 ///
 /// // String comparison
 /// assert!(apply_not_equal(
@@ -201,8 +205,8 @@ pub fn apply_bitwise_and(left: &Value, right: &Value) -> bool {
 ///     &Value::Uint(0x0F)
 /// ));
 ///
-/// // Cross-type comparisons
-/// assert!(!apply_operator(
+/// // Cross-type integer coercion
+/// assert!(apply_operator(
 ///     &Operator::Equal,
 ///     &Value::Uint(42),
 ///     &Value::Int(42)
@@ -219,13 +223,9 @@ pub fn apply_operator(operator: &Operator, left: &Value, right: &Value) -> bool
             let masked_left = match left {
                 Value::Uint(val) => Value::Uint(val & mask),
                 Value::Int(val) => {
-                    // Convert u64 mask to i64 safely
-                    let i64_mask = if i64::try_from(*mask).is_ok() {
-                        i64::try_from(*mask).unwrap_or(0)
-                    } else {
-                        // For values > i64::MAX, use bitwise representation
-                        i64::from_ne_bytes(mask.to_ne_bytes())
-                    };
+                    // Convert u64 mask to i64, using bitwise representation for values > i64::MAX
+                    let i64_mask = i64::try_from(*mask)
+                        .unwrap_or_else(|_| i64::from_ne_bytes(mask.to_ne_bytes()));
                     Value::Int(val & i64_mask)
                 }
                 _ => return false, // Can't apply bitwise operations to non-numeric values
@@ -398,12 +398,23 @@ mod tests {
     // Cross-type comparison tests (should all return false)
     #[test]
     fn test_apply_equal_uint_vs_int() {
+        // Same numeric value across types should match
         let left = Value::Uint(42);
         let right = Value::Int(42);
-        assert!(!apply_equal(&left, &right));
+        assert!(apply_equal(&left, &right));
 
         let left = Value::Uint(0);
         let right = Value::Int(0);
+        assert!(apply_equal(&left, &right));
+
+        // Negative Int cannot equal Uint
+        let left = Value::Uint(42);
+        let right = Value::Int(-42);
+        assert!(!apply_equal(&left, &right));
+
+        // Large Uint that doesn't fit in i64 cannot equal Int
+        let left = Value::Uint(u64::MAX);
+        let right = Value::Int(-1);
         assert!(!apply_equal(&left, &right));
     }
 
@@ -451,14 +462,23 @@ mod tests {
             Value::String("42".to_string()),
         ];
 
-        // Test that no cross-type comparisons return true
+        // Test cross-type comparisons
         for (i, left) in values.iter().enumerate() {
             for (j, right) in values.iter().enumerate() {
                 if i != j {
-                    assert!(
-                        !apply_equal(left, right),
-                        "Cross-type comparison should be false: {left:?} vs {right:?}"
-                    );
+                    let result = apply_equal(left, right);
+                    // Uint(42) and Int(42) should be equal (cross-type coercion)
+                    if (i <= 1) && (j <= 1) {
+                        assert!(
+                            result,
+                            "Integer cross-type comparison should be true: {left:?} vs {right:?}"
+                        );
+                    } else {
+                        assert!(
+                            !result,
+                            "Non-integer cross-type comparison should be false: {left:?} vs {right:?}"
+                        );
+                    }
                 }
             }
         }
@@ -528,6 +548,12 @@ mod tests {
         assert!(apply_equal(&max_signed, &max_signed));
         assert!(apply_equal(&min_int, &min_int));
 
+        // Cross-type edge cases
+        // u64::MAX != -1 in i64 (different mathematical values)
+        assert!(!apply_equal(&max_unsigned, &Value::Int(-1)));
+        // i64::MAX can be represented as u64, so should match
+        assert!(apply_equal(&Value::Uint(i64::MAX as u64), &max_signed));
+
         // Test with empty collections
         let empty_bytes = Value::Bytes(vec![]);
         let empty_string = Value::String(String::new());
@@ -709,12 +735,18 @@ mod tests {
     // Cross-type comparison tests for not_equal (should all return true)
     #[test]
     fn test_apply_not_equal_uint_vs_int() {
+        // Same numeric value across types should be equal (not not-equal)
         let left = Value::Uint(42);
         let right = Value::Int(42);
-        assert!(apply_not_equal(&left, &right));
+        assert!(!apply_not_equal(&left, &right));
 
         let left = Value::Uint(0);
         let right = Value::Int(0);
+        assert!(!apply_not_equal(&left, &right));
+
+        // Different numeric values should be not-equal
+        let left = Value::Uint(42);
+        let right = Value::Int(-42);
         assert!(apply_not_equal(&left, &right));
     }
 
@@ -762,14 +794,23 @@ mod tests {
             Value::String("42".to_string()),
         ];
 
-        // Test that all cross-type comparisons return true for not_equal
+        // Test cross-type comparisons for not_equal
         for (i, left) in values.iter().enumerate() {
             for (j, right) in values.iter().enumerate() {
                 if i != j {
-                    assert!(
-                        apply_not_equal(left, right),
-                        "Cross-type comparison should be true for not_equal: {left:?} vs {right:?}"
-                    );
+                    let result = apply_not_equal(left, right);
+                    // Uint(42) and Int(42) should be equal, so not_equal is false
+                    if (i <= 1) && (j <= 1) {
+                        assert!(
+                            !result,
+                            "Integer cross-type not_equal should be false: {left:?} vs {right:?}"
+                        );
+                    } else {
+                        assert!(
+                            result,
+                            "Non-integer cross-type not_equal should be true: {left:?} vs {right:?}"
+                        );
+                    }
                 }
             }
         }
@@ -1152,8 +1193,8 @@ mod tests {
             &Value::String("world".to_string())
         ));
 
-        // Cross-type should be false
-        assert!(!apply_operator(
+        // Cross-type integer coercion
+        assert!(apply_operator(
             &Operator::Equal,
             &Value::Uint(42),
             &Value::Int(42)
@@ -1186,8 +1227,8 @@ mod tests {
             &Value::String("world".to_string())
         ));
 
-        // Cross-type should be true (not equal)
-        assert!(apply_operator(
+        // Cross-type integer coercion: same value, so not-equal is false
+        assert!(!apply_operator(
             &Operator::NotEqual,
             &Value::Uint(42),
             &Value::Int(42)
@@ -1302,20 +1343,19 @@ mod tests {
                 Value::String("world".to_string()),
             ),
             (Value::Bytes(vec![1, 2, 3]), Value::Bytes(vec![4, 5, 6])),
-            // Different types
-            (Value::Uint(42), Value::Int(42)),
+            // Different types (non-coercible)
             (Value::Uint(42), Value::String("42".to_string())),
             (Value::Int(42), Value::Bytes(vec![42])),
         ];
 
         for (left, right) in test_cases {
-            // Equal should always be false for different values
+            // Equal should always be false for truly different values
             assert!(
                 !apply_operator(&Operator::Equal, &left, &right),
                 "Equal should be false for different values: {left:?} == {right:?}"
             );
 
-            // NotEqual should always be true for different values
+            // NotEqual should always be true for truly different values
             assert!(
                 apply_operator(&Operator::NotEqual, &left, &right),
                 "NotEqual should be true for different values: {left:?} != {right:?}"
@@ -1514,11 +1554,11 @@ mod tests {
             &zero_signed,
             &Value::Int(0xFF)
         ));
-        assert!(apply_operator(
+        assert!(!apply_operator(
             &Operator::NotEqual,
             &zero_uint,
             &zero_signed
-        )); // Different types
+        )); // Cross-type integer coercion: 0 == 0
     }
 
     #[test]
diff --git a/src/evaluator/types.rs b/src/evaluator/types.rs
index c18cd087..448e2752 100644
--- a/src/evaluator/types.rs
+++ b/src/evaluator/types.rs
@@ -72,18 +72,9 @@ pub enum TypeReadError {
 /// Returns `TypeReadError::BufferOverrun` if the offset is greater than or equal to
 /// the buffer length.
 pub fn read_byte(buffer: &[u8], offset: usize) -> Result<Value, TypeReadError> {
-    // Additional security check: ensure offset doesn't cause integer overflow
-    if offset >= buffer.len() {
-        return Err(TypeReadError::BufferOverrun {
-            offset,
-            buffer_len: buffer.len(),
-        });
-    }
-
     buffer
         .get(offset)
-        .copied()
-        .map(|byte| Value::Uint(u64::from(byte)))
+        .map(|&byte| Value::Uint(u64::from(byte)))
         .ok_or(TypeReadError::BufferOverrun {
             offset,
             buffer_len: buffer.len(),
@@ -294,20 +285,14 @@ pub fn read_string(
     // Get the slice starting from offset
     let remaining_buffer = &buffer[offset..];
 
-    // Determine the actual length to read
+    // Determine the actual length to read (uses SIMD-accelerated memchr for null scan)
     let read_length = if let Some(max_len) = max_length {
         // Find null terminator within max_length, or use max_length if no null found
         let search_len = std::cmp::min(max_len, remaining_buffer.len());
-        remaining_buffer[..search_len]
-            .iter()
-            .position(|&b| b == 0)
-            .unwrap_or(search_len)
+        memchr::memchr(0, &remaining_buffer[..search_len]).unwrap_or(search_len)
     } else {
         // Find null terminator in entire remaining buffer
-        remaining_buffer
-            .iter()
-            .position(|&b| b == 0)
-            .unwrap_or(remaining_buffer.len())
+        memchr::memchr(0, remaining_buffer).unwrap_or(remaining_buffer.len())
     };
 
     // Extract the string bytes (excluding null terminator)
diff --git a/src/lib.rs b/src/lib.rs
index b56e4675..5e77f34a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -133,10 +133,10 @@ pub use error::{EvaluationError, LibmagicError, ParseError};
 /// Result type for library operations
 pub type Result<T> = std::result::Result<T, LibmagicError>;
 
-// Implement From<IoError> for LibmagicError
 impl From<crate::io::IoError> for LibmagicError {
     fn from(err: crate::io::IoError) -> Self {
-        LibmagicError::IoError(std::io::Error::other(err.to_string()))
+        // Preserve the structured error message (includes path and operation context)
+        LibmagicError::FileError(err.to_string())
     }
 }
 
@@ -338,19 +338,17 @@ impl EvaluationConfig {
         const MAX_SAFE_RECURSION_DEPTH: u32 = 1000;
 
         if self.max_recursion_depth == 0 {
-            return Err(LibmagicError::ParseError(ParseError::invalid_syntax(
-                0,
-                "max_recursion_depth must be greater than 0",
-            )));
+            return Err(LibmagicError::ConfigError {
+                reason: "max_recursion_depth must be greater than 0".to_string(),
+            });
         }
 
         if self.max_recursion_depth > MAX_SAFE_RECURSION_DEPTH {
-            return Err(LibmagicError::ParseError(ParseError::invalid_syntax(
-                0,
-                format!(
+            return Err(LibmagicError::ConfigError {
+                reason: format!(
                     "max_recursion_depth must not exceed {MAX_SAFE_RECURSION_DEPTH} to prevent stack overflow"
                 ),
-            )));
+            });
         }
 
         Ok(())
@@ -361,19 +359,17 @@ impl EvaluationConfig {
         const MAX_SAFE_STRING_LENGTH: usize = 1_048_576; // 1MB
 
         if self.max_string_length == 0 {
-            return Err(LibmagicError::ParseError(ParseError::invalid_syntax(
-                0,
-                "max_string_length must be greater than 0",
-            )));
+            return Err(LibmagicError::ConfigError {
+                reason: "max_string_length must be greater than 0".to_string(),
+            });
         }
 
         if self.max_string_length > MAX_SAFE_STRING_LENGTH {
-            return Err(LibmagicError::ParseError(ParseError::invalid_syntax(
-                0,
-                format!(
+            return Err(LibmagicError::ConfigError {
+                reason: format!(
                     "max_string_length must not exceed {MAX_SAFE_STRING_LENGTH} bytes to prevent memory exhaustion"
                 ),
-            )));
+            });
         }
 
         Ok(())
@@ -385,19 +381,17 @@ impl EvaluationConfig {
 
         if let Some(timeout) = self.timeout_ms {
             if timeout == 0 {
-                return Err(LibmagicError::ParseError(ParseError::invalid_syntax(
-                    0,
-                    "timeout_ms must be greater than 0 if specified",
-                )));
+                return Err(LibmagicError::ConfigError {
+                    reason: "timeout_ms must be greater than 0 if specified".to_string(),
+                });
             }
 
             if timeout > MAX_SAFE_TIMEOUT_MS {
-                return Err(LibmagicError::ParseError(ParseError::invalid_syntax(
-                    0,
-                    format!(
+                return Err(LibmagicError::ConfigError {
+                    reason: format!(
                         "timeout_ms must not exceed {MAX_SAFE_TIMEOUT_MS} (5 minutes) to prevent denial of service"
                     ),
-                )));
+                });
             }
         }
 
@@ -412,12 +406,11 @@ impl EvaluationConfig {
         if self.max_recursion_depth > HIGH_RECURSION_THRESHOLD
             && self.max_string_length > LARGE_STRING_THRESHOLD
         {
-            return Err(LibmagicError::ParseError(ParseError::invalid_syntax(
-                0,
-                format!(
+            return Err(LibmagicError::ConfigError {
+                reason: format!(
                     "High recursion depth (>{HIGH_RECURSION_THRESHOLD}) combined with large string length (>{LARGE_STRING_THRESHOLD}) may cause resource exhaustion"
                 ),
-            )));
+            });
         }
 
         Ok(())
@@ -432,6 +425,8 @@ pub struct MagicDatabase {
     /// Optional path to the source magic file or directory from which rules were loaded.
     /// This is used for debugging and logging purposes.
     source_path: Option<PathBuf>,
+    /// Cached MIME type mapper to avoid rebuilding the lookup table on every evaluation
+    mime_mapper: mime::MimeMapper,
 }
 
 impl MagicDatabase {
@@ -492,6 +487,7 @@ impl MagicDatabase {
             rules: crate::builtin_rules::get_builtin_rules(),
             config,
             source_path: None,
+            mime_mapper: mime::MimeMapper::new(),
         })
     }
 
@@ -537,6 +533,7 @@ impl MagicDatabase {
             rules,
             config,
             source_path: Some(path.as_ref().to_path_buf()),
+            mime_mapper: mime::MimeMapper::new(),
         })
     }
 
@@ -564,7 +561,6 @@ impl MagicDatabase {
     pub fn evaluate_file<P: AsRef<Path>>(&self, path: P) -> Result<EvaluationResult> {
         use crate::evaluator::evaluate_rules_with_config;
         use crate::io::FileBuffer;
-        use crate::mime::MimeMapper;
         use std::fs;
         use std::time::Instant;
 
@@ -588,56 +584,14 @@ impl MagicDatabase {
         let file_buffer = FileBuffer::new(path)?;
         let buffer = file_buffer.as_slice();
 
-        // If we have no rules, return "data" as fallback
-        if self.rules.is_empty() {
-            return Ok(EvaluationResult {
-                description: "data".to_string(),
-                mime_type: None,
-                confidence: 0.0,
-                matches: vec![],
-                metadata: EvaluationMetadata {
-                    file_size,
-                    evaluation_time_ms: start_time.elapsed().as_secs_f64() * 1000.0,
-                    rules_evaluated: 0,
-                    magic_file: self.source_path.clone(),
-                    timed_out: false,
-                },
-            });
-        }
-
-        // Evaluate rules against the file buffer
-        let matches = evaluate_rules_with_config(&self.rules, buffer, self.config.clone())?;
-
-        // Build the result
-        let (description, confidence) = if matches.is_empty() {
-            ("data".to_string(), 0.0)
+        // Evaluate rules against the file buffer (build_result handles empty rules/matches)
+        let matches = if self.rules.is_empty() {
+            vec![]
         } else {
-            (
-                Self::concatenate_messages(&matches),
-                matches.first().map_or(0.0, |m| m.confidence),
-            )
-        };
-
-        // Get MIME type if enabled
-        let mime_type = if self.config.enable_mime_types {
-            MimeMapper::new().get_mime_type(&description)
-        } else {
-            None
+            evaluate_rules_with_config(&self.rules, buffer, &self.config)?
         };
 
-        Ok(EvaluationResult {
-            description,
-            mime_type,
-            confidence,
-            matches,
-            metadata: EvaluationMetadata {
-                file_size,
-                evaluation_time_ms: start_time.elapsed().as_secs_f64() * 1000.0,
-                rules_evaluated: self.rules.len(),
-                magic_file: self.source_path.clone(),
-                timed_out: false,
-            },
-        })
+        Ok(self.build_result(matches, file_size, start_time))
     }
 
     /// Evaluate magic rules against an in-memory buffer
@@ -676,29 +630,28 @@ impl MagicDatabase {
         start_time: std::time::Instant,
     ) -> Result<EvaluationResult> {
         use crate::evaluator::evaluate_rules_with_config;
-        use crate::mime::MimeMapper;
 
         let file_size = buffer.len() as u64;
 
-        if self.rules.is_empty() {
-            return Ok(EvaluationResult {
-                description: "data".to_string(),
-                mime_type: None,
-                confidence: 0.0,
-                matches: vec![],
-                metadata: EvaluationMetadata {
-                    file_size,
-                    evaluation_time_ms: start_time.elapsed().as_secs_f64() * 1000.0,
-                    rules_evaluated: 0,
-                    magic_file: self.source_path.clone(),
-                    timed_out: false,
-                },
-            });
-        }
+        let matches = if self.rules.is_empty() {
+            vec![]
+        } else {
+            evaluate_rules_with_config(&self.rules, buffer, &self.config)?
+        };
 
-        let matches = evaluate_rules_with_config(&self.rules, buffer, self.config.clone())?;
+        Ok(self.build_result(matches, file_size, start_time))
+    }
 
-        // Build the result
+    /// Build an `EvaluationResult` from match results, file size, and start time.
+    ///
+    /// This is shared between `evaluate_file` and `evaluate_buffer_internal` to
+    /// avoid duplicating the result-construction logic.
+    fn build_result(
+        &self,
+        matches: Vec<evaluator::MatchResult>,
+        file_size: u64,
+        start_time: std::time::Instant,
+    ) -> EvaluationResult {
         let (description, confidence) = if matches.is_empty() {
             ("data".to_string(), 0.0)
         } else {
@@ -708,14 +661,13 @@ impl MagicDatabase {
             )
         };
 
-        // Get MIME type if enabled
         let mime_type = if self.config.enable_mime_types {
-            MimeMapper::new().get_mime_type(&description)
+            self.mime_mapper.get_mime_type(&description)
         } else {
             None
         };
 
-        Ok(EvaluationResult {
+        EvaluationResult {
             description,
             mime_type,
             confidence,
@@ -727,7 +679,7 @@ impl MagicDatabase {
                 magic_file: self.source_path.clone(),
                 timed_out: false,
             },
-        })
+        }
     }
 
     /// Concatenate match messages following libmagic behavior
@@ -735,7 +687,8 @@ impl MagicDatabase {
     /// Messages are joined with spaces, except when a message starts with
     /// backspace character (\\b) which suppresses the space.
     fn concatenate_messages(matches: &[evaluator::MatchResult]) -> String {
-        let mut result = String::new();
+        let capacity: usize = matches.iter().map(|m| m.message.len() + 1).sum();
+        let mut result = String::with_capacity(capacity);
         for m in matches {
             if let Some(rest) = m.message.strip_prefix('\u{0008}') {
                 // Backspace suppresses the space and the character itself
@@ -952,10 +905,10 @@ mod tests {
         assert!(result.is_err());
 
         match result.unwrap_err() {
-            LibmagicError::ParseError(ParseError::InvalidSyntax { message, .. }) => {
+            LibmagicError::ConfigError { reason: message } => {
                 assert!(message.contains("max_recursion_depth must be greater than 0"));
             }
-            _ => panic!("Expected ParseError with InvalidSyntax"),
+            _ => panic!("Expected ConfigError"),
         }
     }
 
@@ -970,10 +923,10 @@ mod tests {
         assert!(result.is_err());
 
         match result.unwrap_err() {
-            LibmagicError::ParseError(ParseError::InvalidSyntax { message, .. }) => {
+            LibmagicError::ConfigError { reason: message } => {
                 assert!(message.contains("max_recursion_depth must not exceed 1000"));
             }
-            _ => panic!("Expected ParseError with InvalidSyntax"),
+            _ => panic!("Expected ConfigError"),
         }
     }
 
@@ -988,10 +941,10 @@ mod tests {
         assert!(result.is_err());
 
         match result.unwrap_err() {
-            LibmagicError::ParseError(ParseError::InvalidSyntax { message, .. }) => {
+            LibmagicError::ConfigError { reason: message } => {
                 assert!(message.contains("max_string_length must be greater than 0"));
             }
-            _ => panic!("Expected ParseError with InvalidSyntax"),
+            _ => panic!("Expected ConfigError"),
         }
     }
 
@@ -1006,11 +959,11 @@ mod tests {
         assert!(result.is_err());
 
         match result.unwrap_err() {
-            LibmagicError::ParseError(ParseError::InvalidSyntax { message, .. }) => {
+            LibmagicError::ConfigError { reason: message } => {
                 assert!(message.contains("max_string_length must not exceed"));
                 assert!(message.contains("bytes to prevent memory exhaustion"));
             }
-            _ => panic!("Expected ParseError with InvalidSyntax"),
+            _ => panic!("Expected ConfigError"),
         }
     }
 
@@ -1025,10 +978,10 @@ mod tests {
         assert!(result.is_err());
 
         match result.unwrap_err() {
-            LibmagicError::ParseError(ParseError::InvalidSyntax { message, .. }) => {
+            LibmagicError::ConfigError { reason: message } => {
                 assert!(message.contains("timeout_ms must be greater than 0 if specified"));
             }
-            _ => panic!("Expected ParseError with InvalidSyntax"),
+            _ => panic!("Expected ConfigError"),
         }
     }
 
@@ -1043,10 +996,10 @@ mod tests {
         assert!(result.is_err());
 
         match result.unwrap_err() {
-            LibmagicError::ParseError(ParseError::InvalidSyntax { message, .. }) => {
+            LibmagicError::ConfigError { reason: message } => {
                 assert!(message.contains("timeout_ms must not exceed 300000"));
             }
-            _ => panic!("Expected ParseError with InvalidSyntax"),
+            _ => panic!("Expected ConfigError"),
         }
     }
 
diff --git a/src/main.rs b/src/main.rs
index 8b75b363..ed10a902 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -5,11 +5,8 @@
 
 use clap::Parser;
 use clap_stdin::FileOrStdin;
-use libmagic_rs::output::MatchResult;
 use libmagic_rs::output::json::{format_json_line_output, format_json_output};
-use libmagic_rs::parser::ast::Value;
 use libmagic_rs::parser::{MagicFileFormat, detect_format};
-use libmagic_rs::tags::TagExtractor;
 use libmagic_rs::{LibmagicError, MagicDatabase};
 use std::fs;
 use std::path::{Path, PathBuf};
@@ -263,6 +260,14 @@ fn handle_error(error: LibmagicError) -> i32 {
         LibmagicError::ParseError(ref parse_err) => handle_parse_error_new(parse_err),
         LibmagicError::EvaluationError(ref eval_err) => handle_evaluation_error_new(eval_err),
         LibmagicError::Timeout { timeout_ms } => handle_timeout_error(timeout_ms),
+        LibmagicError::ConfigError { ref reason } => {
+            eprintln!("Configuration error: {reason}");
+            1
+        }
+        LibmagicError::FileError(ref msg) => {
+            eprintln!("File error: {msg}");
+            3
+        }
     }
 }
 
@@ -377,72 +382,15 @@ fn output_result(
 
     match args.output_format() {
         OutputFormat::Json => {
-            // Extract tags from the description
-            let tag_extractor = TagExtractor::new();
-            let tags = tag_extractor.extract_tags(&result.description);
-
-            // Convert evaluator::MatchResult to output::MatchResult
-            let match_results: Vec<MatchResult> = if result.matches.is_empty() {
-                vec![]
-            } else {
-                result
-                    .matches
-                    .iter()
-                    .map(|m| {
-                        // Build rule_path from match messages
-                        let rule_path =
-                            tag_extractor.extract_rule_path(std::iter::once(m.message.as_str()));
-
-                        // Convert confidence from 0.0-1.0 to 0-100 scale
-                        let confidence_score = (m.confidence * 100.0).min(100.0) as u8;
-
-                        // Estimate length from value type
-                        let length = match &m.value {
-                            Value::Bytes(b) => b.len(),
-                            Value::String(s) => s.len(),
-                            _ => 4, // Default for numeric types
-                        };
-
-                        MatchResult::with_metadata(
-                            m.message.clone(),
-                            m.offset,
-                            length,
-                            m.value.clone(),
-                            rule_path,
-                            confidence_score,
-                            result.mime_type.clone(),
-                        )
-                    })
-                    .collect()
-            };
-
-            // Add tags to the first match if present
-            let match_results = if !match_results.is_empty() && !tags.is_empty() {
-                let mut results = match_results;
-                // Tags are extracted from description, associate with primary result
-                results[0] = MatchResult::with_metadata(
-                    results[0].message.clone(),
-                    results[0].offset,
-                    results[0].length,
-                    results[0].value.clone(),
-                    if results[0].rule_path.is_empty() {
-                        tags
-                    } else {
-                        results[0].rule_path.clone()
-                    },
-                    results[0].confidence,
-                    results[0].mime_type.clone(),
-                );
-                results
-            } else {
-                match_results
-            };
+            // Convert library result to output format (handles match conversion + tag enrichment)
+            let output_result =
+                libmagic_rs::output::EvaluationResult::from_library_result(result, file_path);
 
             // Use JSON Lines format for multiple files, pretty JSON for single file
             let json_result = if is_multiple_files {
-                format_json_line_output(file_path, &match_results)
+                format_json_line_output(file_path, &output_result.matches)
             } else {
-                format_json_output(&match_results)
+                format_json_output(&output_result.matches)
             };
 
             match json_result {
diff --git a/src/output/mod.rs b/src/output/mod.rs
index 48518131..161a0612 100644
--- a/src/output/mod.rs
+++ b/src/output/mod.rs
@@ -12,8 +12,16 @@ pub mod text;
 use serde::{Deserialize, Serialize};
 use std::path::PathBuf;
 
+use std::sync::LazyLock;
+
 use crate::parser::ast::Value;
 
+/// Shared `TagExtractor` instance, initialized once on first use.
+/// Avoids allocating the 16-keyword `HashSet` on every call to
+/// `from_evaluator_match` or `from_library_result`.
+static DEFAULT_TAG_EXTRACTOR: LazyLock<crate::tags::TagExtractor> =
+    LazyLock::new(crate::tags::TagExtractor::new);
+
 /// Result of a single magic rule match
 ///
 /// Contains all information about a successful rule match, including the matched
@@ -245,6 +253,44 @@ impl MatchResult {
         }
     }
 
+    /// Convert from an evaluator `MatchResult` to an output `MatchResult`
+    ///
+    /// This adapts the internal evaluation result format to the richer output format
+    /// used for JSON and structured output. It extracts rule paths from match messages
+    /// and converts confidence from 0.0-1.0 to 0-100 scale.
+    ///
+    /// # Arguments
+    ///
+    /// * `m` - The evaluator match result to convert
+    /// * `mime_type` - Optional MIME type to associate with this match
+    #[must_use]
+    pub fn from_evaluator_match(
+        m: &crate::evaluator::MatchResult,
+        mime_type: Option<&str>,
+    ) -> Self {
+        let rule_path =
+            DEFAULT_TAG_EXTRACTOR.extract_rule_path(std::iter::once(m.message.as_str()));
+
+        #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
+        let confidence = (m.confidence * 100.0).min(100.0) as u8;
+
+        let length = match &m.value {
+            Value::Bytes(b) => b.len(),
+            Value::String(s) => s.len(),
+            Value::Uint(_) | Value::Int(_) => 4,
+        };
+
+        Self::with_metadata(
+            m.message.clone(),
+            m.offset,
+            length,
+            m.value.clone(),
+            rule_path,
+            confidence,
+            mime_type.map(String::from),
+        )
+    }
+
     /// Set the confidence score for this match
     ///
     /// The confidence score is automatically clamped to the range 0-100.
@@ -269,12 +315,6 @@ impl MatchResult {
     /// assert_eq!(result.confidence, 100);
     /// ```
     pub fn set_confidence(&mut self, confidence: u8) {
-        // Only warn in debug builds to avoid performance impact
-        #[cfg(debug_assertions)]
-        if confidence > 100 {
-            eprintln!("Warning: Confidence score {confidence} clamped to 100");
-        }
-
         self.confidence = confidence.min(100);
     }
 
@@ -366,6 +406,51 @@ impl EvaluationResult {
         }
     }
 
+    /// Convert from a library `EvaluationResult` to an output `EvaluationResult`
+    ///
+    /// This adapts the library's evaluation result into the output format used for
+    /// JSON and structured output. Converts all matches and metadata, and enriches
+    /// the first match's rule path with tags extracted from the overall description.
+    ///
+    /// # Arguments
+    ///
+    /// * `result` - The library evaluation result to convert
+    /// * `filename` - Path to the file that was evaluated
+    #[must_use]
+    pub fn from_library_result(
+        result: &crate::EvaluationResult,
+        filename: &std::path::Path,
+    ) -> Self {
+        let mut output_matches: Vec<MatchResult> = result
+            .matches
+            .iter()
+            .map(|m| MatchResult::from_evaluator_match(m, result.mime_type.as_deref()))
+            .collect();
+
+        // Enrich the first match with tags from the overall description
+        if let Some(first) = output_matches.first_mut() {
+            if first.rule_path.is_empty() {
+                first.rule_path = DEFAULT_TAG_EXTRACTOR.extract_tags(&result.description);
+            }
+        }
+
+        #[allow(clippy::cast_possible_truncation)]
+        let rules_evaluated = result.metadata.rules_evaluated as u32;
+        #[allow(clippy::cast_possible_truncation)]
+        let rules_matched = output_matches.len() as u32;
+
+        Self::new(
+            filename.to_path_buf(),
+            output_matches,
+            EvaluationMetadata::new(
+                result.metadata.file_size,
+                result.metadata.evaluation_time_ms,
+                rules_evaluated,
+                rules_matched,
+            ),
+        )
+    }
+
     /// Create an evaluation result with an error
     ///
     /// # Arguments
diff --git a/src/parser/format.rs b/src/parser/format.rs
new file mode 100644
index 00000000..a813d946
--- /dev/null
+++ b/src/parser/format.rs
@@ -0,0 +1,228 @@
+//! Format detection for magic files.
+//!
+//! Detects whether a path points to a text magic file, a directory of magic files,
+//! or a binary compiled magic file (.mgc format).
+
+use crate::error::ParseError;
+use std::io::Read;
+use std::path::Path;
+
+/// Represents the format of a magic file or directory
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum MagicFileFormat {
+    /// Text-based magic file (human-readable)
+    Text,
+    /// Directory containing multiple magic files (Magdir pattern)
+    Directory,
+    /// Binary compiled magic file (.mgc format)
+    Binary,
+}
+
+/// Detect the format of a magic file or directory
+///
+/// This function examines the filesystem metadata and file contents to determine
+/// whether the path points to a text magic file, a directory, or a binary .mgc file.
+///
+/// # Detection Logic
+///
+/// 1. Check if path is a directory -> `MagicFileFormat::Directory`
+/// 2. Read first 4 bytes and check for binary magic number `0xF11E041C` -> `MagicFileFormat::Binary`
+/// 3. Otherwise -> `MagicFileFormat::Text`
+///
+/// # Arguments
+///
+/// * `path` - Path to the magic file or directory to detect
+///
+/// # Errors
+///
+/// Returns `ParseError::IoError` if the path doesn't exist or cannot be read.
+///
+/// # Notes
+///
+/// This function only detects the format and returns it. It does not validate whether
+/// the format is supported by the parser. Higher-level code should check the returned
+/// format and decide how to handle unsupported formats (e.g., binary .mgc files).
+///
+/// # Examples
+///
+/// ```rust,no_run
+/// use libmagic_rs::parser::detect_format;
+/// use std::path::Path;
+///
+/// let format = detect_format(Path::new("/usr/share/file/magic"))?;
+/// # Ok::<(), libmagic_rs::ParseError>(())
+/// ```
+pub fn detect_format(path: &Path) -> Result<MagicFileFormat, ParseError> {
+    // Check if path exists and is accessible
+    let metadata = std::fs::metadata(path)?;
+
+    // Check if it's a directory
+    if metadata.is_dir() {
+        return Ok(MagicFileFormat::Directory);
+    }
+
+    // Read first 4 bytes to check for binary magic number
+    let mut file = std::fs::File::open(path)?;
+
+    let mut magic_bytes = [0u8; 4];
+
+    match file.read_exact(&mut magic_bytes) {
+        Ok(()) => {
+            // Check for binary magic number 0xF11E041C in little-endian format
+            let magic_number = u32::from_le_bytes(magic_bytes);
+            if magic_number == 0xF11E_041C {
+                return Ok(MagicFileFormat::Binary);
+            }
+            // Not a binary magic file, assume text
+            Ok(MagicFileFormat::Text)
+        }
+        Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
+            // File is too small to be a binary magic file, assume text
+            Ok(MagicFileFormat::Text)
+        }
+        Err(e) => Err(ParseError::IoError(e)),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+    use std::io::Write;
+
+    #[test]
+    fn test_detect_format_text_file() {
+        let temp_dir = std::env::temp_dir();
+        let text_file = temp_dir.join("test_text_magic.txt");
+        fs::write(&text_file, "# Magic file\n0 string test Test").unwrap();
+
+        let format = detect_format(&text_file).unwrap();
+        assert_eq!(format, MagicFileFormat::Text);
+
+        fs::remove_file(&text_file).unwrap();
+    }
+
+    #[test]
+    fn test_detect_format_directory() {
+        let temp_dir = std::env::temp_dir().join("test_magic_dir");
+        fs::create_dir_all(&temp_dir).unwrap();
+
+        let format = detect_format(&temp_dir).unwrap();
+        assert_eq!(format, MagicFileFormat::Directory);
+
+        fs::remove_dir_all(&temp_dir).unwrap();
+    }
+
+    #[test]
+    fn test_detect_format_binary_mgc() {
+        let temp_dir = std::env::temp_dir();
+        let binary_file = temp_dir.join("test_binary.mgc");
+
+        // Write binary magic number 0xF11E041C in little-endian
+        let mut file = fs::File::create(&binary_file).unwrap();
+        file.write_all(&[0x1C, 0x04, 0x1E, 0xF1]).unwrap();
+        file.write_all(b"additional binary data").unwrap();
+
+        let result = detect_format(&binary_file);
+        assert!(result.is_ok());
+
+        match result.unwrap() {
+            MagicFileFormat::Binary => {
+                // Expected result
+            }
+            other => panic!("Expected Binary format, got {other:?}"),
+        }
+
+        fs::remove_file(&binary_file).unwrap();
+    }
+
+    #[test]
+    fn test_detect_format_nonexistent_path() {
+        let nonexistent = std::env::temp_dir().join("nonexistent_magic_file.txt");
+
+        let result = detect_format(&nonexistent);
+        assert!(result.is_err());
+
+        match result.unwrap_err() {
+            ParseError::IoError(e) => {
+                assert_eq!(e.kind(), std::io::ErrorKind::NotFound);
+            }
+            other => panic!("Expected IoError, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_detect_format_empty_file() {
+        let temp_dir = std::env::temp_dir();
+        let empty_file = temp_dir.join("test_empty_magic.txt");
+        fs::write(&empty_file, "").unwrap();
+
+        // Empty files should be detected as text (too small for binary magic)
+        let format = detect_format(&empty_file).unwrap();
+        assert_eq!(format, MagicFileFormat::Text);
+
+        fs::remove_file(&empty_file).unwrap();
+    }
+
+    #[test]
+    fn test_detect_format_small_file() {
+        let temp_dir = std::env::temp_dir();
+        let small_file = temp_dir.join("test_small_magic.txt");
+        fs::write(&small_file, "ab").unwrap(); // Only 2 bytes
+
+        // Small files should be detected as text
+        let format = detect_format(&small_file).unwrap();
+        assert_eq!(format, MagicFileFormat::Text);
+
+        fs::remove_file(&small_file).unwrap();
+    }
+
+    #[test]
+    fn test_detect_format_text_with_binary_content() {
+        let temp_dir = std::env::temp_dir();
+        let binary_text_file = temp_dir.join("test_binary_text.txt");
+
+        // Write binary data that's NOT the magic number
+        let mut file = fs::File::create(&binary_text_file).unwrap();
+        file.write_all(&[0xFF, 0xFE, 0xFD, 0xFC]).unwrap();
+        file.write_all(b"some text").unwrap();
+
+        // Should be detected as text (wrong magic number)
+        let format = detect_format(&binary_text_file).unwrap();
+        assert_eq!(format, MagicFileFormat::Text);
+
+        fs::remove_file(&binary_text_file).unwrap();
+    }
+
+    #[test]
+    fn test_magic_file_format_enum_equality() {
+        assert_eq!(MagicFileFormat::Text, MagicFileFormat::Text);
+        assert_eq!(MagicFileFormat::Directory, MagicFileFormat::Directory);
+        assert_eq!(MagicFileFormat::Binary, MagicFileFormat::Binary);
+
+        assert_ne!(MagicFileFormat::Text, MagicFileFormat::Directory);
+        assert_ne!(MagicFileFormat::Text, MagicFileFormat::Binary);
+        assert_ne!(MagicFileFormat::Directory, MagicFileFormat::Binary);
+    }
+
+    #[test]
+    fn test_magic_file_format_debug() {
+        let text_format = MagicFileFormat::Text;
+        let debug_str = format!("{text_format:?}");
+        assert!(debug_str.contains("Text"));
+    }
+
+    #[test]
+    fn test_magic_file_format_clone() {
+        let original = MagicFileFormat::Directory;
+        let cloned = original;
+        assert_eq!(original, cloned);
+    }
+
+    #[test]
+    fn test_magic_file_format_copy() {
+        let original = MagicFileFormat::Binary;
+        let copied = original; // Copy trait allows this
+        assert_eq!(original, copied);
+    }
+}
diff --git a/src/parser/hierarchy.rs b/src/parser/hierarchy.rs
new file mode 100644
index 00000000..4a9496dd
--- /dev/null
+++ b/src/parser/hierarchy.rs
@@ -0,0 +1,227 @@
+//! Rule hierarchy building for magic file parsing.
+//!
+//! Constructs parent-child relationships between magic rules based on
+//! indentation levels, implementing a stack-based algorithm for hierarchy construction.
+
+use super::preprocessing::{LineInfo, parse_magic_rule_line};
+use crate::error::ParseError;
+use crate::parser::ast::MagicRule;
+
+/// Builds a hierarchical structure from a flat list of parsed magic rules.
+///
+/// This function establishes parent-child relationships based on indentation levels.
+/// Rules at deeper indentation levels become children of the most recent rule at a
+/// shallower level. This implements a stack-based algorithm for hierarchy construction.
+///
+/// # Arguments
+///
+/// * `lines` - A vector of preprocessed `LineInfo` structs
+///
+/// # Returns
+///
+/// `Result<Vec<MagicRule>, ParseError>` - Root-level rules with children attached
+///
+/// # Behavior
+///
+/// - Rules with `level=0` are root rules
+/// - Rules with `level=1` become children of the most recent `level=0` rule
+/// - Rules with `level=2` become children of the most recent `level=1` rule
+/// - When indentation decreases, the stack is unwound and completed rules are attached
+/// - Orphaned child rules (starting with '>' but with no preceding parent) are
+///   added to the root list with their hierarchy level preserved
+///
+/// # Errors
+///
+/// Returns an error if:
+/// - Any line contains invalid magic rule syntax
+/// - Rule parsing fails (propagated from `parse_magic_rule_line`)
+pub(crate) fn build_rule_hierarchy(lines: Vec<LineInfo>) -> Result<Vec<MagicRule>, ParseError> {
+    /// Helper to pop a rule from the stack and attach it to its parent or roots
+    fn pop_and_attach(stack: &mut Vec<MagicRule>, roots: &mut Vec<MagicRule>) {
+        if let Some(completed) = stack.pop() {
+            if let Some(parent) = stack.last_mut() {
+                parent.children.push(completed);
+            } else {
+                roots.push(completed);
+            }
+        }
+    }
+
+    let mut stack: Vec<MagicRule> = Vec::new();
+    let mut roots: Vec<MagicRule> = Vec::new();
+    let mut pending_strength: Option<crate::parser::ast::StrengthModifier> = None;
+
+    for line in lines {
+        if line.is_comment {
+            continue;
+        }
+
+        // Handle strength directive: store modifier for next rule
+        if line.strength_modifier.is_some() {
+            pending_strength = line.strength_modifier;
+            continue;
+        }
+
+        let mut rule = parse_magic_rule_line(&line)?;
+
+        // Apply pending strength modifier to this rule
+        if pending_strength.is_some() {
+            rule.strength_modifier = pending_strength.take();
+        }
+
+        // Unwind stack until we find a parent with lower level
+        while stack.last().is_some_and(|top| top.level >= rule.level) {
+            pop_and_attach(&mut stack, &mut roots);
+        }
+
+        stack.push(rule);
+    }
+
+    // Unwind remaining stack
+    while !stack.is_empty() {
+        pop_and_attach(&mut stack, &mut roots);
+    }
+
+    Ok(roots)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::parser::preprocessing::LineInfo;
+
+    fn li(line_number: usize, content: &str) -> LineInfo {
+        LineInfo {
+            content: content.to_string(),
+            line_number,
+            is_comment: false,
+            strength_modifier: None,
+        }
+    }
+
+    // ============================================================
+    // Tests for build_rule_hierarchy (10+ test cases)
+    // ============================================================
+
+    #[test]
+    fn test_build_rule_hierarchy_single_root() {
+        let lines = vec![li(1, "0 string \\x7fELF ELF executable")];
+        let roots = build_rule_hierarchy(lines).unwrap();
+        assert_eq!(roots.len(), 1);
+        assert_eq!(roots[0].level, 0);
+    }
+
+    #[test]
+    fn test_build_rule_hierarchy_root_with_one_child() {
+        let lines = vec![
+            li(1, "0 string \\x7fELF ELF executable"),
+            li(2, ">4 byte 1 32-bit"),
+        ];
+        let roots = build_rule_hierarchy(lines).unwrap();
+        assert_eq!(roots.len(), 1);
+        assert_eq!(roots[0].children.len(), 1);
+    }
+
+    #[test]
+    fn test_build_rule_hierarchy_root_with_multiple_children() {
+        let lines = vec![
+            li(1, "0 string \\x7fELF ELF executable"),
+            li(2, ">4 byte 1 32-bit"),
+            li(3, ">4 byte 2 64-bit"),
+        ];
+        let roots = build_rule_hierarchy(lines).unwrap();
+        assert_eq!(roots.len(), 1);
+        assert_eq!(roots[0].children.len(), 2);
+    }
+
+    #[test]
+    fn test_build_rule_hierarchy_nested_three_levels() {
+        let lines = vec![
+            li(1, "0 string \\x7fELF ELF executable"),
+            li(2, ">4 byte 1 class"),
+            li(3, ">>5 byte 1 subtype"),
+        ];
+        let roots = build_rule_hierarchy(lines).unwrap();
+        assert_eq!(roots[0].children.len(), 1);
+        assert_eq!(roots[0].children[0].children.len(), 1);
+        assert_eq!(roots[0].children[0].children[0].level, 2);
+    }
+
+    #[test]
+    fn test_build_rule_hierarchy_multiple_roots() {
+        let lines = vec![
+            li(1, r#"0 string "ELF" "ELF executable""#),
+            li(2, r#"0 string "%PDF" "PDF document""#),
+        ];
+        let roots = build_rule_hierarchy(lines).unwrap();
+        assert_eq!(roots.len(), 2);
+    }
+
+    #[test]
+    fn test_build_rule_hierarchy_sibling_rules() {
+        let lines = vec![
+            li(1, "0 byte 1 Root"),
+            li(2, ">4 byte 1 Child1"),
+            li(3, ">4 byte 2 Child2"),
+            li(4, "0 byte 2 Root2"),
+        ];
+        let roots = build_rule_hierarchy(lines).unwrap();
+        assert_eq!(roots.len(), 2);
+        assert_eq!(roots[0].children.len(), 2);
+    }
+
+    #[test]
+    fn test_build_rule_hierarchy_deep_nesting() {
+        let lines = vec![
+            li(1, "0 byte 1 L0"),
+            li(2, ">4 byte 1 L1"),
+            li(3, ">>5 byte 2 L2"),
+            li(4, ">>>6 byte 3 L3"),
+            li(5, ">>>>7 byte 4 L4"),
+        ];
+        let roots = build_rule_hierarchy(lines).unwrap();
+        assert_eq!(roots.len(), 1);
+        assert_eq!(
+            roots[0].children[0].children[0].children[0].children.len(),
+            1
+        );
+    }
+
+    #[test]
+    fn test_build_rule_hierarchy_return_to_root_level() {
+        let lines = vec![
+            li(1, "0 byte 1 Root1"),
+            li(2, ">4 byte 1 Child"),
+            li(3, "0 byte 2 Root2"),
+        ];
+        let roots = build_rule_hierarchy(lines).unwrap();
+        assert_eq!(roots.len(), 2);
+        assert_eq!(roots[0].children.len(), 1);
+        assert_eq!(roots[1].children.len(), 0);
+    }
+
+    #[test]
+    fn test_build_rule_hierarchy_orphaned_child() {
+        let lines = vec![li(1, ">4 byte 1 Orphaned child")];
+        let roots = build_rule_hierarchy(lines).unwrap();
+        assert_eq!(roots.len(), 1);
+        assert_eq!(roots[0].level, 1);
+    }
+
+    #[test]
+    fn test_build_rule_hierarchy_complex_structure() {
+        let lines = vec![
+            li(1, "0 byte 1 Root1"),
+            li(2, ">4 byte 1 C1"),
+            li(3, ">4 byte 2 C2"),
+            li(4, ">>6 byte 3 GC1"),
+            li(5, "0 byte 2 Root2"),
+            li(6, ">4 byte 4 C3"),
+        ];
+        let roots = build_rule_hierarchy(lines).unwrap();
+        assert_eq!(roots.len(), 2);
+        assert_eq!(roots[0].children.len(), 2);
+        assert_eq!(roots[0].children[1].children.len(), 1);
+        assert_eq!(roots[1].children.len(), 1);
+    }
+}
diff --git a/src/parser/loader.rs b/src/parser/loader.rs
new file mode 100644
index 00000000..22db03b6
--- /dev/null
+++ b/src/parser/loader.rs
@@ -0,0 +1,586 @@
+//! File and directory loading for magic files.
+//!
+//! Provides functions for loading magic rules from individual files and
+//! directories, with automatic format detection and error handling.
+
+use crate::error::ParseError;
+use crate::parser::ast::MagicRule;
+use std::path::{Path, PathBuf};
+
+use super::format::{MagicFileFormat, detect_format};
+
+/// Loads and parses all magic files from a directory, merging them into a single rule set.
+///
+/// This function reads all regular files in the specified directory, parses each as a magic file,
+/// and combines the resulting rules into a single `Vec<MagicRule>`. Files are processed in
+/// alphabetical order by filename to ensure deterministic results.
+///
+/// # Error Handling Strategy
+///
+/// This function distinguishes between critical and non-critical errors:
+///
+/// - **Critical errors** (I/O failures, directory access issues, encoding problems):
+///   These cause immediate failure and return a `ParseError`. The function stops processing
+///   and propagates the error to the caller.
+///
+/// - **Non-critical errors** (individual file parse failures):
+///   These are logged to stderr with a warning message and the file is skipped. Processing
+///   continues with remaining files.
+///
+/// # Behavior
+///
+/// - Subdirectories are skipped (not recursively processed)
+/// - Symbolic links are skipped
+/// - Empty directories return an empty rules vector
+/// - Files are processed in alphabetical order by filename
+/// - All successfully parsed rules are merged in order
+///
+/// # Examples
+///
+/// Loading a directory of magic files:
+///
+/// ```rust,no_run
+/// use libmagic_rs::parser::load_magic_directory;
+/// use std::path::Path;
+///
+/// let rules = load_magic_directory(Path::new("/usr/share/file/magic.d"))?;
+/// println!("Loaded {} rules from directory", rules.len());
+/// # Ok::<(), libmagic_rs::ParseError>(())
+/// ```
+///
+/// Creating a Magdir-style directory structure:
+///
+/// ```rust,no_run
+/// use libmagic_rs::parser::load_magic_directory;
+/// use std::path::Path;
+///
+/// // Directory structure:
+/// // magic.d/
+/// //   ├── 01-elf
+/// //   ├── 02-archive
+/// //   └── 03-text
+///
+/// let rules = load_magic_directory(Path::new("./magic.d"))?;
+/// // Rules from all three files are merged in alphabetical order
+/// # Ok::<(), libmagic_rs::ParseError>(())
+/// ```
+///
+/// # Errors
+///
+/// Returns `ParseError` if:
+/// - The directory does not exist or cannot be accessed
+/// - Directory entries cannot be read
+/// - A file cannot be read due to I/O errors
+/// - A file contains invalid UTF-8 encoding
+///
+/// # Panics
+///
+/// This function does not panic under normal operation.
+#[allow(clippy::print_stderr)]
+pub fn load_magic_directory(dir_path: &Path) -> Result<Vec<MagicRule>, ParseError> {
+    use std::fs;
+
+    // Read directory entries
+    let entries = fs::read_dir(dir_path).map_err(|e| {
+        ParseError::invalid_syntax(
+            0,
+            format!("Failed to read directory '{}': {}", dir_path.display(), e),
+        )
+    })?;
+
+    // Collect and sort entries by filename for deterministic ordering
+    let mut file_paths: Vec<std::path::PathBuf> = Vec::new();
+    for entry in entries {
+        let entry = entry.map_err(|e| {
+            ParseError::invalid_syntax(
+                0,
+                format!(
+                    "Failed to read directory entry in '{}': {}",
+                    dir_path.display(),
+                    e
+                ),
+            )
+        })?;
+
+        let path = entry.path();
+        let file_type = entry.file_type().map_err(|e| {
+            ParseError::invalid_syntax(
+                0,
+                format!("Failed to read file type for '{}': {}", path.display(), e),
+            )
+        })?;
+
+        // Only process regular files, skip directories and symlinks
+        if file_type.is_file() && !file_type.is_symlink() {
+            file_paths.push(path);
+        }
+    }
+
+    // Sort by filename for deterministic ordering
+    file_paths.sort_by_key(|path| path.file_name().map(std::ffi::OsStr::to_os_string));
+
+    // Accumulate rules from all files
+    let mut all_rules = Vec::new();
+    let mut parse_failures: Vec<(PathBuf, ParseError)> = Vec::new();
+    let file_count = file_paths.len();
+
+    for path in file_paths {
+        // Read file contents
+        let contents = match fs::read_to_string(&path) {
+            Ok(contents) => contents,
+            Err(e) => {
+                // I/O errors are critical
+                return Err(ParseError::invalid_syntax(
+                    0,
+                    format!("Failed to read file '{}': {}", path.display(), e),
+                ));
+            }
+        };
+
+        // Parse the file
+        match super::parse_text_magic_file(&contents) {
+            Ok(rules) => {
+                // Successfully parsed - merge rules
+                all_rules.extend(rules);
+            }
+            Err(e) => {
+                // Track parse failures for reporting
+                parse_failures.push((path, e));
+            }
+        }
+    }
+
+    // If all files failed to parse, return an error
+    if all_rules.is_empty() && !parse_failures.is_empty() {
+        use std::fmt::Write;
+
+        let failure_details: Vec<String> = parse_failures
+            .iter()
+            .take(3) // Limit to first 3 failures for brevity
+            .map(|(path, e)| format!("  - {}: {}", path.display(), e))
+            .collect();
+
+        let mut message = format!("All {file_count} magic file(s) in directory failed to parse");
+        if !failure_details.is_empty() {
+            message.push_str(":\n");
+            message.push_str(&failure_details.join("\n"));
+            if parse_failures.len() > 3 {
+                let _ = write!(message, "\n  ... and {} more", parse_failures.len() - 3);
+            }
+        }
+
+        return Err(ParseError::invalid_syntax(0, message));
+    }
+
+    // Log warnings for partial failures (some files parsed, some failed)
+    // Note: Using eprintln for now; consider a logging framework in the future
+    #[allow(clippy::print_stderr)]
+    for (path, e) in &parse_failures {
+        eprintln!("Warning: Failed to parse '{}': {}", path.display(), e);
+    }
+
+    Ok(all_rules)
+}
+
+/// Loads magic rules from a file or directory, automatically detecting the format.
+///
+/// This is the unified entry point for loading magic rules from the filesystem. It
+/// automatically detects whether the path points to a text magic file, a directory
+/// containing magic files, or a binary compiled magic file, and dispatches to the
+/// appropriate handler.
+///
+/// # Format Detection and Handling
+///
+/// The function uses [`detect_format()`] to determine the file type and handles each
+/// format as follows:
+///
+/// - **Text format**: Reads the file contents and parses using [`parse_text_magic_file()`]
+/// - **Directory format**: Loads all magic files from the directory using [`load_magic_directory()`]
+/// - **Binary format**: Returns an error with guidance to use the `--use-builtin` option
+///
+/// # Arguments
+///
+/// * `path` - Path to a magic file or directory. Can be absolute or relative.
+///
+/// # Returns
+///
+/// Returns `Ok(Vec<MagicRule>)` containing all successfully parsed magic rules. For
+/// directories, rules from all files are merged in alphabetical order by filename.
+///
+/// # Errors
+///
+/// This function returns a [`ParseError`] in the following cases:
+///
+/// - **File not found**: The specified path does not exist
+/// - **Unsupported format**: The file is a binary compiled magic file (`.mgc`)
+/// - **Parse errors**: The magic file contains syntax errors or invalid rules
+/// - **I/O errors**: File system errors during reading (permissions, disk errors, etc.)
+///
+/// # Examples
+///
+/// ## Loading a text magic file
+///
+/// ```no_run
+/// use libmagic_rs::parser::load_magic_file;
+/// use std::path::Path;
+///
+/// let rules = load_magic_file(Path::new("/usr/share/misc/magic"))?;
+/// println!("Loaded {} magic rules", rules.len());
+/// # Ok::<(), libmagic_rs::ParseError>(())
+/// ```
+///
+/// ## Loading a directory of magic files
+///
+/// ```no_run
+/// use libmagic_rs::parser::load_magic_file;
+/// use std::path::Path;
+///
+/// let rules = load_magic_file(Path::new("/usr/share/misc/magic.d"))?;
+/// println!("Loaded {} rules from directory", rules.len());
+/// # Ok::<(), libmagic_rs::ParseError>(())
+/// ```
+///
+/// ## Handling binary format errors
+///
+/// ```no_run
+/// use libmagic_rs::parser::load_magic_file;
+/// use std::path::Path;
+///
+/// match load_magic_file(Path::new("/usr/share/misc/magic.mgc")) {
+///     Ok(rules) => println!("Loaded {} rules", rules.len()),
+///     Err(e) => {
+///         eprintln!("Error loading magic file: {}", e);
+///         eprintln!("Hint: Use --use-builtin for binary files");
+///     }
+/// }
+/// # Ok::<(), libmagic_rs::ParseError>(())
+/// ```
+///
+/// # Security
+///
+/// This function delegates to [`parse_text_magic_file()`] or [`load_magic_directory()`]
+/// based on format detection. Security considerations are handled by those functions:
+///
+/// - Rule hierarchy depth is bounded during parsing
+/// - Invalid syntax is rejected with descriptive errors
+/// - Binary `.mgc` files are rejected (not parsed)
+///
+/// Note: File size limits and memory exhaustion protection are not currently implemented.
+/// Large magic files will be loaded entirely into memory.
+///
+/// # See Also
+///
+/// - [`detect_format()`] - Format detection logic
+/// - [`parse_text_magic_file()`] - Text file parser
+/// - [`load_magic_directory()`] - Directory loader
+pub fn load_magic_file(path: &Path) -> Result<Vec<MagicRule>, ParseError> {
+    // Detect the magic file format
+    let format = detect_format(path)?;
+
+    // Dispatch to appropriate handler based on format
+    match format {
+        MagicFileFormat::Text => {
+            // Read file contents and parse as text magic file
+            let content = std::fs::read_to_string(path)?;
+            super::parse_text_magic_file(&content)
+        }
+        MagicFileFormat::Directory => {
+            // Load all magic files from directory
+            load_magic_directory(path)
+        }
+        MagicFileFormat::Binary => {
+            // Binary compiled magic files are not supported
+            Err(ParseError::unsupported_format(
+                0,
+                "binary .mgc file",
+                "Binary compiled magic files (.mgc) are not supported for parsing.\n\
+                 Use the --use-builtin option to use the built-in magic rules instead,\n\
+                 or provide a text-based magic file or directory.",
+            ))
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // ============================================================
+    // Tests for load_magic_directory (6+ test cases)
+    // ============================================================
+
+    #[test]
+    fn test_load_directory_critical_error_io() {
+        use std::path::Path;
+
+        let non_existent = Path::new("/this/should/not/exist/anywhere/at/all");
+        let result = load_magic_directory(non_existent);
+
+        assert!(
+            result.is_err(),
+            "Should return error for non-existent directory"
+        );
+        let err = result.unwrap_err();
+        assert!(err.to_string().contains("Failed to read directory"));
+    }
+
+    #[test]
+    fn test_load_directory_non_critical_error_parse() {
+        use std::fs;
+        use tempfile::TempDir;
+
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+
+        // Create a valid file
+        let valid_path = temp_dir.path().join("valid.magic");
+        fs::write(&valid_path, "0 string \\x01\\x02 valid\n").expect("Failed to write valid file");
+
+        // Create an invalid file
+        let invalid_path = temp_dir.path().join("invalid.magic");
+        fs::write(&invalid_path, "this is invalid syntax\n").expect("Failed to write invalid file");
+
+        // Should succeed, loading only the valid file
+        let rules = load_magic_directory(temp_dir.path()).expect("Should load valid files");
+
+        assert_eq!(rules.len(), 1, "Should load only valid file");
+        assert_eq!(rules[0].message, "valid");
+    }
+
+    #[test]
+    fn test_load_directory_empty_files() {
+        use std::fs;
+        use tempfile::TempDir;
+
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+
+        // Create an empty file
+        let empty_path = temp_dir.path().join("empty.magic");
+        fs::write(&empty_path, "").expect("Failed to write empty file");
+
+        // Create a file with only comments
+        let comments_path = temp_dir.path().join("comments.magic");
+        fs::write(&comments_path, "# Just comments\n# Nothing else\n")
+            .expect("Failed to write comments file");
+
+        // Should succeed with no rules
+        let rules = load_magic_directory(temp_dir.path()).expect("Should handle empty files");
+
+        assert_eq!(rules.len(), 0, "Empty files should contribute no rules");
+    }
+
+    #[test]
+    fn test_load_directory_binary_files() {
+        use std::fs;
+        use tempfile::TempDir;
+
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+
+        // Create a binary file (invalid UTF-8)
+        let binary_path = temp_dir.path().join("binary.dat");
+        fs::write(&binary_path, [0xFF, 0xFE, 0xFF, 0xFE]).expect("Failed to write binary file");
+
+        // Create a valid text file
+        let valid_path = temp_dir.path().join("valid.magic");
+        fs::write(&valid_path, "0 string \\x01\\x02 valid\n").expect("Failed to write valid file");
+
+        // Binary file should cause a critical error (invalid UTF-8)
+        let result = load_magic_directory(temp_dir.path());
+
+        // The function should fail when encountering binary files (critical I/O error)
+        assert!(
+            result.is_err(),
+            "Binary files should cause critical error due to invalid UTF-8"
+        );
+    }
+
+    #[test]
+    fn test_load_directory_mixed_extensions() {
+        use std::fs;
+        use tempfile::TempDir;
+
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+
+        // Create files with different extensions
+        fs::write(
+            temp_dir.path().join("file.magic"),
+            "0 string \\x01\\x02 magic\n",
+        )
+        .expect("Failed to write .magic file");
+        fs::write(
+            temp_dir.path().join("file.txt"),
+            "0 string \\x03\\x04 txt\n",
+        )
+        .expect("Failed to write .txt file");
+        fs::write(temp_dir.path().join("noext"), "0 string \\x05\\x06 noext\n")
+            .expect("Failed to write no-ext file");
+
+        let rules = load_magic_directory(temp_dir.path())
+            .expect("Should load all files regardless of extension");
+
+        assert_eq!(
+            rules.len(),
+            3,
+            "Should process all files regardless of extension"
+        );
+
+        let messages: Vec<&str> = rules.iter().map(|r| r.message.as_str()).collect();
+        assert!(messages.contains(&"magic"));
+        assert!(messages.contains(&"txt"));
+        assert!(messages.contains(&"noext"));
+    }
+
+    #[test]
+    fn test_load_directory_alphabetical_ordering() {
+        use std::fs;
+        use tempfile::TempDir;
+
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+
+        // Create files in non-alphabetical order - using valid magic syntax with hex escapes
+        fs::write(
+            temp_dir.path().join("03-third"),
+            "0 string \\x07\\x08\\x09 third\n",
+        )
+        .expect("Failed to write third file");
+        fs::write(
+            temp_dir.path().join("01-first"),
+            "0 string \\x01\\x02\\x03 first\n",
+        )
+        .expect("Failed to write first file");
+        fs::write(
+            temp_dir.path().join("02-second"),
+            "0 string \\x04\\x05\\x06 second\n",
+        )
+        .expect("Failed to write second file");
+
+        let rules = load_magic_directory(temp_dir.path()).expect("Should load directory in order");
+
+        assert_eq!(rules.len(), 3);
+        // Should be sorted alphabetically by filename
+        assert_eq!(rules[0].message, "first");
+        assert_eq!(rules[1].message, "second");
+        assert_eq!(rules[2].message, "third");
+    }
+
+    // ============================================================
+    // Tests for load_magic_file (5+ test cases)
+    // ============================================================
+
+    #[test]
+    fn test_load_magic_file_text_format() {
+        use std::fs;
+        use tempfile::TempDir;
+
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let magic_file = temp_dir.path().join("magic.txt");
+
+        // Create text magic file with valid content
+        fs::write(&magic_file, "0 string \\x7fELF ELF executable\n")
+            .expect("Failed to write magic file");
+
+        // Load using load_magic_file
+        let rules = load_magic_file(&magic_file).expect("Failed to load text magic file");
+
+        assert_eq!(rules.len(), 1);
+        assert_eq!(rules[0].message, "ELF executable");
+    }
+
+    #[test]
+    fn test_load_magic_file_directory_format() {
+        use std::fs;
+        use tempfile::TempDir;
+
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let magic_dir = temp_dir.path().join("magic.d");
+        fs::create_dir(&magic_dir).expect("Failed to create magic directory");
+
+        // Create multiple files in directory
+        fs::write(
+            magic_dir.join("00_elf"),
+            "0 string \\x7fELF ELF executable\n",
+        )
+        .expect("Failed to write elf file");
+        fs::write(
+            magic_dir.join("01_zip"),
+            "0 string \\x50\\x4b\\x03\\x04 ZIP archive\n",
+        )
+        .expect("Failed to write zip file");
+
+        // Load using load_magic_file
+        let rules = load_magic_file(&magic_dir).expect("Failed to load directory");
+
+        assert_eq!(rules.len(), 2);
+        assert_eq!(rules[0].message, "ELF executable");
+        assert_eq!(rules[1].message, "ZIP archive");
+    }
+
+    #[test]
+    fn test_load_magic_file_binary_format_error() {
+        use std::fs::File;
+        use std::io::Write;
+        use tempfile::TempDir;
+
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let binary_file = temp_dir.path().join("magic.mgc");
+
+        // Create binary file with .mgc magic number
+        let mut file = File::create(&binary_file).expect("Failed to create binary file");
+        let magic_number: [u8; 4] = [0x1C, 0x04, 0x1E, 0xF1]; // Little-endian 0xF11E041C
+        file.write_all(&magic_number)
+            .expect("Failed to write magic number");
+
+        // Attempt to load binary file
+        let result = load_magic_file(&binary_file);
+
+        assert!(result.is_err(), "Should fail to load binary .mgc file");
+
+        let error = result.unwrap_err();
+        let error_msg = error.to_string();
+
+        // Verify error mentions unsupported format and --use-builtin
+        assert!(
+            error_msg.contains("Binary") || error_msg.contains("binary"),
+            "Error should mention binary format: {error_msg}",
+        );
+        assert!(
+            error_msg.contains("--use-builtin") || error_msg.contains("built-in"),
+            "Error should mention --use-builtin option: {error_msg}",
+        );
+    }
+
+    #[test]
+    fn test_load_magic_file_io_error() {
+        use std::path::Path;
+
+        // Try to load non-existent file
+        let non_existent = Path::new("/this/path/should/not/exist/magic.txt");
+        let result = load_magic_file(non_existent);
+
+        assert!(result.is_err(), "Should fail for non-existent file");
+    }
+
+    #[test]
+    fn test_load_magic_file_parse_error_propagation() {
+        use std::fs;
+        use tempfile::TempDir;
+
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let invalid_file = temp_dir.path().join("invalid.magic");
+
+        // Create file with invalid syntax (missing offset)
+        fs::write(&invalid_file, "string test invalid\n").expect("Failed to write invalid file");
+
+        // Attempt to load file with parse errors
+        let result = load_magic_file(&invalid_file);
+
+        assert!(result.is_err(), "Should fail for file with parse errors");
+
+        // Error should be a parse error (not I/O error)
+        let error = result.unwrap_err();
+        let error_msg = format!("{error:?}");
+        assert!(
+            error_msg.contains("InvalidSyntax") || error_msg.contains("syntax"),
+            "Error should be parse error: {error_msg}",
+        );
+    }
+}
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 9d0d47b4..5ecf3994 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -49,9 +49,9 @@
 //!
 //! 1. **Format Detection**: [`detect_format()`] examines the path to determine the file type
 //! 2. **Dispatch to Handler**:
-//!    - Text files → [`parse_text_magic_file()`] after reading contents
-//!    - Directories → [`load_magic_directory()`] to load and merge all files
-//!    - Binary files → Returns error suggesting `--use-builtin` option
+//!    - Text files -> [`parse_text_magic_file()`] after reading contents
+//!    - Directories -> [`load_magic_directory()`] to load and merge all files
+//!    - Binary files -> Returns error suggesting `--use-builtin` option
 //! 3. **Return Merged Rules**: All rules are returned in a single `Vec<MagicRule>`
 //!
 //! # Examples
@@ -128,7 +128,11 @@
 //! - Non-critical errors (parse failures in individual files): Logs warning to stderr and continues
 
 pub mod ast;
+mod format;
 pub mod grammar;
+mod hierarchy;
+mod loader;
+pub(crate) mod preprocessing;
 
 // Re-export AST types for convenience
 pub use ast::{Endianness, MagicRule, OffsetSpec, Operator, StrengthModifier, TypeKind, Value};
@@ -136,1051 +140,59 @@ pub use ast::{Endianness, MagicRule, OffsetSpec, Operator, StrengthModifier, Typ
 // Re-export parser functions for convenience
 pub use grammar::{parse_number, parse_offset};
 
-use crate::{
-    error::ParseError,
-    parser::grammar::{
-        has_continuation, is_comment_line, is_empty_line, is_strength_directive, parse_comment,
-        parse_magic_rule, parse_strength_directive,
-    },
-};
-use std::io::Read;
-use std::path::{Path, PathBuf};
+// Re-export format detection and loading
+pub use format::{MagicFileFormat, detect_format};
+pub use loader::{load_magic_directory, load_magic_file};
 
-/// Represents the format of a magic file or directory
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum MagicFileFormat {
-    /// Text-based magic file (human-readable)
-    Text,
-    /// Directory containing multiple magic files (Magdir pattern)
-    Directory,
-    /// Binary compiled magic file (.mgc format)
-    Binary,
-}
-
-/// Detect the format of a magic file or directory
-///
-/// This function examines the filesystem metadata and file contents to determine
-/// whether the path points to a text magic file, a directory, or a binary .mgc file.
-///
-/// # Detection Logic
-///
-/// 1. Check if path is a directory → `MagicFileFormat::Directory`
-/// 2. Read first 4 bytes and check for binary magic number `0xF11E041C` → `MagicFileFormat::Binary`
-/// 3. Otherwise → `MagicFileFormat::Text`
-///
-/// # Arguments
-///
-/// * `path` - Path to the magic file or directory to detect
-///
-/// # Errors
-///
-/// Returns `ParseError::IoError` if the path doesn't exist or cannot be read.
-///
-/// # Notes
-///
-/// This function only detects the format and returns it. It does not validate whether
-/// the format is supported by the parser. Higher-level code should check the returned
-/// format and decide how to handle unsupported formats (e.g., binary .mgc files).
-///
-/// # Examples
-///
-/// ```rust,no_run
-/// use libmagic_rs::parser::detect_format;
-/// use std::path::Path;
-///
-/// let format = detect_format(Path::new("/usr/share/file/magic"))?;
-/// # Ok::<(), libmagic_rs::ParseError>(())
-/// ```
-pub fn detect_format(path: &Path) -> Result<MagicFileFormat, ParseError> {
-    // Check if path exists and is accessible
-    let metadata = std::fs::metadata(path)?;
-
-    // Check if it's a directory
-    if metadata.is_dir() {
-        return Ok(MagicFileFormat::Directory);
-    }
-
-    // Read first 4 bytes to check for binary magic number
-    let mut file = std::fs::File::open(path)?;
-
-    let mut magic_bytes = [0u8; 4];
-
-    match file.read_exact(&mut magic_bytes) {
-        Ok(()) => {
-            // Check for binary magic number 0xF11E041C in little-endian format
-            let magic_number = u32::from_le_bytes(magic_bytes);
-            if magic_number == 0xF11E_041C {
-                return Ok(MagicFileFormat::Binary);
-            }
-            // Not a binary magic file, assume text
-            Ok(MagicFileFormat::Text)
-        }
-        Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
-            // File is too small to be a binary magic file, assume text
-            Ok(MagicFileFormat::Text)
-        }
-        Err(e) => Err(ParseError::invalid_syntax(
-            0,
-            format!("Failed to read magic file: {e}"),
-        )),
-    }
-}
-
-/// Internal structure to track line metadata during preprocessing.
-///
-/// Stores the processed content, original line number, and flags for comment
-/// and strength directive lines in the input magic file.
-#[derive(Debug)]
-struct LineInfo {
-    content: String,
-    line_number: usize,
-    is_comment: bool,
-    /// Optional strength modifier parsed from `!:strength` directive
-    strength_modifier: Option<StrengthModifier>,
-}
-
-impl LineInfo {
-    fn new(content: String, line_number: usize, is_comment: bool) -> Self {
-        Self {
-            content,
-            line_number,
-            is_comment,
-            strength_modifier: None,
-        }
-    }
-
-    fn with_strength(
-        content: String,
-        line_number: usize,
-        strength_modifier: StrengthModifier,
-    ) -> Self {
-        Self {
-            content,
-            line_number,
-            is_comment: false,
-            strength_modifier: Some(strength_modifier),
-        }
-    }
-}
-
-/// Preprocesses raw magic file input by handling comments, empty lines, and continuations.
-///
-/// This function performs the following transformations:
-/// - Removes empty lines from the input
-/// - Handles comment lines (lines starting with '#')
-/// - Processes line continuations (lines ending with '\')
-/// - Concatenates continued lines into single entries
-/// - Preserves original line numbers for error reporting (continued lines
-///   are assigned the line number of the first line in the continuation sequence)
-///
-/// # Arguments
-///
-/// * `input` - The raw magic file content as a string
-///
-/// # Returns
-///
-/// `Result<Vec<LineInfo>, ParseError>` - A vector of processed lines or a parse error
-///
-/// # Errors
-///
-/// Returns an error if:
-/// - Comment lines cannot be parsed
-/// - Input ends with an unterminated line continuation
-/// - The input is malformed
-///
-/// # Examples
-///
-/// ```ignore
-/// let input = r#"0 string 0 Test
-/// >4 byte 1 Child"#;
-/// let lines = preprocess_lines(input)?;
-/// assert_eq!(lines.len(), 2);
-/// # Ok::<(), Box<dyn std::error::Error>>(())
-/// ```
-fn preprocess_lines(input: &str) -> Result<Vec<LineInfo>, ParseError> {
-    let mut lines_info: Vec<LineInfo> = Vec::new();
-    let mut line_buf = String::new();
-    let mut start_line_number: Option<usize> = None;
-    for (i, mut line) in input.lines().enumerate() {
-        if is_empty_line(line) {
-            continue;
-        }
-        if is_comment_line(line) {
-            // Bug 1 fix: If we have an ongoing continuation, discard it before processing comment
-            if !line_buf.is_empty() {
-                line_buf.clear();
-                start_line_number = None;
-            }
-            let parsed_comment = parse_comment(line)
-                .map_err(|_| ParseError::invalid_syntax(i + 1, "Unable to parse comment"))?;
-            line = parsed_comment.1.as_str();
-            lines_info.push(LineInfo::new(line.trim().to_string(), i + 1, true));
-            continue;
-        }
-        // Handle strength directives (!:strength ...)
-        if is_strength_directive(line) {
-            // If we have an ongoing continuation, discard it before processing directive
-            if !line_buf.is_empty() {
-                line_buf.clear();
-                start_line_number = None;
-            }
-            let strength_modifier = parse_strength_directive(line)
-                .map_err(|e| {
-                    ParseError::invalid_syntax(
-                        i + 1,
-                        format!("Failed to parse strength directive: {e}"),
-                    )
-                })?
-                .1;
-            lines_info.push(LineInfo::with_strength(
-                line.trim().to_string(),
-                i + 1,
-                strength_modifier,
-            ));
-            continue;
-        }
-        // Track the starting line number when we begin accumulating a rule
-        if start_line_number.is_none() {
-            start_line_number = Some(i + 1);
-        }
-        line_buf.push_str(line.trim());
-        if has_continuation(line) {
-            if let Some(stripped) = line_buf.strip_suffix('\\') {
-                line_buf = stripped.to_string();
-            }
-            continue;
-        }
-        // Bug 2 fix: Use the stored starting line number instead of calculating from cont_ctr
-        let rule_line_number = start_line_number.unwrap_or(i + 1);
-        lines_info.push(LineInfo::new(
-            std::mem::take(&mut line_buf),
-            rule_line_number,
-            false,
-        ));
-        start_line_number = None;
-    }
-
-    // Handle unterminated continuation at end of input
-    if !line_buf.is_empty() {
-        let last_line = input.lines().count();
-        return Err(ParseError::invalid_syntax(
-            last_line,
-            "Unterminated line continuation",
-        ));
-    }
-
-    Ok(lines_info)
-}
-
-/// Parses a single magic rule line into a `MagicRule` AST node.
-///
-/// This function takes a preprocessed `LineInfo` and converts it into a `MagicRule`
-/// by delegating to the grammar parser. It handles error mapping to include
-/// context about which line failed.
-///
-/// # Arguments
-///
-/// * `line` - The `LineInfo` struct containing the rule text and metadata
-///
-/// # Returns
-///
-/// `Result<MagicRule, ParseError>` - The parsed rule or a parse error
-///
-/// # Errors
-///
-/// Returns an error if:
-/// - The line is marked as a comment
-/// - The rule syntax is invalid
-/// - Required fields are missing
-/// - Value parsing fails
-///
-/// # Examples
-///
-/// ```ignore
-/// let line = LineInfo::new("0 string 0 Test".to_string(), 1, false);
-/// let rule = parse_magic_rule_line(&line)?;
-/// assert_eq!(rule.level, 0);
-/// # Ok::<(), Box<dyn std::error::Error>>(())
-/// ```
-fn parse_magic_rule_line(line: &LineInfo) -> Result<MagicRule, ParseError> {
-    if line.is_comment {
-        return Err(ParseError::invalid_syntax(
-            line.line_number,
-            "Comment lines cannot be parsed as rules",
-        ));
-    }
-    parse_magic_rule(&line.content)
-        .map_err(|e| {
-            ParseError::invalid_syntax(line.line_number, format!("Failed to parse rule: {e}"))
-        })
-        .map(|(_, rule)| rule)
-}
-
-/// Builds a hierarchical structure from a flat list of parsed magic rules.
-///
-/// This function establishes parent-child relationships based on indentation levels.
-/// Rules at deeper indentation levels become children of the most recent rule at a
-/// shallower level. This implements a stack-based algorithm for hierarchy construction.
-///
-/// # Arguments
-///
-/// * `lines` - A vector of preprocessed `LineInfo` structs
-///
-/// # Returns
-///
-/// `Result<Vec<MagicRule>, ParseError>` - Root-level rules with children attached
-///
-/// # Behavior
-///
-/// - Rules with `level=0` are root rules
-/// - Rules with `level=1` become children of the most recent `level=0` rule
-/// - Rules with `level=2` become children of the most recent `level=1` rule
-/// - When indentation decreases, the stack is unwound and completed rules are attached
-/// - Orphaned child rules (starting with '>' but with no preceding parent) are
-///   added to the root list with their hierarchy level preserved
-///
-/// # Errors
-///
-/// Returns an error if:
-/// - Any line contains invalid magic rule syntax
-/// - Rule parsing fails (propagated from `parse_magic_rule_line`)
-///
-/// # Examples
-///
-/// ```ignore
-/// let lines = vec![
-///     LineInfo::new("0 string 0 ELF".to_string(), 1, false),
-///     LineInfo::new(">4 byte 1 32-bit".to_string(), 2, false),
-/// ];
-/// let rules = build_rule_hierarchy(lines)?;
-/// assert_eq!(rules[0].children.len(), 1);
-/// # Ok::<(), Box<dyn std::error::Error>>(())
-/// ```
-fn build_rule_hierarchy(lines: Vec<LineInfo>) -> Result<Vec<MagicRule>, ParseError> {
-    /// Helper to pop a rule from the stack and attach it to its parent or roots
-    fn pop_and_attach(stack: &mut Vec<MagicRule>, roots: &mut Vec<MagicRule>) {
-        if let Some(completed) = stack.pop() {
-            if let Some(parent) = stack.last_mut() {
-                parent.children.push(completed);
-            } else {
-                roots.push(completed);
-            }
-        }
-    }
-
-    let mut stack: Vec<MagicRule> = Vec::new();
-    let mut roots: Vec<MagicRule> = Vec::new();
-    let mut pending_strength: Option<StrengthModifier> = None;
-
-    for line in lines {
-        if line.is_comment {
-            continue;
-        }
-
-        // Handle strength directive: store modifier for next rule
-        if line.strength_modifier.is_some() {
-            pending_strength = line.strength_modifier;
-            continue;
-        }
-
-        let mut rule = parse_magic_rule_line(&line)?;
-
-        // Apply pending strength modifier to this rule
-        if pending_strength.is_some() {
-            rule.strength_modifier = pending_strength.take();
-        }
-
-        // Unwind stack until we find a parent with lower level
-        while stack.last().is_some_and(|top| top.level >= rule.level) {
-            pop_and_attach(&mut stack, &mut roots);
-        }
-
-        stack.push(rule);
-    }
-
-    // Unwind remaining stack
-    while !stack.is_empty() {
-        pop_and_attach(&mut stack, &mut roots);
-    }
-
-    Ok(roots)
-}
-
-/// Parses a complete magic file from raw text input.
-///
-/// This is the main public-facing parser function that orchestrates the complete
-/// parsing pipeline: preprocessing, parsing individual rules, and building the
-/// hierarchical structure.
-///
-/// # Arguments
-///
-/// * `input` - The raw magic file content as a string
-///
-/// # Returns
-///
-/// `Result<Vec<MagicRule>, ParseError>` - A vector of root rules with nested children
-///
-/// # Errors
-///
-/// Returns an error if any stage of parsing fails:
-/// - Preprocessing errors
-/// - Rule parsing errors
-/// - Hierarchy building errors
-///
-/// # Example
-///
-/// ```ignore
-/// use libmagic_rs::parser::parse_text_magic_file;
-///
-/// let magic = r#"0 string \x7fELF ELF file
-/// >4 byte 1 32-bit
-/// >4 byte 2 64-bit"#;
-///
-/// let rules = parse_text_magic_file(magic)?;
-/// assert_eq!(rules.len(), 1);
-/// assert_eq!(rules[0].message, "ELF file");
-/// # Ok::<(), Box<dyn std::error::Error>>(())
-/// ```
-pub fn parse_text_magic_file(input: &str) -> Result<Vec<MagicRule>, ParseError> {
-    let lines = preprocess_lines(input)?;
-    build_rule_hierarchy(lines)
-}
-
-/// Loads and parses all magic files from a directory, merging them into a single rule set.
-///
-/// This function reads all regular files in the specified directory, parses each as a magic file,
-/// and combines the resulting rules into a single `Vec<MagicRule>`. Files are processed in
-/// alphabetical order by filename to ensure deterministic results.
-///
-/// # Error Handling Strategy
-///
-/// This function distinguishes between critical and non-critical errors:
-///
-/// - **Critical errors** (I/O failures, directory access issues, encoding problems):
-///   These cause immediate failure and return a `ParseError`. The function stops processing
-///   and propagates the error to the caller.
-///
-/// - **Non-critical errors** (individual file parse failures):
-///   These are logged to stderr with a warning message and the file is skipped. Processing
-///   continues with remaining files.
-///
-/// # Behavior
-///
-/// - Subdirectories are skipped (not recursively processed)
-/// - Symbolic links are skipped
-/// - Empty directories return an empty rules vector
-/// - Files are processed in alphabetical order by filename
-/// - All successfully parsed rules are merged in order
-///
-/// # Examples
-///
-/// Loading a directory of magic files:
-///
-/// ```rust,no_run
-/// use libmagic_rs::parser::load_magic_directory;
-/// use std::path::Path;
-///
-/// let rules = load_magic_directory(Path::new("/usr/share/file/magic.d"))?;
-/// println!("Loaded {} rules from directory", rules.len());
-/// # Ok::<(), libmagic_rs::ParseError>(())
-/// ```
-///
-/// Creating a Magdir-style directory structure:
-///
-/// ```rust,no_run
-/// use libmagic_rs::parser::load_magic_directory;
-/// use std::path::Path;
-///
-/// // Directory structure:
-/// // magic.d/
-/// //   ├── 01-elf
-/// //   ├── 02-archive
-/// //   └── 03-text
-///
-/// let rules = load_magic_directory(Path::new("./magic.d"))?;
-/// // Rules from all three files are merged in alphabetical order
-/// # Ok::<(), libmagic_rs::ParseError>(())
-/// ```
-///
-/// # Errors
-///
-/// Returns `ParseError` if:
-/// - The directory does not exist or cannot be accessed
-/// - Directory entries cannot be read
-/// - A file cannot be read due to I/O errors
-/// - A file contains invalid UTF-8 encoding
-///
-/// # Panics
-///
-/// This function does not panic under normal operation.
-#[allow(clippy::print_stderr)]
-pub fn load_magic_directory(dir_path: &Path) -> Result<Vec<MagicRule>, ParseError> {
-    use std::fs;
-
-    // Read directory entries
-    let entries = fs::read_dir(dir_path).map_err(|e| {
-        ParseError::invalid_syntax(
-            0,
-            format!("Failed to read directory '{}': {}", dir_path.display(), e),
-        )
-    })?;
-
-    // Collect and sort entries by filename for deterministic ordering
-    let mut file_paths: Vec<std::path::PathBuf> = Vec::new();
-    for entry in entries {
-        let entry = entry.map_err(|e| {
-            ParseError::invalid_syntax(
-                0,
-                format!(
-                    "Failed to read directory entry in '{}': {}",
-                    dir_path.display(),
-                    e
-                ),
-            )
-        })?;
-
-        let path = entry.path();
-        let file_type = entry.file_type().map_err(|e| {
-            ParseError::invalid_syntax(
-                0,
-                format!("Failed to read file type for '{}': {}", path.display(), e),
-            )
-        })?;
-
-        // Only process regular files, skip directories and symlinks
-        if file_type.is_file() && !file_type.is_symlink() {
-            file_paths.push(path);
-        }
-    }
-
-    // Sort by filename for deterministic ordering
-    file_paths.sort_by_key(|path| path.file_name().map(std::ffi::OsStr::to_os_string));
-
-    // Accumulate rules from all files
-    let mut all_rules = Vec::new();
-    let mut parse_failures: Vec<(PathBuf, ParseError)> = Vec::new();
-    let file_count = file_paths.len();
-
-    for path in file_paths {
-        // Read file contents
-        let contents = match fs::read_to_string(&path) {
-            Ok(contents) => contents,
-            Err(e) => {
-                // I/O errors are critical
-                return Err(ParseError::invalid_syntax(
-                    0,
-                    format!("Failed to read file '{}': {}", path.display(), e),
-                ));
-            }
-        };
-
-        // Parse the file
-        match parse_text_magic_file(&contents) {
-            Ok(rules) => {
-                // Successfully parsed - merge rules
-                all_rules.extend(rules);
-            }
-            Err(e) => {
-                // Track parse failures for reporting
-                parse_failures.push((path, e));
-            }
-        }
-    }
-
-    // If all files failed to parse, return an error
-    if all_rules.is_empty() && !parse_failures.is_empty() {
-        use std::fmt::Write;
-
-        let failure_details: Vec<String> = parse_failures
-            .iter()
-            .take(3) // Limit to first 3 failures for brevity
-            .map(|(path, e)| format!("  - {}: {}", path.display(), e))
-            .collect();
-
-        let mut message = format!("All {file_count} magic file(s) in directory failed to parse");
-        if !failure_details.is_empty() {
-            message.push_str(":\n");
-            message.push_str(&failure_details.join("\n"));
-            if parse_failures.len() > 3 {
-                let _ = write!(message, "\n  ... and {} more", parse_failures.len() - 3);
-            }
-        }
-
-        return Err(ParseError::invalid_syntax(0, message));
-    }
-
-    // Log warnings for partial failures (some files parsed, some failed)
-    // Note: Using eprintln for now; consider a logging framework in the future
-    #[allow(clippy::print_stderr)]
-    for (path, e) in &parse_failures {
-        eprintln!("Warning: Failed to parse '{}': {}", path.display(), e);
-    }
-
-    Ok(all_rules)
-}
-
-/// Loads magic rules from a file or directory, automatically detecting the format.
-///
-/// This is the unified entry point for loading magic rules from the filesystem. It
-/// automatically detects whether the path points to a text magic file, a directory
-/// containing magic files, or a binary compiled magic file, and dispatches to the
-/// appropriate handler.
-///
-/// # Format Detection and Handling
-///
-/// The function uses [`detect_format()`] to determine the file type and handles each
-/// format as follows:
-///
-/// - **Text format**: Reads the file contents and parses using [`parse_text_magic_file()`]
-/// - **Directory format**: Loads all magic files from the directory using [`load_magic_directory()`]
-/// - **Binary format**: Returns an error with guidance to use the `--use-builtin` option
-///
-/// # Arguments
-///
-/// * `path` - Path to a magic file or directory. Can be absolute or relative.
-///
-/// # Returns
-///
-/// Returns `Ok(Vec<MagicRule>)` containing all successfully parsed magic rules. For
-/// directories, rules from all files are merged in alphabetical order by filename.
-///
-/// # Errors
-///
-/// This function returns a [`ParseError`] in the following cases:
-///
-/// - **File not found**: The specified path does not exist
-/// - **Unsupported format**: The file is a binary compiled magic file (`.mgc`)
-/// - **Parse errors**: The magic file contains syntax errors or invalid rules
-/// - **I/O errors**: File system errors during reading (permissions, disk errors, etc.)
-///
-/// # Examples
-///
-/// ## Loading a text magic file
-///
-/// ```no_run
-/// use libmagic_rs::parser::load_magic_file;
-/// use std::path::Path;
-///
-/// let rules = load_magic_file(Path::new("/usr/share/misc/magic"))?;
-/// println!("Loaded {} magic rules", rules.len());
-/// # Ok::<(), libmagic_rs::ParseError>(())
-/// ```
-///
-/// ## Loading a directory of magic files
-///
-/// ```no_run
-/// use libmagic_rs::parser::load_magic_file;
-/// use std::path::Path;
-///
-/// let rules = load_magic_file(Path::new("/usr/share/misc/magic.d"))?;
-/// println!("Loaded {} rules from directory", rules.len());
-/// # Ok::<(), libmagic_rs::ParseError>(())
-/// ```
-///
-/// ## Handling binary format errors
-///
-/// ```no_run
-/// use libmagic_rs::parser::load_magic_file;
-/// use std::path::Path;
-///
-/// match load_magic_file(Path::new("/usr/share/misc/magic.mgc")) {
-///     Ok(rules) => println!("Loaded {} rules", rules.len()),
-///     Err(e) => {
-///         eprintln!("Error loading magic file: {}", e);
-///         eprintln!("Hint: Use --use-builtin for binary files");
-///     }
-/// }
-/// # Ok::<(), libmagic_rs::ParseError>(())
-/// ```
-///
-/// # Security
-///
-/// This function delegates to [`parse_text_magic_file()`] or [`load_magic_directory()`]
-/// based on format detection. Security considerations are handled by those functions:
-///
-/// - Rule hierarchy depth is bounded during parsing
-/// - Invalid syntax is rejected with descriptive errors
-/// - Binary `.mgc` files are rejected (not parsed)
-///
-/// Note: File size limits and memory exhaustion protection are not currently implemented.
-/// Large magic files will be loaded entirely into memory.
-///
-/// # See Also
-///
-/// - [`detect_format()`] - Format detection logic
-/// - [`parse_text_magic_file()`] - Text file parser
-/// - [`load_magic_directory()`] - Directory loader
-pub fn load_magic_file(path: &Path) -> Result<Vec<MagicRule>, ParseError> {
-    // Detect the magic file format
-    let format = detect_format(path)?;
-
-    // Dispatch to appropriate handler based on format
-    match format {
-        MagicFileFormat::Text => {
-            // Read file contents and parse as text magic file
-            let content = std::fs::read_to_string(path)?;
-            parse_text_magic_file(&content)
-        }
-        MagicFileFormat::Directory => {
-            // Load all magic files from directory
-            load_magic_directory(path)
-        }
-        MagicFileFormat::Binary => {
-            // Binary compiled magic files are not supported
-            Err(ParseError::unsupported_format(
-                0,
-                "binary .mgc file",
-                "Binary compiled magic files (.mgc) are not supported for parsing.\n\
-                 Use the --use-builtin option to use the built-in magic rules instead,\n\
-                 or provide a text-based magic file or directory.",
-            ))
-        }
-    }
-}
-
-#[cfg(test)]
-mod unit_tests {
-    use super::*;
-
-    fn li(line_number: usize, content: &str) -> LineInfo {
-        LineInfo {
-            content: content.to_string(),
-            line_number,
-            is_comment: false,
-            strength_modifier: None,
-        }
-    }
-
-    fn li_comment(line_number: usize, content: &str) -> LineInfo {
-        LineInfo {
-            content: content.to_string(),
-            line_number,
-            is_comment: true,
-            strength_modifier: None,
-        }
-    }
-
-    // ============================================================
-    // Tests for parse_magic_rule_line (10+ test cases)
-    // ============================================================
-
-    #[test]
-    fn test_parse_magic_rule_line_simple_string() {
-        let line = li(1, "0 string \\x7fELF ELF executable");
-        let rule = parse_magic_rule_line(&line).unwrap();
-        assert_eq!(rule.level, 0);
-        assert_eq!(rule.message, "ELF executable");
-    }
-
-    #[test]
-    fn test_parse_magic_rule_line_byte_type() {
-        let line = li(1, "0 byte 1 ELF");
-        let rule = parse_magic_rule_line(&line).unwrap();
-        assert_eq!(rule.level, 0);
-        assert!(matches!(rule.typ, TypeKind::Byte));
-    }
-
-    #[test]
-    fn test_parse_magic_rule_line_with_child_indentation() {
-        let line = li(2, ">4 byte 1 32-bit");
-        let rule = parse_magic_rule_line(&line).unwrap();
-        assert_eq!(rule.level, 1);
-    }
-
-    #[test]
-    fn test_parse_magic_rule_line_deep_indentation() {
-        let line = li(3, ">>>8 long = 0x12345678 Complex match");
-        let rule = parse_magic_rule_line(&line).unwrap();
-        assert_eq!(rule.level, 3);
-    }
-
-    #[test]
-    fn test_parse_magic_rule_line_not_equal_operator() {
-        let line = li(1, "0 byte != 0 Non-zero");
-        let rule = parse_magic_rule_line(&line).unwrap();
-        assert_eq!(rule.op, Operator::NotEqual);
-    }
-
-    #[test]
-    fn test_parse_magic_rule_line_greater_operator() {
-        let line = li(1, "0 long = 1000 Number");
-        let rule = parse_magic_rule_line(&line).unwrap();
-        assert_eq!(rule.op, Operator::Equal);
-    }
-
-    #[test]
-    fn test_parse_magic_rule_line_less_operator() {
-        let line = li(1, "0 long != 256 Not equal");
-        let rule = parse_magic_rule_line(&line).unwrap();
-        assert_eq!(rule.op, Operator::NotEqual);
-    }
-
-    #[test]
-    fn test_parse_magic_rule_line_bitwise_and_operator() {
-        let line = li(1, "0 byte & 0xFF Bitmask");
-        let rule = parse_magic_rule_line(&line).unwrap();
-        assert_eq!(rule.op, Operator::BitwiseAnd);
-    }
-
-    #[test]
-    fn test_parse_magic_rule_line_comment_line_error() {
-        let line = li_comment(1, "This is a comment");
-        let result = parse_magic_rule_line(&line);
-        assert!(result.is_err());
-    }
-
-    #[test]
-    fn test_parse_magic_rule_line_hex_offset() {
-        let line = li(1, "0x100 byte 1 PDF document");
-        let rule = parse_magic_rule_line(&line).unwrap();
-        match rule.offset {
-            OffsetSpec::Absolute(offset) => assert_eq!(offset, 0x100),
-            _ => panic!("Expected absolute offset"),
-        }
-    }
-
-    #[test]
-    fn test_parse_magic_rule_line_string_with_spaces() {
-        let line = li(1, "0 byte 1 Long message with multiple words");
-        let rule = parse_magic_rule_line(&line).unwrap();
-        assert_eq!(rule.message, "Long message with multiple words");
-    }
-
-    #[test]
-    fn test_parse_magic_rule_line_short_type() {
-        let line = li(1, "0 short 0x4d5a MS-DOS executable");
-        let rule = parse_magic_rule_line(&line).unwrap();
-        assert!(matches!(rule.typ, TypeKind::Short { .. }));
-    }
-
-    // ============================================================
-    // Tests for preprocess_lines (10+ test cases)
-    // ============================================================
-
-    #[test]
-    fn test_preprocess_lines_single_rule() {
-        let input = "0 string 0 Test";
-        let lines = preprocess_lines(input).unwrap();
-        assert_eq!(lines.len(), 1);
-        assert_eq!(lines[0].content, "0 string 0 Test");
-        assert!(!lines[0].is_comment);
-    }
-
-    #[test]
-    fn test_preprocess_lines_multiple_rules() {
-        let input = "0 string 0 Test\n0 byte 1 Byte";
-        let lines = preprocess_lines(input).unwrap();
-        assert_eq!(lines.len(), 2);
-        assert_eq!(lines[0].content, "0 string 0 Test");
-        assert_eq!(lines[1].content, "0 byte 1 Byte");
-    }
-
-    #[test]
-    fn test_preprocess_lines_with_comments() {
-        let input = "# Comment\n0 string 0 Test";
-        let lines = preprocess_lines(input).unwrap();
-        assert_eq!(lines.len(), 2);
-        assert!(lines[0].is_comment);
-        assert!(!lines[1].is_comment);
-    }
-
-    #[test]
-    fn test_preprocess_lines_empty_lines() {
-        let input = "0 string 0 Test\n\n0 byte 1 Byte";
-        let lines = preprocess_lines(input).unwrap();
-        assert_eq!(lines.len(), 2);
-    }
-
-    #[test]
-    fn test_preprocess_lines_leading_empty_lines() {
-        let input = "\n\n0 string 0 Test";
-        let lines = preprocess_lines(input).unwrap();
-        assert_eq!(lines.len(), 1);
-        assert_eq!(lines[0].content, "0 string 0 Test");
-    }
-
-    #[test]
-    fn test_preprocess_lines_trailing_empty_lines() {
-        let input = "0 string 0 Test\n\n";
-        let lines = preprocess_lines(input).unwrap();
-        assert_eq!(lines.len(), 1);
-    }
-
-    #[test]
-    fn test_preprocess_lines_line_continuation() {
-        let input = "0 string 0 Long message \\\ncontinued here";
-        let lines = preprocess_lines(input).unwrap();
-        assert_eq!(lines.len(), 1);
-        assert_eq!(lines[0].content, "0 string 0 Long message continued here");
-    }
-
-    #[test]
-    fn test_preprocess_lines_multiple_continuations() {
-        let input = "0 string 0 Multi \\\nline \\\ncontinuation";
-        let lines = preprocess_lines(input).unwrap();
-        assert_eq!(lines.len(), 1);
-        assert_eq!(lines[0].content, "0 string 0 Multi line continuation");
-    }
-
-    #[test]
-    fn test_preprocess_lines_mixed_comments_and_rules() {
-        let input = "# Header\n0 string 0 Test\n# Another comment\n>4 byte 1 Child";
-        let lines = preprocess_lines(input).unwrap();
-        assert_eq!(lines.len(), 4);
-        assert!(lines[0].is_comment);
-        assert!(!lines[1].is_comment);
-        assert!(lines[2].is_comment);
-        assert!(!lines[3].is_comment);
-    }
-
-    #[test]
-    fn test_preprocess_lines_preserves_line_numbers() {
-        let input = "0 string 0 Test\n>4 byte 1 Child";
-        let lines = preprocess_lines(input).unwrap();
-        assert_eq!(lines[0].line_number, 1);
-        assert_eq!(lines[1].line_number, 2);
-    }
-
-    #[test]
-    fn test_preprocess_lines_empty_input() {
-        let input = "";
-        let lines = preprocess_lines(input).unwrap();
-        assert_eq!(lines.len(), 0);
-    }
-
-    #[test]
-    fn test_preprocess_lines_only_comments() {
-        let input = "# Comment 1\n# Comment 2\n# Comment 3";
-        let lines = preprocess_lines(input).unwrap();
-        assert_eq!(lines.len(), 3);
-        assert!(lines.iter().all(|l| l.is_comment));
-    }
-
-    // ============================================================
-    // Tests for build_rule_hierarchy (10+ test cases)
-    // ============================================================
-
-    #[test]
-    fn test_build_rule_hierarchy_single_root() {
-        let lines = vec![li(1, "0 string \\x7fELF ELF executable")];
-        let roots = build_rule_hierarchy(lines).unwrap();
-        assert_eq!(roots.len(), 1);
-        assert_eq!(roots[0].level, 0);
-    }
-
-    #[test]
-    fn test_build_rule_hierarchy_root_with_one_child() {
-        let lines = vec![
-            li(1, "0 string \\x7fELF ELF executable"),
-            li(2, ">4 byte 1 32-bit"),
-        ];
-        let roots = build_rule_hierarchy(lines).unwrap();
-        assert_eq!(roots.len(), 1);
-        assert_eq!(roots[0].children.len(), 1);
-    }
-
-    #[test]
-    fn test_build_rule_hierarchy_root_with_multiple_children() {
-        let lines = vec![
-            li(1, "0 string \\x7fELF ELF executable"),
-            li(2, ">4 byte 1 32-bit"),
-            li(3, ">4 byte 2 64-bit"),
-        ];
-        let roots = build_rule_hierarchy(lines).unwrap();
-        assert_eq!(roots.len(), 1);
-        assert_eq!(roots[0].children.len(), 2);
-    }
-
-    #[test]
-    fn test_build_rule_hierarchy_nested_three_levels() {
-        let lines = vec![
-            li(1, "0 string \\x7fELF ELF executable"),
-            li(2, ">4 byte 1 class"),
-            li(3, ">>5 byte 1 subtype"),
-        ];
-        let roots = build_rule_hierarchy(lines).unwrap();
-        assert_eq!(roots[0].children.len(), 1);
-        assert_eq!(roots[0].children[0].children.len(), 1);
-        assert_eq!(roots[0].children[0].children[0].level, 2);
-    }
-
-    #[test]
-    fn test_build_rule_hierarchy_multiple_roots() {
-        let lines = vec![
-            li(1, r#"0 string "ELF" "ELF executable""#),
-            li(2, r#"0 string "%PDF" "PDF document""#),
-        ];
-        let roots = build_rule_hierarchy(lines).unwrap();
-        assert_eq!(roots.len(), 2);
-    }
-
-    #[test]
-    fn test_build_rule_hierarchy_sibling_rules() {
-        let lines = vec![
-            li(1, "0 byte 1 Root"),
-            li(2, ">4 byte 1 Child1"),
-            li(3, ">4 byte 2 Child2"),
-            li(4, "0 byte 2 Root2"),
-        ];
-        let roots = build_rule_hierarchy(lines).unwrap();
-        assert_eq!(roots.len(), 2);
-        assert_eq!(roots[0].children.len(), 2);
-    }
-
-    #[test]
-    fn test_build_rule_hierarchy_deep_nesting() {
-        let lines = vec![
-            li(1, "0 byte 1 L0"),
-            li(2, ">4 byte 1 L1"),
-            li(3, ">>5 byte 2 L2"),
-            li(4, ">>>6 byte 3 L3"),
-            li(5, ">>>>7 byte 4 L4"),
-        ];
-        let roots = build_rule_hierarchy(lines).unwrap();
-        assert_eq!(roots.len(), 1);
-        assert_eq!(
-            roots[0].children[0].children[0].children[0].children.len(),
-            1
-        );
-    }
+// Internal re-exports for sibling modules and tests
+pub(crate) use hierarchy::build_rule_hierarchy;
+pub(crate) use preprocessing::preprocess_lines;
 
-    #[test]
-    fn test_build_rule_hierarchy_return_to_root_level() {
-        let lines = vec![
-            li(1, "0 byte 1 Root1"),
-            li(2, ">4 byte 1 Child"),
-            li(3, "0 byte 2 Root2"),
-        ];
-        let roots = build_rule_hierarchy(lines).unwrap();
-        assert_eq!(roots.len(), 2);
-        assert_eq!(roots[0].children.len(), 1);
-        assert_eq!(roots[1].children.len(), 0);
-    }
+use crate::error::ParseError;
 
-    #[test]
-    fn test_build_rule_hierarchy_orphaned_child() {
-        let lines = vec![li(1, ">4 byte 1 Orphaned child")];
-        let roots = build_rule_hierarchy(lines).unwrap();
-        assert_eq!(roots.len(), 1);
-        assert_eq!(roots[0].level, 1);
-    }
+/// Parses a complete magic file from raw text input.
+///
+/// This is the main public-facing parser function that orchestrates the complete
+/// parsing pipeline: preprocessing, parsing individual rules, and building the
+/// hierarchical structure.
+///
+/// # Arguments
+///
+/// * `input` - The raw magic file content as a string
+///
+/// # Returns
+///
+/// `Result<Vec<MagicRule>, ParseError>` - A vector of root rules with nested children
+///
+/// # Errors
+///
+/// Returns an error if any stage of parsing fails:
+/// - Preprocessing errors
+/// - Rule parsing errors
+/// - Hierarchy building errors
+///
+/// # Example
+///
+/// ```ignore
+/// use libmagic_rs::parser::parse_text_magic_file;
+///
+/// let magic = r#"0 string \x7fELF ELF file
+/// >4 byte 1 32-bit
+/// >4 byte 2 64-bit"#;
+///
+/// let rules = parse_text_magic_file(magic)?;
+/// assert_eq!(rules.len(), 1);
+/// assert_eq!(rules[0].message, "ELF file");
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+pub fn parse_text_magic_file(input: &str) -> Result<Vec<MagicRule>, ParseError> {
+    let lines = preprocess_lines(input)?;
+    build_rule_hierarchy(lines)
+}
 
-    #[test]
-    fn test_build_rule_hierarchy_complex_structure() {
-        let lines = vec![
-            li(1, "0 byte 1 Root1"),
-            li(2, ">4 byte 1 C1"),
-            li(3, ">4 byte 2 C2"),
-            li(4, ">>6 byte 3 GC1"),
-            li(5, "0 byte 2 Root2"),
-            li(6, ">4 byte 4 C3"),
-        ];
-        let roots = build_rule_hierarchy(lines).unwrap();
-        assert_eq!(roots.len(), 2);
-        assert_eq!(roots[0].children.len(), 2);
-        assert_eq!(roots[0].children[1].children.len(), 1);
-        assert_eq!(roots[1].children.len(), 1);
-    }
+#[cfg(test)]
+mod unit_tests {
+    use super::*;
 
     // ============================================================
     // Tests for parse_text_magic_file (10+ test cases)
@@ -1427,7 +439,7 @@ continued";
     }
 
     // ============================================================
-    // Overflow protection tests (from pr-test-analyzer)
+    // Overflow protection tests
     // ============================================================
 
     #[test]
@@ -1463,63 +475,9 @@ continued";
     }
 
     // ============================================================
-    // Continuation edge case tests (from pr-test-analyzer)
-    // ============================================================
-
-    #[test]
-    fn test_continuation_at_eof() {
-        // Continuation on last line with no following line - should error
-        let input = "0 string 0 Test \\";
-        let result = preprocess_lines(input);
-        assert!(
-            result.is_err(),
-            "Should error on unterminated continuation at EOF"
-        );
-        let err = result.unwrap_err();
-        assert!(
-            format!("{err:?}").contains("Unterminated"),
-            "Error should mention unterminated continuation"
-        );
-    }
-
-    #[test]
-    fn test_continuation_with_empty_next() {
-        // Empty line after continuation causes unterminated continuation
-        // (empty lines are skipped but continuation state persists)
-        let input = "0 string 0 Test \\\n\n0 byte 1 Next";
-        let lines = preprocess_lines(input).unwrap();
-        // The continuation carries through the empty line, so "Next" gets appended
-        assert_eq!(lines.len(), 1);
-        assert_eq!(lines[0].content, "0 string 0 Test 0 byte 1 Next");
-    }
-
-    #[test]
-    fn test_continuation_into_empty_then_rule() {
-        let input = "0 string 0 First \\\n\ncontinued";
-        let lines = preprocess_lines(input).unwrap();
-        assert_eq!(lines.len(), 1);
-        assert_eq!(lines[0].content, "0 string 0 First continued");
-    }
-
-    // ============================================================
-    // Line number accuracy tests (from pr-test-analyzer)
+    // Line number accuracy test (uses parse_text_magic_file)
     // ============================================================
 
-    #[test]
-    fn test_line_numbers_with_continuations() {
-        let input = "0 string 0 test1\n0 string 0 multi \\\nline \\\ntest\n0 string 0 test2";
-        let lines = preprocess_lines(input).unwrap();
-
-        // Line 1: "0 string 0 test1" should report line 1
-        assert_eq!(lines[0].line_number, 1);
-
-        // Line 2-4 continuation should report line 2 (first line of continuation)
-        assert_eq!(lines[1].line_number, 2);
-
-        // Line 5: "0 string 0 test2" should report line 5
-        assert_eq!(lines[2].line_number, 5);
-    }
-
     #[test]
     fn test_error_reports_correct_line_for_continuation() {
         // When a continued rule fails to parse, error should show the starting line
@@ -1538,66 +496,6 @@ continued";
             Ok(_) => panic!("Expected InvalidSyntax error"),
         }
     }
-
-    #[test]
-    fn test_line_numbers_with_mixed_content() {
-        let input = "# Comment line 1\n0 string 0 rule1\n\n# Another comment\n0 string 0 rule2 \\\ncontinued";
-        let lines = preprocess_lines(input).unwrap();
-
-        assert_eq!(lines.len(), 4);
-        assert_eq!(lines[0].line_number, 1); // Comment
-        assert_eq!(lines[1].line_number, 2); // rule1
-        assert_eq!(lines[2].line_number, 4); // Another comment
-        assert_eq!(lines[3].line_number, 5); // rule2 (continued on line 6)
-    }
-
-    // ============================================================
-    // Bug reproduction tests
-    // ============================================================
-
-    #[test]
-    fn test_bug1_comment_during_continuation() {
-        // Bug 1: Comment during continuation should not corrupt line_buf
-        // The partial rule should be discarded, leaving only the comment and new rule
-        let input = "0 string 0 Partial rule \\\n# This is a comment\n0 byte 1 New rule";
-        let lines = preprocess_lines(input).unwrap();
-
-        // The partial rule is discarded, so we should have 2 lines: comment and new rule
-        assert_eq!(lines.len(), 2);
-        // The comment should be separate and not contain rule content
-        let comment_line = lines.iter().find(|l| l.is_comment).unwrap();
-        assert!(!comment_line.content.contains("Partial rule"));
-        assert_eq!(comment_line.content, "This is a comment");
-        // The new rule should be intact
-        let rule_line = lines
-            .iter()
-            .find(|l| !l.is_comment && l.content.contains("New rule"))
-            .unwrap();
-        assert_eq!(rule_line.content, "0 byte 1 New rule");
-    }
-
-    #[test]
-    fn test_bug2_empty_line_in_continuation() {
-        // Bug 2: Empty line in continuation should not break line number calculation
-        let input = "0 string 0 Test \\\n\ncontinued here";
-        let lines = preprocess_lines(input).unwrap();
-
-        assert_eq!(lines.len(), 1);
-        // Line number should point to line 1 (where the rule started), not line 3
-        assert_eq!(lines[0].line_number, 1);
-        assert_eq!(lines[0].content, "0 string 0 Test continued here");
-    }
-
-    #[test]
-    fn test_bug2_multiple_empty_lines_in_continuation() {
-        // Multiple empty lines in continuation
-        let input = "0 string 0 Test \\\n\n\ncontinued here";
-        let lines = preprocess_lines(input).unwrap();
-
-        assert_eq!(lines.len(), 1);
-        // Line number should still point to line 1
-        assert_eq!(lines[0].line_number, 1);
-    }
 }
 
 #[cfg(test)]
@@ -1672,425 +570,3 @@ mod output_test {
         }
     }
 }
-
-#[cfg(test)]
-mod format_detection_tests {
-    use super::*;
-    use std::fs;
-    use std::io::Write;
-
-    #[test]
-    fn test_detect_format_text_file() {
-        let temp_dir = std::env::temp_dir();
-        let text_file = temp_dir.join("test_text_magic.txt");
-        fs::write(&text_file, "# Magic file\n0 string test Test").unwrap();
-
-        let format = detect_format(&text_file).unwrap();
-        assert_eq!(format, MagicFileFormat::Text);
-
-        fs::remove_file(&text_file).unwrap();
-    }
-
-    #[test]
-    fn test_detect_format_directory() {
-        let temp_dir = std::env::temp_dir().join("test_magic_dir");
-        fs::create_dir_all(&temp_dir).unwrap();
-
-        let format = detect_format(&temp_dir).unwrap();
-        assert_eq!(format, MagicFileFormat::Directory);
-
-        fs::remove_dir_all(&temp_dir).unwrap();
-    }
-
-    #[test]
-    fn test_detect_format_binary_mgc() {
-        let temp_dir = std::env::temp_dir();
-        let binary_file = temp_dir.join("test_binary.mgc");
-
-        // Write binary magic number 0xF11E041C in little-endian
-        let mut file = fs::File::create(&binary_file).unwrap();
-        file.write_all(&[0x1C, 0x04, 0x1E, 0xF1]).unwrap();
-        file.write_all(b"additional binary data").unwrap();
-
-        let result = detect_format(&binary_file);
-        assert!(result.is_ok());
-
-        match result.unwrap() {
-            MagicFileFormat::Binary => {
-                // Expected result
-            }
-            other => panic!("Expected Binary format, got {other:?}"),
-        }
-
-        fs::remove_file(&binary_file).unwrap();
-    }
-
-    #[test]
-    fn test_detect_format_nonexistent_path() {
-        let nonexistent = std::env::temp_dir().join("nonexistent_magic_file.txt");
-
-        let result = detect_format(&nonexistent);
-        assert!(result.is_err());
-
-        match result.unwrap_err() {
-            ParseError::IoError(e) => {
-                assert_eq!(e.kind(), std::io::ErrorKind::NotFound);
-            }
-            other => panic!("Expected IoError, got: {other:?}"),
-        }
-    }
-
-    #[test]
-    fn test_detect_format_empty_file() {
-        let temp_dir = std::env::temp_dir();
-        let empty_file = temp_dir.join("test_empty_magic.txt");
-        fs::write(&empty_file, "").unwrap();
-
-        // Empty files should be detected as text (too small for binary magic)
-        let format = detect_format(&empty_file).unwrap();
-        assert_eq!(format, MagicFileFormat::Text);
-
-        fs::remove_file(&empty_file).unwrap();
-    }
-
-    #[test]
-    fn test_detect_format_small_file() {
-        let temp_dir = std::env::temp_dir();
-        let small_file = temp_dir.join("test_small_magic.txt");
-        fs::write(&small_file, "ab").unwrap(); // Only 2 bytes
-
-        // Small files should be detected as text
-        let format = detect_format(&small_file).unwrap();
-        assert_eq!(format, MagicFileFormat::Text);
-
-        fs::remove_file(&small_file).unwrap();
-    }
-
-    #[test]
-    fn test_detect_format_text_with_binary_content() {
-        let temp_dir = std::env::temp_dir();
-        let binary_text_file = temp_dir.join("test_binary_text.txt");
-
-        // Write binary data that's NOT the magic number
-        let mut file = fs::File::create(&binary_text_file).unwrap();
-        file.write_all(&[0xFF, 0xFE, 0xFD, 0xFC]).unwrap();
-        file.write_all(b"some text").unwrap();
-
-        // Should be detected as text (wrong magic number)
-        let format = detect_format(&binary_text_file).unwrap();
-        assert_eq!(format, MagicFileFormat::Text);
-
-        fs::remove_file(&binary_text_file).unwrap();
-    }
-
-    #[test]
-    fn test_magic_file_format_enum_equality() {
-        assert_eq!(MagicFileFormat::Text, MagicFileFormat::Text);
-        assert_eq!(MagicFileFormat::Directory, MagicFileFormat::Directory);
-        assert_eq!(MagicFileFormat::Binary, MagicFileFormat::Binary);
-
-        assert_ne!(MagicFileFormat::Text, MagicFileFormat::Directory);
-        assert_ne!(MagicFileFormat::Text, MagicFileFormat::Binary);
-        assert_ne!(MagicFileFormat::Directory, MagicFileFormat::Binary);
-    }
-
-    #[test]
-    fn test_magic_file_format_debug() {
-        let text_format = MagicFileFormat::Text;
-        let debug_str = format!("{text_format:?}");
-        assert!(debug_str.contains("Text"));
-    }
-
-    #[test]
-    fn test_magic_file_format_clone() {
-        let original = MagicFileFormat::Directory;
-        let cloned = original;
-        assert_eq!(original, cloned);
-    }
-
-    #[test]
-    fn test_magic_file_format_copy() {
-        let original = MagicFileFormat::Binary;
-        let copied = original; // Copy trait allows this
-        assert_eq!(original, copied);
-    }
-
-    // ============================================================
-    // Tests for load_magic_directory (6+ test cases)
-    // ============================================================
-
-    #[test]
-    fn test_load_directory_critical_error_io() {
-        use std::path::Path;
-
-        let non_existent = Path::new("/this/should/not/exist/anywhere/at/all");
-        let result = load_magic_directory(non_existent);
-
-        assert!(
-            result.is_err(),
-            "Should return error for non-existent directory"
-        );
-        let err = result.unwrap_err();
-        assert!(err.to_string().contains("Failed to read directory"));
-    }
-
-    #[test]
-    fn test_load_directory_non_critical_error_parse() {
-        use std::fs;
-        use tempfile::TempDir;
-
-        let temp_dir = TempDir::new().expect("Failed to create temp dir");
-
-        // Create a valid file
-        let valid_path = temp_dir.path().join("valid.magic");
-        fs::write(&valid_path, "0 string \\x01\\x02 valid\n").expect("Failed to write valid file");
-
-        // Create an invalid file
-        let invalid_path = temp_dir.path().join("invalid.magic");
-        fs::write(&invalid_path, "this is invalid syntax\n").expect("Failed to write invalid file");
-
-        // Should succeed, loading only the valid file
-        let rules = load_magic_directory(temp_dir.path()).expect("Should load valid files");
-
-        assert_eq!(rules.len(), 1, "Should load only valid file");
-        assert_eq!(rules[0].message, "valid");
-    }
-
-    #[test]
-    fn test_load_directory_empty_files() {
-        use std::fs;
-        use tempfile::TempDir;
-
-        let temp_dir = TempDir::new().expect("Failed to create temp dir");
-
-        // Create an empty file
-        let empty_path = temp_dir.path().join("empty.magic");
-        fs::write(&empty_path, "").expect("Failed to write empty file");
-
-        // Create a file with only comments
-        let comments_path = temp_dir.path().join("comments.magic");
-        fs::write(&comments_path, "# Just comments\n# Nothing else\n")
-            .expect("Failed to write comments file");
-
-        // Should succeed with no rules
-        let rules = load_magic_directory(temp_dir.path()).expect("Should handle empty files");
-
-        assert_eq!(rules.len(), 0, "Empty files should contribute no rules");
-    }
-
-    #[test]
-    fn test_load_directory_binary_files() {
-        use std::fs;
-        use tempfile::TempDir;
-
-        let temp_dir = TempDir::new().expect("Failed to create temp dir");
-
-        // Create a binary file (invalid UTF-8)
-        let binary_path = temp_dir.path().join("binary.dat");
-        fs::write(&binary_path, [0xFF, 0xFE, 0xFF, 0xFE]).expect("Failed to write binary file");
-
-        // Create a valid text file
-        let valid_path = temp_dir.path().join("valid.magic");
-        fs::write(&valid_path, "0 string \\x01\\x02 valid\n").expect("Failed to write valid file");
-
-        // Binary file should cause a critical error (invalid UTF-8)
-        let result = load_magic_directory(temp_dir.path());
-
-        // The function should fail when encountering binary files (critical I/O error)
-        assert!(
-            result.is_err(),
-            "Binary files should cause critical error due to invalid UTF-8"
-        );
-    }
-
-    #[test]
-    fn test_load_directory_mixed_extensions() {
-        use std::fs;
-        use tempfile::TempDir;
-
-        let temp_dir = TempDir::new().expect("Failed to create temp dir");
-
-        // Create files with different extensions
-        fs::write(
-            temp_dir.path().join("file.magic"),
-            "0 string \\x01\\x02 magic\n",
-        )
-        .expect("Failed to write .magic file");
-        fs::write(
-            temp_dir.path().join("file.txt"),
-            "0 string \\x03\\x04 txt\n",
-        )
-        .expect("Failed to write .txt file");
-        fs::write(temp_dir.path().join("noext"), "0 string \\x05\\x06 noext\n")
-            .expect("Failed to write no-ext file");
-
-        let rules = load_magic_directory(temp_dir.path())
-            .expect("Should load all files regardless of extension");
-
-        assert_eq!(
-            rules.len(),
-            3,
-            "Should process all files regardless of extension"
-        );
-
-        let messages: Vec<&str> = rules.iter().map(|r| r.message.as_str()).collect();
-        assert!(messages.contains(&"magic"));
-        assert!(messages.contains(&"txt"));
-        assert!(messages.contains(&"noext"));
-    }
-
-    #[test]
-    fn test_load_directory_alphabetical_ordering() {
-        use std::fs;
-        use tempfile::TempDir;
-
-        let temp_dir = TempDir::new().expect("Failed to create temp dir");
-
-        // Create files in non-alphabetical order - using valid magic syntax with hex escapes
-        fs::write(
-            temp_dir.path().join("03-third"),
-            "0 string \\x07\\x08\\x09 third\n",
-        )
-        .expect("Failed to write third file");
-        fs::write(
-            temp_dir.path().join("01-first"),
-            "0 string \\x01\\x02\\x03 first\n",
-        )
-        .expect("Failed to write first file");
-        fs::write(
-            temp_dir.path().join("02-second"),
-            "0 string \\x04\\x05\\x06 second\n",
-        )
-        .expect("Failed to write second file");
-
-        let rules = load_magic_directory(temp_dir.path()).expect("Should load directory in order");
-
-        assert_eq!(rules.len(), 3);
-        // Should be sorted alphabetically by filename
-        assert_eq!(rules[0].message, "first");
-        assert_eq!(rules[1].message, "second");
-        assert_eq!(rules[2].message, "third");
-    }
-
-    // ============================================================
-    // Tests for load_magic_file (5+ test cases)
-    // ============================================================
-
-    #[test]
-    fn test_load_magic_file_text_format() {
-        use std::fs;
-        use tempfile::TempDir;
-
-        let temp_dir = TempDir::new().expect("Failed to create temp dir");
-        let magic_file = temp_dir.path().join("magic.txt");
-
-        // Create text magic file with valid content
-        fs::write(&magic_file, "0 string \\x7fELF ELF executable\n")
-            .expect("Failed to write magic file");
-
-        // Load using load_magic_file
-        let rules = load_magic_file(&magic_file).expect("Failed to load text magic file");
-
-        assert_eq!(rules.len(), 1);
-        assert_eq!(rules[0].message, "ELF executable");
-    }
-
-    #[test]
-    fn test_load_magic_file_directory_format() {
-        use std::fs;
-        use tempfile::TempDir;
-
-        let temp_dir = TempDir::new().expect("Failed to create temp dir");
-        let magic_dir = temp_dir.path().join("magic.d");
-        fs::create_dir(&magic_dir).expect("Failed to create magic directory");
-
-        // Create multiple files in directory
-        fs::write(
-            magic_dir.join("00_elf"),
-            "0 string \\x7fELF ELF executable\n",
-        )
-        .expect("Failed to write elf file");
-        fs::write(
-            magic_dir.join("01_zip"),
-            "0 string \\x50\\x4b\\x03\\x04 ZIP archive\n",
-        )
-        .expect("Failed to write zip file");
-
-        // Load using load_magic_file
-        let rules = load_magic_file(&magic_dir).expect("Failed to load directory");
-
-        assert_eq!(rules.len(), 2);
-        assert_eq!(rules[0].message, "ELF executable");
-        assert_eq!(rules[1].message, "ZIP archive");
-    }
-
-    #[test]
-    fn test_load_magic_file_binary_format_error() {
-        use std::fs::File;
-        use std::io::Write;
-        use tempfile::TempDir;
-
-        let temp_dir = TempDir::new().expect("Failed to create temp dir");
-        let binary_file = temp_dir.path().join("magic.mgc");
-
-        // Create binary file with .mgc magic number
-        let mut file = File::create(&binary_file).expect("Failed to create binary file");
-        let magic_number: [u8; 4] = [0x1C, 0x04, 0x1E, 0xF1]; // Little-endian 0xF11E041C
-        file.write_all(&magic_number)
-            .expect("Failed to write magic number");
-
-        // Attempt to load binary file
-        let result = load_magic_file(&binary_file);
-
-        assert!(result.is_err(), "Should fail to load binary .mgc file");
-
-        let error = result.unwrap_err();
-        let error_msg = error.to_string();
-
-        // Verify error mentions unsupported format and --use-builtin
-        assert!(
-            error_msg.contains("Binary") || error_msg.contains("binary"),
-            "Error should mention binary format: {error_msg}",
-        );
-        assert!(
-            error_msg.contains("--use-builtin") || error_msg.contains("built-in"),
-            "Error should mention --use-builtin option: {error_msg}",
-        );
-    }
-
-    #[test]
-    fn test_load_magic_file_io_error() {
-        use std::path::Path;
-
-        // Try to load non-existent file
-        let non_existent = Path::new("/this/path/should/not/exist/magic.txt");
-        let result = load_magic_file(non_existent);
-
-        assert!(result.is_err(), "Should fail for non-existent file");
-    }
-
-    #[test]
-    fn test_load_magic_file_parse_error_propagation() {
-        use std::fs;
-        use tempfile::TempDir;
-
-        let temp_dir = TempDir::new().expect("Failed to create temp dir");
-        let invalid_file = temp_dir.path().join("invalid.magic");
-
-        // Create file with invalid syntax (missing offset)
-        fs::write(&invalid_file, "string test invalid\n").expect("Failed to write invalid file");
-
-        // Attempt to load file with parse errors
-        let result = load_magic_file(&invalid_file);
-
-        assert!(result.is_err(), "Should fail for file with parse errors");
-
-        // Error should be a parse error (not I/O error)
-        let error = result.unwrap_err();
-        let error_msg = format!("{error:?}");
-        assert!(
-            error_msg.contains("InvalidSyntax") || error_msg.contains("syntax"),
-            "Error should be parse error: {error_msg}",
-        );
-    }
-}
diff --git a/src/parser/preprocessing.rs b/src/parser/preprocessing.rs
new file mode 100644
index 00000000..1f9e1004
--- /dev/null
+++ b/src/parser/preprocessing.rs
@@ -0,0 +1,521 @@
+//! Line preprocessing for magic file parsing.
+//!
+//! Handles comment removal, empty line filtering, line continuations,
+//! and strength directive parsing during magic file preprocessing.
+
+use crate::error::ParseError;
+use crate::parser::ast::{MagicRule, StrengthModifier};
+use crate::parser::grammar::{
+    has_continuation, is_comment_line, is_empty_line, is_strength_directive, parse_comment,
+    parse_magic_rule, parse_strength_directive,
+};
+
+/// Internal structure to track line metadata during preprocessing.
+///
+/// Stores the processed content, original line number, and flags for comment
+/// and strength directive lines in the input magic file.
+#[derive(Debug)]
+pub(crate) struct LineInfo {
+    pub(crate) content: String,
+    pub(crate) line_number: usize,
+    pub(crate) is_comment: bool,
+    /// Optional strength modifier parsed from `!:strength` directive
+    pub(crate) strength_modifier: Option<StrengthModifier>,
+}
+
+impl LineInfo {
+    pub(crate) fn new(content: String, line_number: usize, is_comment: bool) -> Self {
+        Self {
+            content,
+            line_number,
+            is_comment,
+            strength_modifier: None,
+        }
+    }
+
+    pub(crate) fn with_strength(
+        content: String,
+        line_number: usize,
+        strength_modifier: StrengthModifier,
+    ) -> Self {
+        Self {
+            content,
+            line_number,
+            is_comment: false,
+            strength_modifier: Some(strength_modifier),
+        }
+    }
+}
+
+/// Preprocesses raw magic file input by handling comments, empty lines, and continuations.
+///
+/// This function performs the following transformations:
+/// - Removes empty lines from the input
+/// - Handles comment lines (lines starting with '#')
+/// - Processes line continuations (lines ending with '\')
+/// - Concatenates continued lines into single entries
+/// - Preserves original line numbers for error reporting (continued lines
+///   are assigned the line number of the first line in the continuation sequence)
+///
+/// # Arguments
+///
+/// * `input` - The raw magic file content as a string
+///
+/// # Returns
+///
+/// `Result<Vec<LineInfo>, ParseError>` - A vector of processed lines or a parse error
+///
+/// # Errors
+///
+/// Returns an error if:
+/// - Comment lines cannot be parsed
+/// - Input ends with an unterminated line continuation
+/// - The input is malformed
+pub(crate) fn preprocess_lines(input: &str) -> Result<Vec<LineInfo>, ParseError> {
+    let mut lines_info: Vec<LineInfo> = Vec::new();
+    let mut line_buf = String::new();
+    let mut start_line_number: Option<usize> = None;
+    let mut last_line_num = 0usize;
+    for (i, mut line) in input.lines().enumerate() {
+        last_line_num = i + 1;
+        if is_empty_line(line) {
+            continue;
+        }
+        if is_comment_line(line) {
+            // Bug 1 fix: If we have an ongoing continuation, discard it before processing comment
+            if !line_buf.is_empty() {
+                line_buf.clear();
+                start_line_number = None;
+            }
+            let parsed_comment = parse_comment(line)
+                .map_err(|_| ParseError::invalid_syntax(i + 1, "Unable to parse comment"))?;
+            line = parsed_comment.1.as_str();
+            lines_info.push(LineInfo::new(line.trim().to_string(), i + 1, true));
+            continue;
+        }
+        // Handle strength directives (!:strength ...)
+        if is_strength_directive(line) {
+            // If we have an ongoing continuation, discard it before processing directive
+            if !line_buf.is_empty() {
+                line_buf.clear();
+                start_line_number = None;
+            }
+            let strength_modifier = parse_strength_directive(line)
+                .map_err(|e| {
+                    ParseError::invalid_syntax(
+                        i + 1,
+                        format!("Failed to parse strength directive: {e}"),
+                    )
+                })?
+                .1;
+            lines_info.push(LineInfo::with_strength(
+                line.trim().to_string(),
+                i + 1,
+                strength_modifier,
+            ));
+            continue;
+        }
+        // Track the starting line number when we begin accumulating a rule
+        if start_line_number.is_none() {
+            start_line_number = Some(i + 1);
+        }
+        line_buf.push_str(line.trim());
+        if has_continuation(line) {
+            // Remove trailing backslash in-place (O(1)) instead of
+            // strip_suffix().to_string() which allocates a new String
+            line_buf.pop();
+            continue;
+        }
+        // Bug 2 fix: Use the stored starting line number instead of calculating from cont_ctr
+        let rule_line_number = start_line_number.unwrap_or(i + 1);
+        lines_info.push(LineInfo::new(
+            std::mem::take(&mut line_buf),
+            rule_line_number,
+            false,
+        ));
+        start_line_number = None;
+    }
+
+    // Handle unterminated continuation at end of input
+    if !line_buf.is_empty() {
+        return Err(ParseError::invalid_syntax(
+            last_line_num,
+            "Unterminated line continuation",
+        ));
+    }
+
+    Ok(lines_info)
+}
+
+/// Parses a single magic rule line into a `MagicRule` AST node.
+///
+/// This function takes a preprocessed `LineInfo` and converts it into a `MagicRule`
+/// by delegating to the grammar parser. It handles error mapping to include
+/// context about which line failed.
+///
+/// # Arguments
+///
+/// * `line` - The `LineInfo` struct containing the rule text and metadata
+///
+/// # Returns
+///
+/// `Result<MagicRule, ParseError>` - The parsed rule or a parse error
+///
+/// # Errors
+///
+/// Returns an error if:
+/// - The line is marked as a comment
+/// - The rule syntax is invalid
+/// - Required fields are missing
+/// - Value parsing fails
+pub(crate) fn parse_magic_rule_line(line: &LineInfo) -> Result<MagicRule, ParseError> {
+    if line.is_comment {
+        return Err(ParseError::invalid_syntax(
+            line.line_number,
+            "Comment lines cannot be parsed as rules",
+        ));
+    }
+    parse_magic_rule(&line.content)
+        .map_err(|e| {
+            ParseError::invalid_syntax(line.line_number, format!("Failed to parse rule: {e}"))
+        })
+        .map(|(_, rule)| rule)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::parser::ast::{OffsetSpec, Operator, TypeKind};
+
+    fn li(line_number: usize, content: &str) -> LineInfo {
+        LineInfo {
+            content: content.to_string(),
+            line_number,
+            is_comment: false,
+            strength_modifier: None,
+        }
+    }
+
+    fn li_comment(line_number: usize, content: &str) -> LineInfo {
+        LineInfo {
+            content: content.to_string(),
+            line_number,
+            is_comment: true,
+            strength_modifier: None,
+        }
+    }
+
+    // ============================================================
+    // Tests for parse_magic_rule_line (12 test cases)
+    // ============================================================
+
+    #[test]
+    fn test_parse_magic_rule_line_simple_string() {
+        let line = li(1, "0 string \\x7fELF ELF executable");
+        let rule = parse_magic_rule_line(&line).unwrap();
+        assert_eq!(rule.level, 0);
+        assert_eq!(rule.message, "ELF executable");
+    }
+
+    #[test]
+    fn test_parse_magic_rule_line_byte_type() {
+        let line = li(1, "0 byte 1 ELF");
+        let rule = parse_magic_rule_line(&line).unwrap();
+        assert_eq!(rule.level, 0);
+        assert!(matches!(rule.typ, TypeKind::Byte));
+    }
+
+    #[test]
+    fn test_parse_magic_rule_line_with_child_indentation() {
+        let line = li(2, ">4 byte 1 32-bit");
+        let rule = parse_magic_rule_line(&line).unwrap();
+        assert_eq!(rule.level, 1);
+    }
+
+    #[test]
+    fn test_parse_magic_rule_line_deep_indentation() {
+        let line = li(3, ">>>8 long = 0x12345678 Complex match");
+        let rule = parse_magic_rule_line(&line).unwrap();
+        assert_eq!(rule.level, 3);
+    }
+
+    #[test]
+    fn test_parse_magic_rule_line_not_equal_operator() {
+        let line = li(1, "0 byte != 0 Non-zero");
+        let rule = parse_magic_rule_line(&line).unwrap();
+        assert_eq!(rule.op, Operator::NotEqual);
+    }
+
+    #[test]
+    fn test_parse_magic_rule_line_greater_operator() {
+        let line = li(1, "0 long = 1000 Number");
+        let rule = parse_magic_rule_line(&line).unwrap();
+        assert_eq!(rule.op, Operator::Equal);
+    }
+
+    #[test]
+    fn test_parse_magic_rule_line_less_operator() {
+        let line = li(1, "0 long != 256 Not equal");
+        let rule = parse_magic_rule_line(&line).unwrap();
+        assert_eq!(rule.op, Operator::NotEqual);
+    }
+
+    #[test]
+    fn test_parse_magic_rule_line_bitwise_and_operator() {
+        let line = li(1, "0 byte & 0xFF Bitmask");
+        let rule = parse_magic_rule_line(&line).unwrap();
+        assert_eq!(rule.op, Operator::BitwiseAnd);
+    }
+
+    #[test]
+    fn test_parse_magic_rule_line_comment_line_error() {
+        let line = li_comment(1, "This is a comment");
+        let result = parse_magic_rule_line(&line);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_parse_magic_rule_line_hex_offset() {
+        let line = li(1, "0x100 byte 1 PDF document");
+        let rule = parse_magic_rule_line(&line).unwrap();
+        match rule.offset {
+            OffsetSpec::Absolute(offset) => assert_eq!(offset, 0x100),
+            _ => panic!("Expected absolute offset"),
+        }
+    }
+
+    #[test]
+    fn test_parse_magic_rule_line_string_with_spaces() {
+        let line = li(1, "0 byte 1 Long message with multiple words");
+        let rule = parse_magic_rule_line(&line).unwrap();
+        assert_eq!(rule.message, "Long message with multiple words");
+    }
+
+    #[test]
+    fn test_parse_magic_rule_line_short_type() {
+        let line = li(1, "0 short 0x4d5a MS-DOS executable");
+        let rule = parse_magic_rule_line(&line).unwrap();
+        assert!(matches!(rule.typ, TypeKind::Short { .. }));
+    }
+
+    // ============================================================
+    // Tests for preprocess_lines (12 test cases)
+    // ============================================================
+
+    #[test]
+    fn test_preprocess_lines_single_rule() {
+        let input = "0 string 0 Test";
+        let lines = preprocess_lines(input).unwrap();
+        assert_eq!(lines.len(), 1);
+        assert_eq!(lines[0].content, "0 string 0 Test");
+        assert!(!lines[0].is_comment);
+    }
+
+    #[test]
+    fn test_preprocess_lines_multiple_rules() {
+        let input = "0 string 0 Test\n0 byte 1 Byte";
+        let lines = preprocess_lines(input).unwrap();
+        assert_eq!(lines.len(), 2);
+        assert_eq!(lines[0].content, "0 string 0 Test");
+        assert_eq!(lines[1].content, "0 byte 1 Byte");
+    }
+
+    #[test]
+    fn test_preprocess_lines_with_comments() {
+        let input = "# Comment\n0 string 0 Test";
+        let lines = preprocess_lines(input).unwrap();
+        assert_eq!(lines.len(), 2);
+        assert!(lines[0].is_comment);
+        assert!(!lines[1].is_comment);
+    }
+
+    #[test]
+    fn test_preprocess_lines_empty_lines() {
+        let input = "0 string 0 Test\n\n0 byte 1 Byte";
+        let lines = preprocess_lines(input).unwrap();
+        assert_eq!(lines.len(), 2);
+    }
+
+    #[test]
+    fn test_preprocess_lines_leading_empty_lines() {
+        let input = "\n\n0 string 0 Test";
+        let lines = preprocess_lines(input).unwrap();
+        assert_eq!(lines.len(), 1);
+        assert_eq!(lines[0].content, "0 string 0 Test");
+    }
+
+    #[test]
+    fn test_preprocess_lines_trailing_empty_lines() {
+        let input = "0 string 0 Test\n\n";
+        let lines = preprocess_lines(input).unwrap();
+        assert_eq!(lines.len(), 1);
+    }
+
+    #[test]
+    fn test_preprocess_lines_line_continuation() {
+        let input = "0 string 0 Long message \\\ncontinued here";
+        let lines = preprocess_lines(input).unwrap();
+        assert_eq!(lines.len(), 1);
+        assert_eq!(lines[0].content, "0 string 0 Long message continued here");
+    }
+
+    #[test]
+    fn test_preprocess_lines_multiple_continuations() {
+        let input = "0 string 0 Multi \\\nline \\\ncontinuation";
+        let lines = preprocess_lines(input).unwrap();
+        assert_eq!(lines.len(), 1);
+        assert_eq!(lines[0].content, "0 string 0 Multi line continuation");
+    }
+
+    #[test]
+    fn test_preprocess_lines_mixed_comments_and_rules() {
+        let input = "# Header\n0 string 0 Test\n# Another comment\n>4 byte 1 Child";
+        let lines = preprocess_lines(input).unwrap();
+        assert_eq!(lines.len(), 4);
+        assert!(lines[0].is_comment);
+        assert!(!lines[1].is_comment);
+        assert!(lines[2].is_comment);
+        assert!(!lines[3].is_comment);
+    }
+
+    #[test]
+    fn test_preprocess_lines_preserves_line_numbers() {
+        let input = "0 string 0 Test\n>4 byte 1 Child";
+        let lines = preprocess_lines(input).unwrap();
+        assert_eq!(lines[0].line_number, 1);
+        assert_eq!(lines[1].line_number, 2);
+    }
+
+    #[test]
+    fn test_preprocess_lines_empty_input() {
+        let input = "";
+        let lines = preprocess_lines(input).unwrap();
+        assert_eq!(lines.len(), 0);
+    }
+
+    #[test]
+    fn test_preprocess_lines_only_comments() {
+        let input = "# Comment 1\n# Comment 2\n# Comment 3";
+        let lines = preprocess_lines(input).unwrap();
+        assert_eq!(lines.len(), 3);
+        assert!(lines.iter().all(|l| l.is_comment));
+    }
+
+    // ============================================================
+    // Continuation edge case tests
+    // ============================================================
+
+    #[test]
+    fn test_continuation_at_eof() {
+        // Continuation on last line with no following line - should error
+        let input = "0 string 0 Test \\";
+        let result = preprocess_lines(input);
+        assert!(
+            result.is_err(),
+            "Should error on unterminated continuation at EOF"
+        );
+        let err = result.unwrap_err();
+        assert!(
+            format!("{err:?}").contains("Unterminated"),
+            "Error should mention unterminated continuation"
+        );
+    }
+
+    #[test]
+    fn test_continuation_with_empty_next() {
+        // Empty line after continuation causes unterminated continuation
+        // (empty lines are skipped but continuation state persists)
+        let input = "0 string 0 Test \\\n\n0 byte 1 Next";
+        let lines = preprocess_lines(input).unwrap();
+        // The continuation carries through the empty line, so "Next" gets appended
+        assert_eq!(lines.len(), 1);
+        assert_eq!(lines[0].content, "0 string 0 Test 0 byte 1 Next");
+    }
+
+    #[test]
+    fn test_continuation_into_empty_then_rule() {
+        let input = "0 string 0 First \\\n\ncontinued";
+        let lines = preprocess_lines(input).unwrap();
+        assert_eq!(lines.len(), 1);
+        assert_eq!(lines[0].content, "0 string 0 First continued");
+    }
+
+    // ============================================================
+    // Line number accuracy tests
+    // ============================================================
+
+    #[test]
+    fn test_line_numbers_with_continuations() {
+        let input = "0 string 0 test1\n0 string 0 multi \\\nline \\\ntest\n0 string 0 test2";
+        let lines = preprocess_lines(input).unwrap();
+
+        // Line 1: "0 string 0 test1" should report line 1
+        assert_eq!(lines[0].line_number, 1);
+
+        // Line 2-4 continuation should report line 2 (first line of continuation)
+        assert_eq!(lines[1].line_number, 2);
+
+        // Line 5: "0 string 0 test2" should report line 5
+        assert_eq!(lines[2].line_number, 5);
+    }
+
+    #[test]
+    fn test_line_numbers_with_mixed_content() {
+        let input = "# Comment line 1\n0 string 0 rule1\n\n# Another comment\n0 string 0 rule2 \\\ncontinued";
+        let lines = preprocess_lines(input).unwrap();
+
+        assert_eq!(lines.len(), 4);
+        assert_eq!(lines[0].line_number, 1); // Comment
+        assert_eq!(lines[1].line_number, 2); // rule1
+        assert_eq!(lines[2].line_number, 4); // Another comment
+        assert_eq!(lines[3].line_number, 5); // rule2 (continued on line 6)
+    }
+
+    // ============================================================
+    // Bug reproduction tests
+    // ============================================================
+
+    #[test]
+    fn test_bug1_comment_during_continuation() {
+        // Bug 1: Comment during continuation should not corrupt line_buf
+        // The partial rule should be discarded, leaving only the comment and new rule
+        let input = "0 string 0 Partial rule \\\n# This is a comment\n0 byte 1 New rule";
+        let lines = preprocess_lines(input).unwrap();
+
+        // The partial rule is discarded, so we should have 2 lines: comment and new rule
+        assert_eq!(lines.len(), 2);
+        // The comment should be separate and not contain rule content
+        let comment_line = lines.iter().find(|l| l.is_comment).unwrap();
+        assert!(!comment_line.content.contains("Partial rule"));
+        assert_eq!(comment_line.content, "This is a comment");
+        // The new rule should be intact
+        let rule_line = lines
+            .iter()
+            .find(|l| !l.is_comment && l.content.contains("New rule"))
+            .unwrap();
+        assert_eq!(rule_line.content, "0 byte 1 New rule");
+    }
+
+    #[test]
+    fn test_bug2_empty_line_in_continuation() {
+        // Bug 2: Empty line in continuation should not break line number calculation
+        let input = "0 string 0 Test \\\n\ncontinued here";
+        let lines = preprocess_lines(input).unwrap();
+
+        assert_eq!(lines.len(), 1);
+        // Line number should point to line 1 (where the rule started), not line 3
+        assert_eq!(lines[0].line_number, 1);
+        assert_eq!(lines[0].content, "0 string 0 Test continued here");
+    }
+
+    #[test]
+    fn test_bug2_multiple_empty_lines_in_continuation() {
+        // Multiple empty lines in continuation
+        let input = "0 string 0 Test \\\n\n\ncontinued here";
+        let lines = preprocess_lines(input).unwrap();
+
+        assert_eq!(lines.len(), 1);
+        // Line number should still point to line 1
+        assert_eq!(lines[0].line_number, 1);
+    }
+}
diff --git a/tests/compatibility_tests.rs b/tests/compatibility_tests.rs
index 2b10c8ab..09930dee 100644
--- a/tests/compatibility_tests.rs
+++ b/tests/compatibility_tests.rs
@@ -34,14 +34,12 @@ enum TestStatus {
 /// Compatibility test runner
 struct CompatibilityTestRunner {
     test_dir: PathBuf,
-    magic_file: PathBuf,
     rmagic_binary: PathBuf,
 }
 
 impl CompatibilityTestRunner {
     fn new() -> Result<Self, Box<dyn std::error::Error>> {
         let test_dir = PathBuf::from("third_party/tests");
-        let magic_file = PathBuf::from("third_party/magic.mgc");
         let rmagic_binary = find_rmagic_binary()?;
 
         if !test_dir.exists() {
@@ -51,13 +49,8 @@ impl CompatibilityTestRunner {
             );
         }
 
-        if !magic_file.exists() {
-            return Err("Magic file not found. Ensure third_party/magic.mgc exists.".into());
-        }
-
         Ok(Self {
             test_dir,
-            magic_file,
             rmagic_binary,
         })
     }
@@ -86,8 +79,7 @@ impl CompatibilityTestRunner {
     /// Run rmagic against a test file
     fn run_rmagic(&self, test_file: &Path) -> Result<String, Box<dyn std::error::Error>> {
         let output = Command::new(&self.rmagic_binary)
-            .arg("--magic-file")
-            .arg(&self.magic_file)
+            .arg("--use-builtin")
             .arg(test_file)
             .stdout(Stdio::piped())
             .stderr(Stdio::piped())
diff --git a/tests/evaluator_tests.rs b/tests/evaluator_tests.rs
new file mode 100644
index 00000000..bba2f187
--- /dev/null
+++ b/tests/evaluator_tests.rs
@@ -0,0 +1,244 @@
+//! Evaluator integration tests
+//!
+//! Tests for confidence calculation, rule ordering, and evaluation behavior
+//! through the public `MagicDatabase` API.
+
+use libmagic_rs::{EvaluationConfig, MagicDatabase};
+
+// ============================================================
+// Confidence Calculation Tests
+// ============================================================
+
+#[test]
+fn test_confidence_nonzero_for_known_type() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"\x7fELF\x02\x01\x01\x00").unwrap();
+    assert!(
+        result.confidence > 0.0,
+        "ELF detection should have non-zero confidence, got {}",
+        result.confidence
+    );
+}
+
+#[test]
+fn test_confidence_zero_for_unknown_type() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"random unknown content").unwrap();
+    assert!(
+        (result.confidence - 0.0).abs() < f64::EPSILON,
+        "Unknown type should have zero confidence"
+    );
+}
+
+#[test]
+fn test_confidence_matches_first_match() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"\x7fELF\x02\x01\x01\x00").unwrap();
+    if let Some(first) = result.matches.first() {
+        assert!(
+            (result.confidence - first.confidence).abs() < f64::EPSILON,
+            "Result confidence should equal first match confidence"
+        );
+    }
+}
+
+// ============================================================
+// Rule Ordering Tests
+// ============================================================
+
+#[test]
+fn test_elf_detected_before_generic() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"\x7fELF\x02\x01\x01\x00").unwrap();
+    assert!(
+        result.description.contains("ELF"),
+        "ELF should be detected, got: {}",
+        result.description
+    );
+}
+
+#[test]
+fn test_pdf_detected_correctly() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"%PDF-\x00\x00\x00").unwrap();
+    assert!(
+        result.description.contains("PDF"),
+        "PDF should be detected, got: {}",
+        result.description
+    );
+}
+
+#[test]
+fn test_png_detected_correctly() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db
+        .evaluate_buffer(b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR")
+        .unwrap();
+    assert!(
+        result.description.contains("PNG"),
+        "PNG should be detected, got: {}",
+        result.description
+    );
+}
+
+#[test]
+fn test_jpeg_detected_correctly() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db
+        .evaluate_buffer(b"\xff\xd8\xff\xe0\x00\x10JFIF\x00")
+        .unwrap();
+    assert!(
+        result.description.contains("JPEG") || result.description.contains("JFIF"),
+        "JPEG should be detected, got: {}",
+        result.description
+    );
+}
+
+#[test]
+fn test_zip_detected_correctly() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"PK\x03\x04rest of zip").unwrap();
+    assert!(
+        result.description.contains("ZIP") || result.description.contains("Zip"),
+        "ZIP should be detected, got: {}",
+        result.description
+    );
+}
+
+#[test]
+fn test_gzip_detected_correctly() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db
+        .evaluate_buffer(b"\x1f\x8b\x08\x00\x00\x00\x00\x00")
+        .unwrap();
+    assert!(
+        result.description.to_lowercase().contains("gzip"),
+        "GZIP should be detected, got: {}",
+        result.description
+    );
+}
+
+// ============================================================
+// Configuration Tests
+// ============================================================
+
+#[test]
+fn test_evaluate_with_performance_config() {
+    let config = EvaluationConfig::performance();
+    let db = MagicDatabase::with_builtin_rules_and_config(config).unwrap();
+    let result = db.evaluate_buffer(b"\x7fELF\x02\x01\x01\x00").unwrap();
+    assert!(result.description.contains("ELF"));
+}
+
+#[test]
+fn test_evaluate_with_comprehensive_config() {
+    let config = EvaluationConfig::comprehensive();
+    let db = MagicDatabase::with_builtin_rules_and_config(config).unwrap();
+    let result = db.evaluate_buffer(b"\x7fELF\x02\x01\x01\x00").unwrap();
+    assert!(result.description.contains("ELF"));
+}
+
+#[test]
+fn test_evaluate_with_mime_types_enabled() {
+    let config = EvaluationConfig {
+        enable_mime_types: true,
+        ..EvaluationConfig::default()
+    };
+    let db = MagicDatabase::with_builtin_rules_and_config(config).unwrap();
+    let result = db.evaluate_buffer(b"\x7fELF\x02\x01\x01\x00").unwrap();
+    assert!(
+        result.mime_type.is_some(),
+        "MIME type should be present when enabled"
+    );
+}
+
+#[test]
+fn test_evaluate_without_mime_types() {
+    let config = EvaluationConfig {
+        enable_mime_types: false,
+        ..EvaluationConfig::default()
+    };
+    let db = MagicDatabase::with_builtin_rules_and_config(config).unwrap();
+    let result = db.evaluate_buffer(b"\x7fELF\x02\x01\x01\x00").unwrap();
+    assert!(
+        result.mime_type.is_none(),
+        "MIME type should be absent when disabled"
+    );
+}
+
+#[test]
+fn test_invalid_config_rejected() {
+    let config = EvaluationConfig {
+        max_recursion_depth: 0,
+        ..EvaluationConfig::default()
+    };
+    let result = MagicDatabase::with_builtin_rules_and_config(config);
+    assert!(result.is_err(), "Zero recursion depth should be rejected");
+}
+
+// ============================================================
+// Metadata Tests
+// ============================================================
+
+#[test]
+fn test_metadata_populated_for_buffer() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"\x7fELF\x02\x01\x01\x00").unwrap();
+
+    assert_eq!(result.metadata.file_size, 8);
+    assert!(result.metadata.evaluation_time_ms >= 0.0);
+    assert!(result.metadata.rules_evaluated > 0);
+    assert!(result.metadata.magic_file.is_none());
+    assert!(!result.metadata.timed_out);
+}
+
+#[test]
+fn test_metadata_for_no_match() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"nothing matches this").unwrap();
+
+    assert_eq!(result.description, "data");
+    assert!(result.metadata.rules_evaluated > 0);
+}
+
+// ============================================================
+// Edge Cases
+// ============================================================
+
+#[test]
+fn test_evaluate_empty_buffer() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"").unwrap();
+    assert_eq!(result.description, "data");
+}
+
+#[test]
+fn test_evaluate_single_byte_buffer() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"\x00").unwrap();
+    // Should not panic, may or may not match
+    assert!(!result.description.is_empty());
+}
+
+#[test]
+fn test_evaluate_all_zeros() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(&[0u8; 1024]).unwrap();
+    assert!(!result.description.is_empty());
+}
+
+#[test]
+fn test_evaluate_all_ones() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(&[0xFF; 1024]).unwrap();
+    assert!(!result.description.is_empty());
+}
+
+#[test]
+fn test_evaluate_partial_magic_header() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    // Only first byte of ELF magic
+    let result = db.evaluate_buffer(b"\x7f").unwrap();
+    // Should not crash, might not match
+    assert!(!result.description.is_empty());
+}
diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs
new file mode 100644
index 00000000..dd08776b
--- /dev/null
+++ b/tests/integration_tests.rs
@@ -0,0 +1,374 @@
+//! End-to-end integration tests for core flows
+//!
+//! Tests the complete workflow: load database, evaluate files/buffers,
+//! verify output formatting and metadata.
+
+use std::fs;
+use std::io::Write;
+use std::path::PathBuf;
+
+use libmagic_rs::{EvaluationConfig, MagicDatabase};
+use tempfile::TempDir;
+
+// ============================================================
+// Built-in Rules End-to-End
+// ============================================================
+
+#[test]
+fn test_builtin_rules_detect_elf() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"\x7fELF\x02\x01\x01\x00").unwrap();
+    assert!(result.description.contains("ELF"));
+    assert!(result.confidence > 0.0);
+}
+
+#[test]
+fn test_builtin_rules_detect_pdf() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"%PDF-\x00\x00\x00").unwrap();
+    assert!(result.description.contains("PDF"));
+}
+
+#[test]
+fn test_builtin_rules_detect_zip() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"PK\x03\x04").unwrap();
+    assert!(
+        result.description.contains("ZIP") || result.description.contains("Zip"),
+        "Expected ZIP detection, got: {}",
+        result.description
+    );
+}
+
+#[test]
+fn test_builtin_rules_detect_png() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db
+        .evaluate_buffer(b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR")
+        .unwrap();
+    assert!(result.description.contains("PNG"));
+}
+
+#[test]
+fn test_builtin_rules_detect_jpeg() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db
+        .evaluate_buffer(b"\xff\xd8\xff\xe0\x00\x10JFIF\x00")
+        .unwrap();
+    assert!(
+        result.description.contains("JPEG") || result.description.contains("JFIF"),
+        "Expected JPEG detection, got: {}",
+        result.description
+    );
+}
+
+#[test]
+fn test_builtin_rules_detect_gif() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"GIF8\x00\x00").unwrap();
+    assert!(
+        result.description.contains("GIF"),
+        "Expected GIF detection, got: {}",
+        result.description
+    );
+}
+
+#[test]
+fn test_builtin_rules_unknown_fallback() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_buffer(b"random bytes").unwrap();
+    assert_eq!(result.description, "data");
+}
+
+// ============================================================
+// Load from File End-to-End
+// ============================================================
+
+#[test]
+fn test_load_from_magic_file_and_evaluate() {
+    let temp_dir = TempDir::new().unwrap();
+    let magic_path = temp_dir.path().join("test.magic");
+
+    // Write a simple magic file using quoted string value
+    let mut f = fs::File::create(&magic_path).unwrap();
+    writeln!(f, "0 string \"TESTMAGIC\" Test file format").unwrap();
+
+    let db = MagicDatabase::load_from_file(&magic_path).unwrap();
+    let result = db.evaluate_buffer(b"TESTMAGIC\x00some data").unwrap();
+    assert!(
+        result.description.contains("Test file format"),
+        "Expected custom rule match, got: {}",
+        result.description
+    );
+}
+
+#[test]
+fn test_load_from_file_with_config() {
+    let temp_dir = TempDir::new().unwrap();
+    let magic_path = temp_dir.path().join("test.magic");
+
+    let mut f = fs::File::create(&magic_path).unwrap();
+    writeln!(f, "0 string \"HELLO\" Hello file").unwrap();
+
+    let config = EvaluationConfig {
+        enable_mime_types: true,
+        ..EvaluationConfig::default()
+    };
+    let db = MagicDatabase::load_from_file_with_config(&magic_path, config).unwrap();
+    let result = db.evaluate_buffer(b"HELLO\x00world").unwrap();
+    assert!(result.description.contains("Hello file"));
+}
+
+#[test]
+fn test_source_path_tracked() {
+    let temp_dir = TempDir::new().unwrap();
+    let magic_path = temp_dir.path().join("test.magic");
+
+    let mut f = fs::File::create(&magic_path).unwrap();
+    writeln!(f, "0 string \"X\" Test").unwrap();
+
+    let db = MagicDatabase::load_from_file(&magic_path).unwrap();
+    assert!(db.source_path().is_some());
+}
+
+#[test]
+fn test_source_path_none_for_builtin() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    assert!(db.source_path().is_none());
+}
+
+#[test]
+fn test_load_nonexistent_file_errors() {
+    let result = MagicDatabase::load_from_file("/nonexistent/magic/file");
+    assert!(result.is_err());
+}
+
+// ============================================================
+// Evaluate File End-to-End
+// ============================================================
+
+#[test]
+fn test_evaluate_file_with_builtin() {
+    let temp_dir = TempDir::new().unwrap();
+    let test_file = temp_dir.path().join("test.elf");
+
+    // Write ELF header
+    fs::write(
+        &test_file,
+        b"\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+    )
+    .unwrap();
+
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_file(&test_file).unwrap();
+    assert!(result.description.contains("ELF"));
+    assert!(result.metadata.file_size > 0);
+}
+
+#[test]
+fn test_evaluate_empty_file() {
+    let temp_dir = TempDir::new().unwrap();
+    let test_file = temp_dir.path().join("empty.bin");
+    fs::write(&test_file, b"").unwrap();
+
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_file(&test_file).unwrap();
+    assert_eq!(result.metadata.file_size, 0);
+}
+
+#[test]
+fn test_evaluate_nonexistent_file_errors() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_file("/nonexistent/test/file.bin");
+    assert!(result.is_err());
+}
+
+// ============================================================
+// Multiple Format Detection
+// ============================================================
+
+#[test]
+fn test_multiple_formats_detected_correctly() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+
+    let test_cases: Vec<(&[u8], &str)> = vec![
+        (b"\x7fELF\x02\x01\x01\x00", "ELF"),
+        (b"%PDF-\x00\x00\x00", "PDF"),
+        (b"PK\x03\x04rest", "ZIP"),
+        (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR", "PNG"),
+        (b"GIF8\x00\x00", "GIF"),
+    ];
+
+    for (input, expected) in test_cases {
+        let result = db.evaluate_buffer(input).unwrap();
+        assert!(
+            result.description.contains(expected),
+            "Expected '{}' for input {:?}, got: '{}'",
+            expected,
+            &input[..4.min(input.len())],
+            result.description
+        );
+    }
+}
+
+// ============================================================
+// Custom Magic Rules End-to-End
+// ============================================================
+
+#[test]
+fn test_custom_rules_with_hierarchy() {
+    let temp_dir = TempDir::new().unwrap();
+    let magic_path = temp_dir.path().join("custom.magic");
+
+    let mut f = fs::File::create(&magic_path).unwrap();
+    writeln!(f, "0 belong 0x7f454c46 ELF").unwrap();
+    writeln!(f, ">4 byte 1 32-bit").unwrap();
+    writeln!(f, ">4 byte 2 64-bit").unwrap();
+
+    let db = MagicDatabase::load_from_file(&magic_path).unwrap();
+
+    // Test 32-bit ELF (5 bytes: 4-byte magic + class byte)
+    let result = db.evaluate_buffer(b"\x7fELF\x01").unwrap();
+    assert!(
+        result.description.contains("32-bit"),
+        "Expected 32-bit, got: {}",
+        result.description
+    );
+
+    // Test 64-bit ELF
+    let result = db.evaluate_buffer(b"\x7fELF\x02").unwrap();
+    assert!(
+        result.description.contains("64-bit"),
+        "Expected 64-bit, got: {}",
+        result.description
+    );
+}
+
+#[test]
+fn test_custom_rules_no_match_fallback() {
+    let temp_dir = TempDir::new().unwrap();
+    let magic_path = temp_dir.path().join("custom.magic");
+
+    let mut f = fs::File::create(&magic_path).unwrap();
+    writeln!(f, "0 string \"RARE\" Rare format").unwrap();
+
+    let db = MagicDatabase::load_from_file(&magic_path).unwrap();
+    let result = db.evaluate_buffer(b"no match here").unwrap();
+    assert_eq!(result.description, "data");
+}
+
+// ============================================================
+// Load from Directory End-to-End
+// ============================================================
+
+#[test]
+fn test_load_directory_of_magic_files() {
+    let temp_dir = TempDir::new().unwrap();
+
+    // Create multiple magic files using belong for binary magic numbers
+    let mut f1 = fs::File::create(temp_dir.path().join("images.magic")).unwrap();
+    writeln!(f1, "0 belong 0x89504e47 PNG image").unwrap();
+
+    let mut f2 = fs::File::create(temp_dir.path().join("docs.magic")).unwrap();
+    writeln!(f2, "0 string \"%PDF-\" PDF document").unwrap();
+
+    let db = MagicDatabase::load_from_file(temp_dir.path()).unwrap();
+
+    let png_result = db
+        .evaluate_buffer(b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR")
+        .unwrap();
+    assert!(
+        png_result.description.contains("PNG"),
+        "Expected PNG from directory load, got: {}",
+        png_result.description
+    );
+
+    let pdf_result = db.evaluate_buffer(b"%PDF-\x001.4").unwrap();
+    assert!(
+        pdf_result.description.contains("PDF"),
+        "Expected PDF from directory load, got: {}",
+        pdf_result.description
+    );
+}
+
+// ============================================================
+// Config Accessor
+// ============================================================
+
+#[test]
+fn test_config_accessor() {
+    let config = EvaluationConfig {
+        max_recursion_depth: 15,
+        timeout_ms: Some(5000),
+        ..EvaluationConfig::default()
+    };
+    let db = MagicDatabase::with_builtin_rules_and_config(config).unwrap();
+    assert_eq!(db.config().max_recursion_depth, 15);
+    assert_eq!(db.config().timeout_ms, Some(5000));
+}
+
+// ============================================================
+// Metadata in File Evaluation
+// ============================================================
+
+#[test]
+fn test_file_evaluation_metadata() {
+    let temp_dir = TempDir::new().unwrap();
+    let test_file = temp_dir.path().join("test.pdf");
+    let content = b"%PDF-\x00some pdf content here for testing";
+    fs::write(&test_file, content).unwrap();
+
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+    let result = db.evaluate_file(&test_file).unwrap();
+
+    assert_eq!(result.metadata.file_size, content.len() as u64);
+    assert!(result.metadata.evaluation_time_ms >= 0.0);
+    assert!(result.metadata.rules_evaluated > 0);
+    assert!(!result.metadata.timed_out);
+}
+
+// ============================================================
+// Multiple Evaluations on Same Database
+// ============================================================
+
+#[test]
+fn test_reuse_database_multiple_evaluations() {
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+
+    let elf = db.evaluate_buffer(b"\x7fELF\x02\x01\x01\x00").unwrap();
+    let pdf = db.evaluate_buffer(b"%PDF-\x001.4").unwrap();
+    let unknown = db.evaluate_buffer(b"nothing here").unwrap();
+
+    assert!(elf.description.contains("ELF"));
+    assert!(pdf.description.contains("PDF"));
+    assert_eq!(unknown.description, "data");
+}
+
+// ============================================================
+// Test Fixture Helper
+// ============================================================
+
+fn create_test_file(dir: &std::path::Path, name: &str, content: &[u8]) -> PathBuf {
+    let path = dir.join(name);
+    fs::write(&path, content).unwrap();
+    path
+}
+
+#[test]
+fn test_evaluate_multiple_files() {
+    let temp_dir = TempDir::new().unwrap();
+    let db = MagicDatabase::with_builtin_rules().unwrap();
+
+    let elf_file = create_test_file(
+        temp_dir.path(),
+        "test.elf",
+        b"\x7fELF\x02\x01\x01\x00\x00\x00",
+    );
+    let pdf_file = create_test_file(temp_dir.path(), "test.pdf", b"%PDF-\x001.4 content");
+
+    let elf_result = db.evaluate_file(&elf_file).unwrap();
+    let pdf_result = db.evaluate_file(&pdf_file).unwrap();
+
+    assert!(elf_result.description.contains("ELF"));
+    assert!(pdf_result.description.contains("PDF"));
+}
diff --git a/tests/mime_tests.rs b/tests/mime_tests.rs
new file mode 100644
index 00000000..c917be04
--- /dev/null
+++ b/tests/mime_tests.rs
@@ -0,0 +1,226 @@
+//! MIME type mapping integration tests
+//!
+//! Tests for MIME type detection through the public API,
+//! including hardcoded mappings, prefix matching, and case insensitivity.
+
+use libmagic_rs::mime::MimeMapper;
+use libmagic_rs::{EvaluationConfig, MagicDatabase};
+
+// ============================================================
+// Integration: MIME via MagicDatabase
+// ============================================================
+
+#[test]
+fn test_mime_enabled_returns_type_for_elf() {
+    let config = EvaluationConfig {
+        enable_mime_types: true,
+        ..EvaluationConfig::default()
+    };
+    let db = MagicDatabase::with_builtin_rules_and_config(config).unwrap();
+    let result = db.evaluate_buffer(b"\x7fELF\x02\x01\x01\x00").unwrap();
+    assert_eq!(
+        result.mime_type,
+        Some("application/x-executable".to_string())
+    );
+}
+
+#[test]
+fn test_mime_enabled_returns_type_for_pdf() {
+    let config = EvaluationConfig {
+        enable_mime_types: true,
+        ..EvaluationConfig::default()
+    };
+    let db = MagicDatabase::with_builtin_rules_and_config(config).unwrap();
+    let result = db.evaluate_buffer(b"%PDF-\x00\x00\x00").unwrap();
+    assert_eq!(result.mime_type, Some("application/pdf".to_string()));
+}
+
+#[test]
+fn test_mime_enabled_returns_type_for_png() {
+    let config = EvaluationConfig {
+        enable_mime_types: true,
+        ..EvaluationConfig::default()
+    };
+    let db = MagicDatabase::with_builtin_rules_and_config(config).unwrap();
+    let result = db
+        .evaluate_buffer(b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR")
+        .unwrap();
+    assert_eq!(result.mime_type, Some("image/png".to_string()));
+}
+
+#[test]
+fn test_mime_enabled_returns_type_for_jpeg() {
+    let config = EvaluationConfig {
+        enable_mime_types: true,
+        ..EvaluationConfig::default()
+    };
+    let db = MagicDatabase::with_builtin_rules_and_config(config).unwrap();
+    let result = db
+        .evaluate_buffer(b"\xff\xd8\xff\xe0\x00\x10JFIF\x00")
+        .unwrap();
+    assert_eq!(result.mime_type, Some("image/jpeg".to_string()));
+}
+
+#[test]
+fn test_mime_enabled_returns_type_for_zip() {
+    let config = EvaluationConfig {
+        enable_mime_types: true,
+        ..EvaluationConfig::default()
+    };
+    let db = MagicDatabase::with_builtin_rules_and_config(config).unwrap();
+    let result = db.evaluate_buffer(b"PK\x03\x04rest of zip").unwrap();
+    assert_eq!(result.mime_type, Some("application/zip".to_string()));
+}
+
+#[test]
+fn test_mime_disabled_returns_none() {
+    let config = EvaluationConfig {
+        enable_mime_types: false,
+        ..EvaluationConfig::default()
+    };
+    let db = MagicDatabase::with_builtin_rules_and_config(config).unwrap();
+    let result = db.evaluate_buffer(b"\x7fELF\x02\x01\x01\x00").unwrap();
+    assert_eq!(result.mime_type, None);
+}
+
+#[test]
+fn test_mime_unknown_data_returns_none() {
+    let config = EvaluationConfig {
+        enable_mime_types: true,
+        ..EvaluationConfig::default()
+    };
+    let db = MagicDatabase::with_builtin_rules_and_config(config).unwrap();
+    let result = db.evaluate_buffer(b"random unknown content").unwrap();
+    assert_eq!(result.mime_type, None);
+}
+
+// ============================================================
+// MimeMapper Direct Tests
+// ============================================================
+
+#[test]
+fn test_mapper_hardcoded_executables() {
+    let mapper = MimeMapper::new();
+    assert_eq!(
+        mapper.get_mime_type("ELF 64-bit LSB executable"),
+        Some("application/x-executable".to_string())
+    );
+    assert_eq!(
+        mapper.get_mime_type("PE32 executable (DLL)"),
+        Some("application/vnd.microsoft.portable-executable".to_string())
+    );
+    assert_eq!(
+        mapper.get_mime_type("Mach-O 64-bit executable"),
+        Some("application/x-mach-binary".to_string())
+    );
+}
+
+#[test]
+fn test_mapper_hardcoded_archives() {
+    let mapper = MimeMapper::new();
+    assert_eq!(
+        mapper.get_mime_type("Zip archive data"),
+        Some("application/zip".to_string())
+    );
+    assert_eq!(
+        mapper.get_mime_type("gzip compressed data"),
+        Some("application/gzip".to_string())
+    );
+    assert_eq!(
+        mapper.get_mime_type("POSIX tar archive"),
+        Some("application/x-tar".to_string())
+    );
+    assert_eq!(
+        mapper.get_mime_type("RAR archive data"),
+        Some("application/vnd.rar".to_string())
+    );
+    assert_eq!(
+        mapper.get_mime_type("7-zip archive"),
+        Some("application/x-7z-compressed".to_string())
+    );
+}
+
+#[test]
+fn test_mapper_hardcoded_images() {
+    let mapper = MimeMapper::new();
+    assert_eq!(
+        mapper.get_mime_type("JPEG image data"),
+        Some("image/jpeg".to_string())
+    );
+    assert_eq!(
+        mapper.get_mime_type("PNG image data, 800x600"),
+        Some("image/png".to_string())
+    );
+    assert_eq!(
+        mapper.get_mime_type("GIF image data, version 89a"),
+        Some("image/gif".to_string())
+    );
+    assert_eq!(
+        mapper.get_mime_type("BMP image data"),
+        Some("image/bmp".to_string())
+    );
+    assert_eq!(
+        mapper.get_mime_type("WebP image data"),
+        Some("image/webp".to_string())
+    );
+}
+
+#[test]
+fn test_mapper_hardcoded_documents() {
+    let mapper = MimeMapper::new();
+    assert_eq!(
+        mapper.get_mime_type("PDF document, version 1.4"),
+        Some("application/pdf".to_string())
+    );
+    assert_eq!(
+        mapper.get_mime_type("PostScript document"),
+        Some("application/postscript".to_string())
+    );
+}
+
+#[test]
+fn test_mapper_hardcoded_web() {
+    let mapper = MimeMapper::new();
+    assert_eq!(
+        mapper.get_mime_type("HTML document"),
+        Some("text/html".to_string())
+    );
+    assert_eq!(
+        mapper.get_mime_type("XML document"),
+        Some("application/xml".to_string())
+    );
+    assert_eq!(
+        mapper.get_mime_type("JSON data"),
+        Some("application/json".to_string())
+    );
+}
+
+#[test]
+fn test_mapper_case_insensitive() {
+    let mapper = MimeMapper::new();
+    assert_eq!(
+        mapper.get_mime_type("elf executable"),
+        mapper.get_mime_type("ELF EXECUTABLE")
+    );
+    assert_eq!(
+        mapper.get_mime_type("png image"),
+        mapper.get_mime_type("PNG IMAGE")
+    );
+}
+
+#[test]
+fn test_mapper_prefers_longer_match() {
+    let mapper = MimeMapper::new();
+    // "gzip" should match before "zip" for "gzip compressed"
+    assert_eq!(
+        mapper.get_mime_type("gzip compressed data"),
+        Some("application/gzip".to_string())
+    );
+}
+
+#[test]
+fn test_mapper_no_match_returns_none() {
+    let mapper = MimeMapper::new();
+    assert_eq!(mapper.get_mime_type("data"), None);
+    assert_eq!(mapper.get_mime_type("unknown binary format"), None);
+}
diff --git a/tests/property_tests.rs b/tests/property_tests.rs
new file mode 100644
index 00000000..4e486844
--- /dev/null
+++ b/tests/property_tests.rs
@@ -0,0 +1,217 @@
+//! Property-based tests for libmagic-rs
+//!
+//! Uses proptest to verify properties that should hold for all valid inputs:
+//! - Evaluator never panics on any buffer
+//! - Buffer access is always bounds-checked
+//! - Metadata is consistent
+//! - Serde roundtrips preserve data
+
+use proptest::prelude::*;
+
+use libmagic_rs::{
+    EvaluationConfig, MagicDatabase, MagicRule, OffsetSpec, Operator, TypeKind, Value,
+};
+
+/// Generate a valid OffsetSpec for testing
+fn arb_offset_spec() -> impl Strategy<Value = OffsetSpec> {
+    prop_oneof![
+        (-1000i64..=1000i64).prop_map(OffsetSpec::Absolute),
+        (-100i64..=100i64).prop_map(OffsetSpec::Relative),
+        (-100i64..=0i64).prop_map(OffsetSpec::FromEnd),
+    ]
+}
+
+/// Generate a valid TypeKind for testing
+fn arb_type_kind() -> impl Strategy<Value = TypeKind> {
+    prop_oneof![
+        Just(TypeKind::Byte),
+        (any::<bool>(), any::<bool>()).prop_map(|(is_big, signed)| {
+            TypeKind::Short {
+                endian: if is_big {
+                    libmagic_rs::Endianness::Big
+                } else {
+                    libmagic_rs::Endianness::Little
+                },
+                signed,
+            }
+        }),
+        (any::<bool>(), any::<bool>()).prop_map(|(is_big, signed)| {
+            TypeKind::Long {
+                endian: if is_big {
+                    libmagic_rs::Endianness::Big
+                } else {
+                    libmagic_rs::Endianness::Little
+                },
+                signed,
+            }
+        }),
+        (0usize..256usize).prop_map(|len| TypeKind::String {
+            max_length: Some(len),
+        }),
+    ]
+}
+
+/// Generate a valid Operator for testing
+fn arb_operator() -> impl Strategy<Value = Operator> {
+    prop_oneof![
+        Just(Operator::Equal),
+        Just(Operator::NotEqual),
+        Just(Operator::BitwiseAnd),
+        (0u64..=255u64).prop_map(Operator::BitwiseAndMask),
+    ]
+}
+
+/// Generate a valid Value for testing
+fn arb_value() -> impl Strategy<Value = Value> {
+    prop_oneof![
+        (0u64..=u32::MAX as u64).prop_map(Value::Uint),
+        (i32::MIN as i64..=i32::MAX as i64).prop_map(Value::Int),
+        prop::collection::vec(any::<u8>(), 0..32).prop_map(Value::Bytes),
+        "[a-zA-Z0-9 ]{0,32}".prop_map(Value::String),
+    ]
+}
+
+/// Generate a valid MagicRule for testing
+fn arb_magic_rule() -> impl Strategy<Value = MagicRule> {
+    (
+        arb_offset_spec(),
+        arb_type_kind(),
+        arb_operator(),
+        arb_value(),
+        "[a-zA-Z0-9 _-]{1,64}",
+    )
+        .prop_map(|(offset, typ, op, value, message)| MagicRule {
+            offset,
+            typ,
+            op,
+            value,
+            message,
+            children: vec![],
+            level: 0,
+            strength_modifier: None,
+        })
+}
+
+/// Generate arbitrary binary data for testing
+fn arb_buffer() -> impl Strategy<Value = Vec<u8>> {
+    prop::collection::vec(any::<u8>(), 0..1024)
+}
+
+// =============================================================================
+// Property Tests
+// =============================================================================
+
+proptest! {
+    /// Property: Evaluation should never panic on any valid buffer
+    #[test]
+    fn prop_evaluation_never_panics(buffer in arb_buffer()) {
+        let db = MagicDatabase::with_builtin_rules()
+            .expect("builtin rules should load");
+
+        let result = db.evaluate_buffer(&buffer);
+
+        match result {
+            Ok(eval_result) => {
+                prop_assert!(!eval_result.description.is_empty());
+                prop_assert!(eval_result.confidence >= 0.0);
+                prop_assert!(eval_result.confidence <= 1.0);
+            }
+            Err(e) => {
+                prop_assert!(!e.to_string().is_empty());
+            }
+        }
+    }
+
+    /// Property: EvaluationConfig validation accepts reasonable values
+    #[test]
+    fn prop_config_validation_consistent(
+        recursion_depth in 1u32..100u32,
+        string_length in 1usize..10000usize,
+        timeout in 1u64..100000u64
+    ) {
+        let config = EvaluationConfig {
+            max_recursion_depth: recursion_depth,
+            max_string_length: string_length,
+            stop_at_first_match: true,
+            enable_mime_types: false,
+            timeout_ms: Some(timeout),
+        };
+
+        prop_assert!(config.validate().is_ok());
+    }
+
+    /// Property: Evaluation result metadata is consistent with input
+    #[test]
+    fn prop_metadata_valid(buffer in arb_buffer()) {
+        let db = MagicDatabase::with_builtin_rules()
+            .expect("builtin rules should load");
+
+        let result = db.evaluate_buffer(&buffer)
+            .expect("should evaluate");
+
+        prop_assert_eq!(result.metadata.file_size as usize, buffer.len());
+        prop_assert!(result.metadata.evaluation_time_ms >= 0.0);
+        prop_assert!(result.metadata.rules_evaluated > 0);
+    }
+
+    /// Property: Arbitrary rules should serialize/deserialize consistently
+    #[test]
+    fn prop_rule_serde_roundtrip(rule in arb_magic_rule()) {
+        let json = serde_json::to_string(&rule)
+            .expect("should serialize");
+
+        let deserialized: MagicRule = serde_json::from_str(&json)
+            .expect("should deserialize");
+
+        prop_assert_eq!(rule.message, deserialized.message);
+        prop_assert_eq!(rule.level, deserialized.level);
+    }
+}
+
+// =============================================================================
+// Known-pattern detection (regular tests, not property tests)
+// =============================================================================
+
+#[test]
+fn test_elf_detection() {
+    let db = MagicDatabase::with_builtin_rules().expect("builtin rules should load");
+    let elf_buffer = vec![0x7f, b'E', b'L', b'F', 2, 1, 1, 0];
+
+    let result = db.evaluate_buffer(&elf_buffer).expect("should evaluate");
+    assert!(
+        result.description.contains("ELF"),
+        "Expected ELF detection, got: {}",
+        result.description
+    );
+}
+
+#[test]
+fn test_zip_detection() {
+    let db = MagicDatabase::with_builtin_rules().expect("builtin rules should load");
+    let zip_buffer = vec![0x50, 0x4b, 0x03, 0x04];
+
+    let result = db.evaluate_buffer(&zip_buffer).expect("should evaluate");
+    assert!(
+        result.description.contains("ZIP"),
+        "Expected ZIP detection, got: {}",
+        result.description
+    );
+}
+
+#[test]
+fn test_empty_buffer_handled() {
+    let db = MagicDatabase::with_builtin_rules().expect("builtin rules should load");
+
+    let result = db.evaluate_buffer(&[]).expect("should evaluate");
+    assert!(!result.description.is_empty());
+}
+
+#[test]
+fn test_zero_recursion_fails_validation() {
+    let config = EvaluationConfig {
+        max_recursion_depth: 0,
+        ..EvaluationConfig::default()
+    };
+
+    assert!(config.validate().is_err());
+}
diff --git a/tests/tags_tests.rs b/tests/tags_tests.rs
new file mode 100644
index 00000000..83c7ce51
--- /dev/null
+++ b/tests/tags_tests.rs
@@ -0,0 +1,159 @@
+//! Tag extraction integration tests
+//!
+//! Tests for keyword extraction, case insensitivity, rule path tags,
+//! and custom keyword support.
+
+use libmagic_rs::tags::TagExtractor;
+
+// ============================================================
+// Keyword Extraction
+// ============================================================
+
+#[test]
+fn test_extract_single_keyword() {
+    let extractor = TagExtractor::new();
+    let tags = extractor.extract_tags("ELF 64-bit executable");
+    assert!(tags.contains(&"executable".to_string()));
+}
+
+#[test]
+fn test_extract_multiple_keywords() {
+    let extractor = TagExtractor::new();
+    let tags = extractor.extract_tags("compressed archive with encrypted data");
+    assert!(tags.contains(&"archive".to_string()));
+    assert!(tags.contains(&"compressed".to_string()));
+    assert!(tags.contains(&"encrypted".to_string()));
+    assert!(tags.contains(&"data".to_string()));
+}
+
+#[test]
+fn test_extract_all_default_keywords() {
+    let extractor = TagExtractor::new();
+
+    let keyword_descriptions = [
+        ("ELF executable", "executable"),
+        ("Zip archive", "archive"),
+        ("PNG image data", "image"),
+        ("MPEG video stream", "video"),
+        ("FLAC audio bitstream", "audio"),
+        ("PDF document", "document"),
+        ("gzip compressed", "compressed"),
+        ("AES encrypted", "encrypted"),
+        ("ASCII text", "text"),
+        ("binary data", "binary"),
+        ("raw data", "data"),
+        ("Python script", "script"),
+        ("TrueType font", "font"),
+        ("SQLite database", "database"),
+        ("Excel spreadsheet", "spreadsheet"),
+    ];
+
+    for (description, expected_tag) in &keyword_descriptions {
+        let tags = extractor.extract_tags(description);
+        assert!(
+            tags.contains(&expected_tag.to_string()),
+            "Expected tag '{}' from description '{}'",
+            expected_tag,
+            description
+        );
+    }
+}
+
+#[test]
+fn test_no_match_returns_empty() {
+    let extractor = TagExtractor::new();
+    let tags = extractor.extract_tags("unknown format xyz");
+    assert!(tags.is_empty());
+}
+
+#[test]
+fn test_tags_are_sorted() {
+    let extractor = TagExtractor::new();
+    let tags = extractor.extract_tags("compressed archive with encrypted data");
+    let mut sorted = tags.clone();
+    sorted.sort();
+    assert_eq!(tags, sorted);
+}
+
+// ============================================================
+// Case Insensitivity
+// ============================================================
+
+#[test]
+fn test_case_insensitive_matching() {
+    let extractor = TagExtractor::new();
+    assert_eq!(
+        extractor.extract_tags("EXECUTABLE file"),
+        extractor.extract_tags("executable file")
+    );
+}
+
+#[test]
+fn test_mixed_case_matching() {
+    let extractor = TagExtractor::new();
+    let tags = extractor.extract_tags("Executable Archive Compressed");
+    assert!(tags.contains(&"executable".to_string()));
+    assert!(tags.contains(&"archive".to_string()));
+    assert!(tags.contains(&"compressed".to_string()));
+}
+
+// ============================================================
+// Custom Keywords
+// ============================================================
+
+#[test]
+fn test_custom_keywords() {
+    let extractor = TagExtractor::with_keywords(vec!["custom", "special"]);
+    let tags = extractor.extract_tags("This has custom and special content");
+    assert!(tags.contains(&"custom".to_string()));
+    assert!(tags.contains(&"special".to_string()));
+    assert!(!tags.contains(&"executable".to_string()));
+}
+
+#[test]
+fn test_custom_keywords_case_normalized() {
+    let extractor = TagExtractor::with_keywords(vec!["UPPER", "MiXeD"]);
+    let tags = extractor.extract_tags("upper and mixed content");
+    assert!(tags.contains(&"upper".to_string()));
+    assert!(tags.contains(&"mixed".to_string()));
+}
+
+#[test]
+fn test_keyword_count() {
+    let extractor = TagExtractor::new();
+    assert!(
+        extractor.keyword_count() >= 15,
+        "Default extractor should have at least 15 keywords"
+    );
+
+    let custom = TagExtractor::with_keywords(vec!["a", "b", "c"]);
+    assert_eq!(custom.keyword_count(), 3);
+}
+
+// ============================================================
+// Rule Path Tags
+// ============================================================
+
+#[test]
+fn test_extract_rule_path_basic() {
+    let extractor = TagExtractor::new();
+    let messages = ["ELF magic", "64-bit LSB", "executable"];
+    let tags = extractor.extract_rule_path(messages.iter().copied());
+    assert_eq!(tags, vec!["elf-magic", "64-bit-lsb", "executable"]);
+}
+
+#[test]
+fn test_extract_rule_path_removes_special_chars() {
+    let extractor = TagExtractor::new();
+    let messages = ["File (version 1.0)", "Data: test!"];
+    let tags = extractor.extract_rule_path(messages.iter().copied());
+    assert_eq!(tags, vec!["file-version-10", "data-test"]);
+}
+
+#[test]
+fn test_extract_rule_path_empty() {
+    let extractor = TagExtractor::new();
+    let messages: Vec<&str> = vec![];
+    let tags = extractor.extract_rule_path(messages.iter().copied());
+    assert!(tags.is_empty());
+}