diff --git a/.serena/project.yml b/.serena/project.yml
index d88a6623..608f2cfd 100644
--- a/.serena/project.yml
+++ b/.serena/project.yml
@@ -1,13 +1,15 @@
+
+
 # list of languages for which language servers are started; choose from:
 #   al                  bash                clojure             cpp                 csharp
 #   csharp_omnisharp    dart                elixir              elm                 erlang
 #   fortran             fsharp              go                  groovy              haskell
 #   java                julia               kotlin              lua                 markdown
 #   matlab              nix                 pascal              perl                php
-#   powershell          python              python_jedi         r                   rego
-#   ruby                ruby_solargraph     rust                scala               swift
-#   terraform           toml                typescript          typescript_vts      vue
-#   yaml                zig
+#   php_phpactor        powershell          python              python_jedi         r
+#   rego                ruby                ruby_solargraph     rust                scala
+#   swift               terraform           toml                typescript          typescript_vts
+#   vue                 yaml                zig
 #   (This list may be outdated. For the current list, see values of Language enum here:
 #   https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py
 #   For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.)
@@ -16,8 +18,8 @@
 #   - For JavaScript, use typescript
 #   - For Free Pascal/Lazarus, use pascal
 # Special requirements:
-#   - csharp: Requires the presence of a .sln file in the project folder.
-#   - pascal: Requires Free Pascal Compiler (fpc) and optionally Lazarus.
+#   Some languages require additional setup/installations.
+#   See here for details: https://oraios.github.io/serena/01-about/020_programming-languages.html#language-servers
 # When using multiple languages, the first language server that supports a given file will be used for that file.
 # The first language is the default language and the respective language server will be used as a fallback.
 # Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored.
@@ -31,8 +33,9 @@ encoding: "utf-8"
 # whether to use project's .gitignore files to ignore files
 ignore_all_files_in_gitignore: true
 
-# list of additional paths to ignore in all projects
-# same syntax as gitignore, so you can use * and **
+# list of additional paths to ignore in this project.
+# Same syntax as gitignore, so you can use * and **.
+# Note: global ignored_paths from serena_config.yml are also applied additively.
 ignored_paths: []
 
 # whether the project is in read-only mode
@@ -40,7 +43,9 @@ ignored_paths: []
 # Added on 2025-04-18
 read_only: false
 
-# list of tool names to exclude. We recommend not excluding any tools, see the readme for more details.
+# list of tool names to exclude.
+# This extends the existing exclusions (e.g. from the global configuration)
+#
 # Below is the complete list of tools for convenience.
 # To make sure you have the latest list of tools, and to view their descriptions, 
 # execute `uv run scripts/print_tool_overview.py`.
@@ -87,7 +92,8 @@ initial_prompt: ""
 # the name by which the project can be referenced within Serena
 project_name: "libmagic-rs"
 
-# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default)
+# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default).
+# This extends the existing inclusions (e.g. from the global configuration).
 included_optional_tools: []
 
 # list of mode names to that are always to be included in the set of active modes
@@ -108,3 +114,39 @@ default_modes:
 # fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools.
 # This cannot be combined with non-empty excluded_tools or included_optional_tools.
 fixed_tools: []
+
+# time budget (seconds) per tool call for the retrieval of additional symbol information
+# such as docstrings or parameter information.
+# This overrides the corresponding setting in the global configuration; see the documentation there.
+# If null or missing, use the setting from the global configuration.
+symbol_info_budget:
+
+# The language backend to use for this project.
+# If not set, the global setting from serena_config.yml is used.
+# Valid values: LSP, JetBrains
+# Note: the backend is fixed at startup. If a project with a different backend
+# is activated post-init, an error will be returned.
+language_backend:
+
+# line ending convention to use when writing source files.
+# Possible values: unset (use global setting), "lf", "crlf", or "native" (platform default)
+# This does not affect Serena's own files (e.g. memories and configuration files), which always use native line endings.
+line_ending:
+
+# list of regex patterns which, when matched, mark a memory entry as read‑only.
+# Extends the list from the global configuration, merging the two lists.
+read_only_memory_patterns: []
+
+# list of regex patterns for memories to completely ignore.
+# Matching memories will not appear in list_memories or activate_project output
+# and cannot be accessed via read_memory or write_memory.
+# To access ignored memory files, use the read_file tool on the raw file path.
+# Extends the list from the global configuration, merging the two lists.
+# Example: ["_archive/.*", "_episodes/.*"]
+ignored_memory_patterns: []
+
+# advanced configuration option allowing to configure language server-specific options.
+# Maps the language key to the options.
+# Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available.
+# No documentation on options means no options are available.
+ls_specific_settings: {}
diff --git a/AGENTS.md b/AGENTS.md
index 1e0fc93f..3328ad90 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -209,32 +209,19 @@ cargo test --doc   # Test documentation examples
 - **Operators**: `=` (equal), `!=` (not equal), `<` (less than), `>` (greater than), `<=` (less equal), `>=` (greater equal), `&` (bitwise AND with optional mask), `^` (bitwise XOR), `~` (bitwise NOT), `x` (any value)
 - **Nested Rules**: Hierarchical rule evaluation with proper indentation
 - **String Matching**: Exact string matching with null-termination and Pascal string (length-prefixed) support
+- **Regex type**: Binary-safe regex matching via `regex::bytes::Regex`. Full flag support: `/c` (case-insensitive), `/s` (anchor advances to match-start instead of match-end), `/l` (scan window is measured in lines instead of bytes). Flags combine in any order (`regex/cs`, `regex/csl`, `regex/lc`). Numeric counts are honored: `regex/100` scans at most 100 bytes; `regex/1l` scans at most 1 line. Multi-line regex matching is always on (matching libmagic's unconditional `REG_NEWLINE`), so `^` and `$` match at line boundaries regardless of `/l`. Every scan window is capped at 8192 bytes (`FILE_REGEX_MAX`) regardless of the user's count.
+- **Search type**: Bounded literal pattern scan via `memchr::memmem::find`; `search/N` caps the scan window to `N` bytes from the offset. The range is **mandatory** and stored as `NonZeroUsize`, so bare `search` and `search/0` are parse errors (matching GNU `file` magic(5)). Anchor advance follows GNU `file` semantics (match-end, not window-end) so relative-offset children resolve to the byte immediately after the matched pattern.
 
 ### Planned Features (v1.0+)
 
-- Regex type: Pattern matching with binary-safe regex support
-- Search type: Multi-pattern string searching
-
-### Future Enhancement: Binary-Safe Regex Handling
-
-> **Note:** The following is planned for future releases and is not yet implemented.
-
-```rust
-// Use regex crate with bytes feature for binary-safe matching
-pub trait BinaryRegex {
-    fn find_at(&self, haystack: &[u8], start: usize) -> Option<Match>;
-}
-
-impl BinaryRegex for regex::bytes::Regex {
-    /* ... */
-}
-```
+- Aho-Corasick multi-pattern search optimization for `search/` rules.
+- `!:mime`/`!:ext`/`!:apple` directive evaluation (currently only `!:strength` is parsed).
+- `use`/`name` named test directives for rule reuse.
 
 ## Current Limitations (v0.1.0)
 
 ### Type System
 
-- No regex/search pattern matching
 - 64-bit integer types: `quad`/`uquad`, `bequad`/`ubequad`, `lequad`/`ulequad` are implemented; `qquad` (128-bit) is not yet supported
 - String evaluation reads until first NUL or end-of-buffer by default; `pstring` reads a length-prefixed Pascal string; `max_length: Some(_)` is supported internally but no dedicated fixed-length string parser syntax exists yet
 - `pstring` supports 1-byte (`/B`), 2-byte big-endian (`/H`), 2-byte little-endian (`/h`), 4-byte big-endian (`/L`), and 4-byte little-endian (`/l`) length prefixes, plus the `/J` flag (stored length includes prefix width). All flags are combinable (e.g., `pstring/HJ`) and fully implemented.
@@ -317,7 +304,7 @@ sample.bin: ELF 64-bit LSB executable, x86-64, version 1 (SYSV)
 
 ### Adding New Type Support
 
-> **Note:** Currently implemented types are `Byte`, `Short`, `Long`, `Quad`, `Float`, `Double`, `Date`, `QDate`, `String`, and `PString`. Regex and search types are planned for future releases.
+> **Note:** Currently implemented types are `Byte`, `Short`, `Long`, `Quad`, `Float`, `Double`, `Date`, `QDate`, `String`, `PString`, `Regex`, and `Search`. See "Current Limitations" for the remaining gaps in regex/search flag coverage.
 
 1. Extend `TypeKind` enum in `src/parser/ast.rs`
 2. Add keyword parsing in `src/parser/types.rs` (`parse_type_keyword` and `type_keyword_to_kind`)
@@ -464,14 +451,15 @@ CI must pass before merge. Mergify merge protections enforce these checks. Bot P
 - `nom`: Parser combinators
 - `serde`: Serialization
 - `clap`: CLI argument parsing
-- `regex`: Pattern matching (used in tests; regex *type* for magic rules is planned)
+- `regex`: Binary-safe pattern matching via `regex::bytes::Regex` for `TypeKind::Regex` evaluation
+- `memchr`: SIMD-accelerated literal pattern search, used for `TypeKind::Search`
 - `aho-corasick`: Multi-pattern search (planned, not yet added)
 
 ### Development Phases
 
 1. **MVP (v0.1.0)** - CURRENT: Basic parsing and evaluation with byte/short/long/quad/string types, equality and bitwise AND operators, built-in rules for 10 common formats
 2. **Enhanced Features (v0.2)**: Comparison operators (`>`, `<`), indirect offset improvements, strength-based rule ordering
-3. **Advanced Types (v0.3)**: Regex type, search patterns
+3. **Advanced Types (v0.3)**: Regex flag completeness (`/s`, proper `/l` line-count semantics, `regex/Nl`), search range enforcement, 8192-byte default regex range
 4. **Full Compatibility (v0.4)**: Complete libmagic syntax support, all special directives, named tests
 5. **Production Ready (v1.0)**: Stable API, complete documentation, 95%+ compatibility with GNU file
 
diff --git a/Cargo.lock b/Cargo.lock
index 0b9f8241..8a480c9e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -258,9 +258,9 @@ dependencies = [
 
 [[package]]
 name = "clap_complete"
-version = "4.6.0"
+version = "4.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19c9f1dde76b736e3681f28cec9d5a61299cbaae0fce80a68e43724ad56031eb"
+checksum = "406e68b4de5c59cfb8f750a7cbd4d31ae153788b8352167c1e5f4fc26e8c91e9"
 dependencies = [
  "clap",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 6d7a4a6b..da04e1d0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -147,13 +147,14 @@ cfg-if = "1.0.4"
 chrono = { version = "0.4.41", default-features = false, features = ["std", "clock"] }
 clap = { version = "4.6.0", features = ["derive"] }
 clap-stdin = "0.8.1"
-clap_complete = "4.6.0"
+clap_complete = "4.6.1"
 ctrlc = { version = "3.5.2", features = ["termination"] }
 env_logger = "0.11"
 log = "0.4"
 memchr = "2.8.0"
 memmap2 = "0.9.10"
 nom = "8.0.0"
+regex = "1.12.3"
 serde = { version = "1.0.228", features = ["derive"] }
 serde_json = "1.0.149"
 thiserror = "2.0.18"
@@ -171,7 +172,6 @@ insta = { version = "1.47.2", features = ["json"] }
 nix = { version = "0.31.2", features = ["fs"] }
 predicates = "3.1.4"
 proptest = "1.11.0"
-regex = "1.12.3"
 tempfile = "3.27.0"
 
 [[bench]]
diff --git a/GOTCHAS.md b/GOTCHAS.md
index 2b88e42a..5acfcc8a 100644
--- a/GOTCHAS.md
+++ b/GOTCHAS.md
@@ -26,7 +26,7 @@ Serialization functions live in `src/parser/codegen.rs`, shared by both `build.r
 
 ### 2.1 `TypeKind` Exhaustive Matches
 
-Adding a variant to `TypeKind` requires updating exhaustive matches in 10+ files: `ast`, `grammar`, `types`, `codegen`, `strength`, `property_tests`, `evaluator/types/mod.rs` (`read_typed_value`, `coerce_value_to_type`, **`bytes_consumed`** -- variable-width variants must be matched explicitly or relative-offset anchors will silently corrupt), `output/mod.rs` (2 length matches), `output/json.rs` (`format_value_as_hex`), and `grammar/tests.rs` (stale assertions). Note: `coerce_value_to_type`, output matches, and `bytes_consumed` use catch-all `_ =>` so they compile without changes but may need semantic updates -- `bytes_consumed` will fire a `debug_assert` in test/dev builds for unhandled variable-width variants.
+Adding a variant to `TypeKind` requires updating exhaustive matches in 10+ files: `ast`, `grammar`, `types`, `codegen` (`serialize_type_kind` -- easy to forget; build.rs is a separate compilation unit so the error surfaces there first), `strength`, `property_tests`, `evaluator/types/mod.rs` (`read_typed_value`, `coerce_value_to_type`, **`bytes_consumed`** -- variable-width variants must be matched explicitly or relative-offset anchors will silently corrupt), `output/mod.rs` (2 length matches), `output/json.rs` (`format_value_as_hex`), and `grammar/tests.rs` (stale assertions). Note: `coerce_value_to_type`, output matches, and `bytes_consumed` use catch-all `_ =>` so they compile without changes but may need semantic updates -- `bytes_consumed` will fire a `debug_assert` in test/dev builds for unhandled variable-width variants.
 
 ### 2.2 `Operator` Exhaustive Matches
 
@@ -38,6 +38,36 @@ Adding a variant to `Value` requires updating: `ast`, `codegen`, `strength`, `pr
 
 - **Note:** `Value` no longer derives `Eq` (removed when `Value::Float(f64)` was added) -- no production code depends on `Value: Eq`.
 
+### 2.4 Pattern-Bearing Types Bypass `apply_operator` in the Engine
+
+`TypeKind::Regex` and `TypeKind::Search` are evaluated by **logical match** in `evaluate_single_rule_with_anchor` (`src/evaluator/engine/mod.rs`), not by string equality against `rule.value`. The engine calls `types::read_pattern_match`, which returns `Result<Option<Value>, _>`: `Some(v)` means the pattern matched (possibly zero-width) and `None` means it did not. The engine translates that `Option` directly into `Equal`/`NotEqual`. Comparing matched text to the pattern literal via `apply_operator` would fail for any regex with metacharacters (e.g., matched `"123"` vs pattern `"[0-9]+"`). **Non-equality operators on pattern-bearing types are rejected as `TypeReadError::UnsupportedType`** — an earlier revision fell through to `apply_operator` and silently produced lexicographic ordering comparisons against the pattern source text. If you add a new pattern-bearing `TypeKind` variant, add its arm to both `read_pattern_match` and `bytes_consumed_with_pattern`; the engine's special-case match is keyed on the `Regex | Search` pair so you must add new variants there too.
+
+### 2.5 Zero-Width Regex Matches vs Misses
+
+`read_regex` returns `Ok(Some(Value::String("")))` for a legitimate zero-width match (`^`, `a*`, lookaheads, `.{0}`) and `Ok(None)` for a genuine miss. An earlier revision collapsed both cases to `Value::String(String::new())` and distinguished them by `is_empty()`, which broke every pattern that legitimately matches zero bytes. The structured `Option` is the invariant — do not re-flatten it. `read_typed_value_with_pattern` does collapse `None` to `Value::String(String::new())` for back-compat with its single-`Value` return shape, but the engine does not go through that function for pattern types; it calls `read_pattern_match` directly.
+
+### 2.6 Search Anchor Advance Is Match-End, Not Window-End
+
+`search_bytes_consumed` returns `match_idx + pattern.len()` — the byte just past the matched pattern — not `range` (the window size). This matches GNU `file` semantics: `src/softmagic.c` `FILE_SEARCH` in `moffset()` computes `o = ms->search.offset + vlen - offset` where `ms->search.offset` has already been advanced by `idx` (the match index inside the window) in `magiccheck`, and `vlen = m->vallen` (the pattern length). An earlier revision returned the full window size, which silently corrupted relative-offset children of every successful `search` rule (e.g., `search/256 "MAGIC"` at index 4 advanced the anchor by 256 instead of by 9). The fix threaded the pattern through `bytes_consumed_with_pattern` for `TypeKind::Search` so the scan can be re-run at anchor-advance time. Search does not currently support a `/s`-style start-offset flag; if one is added, match-end can become match-start.
+
+### 2.7 Regex `/l` Is Scan Window Bounds, Not Multi-Line Toggle
+
+`RegexFlags::line_based` (the `/l` suffix) controls *only* the scan window extent: when set, `count` is interpreted as a line count and `compute_window` walks line terminators (both `\n` and `\r\n`, each counting as one terminator) to bound the scan. It does **not** toggle regex multi-line matching — libmagic always compiles with `REG_NEWLINE` (unconditional at `src/softmagic.c::alloc_regex` line 2123), so `^` and `$` match at line boundaries for every regex rule regardless of `/l`. An earlier revision of this crate wrapped line-based patterns in `^(?:...)` and only set `multi_line(true)` when `/l` was set; that was wrong on both counts and has been removed. `build_regex` now unconditionally sets `multi_line(true)` and `dot_matches_new_line(false)` for all patterns.
+
+### 2.8 Regex Scan Window Is Always Capped at 8192 Bytes
+
+Every regex rule is subject to the `REGEX_MAX_BYTES` (8192) hard cap, matching GNU `file`'s `FILE_REGEX_MAX` (`src/file.h:522`). This applies:
+
+- When `count` is `None` (default scan).
+- When `count` is `Some(n)` with `n > 8192` (explicit counts are clamped).
+- When `flags.line_based` is set (the line-based walk stops after 8192 bytes even if the Nth terminator has not been reached yet).
+
+The cap is a DoS mitigation: without it, a malicious regex against a multi-GB buffer combined with `EvaluationConfig::default()` (no timeout — see S13.1) can hang the evaluator. It is enforced inside `compute_window` in `src/evaluator/types/regex.rs`. Do not add a path that bypasses the cap, even for "trusted" rules — the cap is also what makes the regex evaluator's worst-case runtime bounded.
+
+### 2.9 Regex `/s` Flag Affects Anchor Advance Only, Not Match Result
+
+`RegexFlags::start_offset` (the `/s` suffix) controls *only* `regex_bytes_consumed`: when set, the anchor advance is `m.start()` (match-start) instead of `m.end()` (match-end). The match *result* (whether a pattern matches, and what matched text is returned) is unchanged. This matches libmagic's `REGEX_OFFSET_START` flag, which zeros the `rm_len` contribution in `moffset()` but does not alter the regex scan itself. Tests for `/s` must exercise `regex_bytes_consumed` directly or check the resolved offset of a `Relative(N)` child rule; checking `read_regex` alone won't detect a broken `/s` implementation.
+
 ## 3. Parser Architecture
 
 ### 3.1 Type Keyword Parsing Split
@@ -77,6 +107,10 @@ Lowercase pointer specifiers (`.s`, `.l`, `.q`) map to **little-endian**, not na
 
 The load-bearing invariant is that the anchor is updated *before recursing into children* (so children and their followers see the new anchor). The current code also happens to set the anchor before `matches.push(...)`, but the push-ordering relative to `set_last_match_end` is incidental for anchor correctness -- only the ordering before the `evaluate_rules` recursion call matters. (Future code that reads the anchor while iterating `matches` would make this ordering load-bearing, so do not "optimize" the order without checking call sites first.) `bytes_consumed()` (in `evaluator/types/mod.rs`) is the source of truth for advance distance; for variable-width types it re-derives consumption from the buffer rather than trusting `Value::String.len()` (which can drift from the original byte length via `from_utf8_lossy`). Pascal-string consumption is also clamped against the remaining buffer to prevent attacker-controlled length prefixes from poisoning the anchor to `usize::MAX`.
 
+### 3.9 `parse_text_magic_file` is Fail-Fast, Not Skip-on-Error
+
+`build_rule_hierarchy` propagates any `parse_magic_rule_line` error immediately, so a single unparseable rule (e.g., a child using unsupported `&+N` relative-offset syntax or an unquoted `$VAR` string value -- see S3.6) causes the **entire file load** to fail with `ParseError::InvalidSyntax`. There is no skip-and-continue mode. When writing corpus tests against third_party `.magic` files that mix supported and unsupported syntax, bypass the parser and build the equivalent `MagicRule` tree programmatically via the AST; the runtime evaluator can still be exercised end-to-end against the real testfile buffer. See `tests/evaluator_tests.rs::test_regex_eol_corpus` for a worked example.
+
 ## 4. Module Visibility & Re-exports
 
 ### 4.1 Private Engine Module
@@ -130,6 +164,10 @@ Middle-endian date keywords are NOT supported. They were removed until real midd
 
 libmagic types are signed by default (`byte`, `short`, `long`, `quad`). Unsigned variants use `u` prefix (`ubyte`, `ushort`, `ulong`, `uquad`, etc.).
 
+### 6.4 `TypeKind::String { max_length: None }` Against Buffers Without NUL
+
+`read_string` with `max_length: None` reads until the first NUL or end of buffer. On NUL-free buffers (raw ASCII text, JSON, log lines, etc.) it reads the *entire remaining buffer*, and equality comparison against a short target value then fails. Programmatic rules built against such buffers must set `max_length: Some(target_len)` explicitly. Text magic rules (`string "MZ"`) typically work anyway because real executable headers contain NULs within the first few bytes.
+
 ## 7. Testing
 
 ### 7.1 Doctest Import Paths
@@ -224,3 +262,13 @@ All tags and commits MUST be signed -- use `git tag -s` and `git commit -s -S`.
 - **Rule:** Library consumers embedding libmagic-rs in services or untrusted-input pipelines should **not** use `EvaluationConfig::default()`. Use `EvaluationConfig::performance()` (which sets `timeout_ms: Some(1000)`) as the safe preset, or construct a config explicitly with a non-`None` timeout sized for your workload.
 - **Validation:** `timeout_ms` is clamped to `MAX_SAFE_TIMEOUT_MS` (5 minutes) by config validation and must be `> 0` if specified -- see the validation logic in `src/config.rs`.
 - **Note:** `Default` cannot be changed to set a timeout without breaking API expectations of callers who deliberately want no timeout (e.g., CLI one-shot invocations). The gotcha is that the unsafe default is the ergonomic choice; document the tradeoff prominently in any new consumer-facing docs.
+
+## 14. Output Formatting
+
+### 14.1 `\b` (Backspace) Prefix in Rule Messages Suppresses Leading Space
+
+`MagicDatabase::build_result` concatenates rule messages with a space separator, **except** when a message starts with `\u{0008}` (backspace / `\b`), in which case the backspace is stripped and no leading space is inserted. This mirrors GNU `file`'s description formatting (used by rules like `>&1 regex/1l ... \b, version %s` to produce `Ansible Vault text, version 1.1` instead of `Ansible Vault text , version 1.1`). Tests that manually simulate the concatenation path (e.g., corpus tests that bypass `load_from_file` -- see S3.9) must honor this convention or their assertions will diverge from the real evaluator output.
+
+### 14.2 `%s` (and Other printf-Style Format Specifiers) Are Not Substituted
+
+Magic rule messages like `\b, version %s` are passed through verbatim to the final concatenated description -- the evaluator does not implement printf-style format substitution. Captured values from regex/search/pattern matches live on `RuleMatch.value`, not embedded in `RuleMatch.message`. Tests or output checks that expect substituted text (e.g., "version 1.1") must either hardcode the expected token in the rule's message or assert against `RuleMatch.value` directly.
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index a8cf7305..d442e328 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -94,7 +94,14 @@ libmagic-rs/
 │   │   ├── mod.rs          # Public API surface with re-exports, EvaluationContext, RuleMatch
 │   │   ├── engine.rs       # Core evaluation logic (evaluate_single_rule, evaluate_rules, evaluate_rules_with_config)
 │   │   ├── offset.rs       # Offset resolution
-│   │   ├── types.rs        # Type reading with bounds checking
+│   │   ├── types/          # Type reading subsystem
+│   │   │   ├── mod.rs      # Type dispatch and pattern matching
+│   │   │   ├── numeric.rs  # Byte, Short, Long, Quad
+│   │   │   ├── float.rs    # Float, Double
+│   │   │   ├── date.rs     # Date, QDate
+│   │   │   ├── string.rs   # String, PString
+│   │   │   ├── regex.rs    # Regex pattern matching
+│   │   │   └── search.rs   # Search literal scanning
 │   │   ├── operators.rs    # Comparison operations
 │   │   └── strength.rs     # Strength calculation and sorting
 │   │
@@ -288,6 +295,8 @@ pub struct MagicRule {
 - `Long { endian: Endianness, signed: bool }` - 32-bit integer
 - `Quad { endian: Endianness, signed: bool }` - 64-bit integer
 - `String { max_length: Option<usize> }` - Null-terminated string
+- `Regex { flags: RegexFlags, count: Option<NonZeroU32> }` - Regular expression matching
+- `Search { range: NonZeroUsize }` - Bounded literal pattern search
 
 **Hierarchical Structure:**
 
@@ -465,6 +474,8 @@ Vetted dependencies with minimal unsafe:
 - `memmap2` - Memory mapping (audited)
 - `nom` - Parsing (no unsafe)
 - `thiserror` - Error handling (no unsafe)
+- `regex` - Pattern matching (production dependency)
+- `memchr` - Fast byte searching (production dependency)
 
 ---
 
@@ -499,7 +510,7 @@ The evaluation hot path is optimized for:
 
 1. Add variant to `TypeKind` enum (`ast.rs`)
 2. Add parsing logic (`grammar/mod.rs`)
-3. Add reading logic (`types.rs`)
+3. Add reading logic in `evaluator/types/` (as a submodule for complex types or in `types/mod.rs` for simple ones)
 4. Add serialization support (`build_helpers.rs`)
 5. Add tests
 6. Update documentation
diff --git a/docs/solutions/integration-issues/implementing-variable-width-typekind-variant.md b/docs/solutions/integration-issues/implementing-variable-width-typekind-variant.md
new file mode 100644
index 00000000..8086fb61
--- /dev/null
+++ b/docs/solutions/integration-issues/implementing-variable-width-typekind-variant.md
@@ -0,0 +1,133 @@
+---
+title: Implementing regex and search evaluator types in libmagic-rs
+category: integration-issues
+date: 2026-04-10
+tags: [rust, evaluator, regex, search, typekind, libmagic, exhaustive-match]
+severity: medium
+components: [evaluator/types, parser/codegen, parser/grammar]
+related_issues: [39]
+---
+
+## Problem
+
+Implementing evaluator support for `TypeKind::Regex` and `TypeKind::Search` in libmagic-rs exposed five interlocking issues: a stale `regex` crate feature flag, a dispatch signature that could not carry pattern operands to the type-reading layer, a missing anchor-advance path for variable-width regex matches, a build-script exhaustive-match failure that surfaced before the library error, and a clippy `doc_markdown` lint on module-level docs.
+
+## Root Cause
+
+1. `regex` v1.12+ exposes `regex::bytes::RegexBuilder` unconditionally; declaring `features = ["bytes"]` references a feature that no longer exists, so cargo rejects the manifest.
+2. `read_typed_value(buffer, offset, type_kind)` was designed for fixed-shape numeric and string types that need only the buffer and offset. Regex and Search are fundamentally different — they require the rule's *value operand* (the pattern) at read time to compile the regex or locate the needle.
+3. `bytes_consumed` (the source of truth for advancing `EvaluationContext::last_match_end` per GOTCHAS.md S3.8) re-derives consumption from the buffer for variable-width types. Regex matches have buffer-dependent lengths, so the anchor advance cannot be computed without re-running the regex.
+4. `src/parser/codegen.rs` is included by `build.rs` via `#[path]` (GOTCHAS.md S1.2). Adding `TypeKind` variants breaks `serialize_type_kind`'s exhaustive match, and cargo surfaces the build-script compilation failure *before* the library error — a trap not previously documented in S2.1.
+5. Clippy's pedantic `doc_markdown` lint flags unquoted identifiers like `TypeKind` in rustdoc, and each identifier must be individually backticked.
+
+## Solution
+
+**Manifest fix:** Drop the nonexistent feature flag in `Cargo.toml`:
+
+```toml
+regex = "1.12.3"
+```
+
+**Dispatch threading:** Add `read_typed_value_with_pattern(buffer, offset, type_kind, pattern: Option<&Value>)` as a new entry point alongside the existing 3-arg `read_typed_value`, which becomes a thin wrapper that forwards `pattern: None`. The engine calls the pattern-aware form uniformly; the 3-arg convenience wrapper is retained so the ~30 existing call sites (`read_typed_value(buf, off, &kind)`) compile unchanged. Add a parallel `bytes_consumed_with_pattern` so the anchor-advance path can reach the pattern operand for `TypeKind::Regex` and `TypeKind::Search`.
+
+Additionally, expose a `read_pattern_match(buffer, offset, type_kind, pattern) -> Result<Option<Value>, TypeReadError>` helper for the engine's pattern-bearing code path. `Option<Value>` is the structured "no match" signal: a genuine miss returns `None`, while a legitimate zero-width regex match (e.g., `^`, `a*`, lookaheads) returns `Some(Value::String(String::new()))`. `read_typed_value_with_pattern` collapses `None` to `Value::String(String::new())` for back-compat with the single-`Value` return shape; the engine path uses `read_pattern_match` directly and drives its own `Equal`/`NotEqual` decision from the `Option` discriminant.
+
+**Regex reader** (`src/evaluator/types/regex.rs`) — uses a `build_regex` helper that wraps the pattern in `^(?:...)` when `/l` is set so bare, unanchored patterns cannot match mid-line:
+
+```rust
+fn build_regex(
+    pattern: &str,
+    case_insensitive: bool,
+    start_of_line: bool,
+) -> Result<Regex, regex::Error> {
+    let owned;
+    let effective_pattern: &str = if start_of_line {
+        owned = format!("^(?:{pattern})");
+        &owned
+    } else {
+        pattern
+    };
+    RegexBuilder::new(effective_pattern)
+        .case_insensitive(case_insensitive)
+        .multi_line(start_of_line)
+        .build()
+}
+
+pub fn read_regex(
+    buffer: &[u8],
+    offset: usize,
+    pattern: &str,
+    case_insensitive: bool,
+    start_of_line: bool,
+) -> Result<Option<Value>, TypeReadError> {
+    if offset >= buffer.len() { return Err(BufferOverrun { .. }); }
+    let regex = build_regex(pattern, case_insensitive, start_of_line)
+        .map_err(|e| UnsupportedType {
+            type_name: format!("regex compile error: {e}"),
+        })?;
+    let remaining = &buffer[offset..];
+    Ok(regex.find(remaining).map(|m| {
+        Value::String(String::from_utf8_lossy(m.as_bytes()).into_owned())
+    }))
+}
+```
+
+**Search reader** (`src/evaluator/types/search.rs`):
+
+```rust
+pub fn read_search(
+    buffer: &[u8],
+    offset: usize,
+    pattern: &[u8],
+    range: Option<usize>,
+) -> Result<Option<Value>, TypeReadError> {
+    if offset >= buffer.len() { return Err(BufferOverrun { .. }); }
+    let remaining = &buffer[offset..];
+    let window_len = range.map_or(remaining.len(), |n| n.min(remaining.len()));
+    let window = &remaining[..window_len];
+    Ok(memchr::memmem::find(window, pattern).map(|_| {
+        Value::String(String::from_utf8_lossy(pattern).into_owned())
+    }))
+}
+```
+
+`None` is the structured "no match" signal, which lets the engine distinguish a zero-width regex match from a genuine miss without reusing `Value::String(String::new())` as a sentinel.
+
+**Anchor advance:** In `bytes_consumed_with_pattern`, the `Regex` arm re-runs the regex via `regex_bytes_consumed(...)` and returns `m.end()`. The `Search` arm re-runs `memchr::memmem::find` against the window and returns `match_idx + pattern.len()` — the byte just past the matched needle, matching GNU `file`'s `softmagic.c` `FILE_SEARCH` path where `ms->search.offset += idx` and then `moffset()` adds `vlen = m->vallen`. An earlier revision of this PR advanced by the full window size (`range`); that was wrong and caused relative-offset children to land far past the intended byte.
+
+**Engine pattern-bearing code path:** In `evaluate_single_rule_with_anchor`, split the flow into two arms. For `TypeKind::Regex | Search`, call `read_pattern_match` and translate its `Option` result directly into `Equal` (`Some` → match) / `NotEqual` (`None` → match) — no `apply_operator` call. Any other operator on a pattern-bearing type is rejected as `TypeReadError::UnsupportedType` because it has no well-defined semantics (ordering a matched string against the pattern literal produces nonsense). For all other types, continue through `read_typed_value_with_pattern` + `coerce_value_to_type` + `apply_operator` as before.
+
+**Codegen:** Add `Regex { .. }` and `Search { .. }` arms to `serialize_type_kind` in `src/parser/codegen.rs`. Verify `cargo check` against `build.rs` output, not just the library.
+
+**Doc lint:** Backtick identifiers individually in module docs: `` //! Implements the `regex` `TypeKind`. ``
+
+## Prevention
+
+- **Verify crate features on docs.rs before adding them.** The `regex` crate dropped the `bytes` feature by v1.12 (`regex::bytes` is unconditional). Check `https://docs.rs/<crate>/<version>/` for the exact feature list before editing `Cargo.toml`. A wasted `cargo build` cycle is the cheap failure mode; a silently-disabled feature is the expensive one.
+- **When adding a `TypeKind` variant, walk GOTCHAS S2.1 in order, then verify the build.rs pipeline.** The hidden site is `serialize_type_kind` in `src/parser/codegen.rs` — it is included via `#[path]` in `build.rs`, so omissions surface as confusing `E0004`/`E0599` errors from `build.rs` *before* any library file compiles. Run `cargo clean && cargo check` after editing `TypeKind` to shake these out early.
+- **`bytes_consumed` is load-bearing for relative offsets.** Any variable-width variant (`Regex`, `Search`, `String`, `PString`, future additions) MUST have an explicit arm in `bytes_consumed` in `src/evaluator/types/mod.rs`. The catch-all `_ =>` arm fires a `debug_assert` in dev/test, but release builds will silently corrupt the GNU `file` anchor for any downstream `Relative(N)` sibling. Treat missing arms as a correctness bug, not a lint.
+- **Sibling functions beat signature extensions when the new concern is narrow.** The earlier design in this solution suggested extending `read_typed_value` in place; the current implementation instead added a sibling `read_typed_value_with_pattern` and kept `read_typed_value` as a zero-cost wrapper. The sibling approach avoided updating ~30 existing call sites that would otherwise have to pass `None` for the new argument. When only a narrow slice of callers needs the new capability, a sibling function is cheaper and easier to review.
+- **Do not overload `Value::String("")` as a "no match" sentinel.** A zero-width regex match (`^`, `a*`, lookaheads) returns a valid empty matched string that is not a miss. Use `Result<Option<Value>, _>` or a dedicated sentinel variant when the reader needs to distinguish "found nothing" from "found zero bytes." The engine path must work from the `Option`, not from `is_empty()`.
+- **Search advances by match-end, not window-end.** The GNU `file` contract is `anchor += match_idx + pattern.len()`; the full search window size is only used as a bound on the scan. Getting this wrong silently corrupts relative-offset children of every successful search rule with no test failure for any rule that does not chain children.
+- **Pattern-bearing types reject non-equality operators.** `regex < "foo"` and `search & 0xff` are magic-file semantic bugs. The engine should return a structured error rather than falling through to `apply_operator`, which produces garbage ordering comparisons against the pattern literal.
+- **Backtick every Rust identifier individually in doc comments.** Clippy `doc_markdown` fires on bare `TypeKind` even inside a sentence like "extends `read_typed_value` for TypeKind::Regex". Write `` `TypeKind::Regex` `` as a separate backticked span.
+
+## Testing
+
+- **Unit tests for `read_regex` and `read_search`** (added this session): basic match, no-match, case-insensitive flag, start-of-line anchor, non-zero offset handling, bounded search range, invalid/unparseable pattern error path, and binary (non-UTF-8) buffer handling.
+- **Start-of-line anchoring negative test.** With `/l` enabled, a bare (unanchored) pattern like `"line"` that appears only mid-line must return the empty-string no-match. The `build_regex` helper's `^(?:...)` wrapper is what makes this correct — test it explicitly so a future refactor does not regress.
+- **Anchor-advance regression tests.** After a successful `Regex` or `Search` match at offset `O` consuming `N` bytes, assert `EvaluationContext::last_match_end() == O + N`. Add a parallel test for the no-match path (anchor must not advance).
+- **Sibling-after-regex integration test.** Construct a `MagicRule` tree where a `Regex` parent match is followed by a sibling with `OffsetSpec::Relative(+K)`; verify the sibling reads from `anchor + K`, not from absolute `K`. Repeat for `Search` and for `Relative(-K)` to cover both directions.
+- **Property test hook.** Add `Regex` and `Search` arms to `arb_type_kind` in `tests/property_tests.rs` so the codegen round-trip and strength-calculation invariants exercise the new variants automatically.
+
+## Related Documentation
+
+- `GOTCHAS.md` S2.1 — TypeKind exhaustive-match checklist across 10+ files (ast, grammar, types, codegen, strength, property_tests, evaluator/types, output, grammar/tests); catch-all arms in `bytes_consumed` will fire `debug_assert` for variable-width variants.
+- `GOTCHAS.md` S3.1 — parser type-keyword split between `src/parser/types.rs` (`parse_type_keyword` / `type_keyword_to_kind`) and `src/parser/grammar/mod.rs` for suffixes.
+- `GOTCHAS.md` S1.2 / S1.3 — build.rs / codegen serialization boundary and generated-import sync (`generate_builtin_rules` in `src/parser/codegen.rs`).
+- `GOTCHAS.md` S3.8 — `bytes_consumed` as source of truth for `EvaluationContext::last_match_end` anchor advance.
+- `GOTCHAS.md` S8.1 — `enum_variant_names` clippy guidance for same-suffix variants; S10.3 — public enum variants require `# Examples` rustdoc (clippy enforced).
+- `AGENTS.md` "Adding New Type Support" — 7-step procedure for new `TypeKind` variants.
+- GitHub issue **#39** — parent ticket tracking regex and search type evaluator support.
+
+No prior solution doc specifically covers regex/search type matching, the build.rs/codegen indirect-error surface, or `clippy::doc_markdown` fixes.
diff --git a/docs/src/ast-structures.md b/docs/src/ast-structures.md
index 2058335b..fd0abb44 100644
--- a/docs/src/ast-structures.md
+++ b/docs/src/ast-structures.md
@@ -191,6 +191,17 @@ pub enum TypeKind {
         length_width: PStringLengthWidth,
         length_includes_itself: bool,
     },
+
+    /// Regular expression pattern matching
+    Regex {
+        flags: RegexFlags,
+        count: Option<NonZeroU32>,
+    },
+
+    /// Bounded literal byte sequence search
+    Search {
+        range: NonZeroUsize,
+    },
 }
 ```
 
@@ -328,6 +339,140 @@ let limited_pstring = TypeKind::PString {
 };
 ```
 
+### Regex (Regular Expression Pattern Matching)
+
+The `Regex` variant matches POSIX-extended regular expression patterns against file buffers. Patterns are binary-safe and always compiled with multi-line mode enabled (matching `^` and `$` at line boundaries). The scan window is capped at 8192 bytes regardless of the count parameter.
+
+**Structure:**
+
+```rust
+Regex {
+    flags: RegexFlags,
+    count: Option<NonZeroU32>,
+}
+```
+
+**Fields:**
+
+- `flags`: Modifier flags from the `/[csl]` suffix (case-insensitive, start-offset, line-based)
+- `count`: Optional numeric scan limit, interpreted as bytes or lines depending on `flags.line_based`
+
+**Example:**
+
+```text
+0    regex    [0-9]+       Numeric content
+0    regex/1l  ^#!/        Shebang on first line
+0    regex/cs  json        Case-insensitive "json" anywhere
+```
+
+**Behavior:**
+
+- Returns `Value::String` containing the matched text
+- Scan window capped at 8192 bytes (GNU `file` `FILE_REGEX_MAX`)
+- Multi-line mode unconditional (`^`/`$` match line boundaries, `.` does not match newlines)
+- Zero-width matches (e.g., `^`, `a*`) return `Value::String("")` and are distinguished from no-match
+- Only supports `Equal` and `NotEqual` operators; other comparison operators return `TypeReadError::UnsupportedType`
+
+### RegexFlags Struct
+
+The `RegexFlags` struct specifies regex behavior modifiers. All flags default to `false` via `RegexFlags::default`.
+
+```rust
+pub struct RegexFlags {
+    /// `/c` - case-insensitive matching
+    pub case_insensitive: bool,
+    /// `/s` - advance anchor to match-start instead of match-end
+    pub start_offset: bool,
+    /// `/l` - measure scan window in lines instead of bytes
+    pub line_based: bool,
+}
+```
+
+**Flag combinations:**
+
+- `/c` - case-insensitive matching
+- `/s` - anchor advances to match-start (for chaining child rules)
+- `/l` - count parameter measured in lines (80 bytes per line, capped at 8192 total)
+- `/cs`, `/cl`, `/sl`, `/csl` - any combination of flags
+
+**Examples:**
+
+```rust
+use libmagic_rs::parser::ast::{TypeKind, RegexFlags};
+use std::num::NonZeroU32;
+
+// Plain regex with 8192-byte default scan window
+let plain_regex = TypeKind::Regex {
+    flags: RegexFlags::default(),
+    count: None,
+};
+
+// First line only (1 line, capped at 8192 bytes)
+let first_line = TypeKind::Regex {
+    flags: RegexFlags {
+        line_based: true,
+        ..RegexFlags::default()
+    },
+    count: NonZeroU32::new(1),
+};
+
+// Case-insensitive with anchor at match-start
+let case_start = TypeKind::Regex {
+    flags: RegexFlags {
+        case_insensitive: true,
+        start_offset: true,
+        line_based: false,
+    },
+    count: None,
+};
+```
+
+### Search (Bounded Literal Byte Sequence Search)
+
+The `Search` variant scans for a literal byte pattern within a bounded range. Unlike `String`, which matches only at the exact offset, `Search` scans forward up to `range` bytes for the first occurrence.
+
+**Structure:**
+
+```rust
+Search {
+    range: NonZeroUsize,
+}
+```
+
+**Fields:**
+
+- `range`: Mandatory scan window width in bytes (must be non-zero per GNU `file` magic(5) specification)
+
+**Example:**
+
+```text
+0    search/256    PK\003\004    ZIP archive within first 256 bytes
+```
+
+**Behavior:**
+
+- Returns `Value::String` containing the matched bytes if found within range
+- Anchor advances by the entire search window regardless of where the match was found
+- Only supports `Equal` and `NotEqual` operators
+- Range is mandatory; `search/0` or bare `search` are parse errors
+
+**Examples:**
+
+```rust
+use libmagic_rs::parser::ast::TypeKind;
+use std::num::NonZeroUsize;
+
+// Scan up to 256 bytes for the pattern
+let bounded_search = TypeKind::Search {
+    range: NonZeroUsize::new(256).unwrap(),
+};
+
+// Scan up to 1024 bytes
+let wide_search = TypeKind::Search {
+    range: NonZeroUsize::new(1024).unwrap(),
+};
+```
+
 ### Endianness Options
 
 ```rust
@@ -520,9 +665,11 @@ let script_rule = MagicRule {
 
 1. **Use `Byte { signed }`** for single-byte values and flags, specifying signedness
 2. **Use `Short/Long/Quad`** with explicit endianness and signedness for multi-byte integers
-3. **Use `String`** with length limits for text patterns
+3. **Use `String`** with length limits for text patterns at exact offsets
 4. **Use `PString`** for Pascal-style length-prefixed strings
-5. **Use `Bytes`** for exact binary sequences
+5. **Use `Regex`** for pattern matching (complex patterns, line-based checks, case-insensitive matching)
+6. **Use `Search`** for simple substring matching within a bounded range (faster than regex for literal patterns)
+7. **Use `Bytes`** for exact binary sequences
 
 ### Performance Considerations
 
diff --git a/docs/src/compatibility.md b/docs/src/compatibility.md
index 444d5eca..3f5d7327 100644
--- a/docs/src/compatibility.md
+++ b/docs/src/compatibility.md
@@ -75,9 +75,9 @@ $ rmagic --json example.elf
 | Hierarchical rules | ✅       | ✅     | Complete | Parent-child relationships             |
 | Indirect offsets   | ✅       | ✅     | Complete | Pointer dereferencing                  |
 | Relative offsets   | ✅       | ✅     | Complete | Position-relative addressing (PR #211) |
-| Search patterns    | ✅       | 📋     | Planned  | Pattern searching in ranges            |
+| Search patterns    | ✅       | ✅     | Complete | Pattern searching in ranges (PR #214)  |
 | Bitwise operations | ✅       | ✅     | Complete | AND, XOR, NOT operations               |
-| String operations  | ✅       | 📋     | Planned  | Case-insensitive, regex                |
+| String operations  | ✅       | ✅     | Complete | Case-insensitive, regex (PR #214)      |
 | Date/time formats  | ✅       | ✅     | Complete | 32-bit and 64-bit timestamps           |
 | Floating point     | ✅       | ✅     | Complete | Float, double with endianness          |
 | Unicode support    | ✅       | 📋     | Planned  | UTF-8, UTF-16 strings                  |
diff --git a/docs/src/evaluator.md b/docs/src/evaluator.md
index b74d0711..258d7865 100644
--- a/docs/src/evaluator.md
+++ b/docs/src/evaluator.md
@@ -132,16 +132,30 @@ Interprets bytes according to type specifications. The types module is organized
 - **QDate**: 64-bit Unix timestamps (signed seconds since epoch) with configurable endianness and UTC/local time formatting
 - **String**: Byte sequences with length limits
 - **PString**: Pascal-style length-prefixed strings with 1-byte (`/B`), 2-byte (`/H` or `/h`), or 4-byte (`/L` or `/l`) length prefixes, supporting big-endian and little-endian byte order
+- **Regex**: Binary-safe regex matching via `regex::bytes::Regex`; the `/c` flag enables case-insensitive matching and `/l` enables multi-line start-of-line anchoring
+- **Search**: Bounded literal pattern scan via `memchr::memmem::find`; `search/N` caps the scan window to `N` bytes from the offset
 - **Bounds checking**: Prevents buffer overruns
 
 ```rust
+// Non-pattern types use the 3-arg convenience wrapper:
 pub fn read_typed_value(
     buffer: &[u8],
     offset: usize,
     type_kind: &TypeKind,
 ) -> Result<Value, TypeReadError>
+
+// Pattern-bearing types (Regex, Search) thread the rule's value operand
+// through as the match pattern:
+pub fn read_typed_value_with_pattern(
+    buffer: &[u8],
+    offset: usize,
+    type_kind: &TypeKind,
+    pattern: Option<&Value>,
+) -> Result<Value, TypeReadError>
 ```
 
+The engine uses `read_typed_value_with_pattern` uniformly and passes `Some(&rule.value)` for every rule; the convenience `read_typed_value` is a thin wrapper that forwards `pattern: None`. For pattern-bearing types a genuine "no match" is collapsed to `Value::String(String::new())` in the `read_typed_value_with_pattern` return so the back-compat `Value` shape is preserved; the engine instead calls `read_pattern_match` directly, which returns `Result<Option<Value>, _>` so zero-width matches (e.g. `^`, `a*`) can be distinguished from genuine misses.
+
 The `read_byte` function signature changed in v0.2.0 to accept three parameters (`buffer`, `offset`, and `signed`) instead of two, allowing explicit control over signed vs unsigned byte interpretation.
 
 **Floating-Point Type Reading (`evaluator/types/float.rs`):**
diff --git a/docs/src/parser.md b/docs/src/parser.md
index b59adde2..71ba7bef 100644
--- a/docs/src/parser.md
+++ b/docs/src/parser.md
@@ -285,6 +285,139 @@ The parser supports date and timestamp types for parsing Unix timestamps (signed
 
 The parser creates `TypeKind::Date` or `TypeKind::QDate` variants with appropriate endianness and UTC flags. During evaluation, timestamps are formatted as strings in the format "Www Mmm DD HH:MM:SS YYYY" to match GNU file output.
 
+### Regex Type
+
+The parser supports regular expression matching through the `regex` keyword, enabling POSIX-extended regex patterns against file contents:
+
+**Type Keyword:**
+
+- `regex` - Regular expression match → `TypeKind::Regex { flags, count }`
+
+**Flag Support:**
+
+Regex rules accept three modifier flags via the `/[csl]` suffix:
+
+- `/c` - Case-insensitive matching → `RegexFlags::case_insensitive = true`
+- `/s` - Advance anchor to match-start instead of match-end → `RegexFlags::start_offset = true`
+- `/l` - Line-based counting (interpret count as line count) → `RegexFlags::line_based = true`
+
+Flags can be combined in any order (`/cl`, `/lc`, `/csl` are all equivalent). The parser also accepts interleaved flag-and-count syntax matching GNU `file` semantics: `regex/1l` and `regex/l1` both parse identically.
+
+**Optional Count Parameter:**
+
+An optional decimal count controls the scan window:
+
+- No count: scan 8192 bytes (default)
+- `/N` (no `/l`): scan at most `N` bytes, capped at 8192
+- `/Nl` (with `/l`): scan at most `N` lines, effective byte cap is `min(N * 80, 8192)`
+
+The 8192-byte hard cap matches GNU `file`'s `FILE_REGEX_MAX` constant and prevents runaway regex scans against large buffers.
+
+**Parsing Examples:**
+
+```rust
+// Plain regex (no flags, 8192-byte default scan window)
+parse_type_and_operator("regex")
+// → TypeKind::Regex { flags: RegexFlags::default(), count: None }
+
+// Case-insensitive flag
+parse_type_and_operator("regex/c")
+// → TypeKind::Regex { flags: RegexFlags { case_insensitive: true, .. }, count: None }
+
+// Line-based with explicit count
+parse_type_and_operator("regex/1l")
+// → TypeKind::Regex { flags: RegexFlags { line_based: true, .. }, count: Some(1) }
+
+// Combined flags and count (interleaved order accepted)
+parse_type_and_operator("regex/c256s")
+// → TypeKind::Regex { flags: RegexFlags { case_insensitive: true, start_offset: true, .. }, count: Some(256) }
+```
+
+**Usage in Magic Rules:**
+
+```rust
+// Match lines starting with a digit
+0 regex "^[0-9]" numeric prefix
+
+// Case-insensitive JSON detection
+0 regex/c "\\{.*\"[^\"]+\"" possible JSON
+
+// Scan first line only for version string
+>1 regex/1l "version [0-9]+" version line
+```
+
+**Regex Semantics:**
+
+- Patterns are compiled with multi-line mode always enabled (matching libmagic's unconditional `REG_NEWLINE`), so `^` and `$` match at line boundaries and `.` does not match `\n`.
+- The scan window is always capped at 8192 bytes regardless of the `count` value.
+- Zero-width matches (`^`, `a*`, lookaheads) are preserved as `Value::String("")` and distinguished from genuine misses.
+- Regex rules only support `Operator::Equal` and `Operator::NotEqual`; other comparison operators are rejected at evaluation time.
+
+**Features:**
+
+- ✅ `regex` keyword recognition with suffix parsing
+- ✅ Three modifier flags (`/c`, `/s`, `/l`) with arbitrary combination order
+- ✅ Optional numeric count parameter (interleaved with flags per GNU `file` semantics)
+- ✅ 8192-byte scan window cap matching `FILE_REGEX_MAX`
+- ✅ Bare `regex/` with no valid modifier is a parse error
+- ✅ `regex/0` is rejected (zero count has no valid semantics)
+- ✅ `RegexFlags` struct representation for clean flag management
+
+### Search Type
+
+The parser supports bounded literal byte sequence searching through the `search` keyword:
+
+**Type Keyword:**
+
+- `search` - Multi-byte pattern search within bounded range → `TypeKind::Search { range }`
+
+**Mandatory Range Parameter:**
+
+Search rules require a decimal range suffix specifying the scan window width in bytes:
+
+- `/N` - Scan up to `N` bytes for the literal pattern, stored as `NonZeroUsize`
+
+Per GNU `file` magic(5) specification, the range is **mandatory**. Bare `search` (no `/N` suffix) and `search/0` are both rejected at parse time.
+
+**Parsing Examples:**
+
+```rust
+// 256-byte search window
+parse_type_and_operator("search/256")
+// → TypeKind::Search { range: NonZeroUsize(256) }
+
+// Bare search is a parse error (range is mandatory)
+parse_type_and_operator("search")
+// → Err(...)
+
+// Zero-range search is rejected
+parse_type_and_operator("search/0")
+// → Err(...)
+```
+
+**Usage in Magic Rules:**
+
+```rust
+// Scan up to 256 bytes for DOS MZ header
+0 search/256 "MZ" DOS executable
+
+// Look for ZIP signature within first 1024 bytes
+0 search/1024 "PK\x03\x04" ZIP archive
+```
+
+**Search Semantics:**
+
+- Unlike `TypeKind::String`, which only matches at the exact offset, `search` scans forward up to `range` bytes for the first occurrence of the literal pattern.
+- The anchor advances to the end of the matched pattern (matching libmagic's `FILE_SEARCH` behavior in `softmagic.c::moffset()`).
+- Search rules only support `Operator::Equal` and `Operator::NotEqual`; other comparison operators are rejected at evaluation time.
+
+**Features:**
+
+- ✅ `search` keyword recognition with mandatory `/N` suffix
+- ✅ `NonZeroUsize` range representation (zero-width scan unrepresentable)
+- ✅ Bare `search` and `search/0` rejected at parse time
+- ✅ Binary-safe literal matching via `memchr::memmem::find`
+
 ## Parser Design Principles
 
 ### Error Handling
@@ -395,7 +528,6 @@ match detect_format(path)? {
 ### Not Yet Implemented
 
 - **Indirect Offsets**: Pointer dereferencing patterns (e.g., `(0x3c.l)`)
-- **Regex Support**: Regular expression matching in rules
 - **Binary .mgc Format**: Compiled magic database format
 - **Strength Modifiers**: `!:strength` parsing for rule priority
 
diff --git a/src/evaluator/engine/mod.rs b/src/evaluator/engine/mod.rs
index 4acf0a3a..cc74abbe 100644
--- a/src/evaluator/engine/mod.rs
+++ b/src/evaluator/engine/mod.rs
@@ -107,24 +107,87 @@ fn evaluate_single_rule_with_anchor(
     let absolute_offset =
         offset::resolve_offset_with_context(&rule.offset, buffer, last_match_end)?;
 
-    // Step 2: Read and interpret bytes at the resolved offset according to the rule's type
-    let read_value = types::read_typed_value(buffer, absolute_offset, &rule.typ)
-        .map_err(|e| LibmagicError::EvaluationError(e.into()))?;
+    // Step 2 & 3: Read and interpret bytes at the resolved offset according
+    // to the rule's type, and compute the logical match state.
+    //
+    // Pattern-bearing types (Regex, Search) take a different path from
+    // fixed-width types because the rule's `value` operand is the *pattern*,
+    // not an expected matched value. Running those through `apply_operator`
+    // would compare matched text ("123") against the pattern literal
+    // ("[0-9]+") and produce false negatives on any regex with
+    // metacharacters. Instead, `read_pattern_match` returns `Some(v)` on a
+    // successful match (possibly zero-width) and `None` on a genuine miss;
+    // the engine translates that directly into Equal / NotEqual. Any other
+    // operator on a pattern-bearing type is a magic-file semantic bug and
+    // surfaces as a hard error -- the fallthrough to `apply_operator`
+    // previously masked this by producing nonsense ordering comparisons
+    // against the pattern source text.
+    let (matched, read_value) = match &rule.typ {
+        crate::parser::ast::TypeKind::Regex { .. }
+        | crate::parser::ast::TypeKind::Search { .. } => {
+            let match_outcome =
+                types::read_pattern_match(buffer, absolute_offset, &rule.typ, Some(&rule.value))
+                    .map_err(|e| LibmagicError::EvaluationError(e.into()))?;
+            let pattern_found = match_outcome.is_some();
+            let matched = match &rule.op {
+                crate::parser::ast::Operator::Equal => pattern_found,
+                crate::parser::ast::Operator::NotEqual => !pattern_found,
+                other => {
+                    return Err(LibmagicError::EvaluationError(
+                        types::TypeReadError::UnsupportedType {
+                            type_name: format!(
+                                "operator {other:?} is not supported for pattern-bearing type {:?}; only Equal (=) and NotEqual (!=) are allowed",
+                                rule.typ
+                            ),
+                        }
+                        .into(),
+                    ));
+                }
+            };
+            // For anchor-advance and output, present the match as a
+            // `Value::String`. A genuine miss is represented as an empty
+            // string to keep the downstream `RuleMatch.value` contract
+            // uniform; the engine already decided `matched` above so the
+            // placeholder value only affects display and
+            // `bytes_consumed_with_pattern` (which re-derives the match
+            // position from the pattern, not this value).
+            let value =
+                match_outcome.unwrap_or_else(|| crate::parser::ast::Value::String(String::new()));
+            (matched, value)
+        }
+        _ => {
+            // Value-based types: read the typed value and apply the operator
+            // against the rule's expected value.
+            let read_value = types::read_typed_value_with_pattern(
+                buffer,
+                absolute_offset,
+                &rule.typ,
+                Some(&rule.value),
+            )
+            .map_err(|e| LibmagicError::EvaluationError(e.into()))?;
 
-    // Step 3: Coerce the rule's expected value to match the type's signedness/width.
-    // `coerce_value_to_type` returns `Cow::Borrowed` on the hot path so no
-    // allocation happens for pass-through values (e.g., string matches).
-    let expected_value = types::coerce_value_to_type(&rule.value, &rule.typ);
-    let expected_ref: &crate::parser::ast::Value = expected_value.as_ref();
+            // Coerce the rule's expected value to match the type's
+            // signedness/width. `coerce_value_to_type` returns
+            // `Cow::Borrowed` on the hot path so no allocation happens for
+            // pass-through values (e.g., string matches).
+            let expected_value = types::coerce_value_to_type(&rule.value, &rule.typ);
+            let expected_ref: &crate::parser::ast::Value = expected_value.as_ref();
 
-    // Step 4: Apply the operator to compare the read value with the expected value
-    // BitwiseNot needs type-aware bit-width masking so the complement is computed
-    // at the type's natural width (e.g., byte NOT of 0x00 = 0xFF, not u64::MAX).
-    let matched = match &rule.op {
-        crate::parser::ast::Operator::BitwiseNot => {
-            operators::apply_bitwise_not_with_width(&read_value, expected_ref, rule.typ.bit_width())
+            // BitwiseNot needs type-aware bit-width masking so the
+            // complement is computed at the type's natural width (e.g.,
+            // byte NOT of 0x00 = 0xFF, not u64::MAX).
+            let matched = match &rule.op {
+                crate::parser::ast::Operator::BitwiseNot => {
+                    operators::apply_bitwise_not_with_width(
+                        &read_value,
+                        expected_ref,
+                        rule.typ.bit_width(),
+                    )
+                }
+                op => operators::apply_operator(op, &read_value, expected_ref),
+            };
+            (matched, read_value)
         }
-        op => operators::apply_operator(op, &read_value, expected_ref),
     };
     Ok(matched.then_some((absolute_offset, read_value)))
 }
@@ -280,7 +343,12 @@ pub fn evaluate_rules(
             // anchor. The anchor is updated unconditionally to the end of
             // this match -- it may move forward or backward depending on
             // where successive rules match (it is *not* a high-watermark).
-            let consumed = types::bytes_consumed(buffer, absolute_offset, &rule.typ);
+            let consumed = types::bytes_consumed_with_pattern(
+                buffer,
+                absolute_offset,
+                &rule.typ,
+                Some(&rule.value),
+            );
             let new_anchor = absolute_offset.saturating_add(consumed);
             context.set_last_match_end(new_anchor);
 
diff --git a/src/evaluator/engine/tests.rs b/src/evaluator/engine/tests.rs
index 1583b386..db928437 100644
--- a/src/evaluator/engine/tests.rs
+++ b/src/evaluator/engine/tests.rs
@@ -2384,3 +2384,249 @@ fn test_resource_exhaustion_large_buffer_completes_without_panic() {
         matches.len()
     );
 }
+
+/// A regex rule whose pattern contains metacharacters must succeed when the
+/// pattern actually matches the buffer. Prior to this fix, the engine compared
+/// the matched text (e.g., "123") against the pattern literal ("[0-9]+") via
+/// `apply_operator`, which failed for any real regex.
+#[test]
+fn test_regex_rule_with_metacharacters_matches() {
+    let rule = MagicRule {
+        offset: OffsetSpec::Absolute(0),
+        typ: TypeKind::Regex {
+            flags: crate::parser::ast::RegexFlags::default(),
+            count: None,
+        },
+        op: Operator::Equal,
+        value: Value::String("[0-9]+".to_string()),
+        message: "has digits".to_string(),
+        children: vec![],
+        level: 0,
+        strength_modifier: None,
+    };
+
+    let mut context = EvaluationContext::new(EvaluationConfig::default());
+    let matches = evaluate_single_rule(&rule, b"abc123def", &mut context).unwrap();
+    assert_eq!(matches.len(), 1);
+    assert_eq!(matches[0].message, "has digits");
+}
+
+/// A regex rule whose pattern does not match must not match, confirming that
+/// the logical-match shortcut only fires on a non-empty reader result.
+#[test]
+fn test_regex_rule_with_metacharacters_no_match() {
+    let rule = MagicRule {
+        offset: OffsetSpec::Absolute(0),
+        typ: TypeKind::Regex {
+            flags: crate::parser::ast::RegexFlags::default(),
+            count: None,
+        },
+        op: Operator::Equal,
+        value: Value::String("[0-9]+".to_string()),
+        message: "has digits".to_string(),
+        children: vec![],
+        level: 0,
+        strength_modifier: None,
+    };
+
+    let mut context = EvaluationContext::new(EvaluationConfig::default());
+    let matches = evaluate_single_rule(&rule, b"abcdef", &mut context).unwrap();
+    assert!(matches.is_empty());
+}
+
+/// A search rule with `Operator::NotEqual` succeeds only when the literal
+/// pattern is absent from the window.
+#[test]
+fn test_search_rule_not_equal_succeeds_when_pattern_absent() {
+    let rule = MagicRule {
+        offset: OffsetSpec::Absolute(0),
+        typ: TypeKind::Search {
+            range: ::std::num::NonZeroUsize::new(64).unwrap(),
+        },
+        op: Operator::NotEqual,
+        value: Value::String("needle".to_string()),
+        message: "no needle".to_string(),
+        children: vec![],
+        level: 0,
+        strength_modifier: None,
+    };
+
+    let mut context = EvaluationContext::new(EvaluationConfig::default());
+    let matches = evaluate_single_rule(&rule, b"plain haystack", &mut context).unwrap();
+    assert_eq!(matches.len(), 1);
+}
+
+/// A non-Equal/NotEqual operator on a pattern-bearing type must surface as
+/// a hard error, not silently produce an ordering comparison against the
+/// pattern source text. Pre-fix, `regex > "[0-9]+"` matched by coincidence
+/// whenever the empty "no match" sentinel happened to lexicographically
+/// exceed the pattern literal.
+#[test]
+fn test_regex_rule_with_ordering_operator_is_rejected() {
+    let rule = MagicRule {
+        offset: OffsetSpec::Absolute(0),
+        typ: TypeKind::Regex {
+            flags: crate::parser::ast::RegexFlags::default(),
+            count: None,
+        },
+        op: Operator::GreaterThan,
+        value: Value::String("[0-9]+".to_string()),
+        message: "bogus".to_string(),
+        children: vec![],
+        level: 0,
+        strength_modifier: None,
+    };
+
+    let mut context = EvaluationContext::new(EvaluationConfig::default());
+    let result = evaluate_single_rule(&rule, b"abcdef", &mut context);
+    match result {
+        Err(LibmagicError::EvaluationError(_)) => {}
+        other => panic!("expected EvaluationError for ordering operator on regex, got {other:?}"),
+    }
+}
+
+#[test]
+fn test_search_rule_with_bitwise_operator_is_rejected() {
+    let rule = MagicRule {
+        offset: OffsetSpec::Absolute(0),
+        typ: TypeKind::Search {
+            range: ::std::num::NonZeroUsize::new(32).unwrap(),
+        },
+        op: Operator::BitwiseAnd,
+        value: Value::String("needle".to_string()),
+        message: "bogus".to_string(),
+        children: vec![],
+        level: 0,
+        strength_modifier: None,
+    };
+
+    let mut context = EvaluationContext::new(EvaluationConfig::default());
+    let result = evaluate_single_rule(&rule, b"plain haystack", &mut context);
+    assert!(
+        matches!(result, Err(LibmagicError::EvaluationError(_))),
+        "expected EvaluationError for bitwise operator on search"
+    );
+}
+
+/// A child rule with `OffsetSpec::Relative(0)` after a parent regex match
+/// must resolve to `parent_absolute_offset + match_length`, so the byte the
+/// child reads is the first byte *after* the parent's match. This is the
+/// regression test GOTCHAS 2.1 warns about: if `bytes_consumed_with_pattern`
+/// returns the wrong number for `TypeKind::Regex`, the child lands at the
+/// wrong offset and either misses or matches the wrong byte.
+#[test]
+fn test_regex_parent_advances_anchor_for_relative_child() {
+    // Buffer: "abc123X" -- parent regex "abc" matches bytes 0..3, so a
+    // Relative(0) child should read byte 3 = '1' (0x31). A Relative(-1)
+    // child would read byte 2 = 'c' (0x63).
+    let child = MagicRule {
+        offset: OffsetSpec::Relative(0),
+        typ: TypeKind::Byte { signed: false },
+        op: Operator::Equal,
+        value: Value::Uint(u64::from(b'1')),
+        message: "first digit".to_string(),
+        children: vec![],
+        level: 1,
+        strength_modifier: None,
+    };
+    let parent = MagicRule {
+        offset: OffsetSpec::Absolute(0),
+        typ: TypeKind::Regex {
+            flags: crate::parser::ast::RegexFlags::default(),
+            count: None,
+        },
+        op: Operator::Equal,
+        value: Value::String("abc".to_string()),
+        message: "abc prefix".to_string(),
+        children: vec![child],
+        level: 0,
+        strength_modifier: None,
+    };
+
+    let mut context = EvaluationContext::new(EvaluationConfig::default());
+    let matches = evaluate_rules(&[parent], b"abc123X", &mut context).unwrap();
+    assert_eq!(
+        matches.len(),
+        2,
+        "expected parent + child match, got {}: {matches:?}",
+        matches.len()
+    );
+    assert_eq!(matches[0].message, "abc prefix");
+    assert_eq!(matches[1].message, "first digit");
+}
+
+/// A child rule with `OffsetSpec::Relative(0)` after a parent search match
+/// must land at `match_index + pattern.len()` — NOT at `window_end` (the
+/// pre-fix window-size advance would land on a completely different byte).
+#[test]
+fn test_search_parent_advances_anchor_to_match_end_not_window_end() {
+    // Buffer: "XXXneedleYY_ZZ" -- parent `search/32 "needle"` finds the
+    // pattern at index 3, length 6, match-end = 9. A Relative(0) child
+    // should read byte 9 = 'Y' (0x59). With the bug, the anchor would
+    // advance by 32 bytes (way past the buffer) or (with range=14) by 14
+    // to index 14 which is past the buffer end.
+    let child = MagicRule {
+        offset: OffsetSpec::Relative(0),
+        typ: TypeKind::Byte { signed: false },
+        op: Operator::Equal,
+        value: Value::Uint(u64::from(b'Y')),
+        message: "trailing Y".to_string(),
+        children: vec![],
+        level: 1,
+        strength_modifier: None,
+    };
+    let parent = MagicRule {
+        offset: OffsetSpec::Absolute(0),
+        typ: TypeKind::Search {
+            range: ::std::num::NonZeroUsize::new(14).unwrap(),
+        },
+        op: Operator::Equal,
+        value: Value::String("needle".to_string()),
+        message: "found needle".to_string(),
+        children: vec![child],
+        level: 0,
+        strength_modifier: None,
+    };
+
+    let mut context = EvaluationContext::new(EvaluationConfig::default());
+    let matches = evaluate_rules(&[parent], b"XXXneedleYY_ZZ", &mut context).unwrap();
+    assert_eq!(matches.len(), 2, "expected parent + child, got {matches:?}");
+    assert_eq!(matches[1].message, "trailing Y");
+}
+
+/// Sanity check the negative: when the parent search finds the pattern
+/// early in the window, a Relative(-N) child should still resolve against
+/// the match-end anchor. This catches a class of bugs where the anchor
+/// update uses the wrong base offset.
+#[test]
+fn test_search_parent_relative_child_at_positive_offset() {
+    // Buffer: "prefix_NEEDLE_after_stuff" -- "NEEDLE" is at index 7, len
+    // 6, match-end = 13. A Relative(1) child should read byte 14 = 'a'.
+    let child = MagicRule {
+        offset: OffsetSpec::Relative(1),
+        typ: TypeKind::Byte { signed: false },
+        op: Operator::Equal,
+        value: Value::Uint(u64::from(b'a')),
+        message: "a after".to_string(),
+        children: vec![],
+        level: 1,
+        strength_modifier: None,
+    };
+    let parent = MagicRule {
+        offset: OffsetSpec::Absolute(0),
+        typ: TypeKind::Search {
+            range: ::std::num::NonZeroUsize::new(32).unwrap(),
+        },
+        op: Operator::Equal,
+        value: Value::String("NEEDLE".to_string()),
+        message: "found".to_string(),
+        children: vec![child],
+        level: 0,
+        strength_modifier: None,
+    };
+
+    let mut context = EvaluationContext::new(EvaluationConfig::default());
+    let matches = evaluate_rules(&[parent], b"prefix_NEEDLE_after_stuff", &mut context).unwrap();
+    assert_eq!(matches.len(), 2);
+    assert_eq!(matches[1].message, "a after");
+}
diff --git a/src/evaluator/strength.rs b/src/evaluator/strength.rs
index 1d0e7b4d..5caa3d70 100644
--- a/src/evaluator/strength.rs
+++ b/src/evaluator/strength.rs
@@ -77,6 +77,20 @@ pub fn calculate_default_strength(rule: &MagicRule) -> i32 {
             // Add bonus for limited-length strings (more constrained match)
             if max_length.is_some() { base + 5 } else { base }
         }
+        // Regex matches a pattern -- treat similarly to an unbounded string.
+        // A rule with an explicit `count` is more constrained (narrower scan
+        // window) and therefore more specific.
+        TypeKind::Regex { count, .. } => {
+            if count.is_some() {
+                25
+            } else {
+                20
+            }
+        }
+        // Search is always a bounded scan (the range is mandatory), so it
+        // gets the "constrained match" bonus unconditionally. This matches
+        // the max_length bonus used for String and PString.
+        TypeKind::Search { .. } => 25,
         // 64-bit types are most specific among numerics
         TypeKind::Quad { .. } | TypeKind::Double { .. } | TypeKind::QDate { .. } => 16,
         // 32-bit types are fairly specific
diff --git a/src/evaluator/types/mod.rs b/src/evaluator/types/mod.rs
index 2f149776..f98c0d31 100644
--- a/src/evaluator/types/mod.rs
+++ b/src/evaluator/types/mod.rs
@@ -9,6 +9,8 @@
 mod date;
 mod float;
 mod numeric;
+mod regex;
+mod search;
 mod string;
 
 use crate::parser::ast::{TypeKind, Value};
@@ -19,6 +21,8 @@ use date::format_timestamp_value;
 pub use date::{read_date, read_qdate};
 pub use float::{read_double, read_float};
 pub use numeric::{read_byte, read_long, read_quad, read_short};
+pub use regex::read_regex;
+pub use search::read_search;
 pub use string::{read_pstring, read_string};
 
 /// Reads a fixed-size byte array from the buffer at the given offset.
@@ -95,6 +99,18 @@ pub enum TypeReadError {
 
 /// Reads bytes according to the specified `TypeKind`.
 ///
+/// This is the public dispatch entry point for type reading for non
+/// pattern-bearing types. It preserves the original three-argument
+/// signature used by external consumers -- fixed-width numeric, float,
+/// date, string, and pstring types need no pattern operand, so the hot
+/// path stays ergonomic.
+///
+/// For pattern-bearing types (`TypeKind::Regex`, `TypeKind::Search`) this
+/// function will return `TypeReadError::UnsupportedType` because the
+/// pattern operand is mandatory. Callers that need to evaluate regex/search
+/// rules should use [`read_typed_value_with_pattern`] and thread the rule
+/// value operand through as `pattern`.
+///
 /// # Examples
 ///
 /// ```
@@ -102,7 +118,8 @@ pub enum TypeReadError {
 /// use libmagic_rs::parser::ast::{Endianness, TypeKind, Value};
 ///
 /// let buffer = &[0x7f, 0x45, 0x4c, 0x46, 0x34, 0x12];
-/// let byte_result = read_typed_value(buffer, 0, &TypeKind::Byte { signed: false }).unwrap();
+/// let byte_result =
+///     read_typed_value(buffer, 0, &TypeKind::Byte { signed: false }).unwrap();
 /// assert_eq!(byte_result, Value::Uint(0x7f));
 ///
 /// let short_type = TypeKind::Short {
@@ -115,12 +132,59 @@ pub enum TypeReadError {
 ///
 /// # Errors
 ///
-/// Returns `TypeReadError::BufferOverrun` when the requested value extends past
-/// the buffer bounds.
+/// Returns `TypeReadError::BufferOverrun` when the requested value extends
+/// past the buffer bounds, `TypeReadError::UnsupportedType` when a
+/// pattern-bearing type is evaluated without a pattern, or
+/// `TypeReadError::InvalidPStringLength` for a malformed Pascal string
+/// length prefix.
 pub fn read_typed_value(
     buffer: &[u8],
     offset: usize,
     type_kind: &TypeKind,
+) -> Result<Value, TypeReadError> {
+    read_typed_value_with_pattern(buffer, offset, type_kind, None)
+}
+
+/// Reads bytes according to the specified `TypeKind`, threading a
+/// `pattern` operand through for pattern-bearing types (`Regex`, `Search`).
+///
+/// This is the internal dispatch entry point used by the evaluation engine
+/// to evaluate pattern-bearing types. The engine threads the rule's value
+/// operand through as `pattern` so the regex and search readers can
+/// compile/locate it against the buffer. For fixed-width and non-pattern
+/// types (numeric, float, date, string, pstring), the `pattern` parameter
+/// is ignored; external callers for those types should prefer the simpler
+/// three-argument [`read_typed_value`] wrapper.
+///
+/// # Examples
+///
+/// ```
+/// use libmagic_rs::evaluator::types::read_typed_value_with_pattern;
+/// use libmagic_rs::parser::ast::{RegexFlags, TypeKind, Value};
+///
+/// let haystack = b"abc123def";
+/// let regex_type = TypeKind::Regex {
+///     flags: RegexFlags::default(),
+///     count: None,
+/// };
+/// let pattern = Value::String("[0-9]+".to_string());
+/// let regex_result =
+///     read_typed_value_with_pattern(haystack, 0, &regex_type, Some(&pattern)).unwrap();
+/// assert_eq!(regex_result, Value::String("123".to_string()));
+/// ```
+///
+/// # Errors
+///
+/// Returns `TypeReadError::BufferOverrun` when the requested value extends
+/// past the buffer bounds, `TypeReadError::UnsupportedType` when a regex
+/// pattern fails to compile or a pattern-bearing type is evaluated without
+/// a pattern, or `TypeReadError::InvalidPStringLength` for a malformed
+/// Pascal string length prefix.
+pub fn read_typed_value_with_pattern(
+    buffer: &[u8],
+    offset: usize,
+    type_kind: &TypeKind,
+    pattern: Option<&Value>,
 ) -> Result<Value, TypeReadError> {
     match type_kind {
         TypeKind::Byte { signed } => read_byte(buffer, offset, *signed),
@@ -143,6 +207,90 @@ pub fn read_typed_value(
             *length_width,
             *length_includes_itself,
         ),
+        TypeKind::Regex { flags, count } => {
+            let pattern_str = match pattern {
+                Some(Value::String(s)) => s.as_str(),
+                _ => {
+                    return Err(TypeReadError::UnsupportedType {
+                        type_name: "regex without string pattern".to_string(),
+                    });
+                }
+            };
+            // Collapse `None` (no match) to `Value::String(String::new())`
+            // for back-compat with callers using the single-Value return
+            // shape. The engine path goes through `read_pattern_match`
+            // directly and preserves the `Option` so it can distinguish a
+            // zero-width match from a miss.
+            Ok(read_regex(buffer, offset, pattern_str, *flags, *count)?
+                .unwrap_or_else(|| Value::String(String::new())))
+        }
+        TypeKind::Search { range } => {
+            let pattern_bytes: &[u8] = match pattern {
+                Some(Value::String(s)) => s.as_bytes(),
+                Some(Value::Bytes(b)) => b.as_slice(),
+                _ => {
+                    return Err(TypeReadError::UnsupportedType {
+                        type_name: "search without string/bytes pattern".to_string(),
+                    });
+                }
+            };
+            Ok(read_search(buffer, offset, pattern_bytes, *range)?
+                .unwrap_or_else(|| Value::String(String::new())))
+        }
+    }
+}
+
+/// Engine entry point for pattern-bearing types (`Regex`, `Search`).
+///
+/// Returns `Ok(None)` on a genuine "no match" outcome and `Ok(Some(value))`
+/// on a successful match -- including zero-width matches (e.g., regex `^`,
+/// `a*`, lookaheads). This is the contract the evaluator needs to
+/// distinguish a real miss from a zero-width hit; [`read_typed_value_with_pattern`]
+/// collapses both cases to `Value::String(String::new())` for back-compat.
+///
+/// # Errors
+///
+/// Returns [`TypeReadError`] for:
+///
+/// * `BufferOverrun` when `offset >= buffer.len()`
+/// * `UnsupportedType` if `type_kind` is not pattern-bearing, if the
+///   pattern operand is missing, or if the pattern has the wrong
+///   `Value` variant for the type
+/// * `UnsupportedType` (via [`read_regex`]) if a regex pattern fails to
+///   compile
+pub(crate) fn read_pattern_match(
+    buffer: &[u8],
+    offset: usize,
+    type_kind: &TypeKind,
+    pattern: Option<&Value>,
+) -> Result<Option<Value>, TypeReadError> {
+    match type_kind {
+        TypeKind::Regex { flags, count } => {
+            let pattern_str = match pattern {
+                Some(Value::String(s)) => s.as_str(),
+                _ => {
+                    return Err(TypeReadError::UnsupportedType {
+                        type_name: "regex without string pattern".to_string(),
+                    });
+                }
+            };
+            read_regex(buffer, offset, pattern_str, *flags, *count)
+        }
+        TypeKind::Search { range } => {
+            let pattern_bytes: &[u8] = match pattern {
+                Some(Value::String(s)) => s.as_bytes(),
+                Some(Value::Bytes(b)) => b.as_slice(),
+                _ => {
+                    return Err(TypeReadError::UnsupportedType {
+                        type_name: "search without string/bytes pattern".to_string(),
+                    });
+                }
+            };
+            read_search(buffer, offset, pattern_bytes, *range)
+        }
+        _ => Err(TypeReadError::UnsupportedType {
+            type_name: format!("read_pattern_match called on non-pattern type: {type_kind:?}"),
+        }),
     }
 }
 
@@ -202,7 +350,8 @@ pub fn coerce_value_to_type<'a>(value: &'a Value, type_kind: &TypeKind) -> Cow<'
     }
 }
 
-/// Returns the anchor-advance distance for `type_kind` at `offset`.
+/// Returns the anchor-advance distance for `type_kind` at `offset`, threading
+/// the rule's value operand through for pattern-bearing types.
 ///
 /// This value is used by the evaluation engine to advance the GNU `file`
 /// "previous match" anchor for relative offset resolution. It reflects how
@@ -223,6 +372,16 @@ pub fn coerce_value_to_type<'a>(value: &'a Value, type_kind: &TypeKind) -> Cow<'
 /// it after a successful read, so the defensive paths are belt-and-braces
 /// for any future caller that breaks that invariant.
 ///
+/// For `TypeKind::Regex`, the pattern is required to re-run the match and
+/// compute the consumed bytes. When the pattern is unavailable (or not a
+/// string), the function returns `0` -- the anchor will then stay put and
+/// the next relative offset resolves against the previous anchor position,
+/// which is the same graceful-degradation behavior used by the other
+/// defensive paths in this module. For `TypeKind::Search`, the pattern is
+/// not needed because the consumed distance is the entire search window
+/// regardless of where the match was found. Non-pattern types should pass
+/// `pattern: None`.
+///
 /// # Semantics
 ///
 /// - **Fixed-width types** (Byte, Short, Long, Quad, Float, Double, Date,
@@ -247,7 +406,12 @@ pub fn coerce_value_to_type<'a>(value: &'a Value, type_kind: &TypeKind) -> Cow<'
 ///   buffer length so a malicious oversized length prefix cannot poison the
 ///   anchor.
 #[must_use]
-pub(crate) fn bytes_consumed(buffer: &[u8], offset: usize, type_kind: &TypeKind) -> usize {
+pub(crate) fn bytes_consumed_with_pattern(
+    buffer: &[u8],
+    offset: usize,
+    type_kind: &TypeKind,
+    pattern: Option<&Value>,
+) -> usize {
     if let Some(bits) = type_kind.bit_width() {
         let width = (bits as usize) / 8;
         // Bounds-check the fixed-width path so a misuse (offset past end of
@@ -274,6 +438,39 @@ pub(crate) fn bytes_consumed(buffer: &[u8], offset: usize, type_kind: &TypeKind)
             *length_width,
             *length_includes_itself,
         ),
+        TypeKind::Regex { flags, count } => match pattern {
+            Some(Value::String(s)) => {
+                regex::regex_bytes_consumed(buffer, offset, s.as_str(), *flags, *count)
+            }
+            // Invariant: the engine only calls `bytes_consumed_with_pattern`
+            // after a successful `read_typed_value_with_pattern`/`read_pattern_match`,
+            // which requires `Some(Value::String(_))` for regex. If we land
+            // here the invariant is broken by a new caller and the anchor
+            // would silently stall instead of advancing. Fire a debug_assert
+            // so the mismatch is caught in dev/test builds.
+            other => {
+                debug_assert!(
+                    false,
+                    "bytes_consumed_with_pattern: TypeKind::Regex without Value::String pattern ({other:?}) -- engine invariant violated"
+                );
+                0
+            }
+        },
+        TypeKind::Search { range } => match pattern {
+            Some(Value::String(s)) => {
+                search::search_bytes_consumed(buffer, offset, s.as_bytes(), *range)
+            }
+            Some(Value::Bytes(b)) => {
+                search::search_bytes_consumed(buffer, offset, b.as_slice(), *range)
+            }
+            other => {
+                debug_assert!(
+                    false,
+                    "bytes_consumed_with_pattern: TypeKind::Search without Value::String/Bytes pattern ({other:?}) -- engine invariant violated"
+                );
+                0
+            }
+        },
         // A new variable-width TypeKind variant was added without updating
         // this match. Returning 0 here would silently corrupt the GNU `file`
         // anchor for any rule using relative offsets after a match of the
diff --git a/src/evaluator/types/regex.rs b/src/evaluator/types/regex.rs
new file mode 100644
index 00000000..28b7d58e
--- /dev/null
+++ b/src/evaluator/types/regex.rs
@@ -0,0 +1,479 @@
+// Copyright (c) 2025-2026 the libmagic-rs contributors
+// SPDX-License-Identifier: Apache-2.0
+
+//! Regular-expression matching for magic rule evaluation.
+//!
+//! Implements the `regex` `TypeKind` using `regex::bytes::RegexBuilder` so
+//! that matching is binary-safe (patterns are applied to the raw byte
+//! buffer, not a UTF-8 string). A successful match returns
+//! `Ok(Some(Value::String(...)))` -- the matched bytes with invalid UTF-8
+//! replaced via `from_utf8_lossy`. A miss returns `Ok(None)`. The `Option`
+//! is the structured "no match" signal, which lets the engine distinguish
+//! a legitimate zero-width match (e.g., `^`, `a*`, lookaheads) from a
+//! genuine miss -- both of which would otherwise collapse to
+//! `Value::String(String::new())`.
+//!
+//! ## Semantics (matching GNU `file`)
+//!
+//! * **Multi-line mode is always on.** GNU `file`'s `alloc_regex` in
+//!   `src/softmagic.c` compiles every regex with `REG_NEWLINE`
+//!   unconditionally, so `^` and `$` match at line boundaries and `.`
+//!   does not match `\n`. The `/l` flag does **not** control this; it
+//!   controls whether the scan window is measured in bytes or lines.
+//!
+//! * **Scan window is always capped at [`REGEX_MAX_BYTES`] (8192).** This
+//!   matches libmagic's `FILE_REGEX_MAX` constant. An explicit `count`
+//!   larger than 8192 is clamped. An implicit count (no user-supplied
+//!   value) uses the 8192 default directly.
+//!
+//! * **Line-based window** (`/l` flag): when `flags.line_based` is set,
+//!   `count` is a line count. The scan window extends from `offset`
+//!   through the end of the Nth line terminator, capped at 8192 bytes.
+//!   Libmagic recognizes both `\n` (LF) and `\r\n` (CR+LF) as terminators
+//!   and counts them as single lines; this implementation uses the same
+//!   semantics via `memchr::memchr2(b'\n', b'\r', ...)`.
+//!
+//! * **`/s` flag** (`start_offset`): affects only the anchor advance
+//!   computed by [`regex_bytes_consumed`]. When set, the anchor moves by
+//!   `m.start()` (match-start) instead of `m.end()` (match-end), matching
+//!   libmagic's `REGEX_OFFSET_START` / `moffset()` logic.
+
+use super::TypeReadError;
+use crate::parser::ast::{REGEX_MAX_BYTES, RegexFlags, Value};
+use regex::bytes::{Regex, RegexBuilder};
+use std::num::NonZeroU32;
+
+/// Compile `pattern` with the magic-rule regex flags applied.
+///
+/// Multi-line mode is always enabled (unconditional in libmagic via
+/// `REG_NEWLINE`) and `.` does not match newlines. The `case_insensitive`
+/// flag is the only compile-time flag the magic-rule interface controls;
+/// `line_based` and `start_offset` affect window computation and anchor
+/// advance respectively, not regex compilation.
+fn build_regex(pattern: &str, case_insensitive: bool) -> Result<Regex, regex::Error> {
+    RegexBuilder::new(pattern)
+        .case_insensitive(case_insensitive)
+        .multi_line(true)
+        .dot_matches_new_line(false)
+        .build()
+}
+
+/// Compute the scan window for a regex rule at `offset`, applying the
+/// 8192-byte cap and the `/l` line-count semantics when requested.
+///
+/// Returns a slice of `buffer` starting at `offset`:
+///
+/// * **Byte mode** (`flags.line_based == false`): window length is
+///   `min(count.unwrap_or(REGEX_MAX_BYTES), REGEX_MAX_BYTES, remaining)`.
+///
+/// * **Line mode** (`flags.line_based == true`): window extends from
+///   `offset` through the end of the Nth line terminator (inclusive),
+///   where N is `count.unwrap_or(u32::MAX)`. `\r\n` and `\n` both count as
+///   one line terminator. If the Nth terminator is not found within
+///   `REGEX_MAX_BYTES`, the window is truncated to 8192 bytes. If `count`
+///   is `None` and no terminator is found at all, the window is the whole
+///   buffer tail up to the 8192-byte cap.
+fn compute_window(
+    buffer: &[u8],
+    offset: usize,
+    flags: RegexFlags,
+    count: Option<NonZeroU32>,
+) -> &[u8] {
+    let Some(remaining) = buffer.get(offset..) else {
+        return &[];
+    };
+    let byte_cap = remaining.len().min(REGEX_MAX_BYTES);
+    let capped = &remaining[..byte_cap];
+
+    if !flags.line_based {
+        let count_bytes =
+            count.map_or(REGEX_MAX_BYTES, |n| (n.get() as usize).min(REGEX_MAX_BYTES));
+        return &capped[..count_bytes.min(capped.len())];
+    }
+
+    // Line mode: walk the byte-capped slice counting `\n` (and `\r\n`
+    // pairs as one terminator), stopping after the Nth terminator.
+    let target_lines = count.map_or(u32::MAX, NonZeroU32::get);
+    let mut lines_seen: u32 = 0;
+    let mut idx = 0usize;
+    while idx < capped.len() {
+        match capped[idx] {
+            b'\r' => {
+                // Treat CR and CRLF as a single terminator.
+                let advance = if idx + 1 < capped.len() && capped[idx + 1] == b'\n' {
+                    2
+                } else {
+                    1
+                };
+                idx += advance;
+                lines_seen = lines_seen.saturating_add(1);
+            }
+            b'\n' => {
+                idx += 1;
+                lines_seen = lines_seen.saturating_add(1);
+            }
+            _ => idx += 1,
+        }
+        if lines_seen >= target_lines {
+            break;
+        }
+    }
+    &capped[..idx]
+}
+
+/// Scan `buffer` starting at `offset` for the first match of `pattern`.
+///
+/// # Arguments
+///
+/// * `buffer` - File buffer to scan
+/// * `offset` - Starting position within the buffer
+/// * `pattern` - Regex source string (from the rule's `Value::String`
+///   operand)
+/// * `flags` - Regex modifier flags parsed from the `/[csl]` suffix
+/// * `count` - Optional numeric count. Interpretation depends on
+///   `flags.line_based`; see [`compute_window`] for the details.
+///
+/// # Returns
+///
+/// * `Ok(Some(Value::String(matched_text)))` on a successful match --
+///   invalid UTF-8 in the matched bytes is replaced with U+FFFD via
+///   `from_utf8_lossy`. The matched text may legitimately be empty for
+///   zero-width matches (e.g., `^`, `a*`, or lookaheads).
+/// * `Ok(None)` when the pattern does not match anywhere in the scan
+///   window.
+///
+/// # Errors
+///
+/// * `TypeReadError::BufferOverrun` if `offset >= buffer.len()`.
+/// * `TypeReadError::UnsupportedType` if `pattern` fails to compile as a
+///   regex (the error variant is reused to avoid adding a new enum
+///   variant; the `type_name` field carries the compilation error
+///   message).
+pub fn read_regex(
+    buffer: &[u8],
+    offset: usize,
+    pattern: &str,
+    flags: RegexFlags,
+    count: Option<NonZeroU32>,
+) -> Result<Option<Value>, TypeReadError> {
+    if offset >= buffer.len() {
+        return Err(TypeReadError::BufferOverrun {
+            offset,
+            buffer_len: buffer.len(),
+        });
+    }
+
+    let regex = build_regex(pattern, flags.case_insensitive).map_err(|e| {
+        TypeReadError::UnsupportedType {
+            type_name: format!("regex compile error: {e}"),
+        }
+    })?;
+
+    let window = compute_window(buffer, offset, flags, count);
+
+    Ok(regex
+        .find(window)
+        .map(|m| Value::String(String::from_utf8_lossy(m.as_bytes()).into_owned())))
+}
+
+/// Re-run `pattern` against `buffer` at `offset` and return the anchor
+/// advance for the first match (number of bytes to add to the GNU `file`
+/// previous-match anchor).
+///
+/// When `flags.start_offset` is set (the `/s` modifier), the advance is
+/// `m.start()` (match-start). Otherwise the advance is `m.end()`
+/// (match-end). This matches libmagic's `REGEX_OFFSET_START` / `moffset()`
+/// branch in `src/softmagic.c`.
+///
+/// Returns `0` on any failure -- offset past buffer end, invalid pattern,
+/// or no match. The `debug_assert` guards catch engine-invariant
+/// violations (i.e., calls without a preceding successful `read_regex`) in
+/// dev/test builds.
+///
+/// Note: the regex is compiled twice per successful match -- once in
+/// `read_regex` and again here. Caching the compiled `Regex` would require
+/// threading it through `TypeReadError`/`Value` or adding a second return
+/// channel, both of which complicate the reader API for a micro-
+/// optimization. The duplicated compile is a deliberate simplicity-over-
+/// caching trade-off.
+#[must_use]
+pub(super) fn regex_bytes_consumed(
+    buffer: &[u8],
+    offset: usize,
+    pattern: &str,
+    flags: RegexFlags,
+    count: Option<NonZeroU32>,
+) -> usize {
+    if buffer.get(offset..).is_none() {
+        debug_assert!(
+            false,
+            "regex_bytes_consumed: offset {offset} > buffer.len() {} -- engine invariant violated (called without a preceding successful read_regex)",
+            buffer.len()
+        );
+        return 0;
+    }
+    let Ok(regex) = build_regex(pattern, flags.case_insensitive) else {
+        debug_assert!(
+            false,
+            "regex_bytes_consumed: failed to re-compile pattern {pattern:?} -- engine invariant violated (read_regex already succeeded)"
+        );
+        return 0;
+    };
+    let window = compute_window(buffer, offset, flags, count);
+    regex.find(window).map_or(0, |m| {
+        if flags.start_offset {
+            m.start()
+        } else {
+            m.end()
+        }
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn no_flags() -> RegexFlags {
+        RegexFlags::default()
+    }
+
+    fn flags(case: bool, start: bool, line: bool) -> RegexFlags {
+        RegexFlags {
+            case_insensitive: case,
+            start_offset: start,
+            line_based: line,
+        }
+    }
+
+    #[test]
+    fn test_read_regex_basic_match() {
+        let buffer = b"Hello, World!";
+        let result = read_regex(buffer, 0, "World", no_flags(), None).unwrap();
+        assert_eq!(result, Some(Value::String("World".to_string())));
+    }
+
+    #[test]
+    fn test_read_regex_no_match_returns_none() {
+        let buffer = b"Hello, World!";
+        let result = read_regex(buffer, 0, "xyz", no_flags(), None).unwrap();
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn test_read_regex_case_insensitive() {
+        let buffer = b"Hello, World!";
+        let result = read_regex(buffer, 0, "world", flags(true, false, false), None).unwrap();
+        assert_eq!(result, Some(Value::String("World".to_string())));
+    }
+
+    #[test]
+    fn test_read_regex_case_sensitive_no_match() {
+        let buffer = b"Hello, World!";
+        let result = read_regex(buffer, 0, "world", no_flags(), None).unwrap();
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn test_read_regex_multiline_anchor_across_lines() {
+        // libmagic always compiles regexes with REG_NEWLINE, so `^` and
+        // `$` match at internal line boundaries regardless of the `/l`
+        // flag. This test pins the behavior: `^second` on a two-line
+        // buffer matches the second line even with no flags set.
+        let buffer = b"first line\nsecond line";
+        let result = read_regex(buffer, 0, "^second", no_flags(), None).unwrap();
+        assert_eq!(result, Some(Value::String("second".to_string())));
+    }
+
+    #[test]
+    fn test_read_regex_dot_does_not_match_newline() {
+        // The REG_NEWLINE flag also makes `.` stop at newlines. A `.+`
+        // match against a multi-line buffer must not consume the `\n`.
+        let buffer = b"first\nsecond";
+        let result = read_regex(buffer, 0, ".+", no_flags(), None).unwrap();
+        assert_eq!(result, Some(Value::String("first".to_string())));
+    }
+
+    #[test]
+    fn test_read_regex_zero_width_start_anchor_matches() {
+        // `^` matches zero-width at position 0. Must be reported as
+        // `Some(Value::String(""))`, not `None`. Regression guard for C3.
+        let buffer = b"hello";
+        let result = read_regex(buffer, 0, "^", no_flags(), None).unwrap();
+        assert_eq!(
+            result,
+            Some(Value::String(String::new())),
+            "^ is a legitimate zero-width match, not a miss"
+        );
+    }
+
+    #[test]
+    fn test_read_regex_zero_width_star_matches_empty() {
+        let buffer = b"xyz";
+        let result = read_regex(buffer, 0, "a*", no_flags(), None).unwrap();
+        assert_eq!(result, Some(Value::String(String::new())));
+    }
+
+    #[test]
+    fn test_read_regex_at_offset() {
+        let buffer = b"prefix_World!";
+        let result = read_regex(buffer, 7, "World", no_flags(), None).unwrap();
+        assert_eq!(result, Some(Value::String("World".to_string())));
+    }
+
+    #[test]
+    fn test_read_regex_offset_past_end() {
+        let buffer = b"Hello";
+        let result = read_regex(buffer, 10, "x", no_flags(), None);
+        assert!(matches!(
+            result,
+            Err(TypeReadError::BufferOverrun {
+                offset: 10,
+                buffer_len: 5
+            })
+        ));
+    }
+
+    #[test]
+    fn test_read_regex_invalid_pattern() {
+        let buffer = b"Hello";
+        let result = read_regex(buffer, 0, "[unclosed", no_flags(), None);
+        assert!(matches!(result, Err(TypeReadError::UnsupportedType { .. })));
+    }
+
+    #[test]
+    fn test_read_regex_binary_safe() {
+        let buffer = &[0x00, 0xff, 0xfe, 0x41, 0x42, 0x43];
+        let result = read_regex(buffer, 0, "ABC", no_flags(), None).unwrap();
+        assert_eq!(result, Some(Value::String("ABC".to_string())));
+    }
+
+    #[test]
+    fn test_read_regex_character_class() {
+        let buffer = b"abc123def";
+        let result = read_regex(buffer, 0, "[0-9]+", no_flags(), None).unwrap();
+        assert_eq!(result, Some(Value::String("123".to_string())));
+    }
+
+    // ------- V1: line-based window -------
+
+    #[test]
+    fn test_read_regex_line_based_one_line_caps_scan() {
+        // `regex/1l` with a pattern that appears on the second line must
+        // miss -- the scan window stops after the first newline.
+        let buffer = b"first line\nsecond line\n";
+        let one = NonZeroU32::new(1);
+        let result = read_regex(buffer, 0, "second", flags(false, false, true), one).unwrap();
+        assert_eq!(result, None, "scan should stop after the first line");
+    }
+
+    #[test]
+    fn test_read_regex_line_based_crlf_terminator() {
+        // CRLF (`\r\n`) counts as a single line terminator, matching
+        // libmagic's `memchr2('\n', '\r', ...)` logic.
+        let buffer = b"line1\r\nline2\r\n";
+        let one = NonZeroU32::new(1);
+        let second = read_regex(buffer, 0, "line2", flags(false, false, true), one).unwrap();
+        assert_eq!(second, None, "CRLF should end the first line");
+    }
+
+    #[test]
+    fn test_read_regex_line_based_counts_multiple_lines() {
+        // `regex/3l` scans up to the third line, so a pattern on line 3
+        // matches, but a pattern on line 4 misses.
+        let buffer = b"line1\nline2\nline3\nline4\n";
+        let three = NonZeroU32::new(3);
+        let line3 = read_regex(buffer, 0, "line3", flags(false, false, true), three).unwrap();
+        assert_eq!(line3, Some(Value::String("line3".to_string())));
+
+        let line4 = read_regex(buffer, 0, "line4", flags(false, false, true), three).unwrap();
+        assert_eq!(line4, None, "line4 is beyond the 3-line window");
+    }
+
+    // ------- V5: 8192-byte default cap -------
+
+    #[test]
+    fn test_read_regex_default_window_caps_at_8192_bytes() {
+        // A buffer larger than 8192 bytes with the pattern past 8192
+        // must miss on an un-counted regex, because the scan window is
+        // capped at 8192 (FILE_REGEX_MAX).
+        let mut buffer = vec![b'a'; 9000];
+        buffer.extend_from_slice(b"needle");
+        let result = read_regex(&buffer, 0, "needle", no_flags(), None).unwrap();
+        assert_eq!(
+            result, None,
+            "needle past byte 9000 must not match under the 8192 default cap"
+        );
+    }
+
+    #[test]
+    fn test_read_regex_explicit_count_larger_than_cap_still_capped() {
+        // Even an explicit `regex/100000` is clamped to 8192 bytes --
+        // users cannot opt out of the hard cap.
+        let mut buffer = vec![b'a'; 9000];
+        buffer.extend_from_slice(b"needle");
+        let hundred_thousand = NonZeroU32::new(100_000);
+        let result = read_regex(&buffer, 0, "needle", no_flags(), hundred_thousand).unwrap();
+        assert_eq!(result, None, "explicit count must still be clamped to 8192");
+    }
+
+    #[test]
+    fn test_read_regex_small_count_honored() {
+        // A small explicit count (e.g., 10 bytes) must be honored -- a
+        // pattern past byte 10 misses.
+        let buffer = b"abcdefghij_needle_here";
+        let ten = NonZeroU32::new(10);
+        let result = read_regex(buffer, 0, "needle", no_flags(), ten).unwrap();
+        assert_eq!(result, None);
+    }
+
+    // ------- regex_bytes_consumed -------
+
+    #[test]
+    fn test_regex_bytes_consumed_match_end_by_default() {
+        let buffer = b"Hello, World!";
+        assert_eq!(
+            regex_bytes_consumed(buffer, 0, "World", no_flags(), None),
+            12
+        );
+    }
+
+    #[test]
+    fn test_regex_bytes_consumed_no_match() {
+        let buffer = b"Hello";
+        assert_eq!(regex_bytes_consumed(buffer, 0, "xyz", no_flags(), None), 0);
+    }
+
+    #[test]
+    fn test_regex_bytes_consumed_zero_width_match_returns_zero() {
+        let buffer = b"hello";
+        assert_eq!(regex_bytes_consumed(buffer, 0, "^", no_flags(), None), 0);
+    }
+
+    // ------- V2: /s flag (start_offset) -------
+
+    #[test]
+    fn test_regex_bytes_consumed_start_offset_returns_match_start() {
+        // Buffer: "abcWorld", pattern "World" matches at index 3, length
+        // 5. Without `/s` the anchor advances by 8 (match-end). With `/s`
+        // it advances by 3 (match-start), matching libmagic's
+        // REGEX_OFFSET_START / moffset() zero-length path.
+        let buffer = b"abcWorld";
+        let match_end = regex_bytes_consumed(buffer, 0, "World", no_flags(), None);
+        let match_start = regex_bytes_consumed(buffer, 0, "World", flags(false, true, false), None);
+        assert_eq!(match_end, 8, "default anchor advance is match-end");
+        assert_eq!(
+            match_start, 3,
+            "/s flag advances anchor to match-start instead"
+        );
+    }
+
+    #[test]
+    fn test_regex_bytes_consumed_start_offset_no_match_returns_zero() {
+        // /s flag on a non-matching pattern still returns 0 (no advance).
+        let buffer = b"Hello";
+        assert_eq!(
+            regex_bytes_consumed(buffer, 0, "xyz", flags(false, true, false), None),
+            0
+        );
+    }
+}
diff --git a/src/evaluator/types/search.rs b/src/evaluator/types/search.rs
new file mode 100644
index 00000000..070cb496
--- /dev/null
+++ b/src/evaluator/types/search.rs
@@ -0,0 +1,234 @@
+// Copyright (c) 2025-2026 the libmagic-rs contributors
+// SPDX-License-Identifier: Apache-2.0
+
+//! Bounded literal pattern search for magic rule evaluation.
+//!
+//! Implements the `search` `TypeKind` as a forward scan for a literal byte
+//! pattern within a bounded window. Unlike `TypeKind::String`, which only
+//! matches at the exact offset, `search` advances through the buffer looking
+//! for the first occurrence of the pattern anywhere in the window. The
+//! search window is `buffer[offset..]` capped by the optional `range`.
+
+use super::TypeReadError;
+use crate::parser::ast::Value;
+use std::num::NonZeroUsize;
+
+/// Scan a bounded window of `buffer` for the first occurrence of `pattern`.
+///
+/// # Arguments
+///
+/// * `buffer` - File buffer to scan
+/// * `offset` - Starting position within the buffer
+/// * `pattern` - Literal bytes to search for (from the rule's value operand)
+/// * `range` - Byte range to scan starting at `offset`. The window is the
+///   smaller of `range` and the buffer remainder. Per GNU `file`'s
+///   magic(5), the range is mandatory and is therefore a [`NonZeroUsize`]
+///   in the type signature.
+///
+/// # Returns
+///
+/// * `Ok(Some(Value::String(pattern_text)))` on a successful match -- the
+///   matched text is the literal pattern (search is a locate, not a
+///   capture), with invalid UTF-8 replaced via `from_utf8_lossy`.
+/// * `Ok(None)` when the pattern is not found in the window. `None` is the
+///   structured "no match" signal; callers that need a compatibility
+///   `Value::String(String::new())` should convert at the call site.
+///
+/// # Errors
+///
+/// * `TypeReadError::BufferOverrun` if `offset >= buffer.len()`.
+pub fn read_search(
+    buffer: &[u8],
+    offset: usize,
+    pattern: &[u8],
+    range: NonZeroUsize,
+) -> Result<Option<Value>, TypeReadError> {
+    if offset >= buffer.len() {
+        return Err(TypeReadError::BufferOverrun {
+            offset,
+            buffer_len: buffer.len(),
+        });
+    }
+
+    let remaining = &buffer[offset..];
+    let window_len = range.get().min(remaining.len());
+    let window = &remaining[..window_len];
+
+    match memchr::memmem::find(window, pattern) {
+        Some(_) => Ok(Some(Value::String(
+            String::from_utf8_lossy(pattern).into_owned(),
+        ))),
+        None => Ok(None),
+    }
+}
+
+/// Compute the anchor-advance distance for a successful search match.
+///
+/// GNU `file` advances its previous-match anchor to the byte just past the
+/// matched pattern -- `base_offset + match_index + pattern.len()`, not past
+/// the full search window. See `src/softmagic.c` `moffset()` / `FILE_SEARCH`
+/// branch (`vlen = m->vallen; o = ms->search.offset + vlen - offset;`) where
+/// `ms->search.offset` has already been advanced by `idx` (the match index
+/// within the window).
+///
+/// This function re-runs the same `memchr::memmem::find` scan as
+/// [`read_search`] and returns `match_index + pattern.len()`. On miss or
+/// invalid state it returns `0`; the engine only calls it after a successful
+/// read so the defensive paths are belt-and-braces.
+///
+/// Note: like [`crate::evaluator::types::regex::regex_bytes_consumed`], this
+/// pays the cost of a second scan rather than threading the match position
+/// back through the reader API. Caching would require a second return
+/// channel that complicates every non-pattern type.
+#[must_use]
+pub(super) fn search_bytes_consumed(
+    buffer: &[u8],
+    offset: usize,
+    pattern: &[u8],
+    range: NonZeroUsize,
+) -> usize {
+    let Some(remaining) = buffer.get(offset..) else {
+        debug_assert!(
+            false,
+            "search_bytes_consumed: offset {offset} > buffer.len() {} -- engine invariant violated (called without a preceding successful read_search)",
+            buffer.len()
+        );
+        return 0;
+    };
+    let window_len = range.get().min(remaining.len());
+    let window = &remaining[..window_len];
+    memchr::memmem::find(window, pattern).map_or(0, |idx| idx + pattern.len())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn nz(n: usize) -> NonZeroUsize {
+        NonZeroUsize::new(n).expect("non-zero in test")
+    }
+
+    #[test]
+    fn test_read_search_basic_match() {
+        let buffer = b"Hello, World!";
+        let result = read_search(buffer, 0, b"World", nz(100)).unwrap();
+        assert_eq!(result, Some(Value::String("World".to_string())));
+    }
+
+    #[test]
+    fn test_read_search_no_match_returns_none() {
+        let buffer = b"Hello, World!";
+        let result = read_search(buffer, 0, b"xyz", nz(100)).unwrap();
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn test_read_search_bounded_range_finds() {
+        let buffer = b"abcdefWorldxyz";
+        let result = read_search(buffer, 0, b"World", nz(14)).unwrap();
+        assert_eq!(result, Some(Value::String("World".to_string())));
+    }
+
+    #[test]
+    fn test_read_search_bounded_range_too_small() {
+        let buffer = b"abcdefWorldxyz";
+        // Range only covers "abcde" -- World is past the window
+        let result = read_search(buffer, 0, b"World", nz(5)).unwrap();
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn test_read_search_range_larger_than_buffer() {
+        let buffer = b"Hello";
+        let result = read_search(buffer, 0, b"lo", nz(1000)).unwrap();
+        assert_eq!(result, Some(Value::String("lo".to_string())));
+    }
+
+    #[test]
+    fn test_read_search_at_offset() {
+        let buffer = b"junk_prefix_World!";
+        let result = read_search(buffer, 12, b"World", nz(100)).unwrap();
+        assert_eq!(result, Some(Value::String("World".to_string())));
+    }
+
+    #[test]
+    fn test_read_search_offset_past_end() {
+        let buffer = b"Hello";
+        let result = read_search(buffer, 10, b"x", nz(100));
+        assert!(matches!(
+            result,
+            Err(TypeReadError::BufferOverrun {
+                offset: 10,
+                buffer_len: 5
+            })
+        ));
+    }
+
+    #[test]
+    fn test_read_search_binary_pattern() {
+        let buffer = &[0x00, 0xff, 0xfe, 0xaa, 0xbb, 0xcc];
+        let result = read_search(buffer, 0, &[0xaa, 0xbb], nz(100)).unwrap();
+        // Invalid UTF-8 gets replaced with U+FFFD, but the match is still Some
+        match result {
+            Some(Value::String(s)) => assert!(!s.is_empty()),
+            other => panic!("Expected Some(Value::String), got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_read_search_empty_pattern_matches_at_offset() {
+        // memmem finds an empty pattern at position 0 in any non-empty
+        // window. This is a degenerate but well-defined outcome: the
+        // reader reports a match with an empty matched text. Magic files
+        // using `search` with an empty pattern are nonsensical; the
+        // grammar layer should reject them, not the reader.
+        let buffer = b"Hello";
+        let result = read_search(buffer, 0, b"", nz(100)).unwrap();
+        assert_eq!(result, Some(Value::String(String::new())));
+    }
+
+    #[test]
+    fn test_read_search_multi_char_pattern() {
+        let buffer = b"The quick brown fox jumps over the lazy dog";
+        let result = read_search(buffer, 0, b"brown", nz(50)).unwrap();
+        assert_eq!(result, Some(Value::String("brown".to_string())));
+    }
+
+    #[test]
+    fn test_search_bytes_consumed_matches_match_end_not_window_end() {
+        // GNU `file` advances the anchor past the matched pattern, not
+        // past the full search window. Regression guard for the pre-fix
+        // behavior which returned the whole window size.
+        let buffer = b"abcWorldxyz___more_data";
+        // Window size 10 (`abcWorldxy`), pattern "World" at index 3,
+        // length 5, so match-end = 3 + 5 = 8.
+        assert_eq!(
+            search_bytes_consumed(buffer, 0, b"World", nz(10)),
+            8,
+            "expected match-end (8), not window-end (10)"
+        );
+    }
+
+    #[test]
+    fn test_search_bytes_consumed_no_match_returns_zero() {
+        let buffer = b"abcdefghij";
+        assert_eq!(search_bytes_consumed(buffer, 0, b"XYZ", nz(10)), 0);
+    }
+
+    #[test]
+    fn test_search_bytes_consumed_range_caps_match() {
+        // Match exists past the window; bytes_consumed reports 0 because
+        // the scan only sees the window.
+        let buffer = b"abcdefWorldxyz";
+        // Range 5 means window is "abcde" -- no "World" inside it.
+        assert_eq!(search_bytes_consumed(buffer, 0, b"World", nz(5)), 0);
+    }
+
+    #[test]
+    fn test_search_bytes_consumed_match_at_window_end() {
+        // Pattern lands exactly at the window boundary: window is 8
+        // bytes, pattern "def" occupies indices 3..6, match-end = 6.
+        let buffer = b"abcdefgh_ignored";
+        assert_eq!(search_bytes_consumed(buffer, 0, b"def", nz(8)), 6);
+    }
+}
diff --git a/src/evaluator/types/tests.rs b/src/evaluator/types/tests.rs
index 5e9fa908..9c33cf30 100644
--- a/src/evaluator/types/tests.rs
+++ b/src/evaluator/types/tests.rs
@@ -801,7 +801,7 @@ fn test_bytes_consumed_fixed_width_types() {
     ];
 
     for (typ, expected) in cases {
-        let consumed = bytes_consumed(buf, 0, typ);
+        let consumed = bytes_consumed_with_pattern(buf, 0, typ, None);
         assert_eq!(
             consumed, *expected,
             "fixed-width width mismatch for {typ:?}"
@@ -814,7 +814,7 @@ fn test_bytes_consumed_string_with_nul() {
     // "MZ\0" -> matches "MZ" and consumes 3 bytes (2 + NUL).
     let buf = b"MZ\x00rest";
     let typ = TypeKind::String { max_length: None };
-    assert_eq!(bytes_consumed(buf, 0, &typ), 3);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 3);
 }
 
 #[test]
@@ -822,7 +822,7 @@ fn test_bytes_consumed_string_at_offset() {
     // String starting mid-buffer.
     let buf = b"PREFIXabc\x00tail";
     let typ = TypeKind::String { max_length: None };
-    assert_eq!(bytes_consumed(buf, 6, &typ), 4); // "abc" + NUL
+    assert_eq!(bytes_consumed_with_pattern(buf, 6, &typ, None), 4); // "abc" + NUL
 }
 
 #[test]
@@ -830,7 +830,7 @@ fn test_bytes_consumed_string_no_nul_in_buffer() {
     // No NUL terminator -- consumes to end of buffer (no extra byte for NUL).
     let buf = b"NoNull";
     let typ = TypeKind::String { max_length: None };
-    assert_eq!(bytes_consumed(buf, 0, &typ), 6);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 6);
 }
 
 #[test]
@@ -838,7 +838,7 @@ fn test_bytes_consumed_string_empty() {
     // Empty string at offset 0 -- just the NUL.
     let buf = b"\x00rest";
     let typ = TypeKind::String { max_length: None };
-    assert_eq!(bytes_consumed(buf, 0, &typ), 1);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 1);
 }
 
 #[test]
@@ -848,7 +848,7 @@ fn test_bytes_consumed_string_max_length_caps() {
     let typ = TypeKind::String {
         max_length: Some(4),
     };
-    assert_eq!(bytes_consumed(buf, 0, &typ), 4);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 4);
 }
 
 #[test]
@@ -858,7 +858,7 @@ fn test_bytes_consumed_string_max_length_finds_nul() {
     let typ = TypeKind::String {
         max_length: Some(10),
     };
-    assert_eq!(bytes_consumed(buf, 0, &typ), 6);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 6);
 }
 
 #[test]
@@ -870,7 +870,7 @@ fn test_bytes_consumed_pstring_one_byte() {
         length_width: PStringLengthWidth::OneByte,
         length_includes_itself: false,
     };
-    assert_eq!(bytes_consumed(buf, 0, &typ), 6);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 6);
 }
 
 #[test]
@@ -882,7 +882,7 @@ fn test_bytes_consumed_pstring_two_byte_be() {
         length_width: PStringLengthWidth::TwoByteBE,
         length_includes_itself: false,
     };
-    assert_eq!(bytes_consumed(buf, 0, &typ), 7);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 7);
 }
 
 #[test]
@@ -893,7 +893,7 @@ fn test_bytes_consumed_pstring_two_byte_le() {
         length_width: PStringLengthWidth::TwoByteLE,
         length_includes_itself: false,
     };
-    assert_eq!(bytes_consumed(buf, 0, &typ), 7);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 7);
 }
 
 #[test]
@@ -904,7 +904,7 @@ fn test_bytes_consumed_pstring_four_byte_be() {
         length_width: PStringLengthWidth::FourByteBE,
         length_includes_itself: false,
     };
-    assert_eq!(bytes_consumed(buf, 0, &typ), 5);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 5);
 }
 
 #[test]
@@ -916,7 +916,7 @@ fn test_bytes_consumed_pstring_j_flag() {
         length_width: PStringLengthWidth::OneByte,
         length_includes_itself: true,
     };
-    assert_eq!(bytes_consumed(buf, 0, &typ), 4);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 4);
 }
 
 #[test]
@@ -928,7 +928,7 @@ fn test_bytes_consumed_pstring_empty() {
         length_width: PStringLengthWidth::OneByte,
         length_includes_itself: false,
     };
-    assert_eq!(bytes_consumed(buf, 0, &typ), 1);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 1);
 }
 
 #[test]
@@ -940,7 +940,7 @@ fn test_bytes_consumed_pstring_max_length_caps() {
         length_width: PStringLengthWidth::OneByte,
         length_includes_itself: false,
     };
-    assert_eq!(bytes_consumed(buf, 0, &typ), 6);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 6);
 }
 
 #[test]
@@ -952,7 +952,7 @@ fn test_bytes_consumed_pstring_j_flag_underflow_multi_byte() {
         length_width: PStringLengthWidth::TwoByteBE,
         length_includes_itself: true,
     };
-    assert_eq!(bytes_consumed(buf, 0, &typ), 0);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 0);
 
     // /J with FourByteLE: stored length 3, prefix width 4 -> underflow -> 0.
     let buf = b"\x03\x00\x00\x00xx";
@@ -961,7 +961,7 @@ fn test_bytes_consumed_pstring_j_flag_underflow_multi_byte() {
         length_width: PStringLengthWidth::FourByteLE,
         length_includes_itself: true,
     };
-    assert_eq!(bytes_consumed(buf, 0, &typ), 0);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 0);
 }
 
 #[test]
@@ -976,7 +976,7 @@ fn test_bytes_consumed_pstring_clamps_oversized_prefix_be() {
         length_includes_itself: false,
     };
     // 4 (prefix) + min(0xFFFFFFFF, 3) = 4 + 3 = 7
-    assert_eq!(bytes_consumed(buf, 0, &typ), 7);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 7);
 }
 
 #[test]
@@ -988,7 +988,7 @@ fn test_bytes_consumed_pstring_clamps_oversized_prefix_le() {
         length_includes_itself: false,
     };
     // 4 + min(0xFFFFFFFF, 5) = 9
-    assert_eq!(bytes_consumed(buf, 0, &typ), 9);
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, None), 9);
 }
 
 #[test]
@@ -998,7 +998,110 @@ fn test_bytes_consumed_string_at_past_end_returns_zero() {
     // a successful read, but the path is exercised here for the contract.
     let buf = b"abc";
     let typ = TypeKind::String { max_length: None };
-    assert_eq!(bytes_consumed(buf, 10, &typ), 0);
+    assert_eq!(bytes_consumed_with_pattern(buf, 10, &typ, None), 0);
+}
+
+#[test]
+fn test_bytes_consumed_regex_with_string_pattern() {
+    // Regression guard for GOTCHAS 2.1: variable-width variants must be
+    // matched explicitly in `bytes_consumed_with_pattern` or relative
+    // offsets silently corrupt. This test exercises the dispatch path
+    // and verifies the match-end byte count matches the reader's view.
+    let buf = b"prefix_World_suffix";
+    let typ = TypeKind::Regex {
+        flags: crate::parser::ast::RegexFlags::default(),
+        count: None,
+    };
+    let pattern = Value::String("World".to_string());
+    // "World" starts at index 7 in the buffer, length 5, so a scan from
+    // offset 0 consumes 7+5=12 bytes.
+    assert_eq!(
+        bytes_consumed_with_pattern(buf, 0, &typ, Some(&pattern)),
+        12
+    );
+}
+
+#[test]
+fn test_bytes_consumed_regex_no_match_returns_zero() {
+    let buf = b"abcdef";
+    let typ = TypeKind::Regex {
+        flags: crate::parser::ast::RegexFlags::default(),
+        count: None,
+    };
+    let pattern = Value::String("xyz".to_string());
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, Some(&pattern)), 0);
+}
+
+#[test]
+fn test_bytes_consumed_regex_zero_width_match_returns_zero() {
+    // Zero-width match at position 0 means match_end=0 so the anchor
+    // stays put. Cross-check with the direct reader in regex.rs.
+    let buf = b"hello";
+    let typ = TypeKind::Regex {
+        flags: crate::parser::ast::RegexFlags::default(),
+        count: None,
+    };
+    let pattern = Value::String("^".to_string());
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, Some(&pattern)), 0);
+}
+
+#[test]
+fn test_bytes_consumed_regex_start_offset_flag_uses_match_start() {
+    // /s flag changes the anchor advance to match-start instead of
+    // match-end. Regression guard for V2.
+    let buf = b"prefix_World_suffix";
+    let typ = TypeKind::Regex {
+        flags: crate::parser::ast::RegexFlags {
+            start_offset: true,
+            ..crate::parser::ast::RegexFlags::default()
+        },
+        count: None,
+    };
+    let pattern = Value::String("World".to_string());
+    // Match-start for "World" at index 7 is 7, not 12.
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, Some(&pattern)), 7);
+}
+
+#[test]
+fn test_bytes_consumed_search_with_pattern_is_match_end() {
+    // Regression guard for the pre-fix behavior that returned the
+    // entire window size instead of match-end. Per GNU `file` softmagic.c
+    // FILE_SEARCH, the anchor advances to `base + match_idx + pattern.len()`.
+    let buf = b"abcWorld_xyz";
+    let typ = TypeKind::Search {
+        range: ::std::num::NonZeroUsize::new(10).unwrap(),
+    };
+    let pattern = Value::String("World".to_string());
+    // "World" is at index 3, length 5, match-end = 8.
+    assert_eq!(
+        bytes_consumed_with_pattern(buf, 0, &typ, Some(&pattern)),
+        8,
+        "expected match-end (8), not window-end (10)"
+    );
+}
+
+#[test]
+fn test_bytes_consumed_search_no_match_returns_zero() {
+    let buf = b"abcdefghij";
+    let typ = TypeKind::Search {
+        range: ::std::num::NonZeroUsize::new(10).unwrap(),
+    };
+    let pattern = Value::String("XYZ".to_string());
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, Some(&pattern)), 0);
+}
+
+#[test]
+fn test_bytes_consumed_search_bytes_pattern_works() {
+    // Value::Bytes is an alternative pattern shape for search -- verify
+    // the dispatch path accepts it and computes the same match-end as a
+    // Value::String pattern would.
+    let buf = &[0x00, 0xff, 0xde, 0xad, 0xbe, 0xef, 0x11];
+    let typ = TypeKind::Search {
+        range: ::std::num::NonZeroUsize::new(7).unwrap(),
+    };
+    let pattern = Value::Bytes(vec![0xde, 0xad, 0xbe, 0xef]);
+    // 0xde at index 2, length 4, match-end = 6.
+    assert_eq!(bytes_consumed_with_pattern(buf, 0, &typ, Some(&pattern)), 6);
 }
 
 #[test]
@@ -1010,11 +1113,11 @@ fn test_bytes_consumed_fixed_width_returns_zero_past_end() {
     let buf = b"abc";
     let typ = TypeKind::Byte { signed: false };
     // offset == buf.len() leaves no room for a 1-byte read.
-    assert_eq!(bytes_consumed(buf, 3, &typ), 0);
+    assert_eq!(bytes_consumed_with_pattern(buf, 3, &typ, None), 0);
     // Way past end.
-    assert_eq!(bytes_consumed(buf, 100, &typ), 0);
+    assert_eq!(bytes_consumed_with_pattern(buf, 100, &typ, None), 0);
     // Last valid index: 1-byte read fits.
-    assert_eq!(bytes_consumed(buf, 2, &typ), 1);
+    assert_eq!(bytes_consumed_with_pattern(buf, 2, &typ, None), 1);
 
     // Multi-byte fixed-width type at the boundary.
     let typ_long = TypeKind::Long {
@@ -1023,9 +1126,12 @@ fn test_bytes_consumed_fixed_width_returns_zero_past_end() {
     };
     let buf4 = b"abcd";
     // offset 0 + width 4 == buf.len() -> fits
-    assert_eq!(bytes_consumed(buf4, 0, &typ_long), 4);
+    assert_eq!(bytes_consumed_with_pattern(buf4, 0, &typ_long, None), 4);
     // offset 1 + width 4 == 5 > buf.len() -> 0
-    assert_eq!(bytes_consumed(buf4, 1, &typ_long), 0);
+    assert_eq!(bytes_consumed_with_pattern(buf4, 1, &typ_long, None), 0);
     // overflow: offset = usize::MAX, width = 4 -> checked_add returns None -> 0
-    assert_eq!(bytes_consumed(buf4, usize::MAX, &typ_long), 0);
+    assert_eq!(
+        bytes_consumed_with_pattern(buf4, usize::MAX, &typ_long, None),
+        0
+    );
 }
diff --git a/src/parser/ast.rs b/src/parser/ast.rs
index dc6e7bd4..bcef818e 100644
--- a/src/parser/ast.rs
+++ b/src/parser/ast.rs
@@ -7,6 +7,7 @@
 //! and their components, including offset specifications, type kinds, operators, and values.
 
 use serde::{Deserialize, Serialize};
+use std::num::{NonZeroU32, NonZeroUsize};
 
 /// The width of the length prefix for Pascal strings.
 ///
@@ -335,8 +336,141 @@ pub enum TypeKind {
         /// Whether the stored length includes the length field itself (`/J` flag)
         length_includes_itself: bool,
     },
+    /// Regular expression matching against file contents
+    ///
+    /// Regex rules match a POSIX-extended regular expression pattern against the
+    /// file buffer. Patterns are compiled with multi-line mode always enabled
+    /// (matching libmagic's unconditional `REG_NEWLINE`), so `^` and `$` match
+    /// at line boundaries and `.` does not match `\n`. The `flags` control
+    /// case sensitivity, anchor advance semantics, and whether `count` is
+    /// measured in bytes or lines. The scan window is always capped at
+    /// [`REGEX_MAX_BYTES`] (8192) regardless of `count`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use libmagic_rs::parser::ast::{TypeKind, RegexFlags};
+    /// use std::num::NonZeroU32;
+    ///
+    /// // Plain `regex` -- no flags, default 8192-byte scan window.
+    /// let plain = TypeKind::Regex {
+    ///     flags: RegexFlags::default(),
+    ///     count: None,
+    /// };
+    ///
+    /// // `regex/1l` -- scan the first line only (1 line, capped at 8192 bytes).
+    /// let first_line = TypeKind::Regex {
+    ///     flags: RegexFlags {
+    ///         line_based: true,
+    ///         ..RegexFlags::default()
+    ///     },
+    ///     count: NonZeroU32::new(1),
+    /// };
+    ///
+    /// // `regex/cs` -- case-insensitive, anchor advances to match-start.
+    /// let case_insensitive_start = TypeKind::Regex {
+    ///     flags: RegexFlags {
+    ///         case_insensitive: true,
+    ///         start_offset: true,
+    ///         line_based: false,
+    ///     },
+    ///     count: None,
+    /// };
+    /// ```
+    Regex {
+        /// Modifier flags from the `/[csl]` suffix.
+        flags: RegexFlags,
+        /// Optional numeric count from `regex/N[flags]`. Interpretation
+        /// depends on `flags.line_based`:
+        ///
+        /// * `None`: use the 8192-byte default scan window.
+        /// * `Some(n)` with `flags.line_based == false`: scan at most `n`
+        ///   bytes, capped at 8192.
+        /// * `Some(n)` with `flags.line_based == true`: scan at most `n`
+        ///   lines, with an effective byte cap of `min(n * 80, 8192)`.
+        ///
+        /// The 8192-byte hard cap matches GNU `file`'s `FILE_REGEX_MAX` and
+        /// prevents runaway regex scans against large buffers.
+        count: Option<NonZeroU32>,
+    },
+    /// Multi-byte pattern search within a bounded range
+    ///
+    /// Search rules look for a literal byte pattern within `range` bytes of
+    /// the offset. Unlike [`TypeKind::String`], which only matches at the
+    /// exact offset, `search` scans forward up to `range` bytes for the
+    /// first occurrence. The range is **mandatory** per GNU `file`'s
+    /// magic(5) specification and is stored as a [`NonZeroUsize`] so a
+    /// zero-range search is unrepresentable.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use libmagic_rs::parser::ast::TypeKind;
+    /// use std::num::NonZeroUsize;
+    ///
+    /// // `search/256` -- scan up to 256 bytes for the literal pattern.
+    /// let bounded = TypeKind::Search {
+    ///     range: NonZeroUsize::new(256).unwrap(),
+    /// };
+    /// ```
+    Search {
+        /// Scan window width in bytes, starting at the rule's offset.
+        range: NonZeroUsize,
+    },
 }
 
+/// Regex modifier flags parsed from the `/[csl]` suffix on a `regex` rule.
+///
+/// All flags default to `false` via [`RegexFlags::default`]. The `Default`
+/// impl is equivalent to a plain `regex` type with no suffix, which scans
+/// 8192 bytes in byte mode and advances the anchor to match-end.
+///
+/// # Examples
+///
+/// ```
+/// use libmagic_rs::parser::ast::RegexFlags;
+///
+/// let plain = RegexFlags::default();
+/// assert!(!plain.case_insensitive);
+/// assert!(!plain.start_offset);
+/// assert!(!plain.line_based);
+///
+/// let case_and_line = RegexFlags {
+///     case_insensitive: true,
+///     start_offset: false,
+///     line_based: true,
+/// };
+/// assert!(case_and_line.case_insensitive);
+/// assert!(case_and_line.line_based);
+/// ```
+#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
+pub struct RegexFlags {
+    /// `/c` -- case-insensitive matching. When `true`, ASCII letter
+    /// casing is ignored during pattern matching.
+    pub case_insensitive: bool,
+    /// `/s` -- advance the GNU `file` previous-match anchor to the start
+    /// of the matched region instead of its end. Matches libmagic's
+    /// `REGEX_OFFSET_START` flag, which zeros the length contribution in
+    /// `moffset()` for `FILE_REGEX`. Useful for chaining child rules that
+    /// need to re-match from the position where the parent regex began.
+    pub start_offset: bool,
+    /// `/l` -- measure the scan window in lines instead of bytes. When
+    /// `true`, `count` is interpreted as a line count rather than a byte
+    /// count. The effective byte window is still capped at 8192 bytes
+    /// regardless (see [`TypeKind::Regex::count`] for the details).
+    ///
+    /// Note: this flag does **not** control multi-line regex matching;
+    /// libmagic always compiles patterns with `REG_NEWLINE`, so `^`/`$`
+    /// match at line boundaries regardless of `/l`.
+    pub line_based: bool,
+}
+
+/// The hard upper bound on regex scan window size, matching GNU `file`'s
+/// `FILE_REGEX_MAX` constant in `src/file.h`. Any regex rule -- including
+/// ones with explicit counts larger than this -- is capped at this many
+/// bytes to prevent runaway scans against large buffers.
+pub const REGEX_MAX_BYTES: usize = 8192;
+
 impl TypeKind {
     /// Returns the bit width of integer types, or `None` for non-integer types (e.g., String).
     ///
@@ -360,7 +494,10 @@ impl TypeKind {
             Self::Short { .. } => Some(16),
             Self::Long { .. } | Self::Float { .. } | Self::Date { .. } => Some(32),
             Self::Quad { .. } | Self::Double { .. } | Self::QDate { .. } => Some(64),
-            Self::String { .. } | Self::PString { .. } => None,
+            Self::String { .. }
+            | Self::PString { .. }
+            | Self::Regex { .. }
+            | Self::Search { .. } => None,
         }
     }
 }
diff --git a/src/parser/codegen.rs b/src/parser/codegen.rs
index bf36be8f..b826f90c 100644
--- a/src/parser/codegen.rs
+++ b/src/parser/codegen.rs
@@ -232,6 +232,25 @@ pub fn serialize_type_kind(typ: &TypeKind) -> String {
                 length_includes_itself
             ),
         },
+        TypeKind::Regex { flags, count } => {
+            let count_lit = match count {
+                Some(n) => format!("::std::num::NonZeroU32::new({}).unwrap()", n.get()),
+                None => String::new(),
+            };
+            let count_expr = if count.is_some() {
+                format!("Some({count_lit})")
+            } else {
+                "None".to_string()
+            };
+            format!(
+                "TypeKind::Regex {{ flags: libmagic_rs::parser::ast::RegexFlags {{ case_insensitive: {}, start_offset: {}, line_based: {} }}, count: {count_expr} }}",
+                flags.case_insensitive, flags.start_offset, flags.line_based
+            )
+        }
+        TypeKind::Search { range } => format!(
+            "TypeKind::Search {{ range: ::std::num::NonZeroUsize::new({}).unwrap() }}",
+            range.get()
+        ),
     }
 }
 
diff --git a/src/parser/grammar/mod.rs b/src/parser/grammar/mod.rs
index 59e1316b..50c76917 100644
--- a/src/parser/grammar/mod.rs
+++ b/src/parser/grammar/mod.rs
@@ -391,6 +391,7 @@ fn parse_pstring_suffix(
 ///
 /// # Errors
 /// Returns a nom parsing error if the input doesn't match the expected format
+#[allow(clippy::too_many_lines)]
 pub fn parse_type_and_operator(input: &str) -> IResult<&str, (TypeKind, Option<Operator>)> {
     use crate::parser::ast::PStringLengthWidth;
 
@@ -410,6 +411,100 @@ pub fn parse_type_and_operator(input: &str) -> IResult<&str, (TypeKind, Option<O
         pstring_length_includes_itself = includes_j;
     }
 
+    // Handle regex suffixes: flag letters (`c`, `s`, `l`) and an optional
+    // decimal count. GNU `file`'s `parse_string_modifier` accepts flag
+    // letters and digits in any interleaved order with "last range wins"
+    // semantics; we implement the same: scan the suffix character by
+    // character, setting flag bits on letters and parsing a new numeric
+    // count on digit sequences (which overwrites any previously-seen
+    // count). This accepts both `regex/1l` and `regex/l1` as equivalent.
+    let mut regex_flags = crate::parser::ast::RegexFlags::default();
+    let mut regex_count: Option<u32> = None;
+    if type_name == "regex"
+        && let Some(suffix_rest) = input.strip_prefix('/')
+    {
+        let mut rest = suffix_rest;
+        let mut any_modifier = false;
+
+        // Scan modifier sequence. Stop at whitespace or at operator
+        // boundary characters (`=`, `!`, `<`, `>`, `&`, `^`, `~`, `x`) so
+        // forms like `regex/c=...` or `regex/l!=...` leave the operator
+        // for `parse_operator` to handle.
+        loop {
+            if let Some(next) = rest.strip_prefix('c') {
+                regex_flags.case_insensitive = true;
+                rest = next;
+                any_modifier = true;
+            } else if let Some(next) = rest.strip_prefix('s') {
+                regex_flags.start_offset = true;
+                rest = next;
+                any_modifier = true;
+            } else if let Some(next) = rest.strip_prefix('l') {
+                regex_flags.line_based = true;
+                rest = next;
+                any_modifier = true;
+            } else if rest.starts_with(|c: char| c.is_ascii_digit()) {
+                let (after_number, n) = parse_decimal_number(rest).map_err(|_| {
+                    nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
+                })?;
+                // `0` is a valid sentinel in libmagic (means "unset"), but
+                // with a dedicated 8192-byte default we don't need a
+                // sentinel. Reject 0 explicitly so callers get a clear
+                // parse error instead of a silently-dropped count.
+                let count_value = u32::try_from(n)
+                    .ok()
+                    .and_then(::std::num::NonZeroU32::new)
+                    .ok_or_else(|| {
+                        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
+                    })?;
+                regex_count = Some(count_value.get());
+                rest = after_number;
+                any_modifier = true;
+            } else {
+                match rest.chars().next() {
+                    Some(c) if c.is_whitespace() => break,
+                    None | Some('=' | '!' | '<' | '>' | '&' | '^' | '~' | 'x') => break,
+                    Some(_) => {
+                        return Err(nom::Err::Error(nom::error::Error::new(
+                            input,
+                            nom::error::ErrorKind::Tag,
+                        )));
+                    }
+                }
+            }
+        }
+
+        // A bare `regex/` with no valid modifier is a parse error.
+        if !any_modifier {
+            return Err(nom::Err::Error(nom::error::Error::new(
+                input,
+                nom::error::ErrorKind::Tag,
+            )));
+        }
+
+        input = rest;
+    }
+
+    // Handle search suffix: required decimal range (e.g., `search/256`).
+    // Per GNU `file` magic(5), the range is mandatory. `search/0` and
+    // bare `search` are rejected at parse time via `NonZeroUsize`.
+    let mut search_range: Option<::std::num::NonZeroUsize> = None;
+    if type_name == "search"
+        && let Some(suffix_rest) = input.strip_prefix('/')
+    {
+        let (rest, n) = parse_decimal_number(suffix_rest).map_err(|_| {
+            nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
+        })?;
+        let range_value = usize::try_from(n)
+            .ok()
+            .and_then(::std::num::NonZeroUsize::new)
+            .ok_or_else(|| {
+                nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
+            })?;
+        search_range = Some(range_value);
+        input = rest;
+    }
+
     // Check for attached operator with mask (like &0xf0000000)
     // Uses unsigned parsing so full u64 masks (e.g. 0xffffffffffffffff) are supported.
     // If '&' is followed by digits/0x but the mask parse fails (overflow, etc.),
@@ -437,15 +532,35 @@ pub fn parse_type_and_operator(input: &str) -> IResult<&str, (TypeKind, Option<O
 
     let (input, _) = multispace0(input)?;
 
-    let mut type_kind = crate::parser::types::type_keyword_to_kind(type_name);
-    // Patch PString with parsed length_width and length_includes_itself
-    if let TypeKind::PString { max_length, .. } = type_kind {
-        type_kind = TypeKind::PString {
-            max_length,
-            length_width: pstring_length_width,
-            length_includes_itself: pstring_length_includes_itself,
-        };
-    }
+    // Build Regex/Search directly from the parsed suffixes; fall back to
+    // `type_keyword_to_kind` for every other type. PString still uses the
+    // patch-after-construct pattern because `type_keyword_to_kind` supplies
+    // its `max_length` default and the suffix parser only produces the
+    // length-width and `/J` flag.
+    let type_kind = match type_name {
+        "regex" => TypeKind::Regex {
+            flags: regex_flags,
+            count: regex_count.and_then(::std::num::NonZeroU32::new),
+        },
+        "search" => {
+            // Mandatory range: reject bare `search` at parse time.
+            let range = search_range.ok_or_else(|| {
+                nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Tag))
+            })?;
+            TypeKind::Search { range }
+        }
+        _ => {
+            let mut kind = crate::parser::types::type_keyword_to_kind(type_name);
+            if let TypeKind::PString { max_length, .. } = kind {
+                kind = TypeKind::PString {
+                    max_length,
+                    length_width: pstring_length_width,
+                    length_includes_itself: pstring_length_includes_itself,
+                };
+            }
+            kind
+        }
+    };
 
     Ok((input, (type_kind, attached_op)))
 }
diff --git a/src/parser/grammar/tests/mod.rs b/src/parser/grammar/tests/mod.rs
index c6a69589..dd9551ef 100644
--- a/src/parser/grammar/tests/mod.rs
+++ b/src/parser/grammar/tests/mod.rs
@@ -2275,3 +2275,172 @@ fn test_parse_type_and_operator_pstring_suffixes() {
         }
     }
 }
+
+#[test]
+fn test_parse_type_and_operator_regex_and_search_suffixes() {
+    use crate::parser::ast::{RegexFlags, TypeKind};
+    use std::num::{NonZeroU32, NonZeroUsize};
+
+    fn rx(case: bool, start: bool, line: bool, count: Option<u32>) -> TypeKind {
+        TypeKind::Regex {
+            flags: RegexFlags {
+                case_insensitive: case,
+                start_offset: start,
+                line_based: line,
+            },
+            count: count.and_then(NonZeroU32::new),
+        }
+    }
+    fn sr(n: usize) -> TypeKind {
+        TypeKind::Search {
+            range: NonZeroUsize::new(n).unwrap(),
+        }
+    }
+
+    let cases: &[(&str, TypeKind, &str)] = &[
+        ("regex", rx(false, false, false, None), ""),
+        ("regex/c", rx(true, false, false, None), ""),
+        ("regex/l", rx(false, false, true, None), ""),
+        ("regex/s", rx(false, true, false, None), ""),
+        ("regex/cl", rx(true, false, true, None), ""),
+        ("regex/lc", rx(true, false, true, None), ""),
+        ("regex/cs", rx(true, true, false, None), ""),
+        ("regex/csl", rx(true, true, true, None), ""),
+        ("regex/1l", rx(false, false, true, Some(1)), ""),
+        ("regex/l1", rx(false, false, true, Some(1)), ""),
+        ("regex/1c", rx(true, false, false, Some(1)), ""),
+        ("regex/256", rx(false, false, false, Some(256)), ""),
+        ("regex/c =", rx(true, false, false, None), "="),
+        ("search/256", sr(256), ""),
+        ("search/1", sr(1), ""),
+        ("search/256 =", sr(256), "="),
+    ];
+    for &(input, ref expected_kind, expected_rest) in cases {
+        let (rest, (kind, op)) = parse_type_and_operator(input).expect(input);
+        assert_eq!(rest, expected_rest, "rest for input: {input}");
+        assert!(op.is_none(), "operator for input: {input}");
+        assert_eq!(&kind, expected_kind, "kind for input: {input}");
+    }
+}
+
+#[test]
+fn test_parse_type_and_operator_search_requires_range() {
+    // Bare `search` (no /N suffix) is a hard parse error per GNU `file`.
+    assert!(parse_type_and_operator("search").is_err());
+    // `search/0` is also rejected -- `NonZeroUsize` makes a zero-width
+    // scan unrepresentable.
+    assert!(parse_type_and_operator("search/0").is_err());
+}
+
+#[test]
+fn test_parse_type_and_operator_regex_invalid_suffix() {
+    // Bare slash with no flags or count
+    assert!(parse_type_and_operator("regex/").is_err());
+    // Unrecognized flag letter
+    assert!(parse_type_and_operator("regex/z").is_err());
+    // Non-operator trailing character is still rejected
+    assert!(parse_type_and_operator("regex/cz").is_err());
+    // regex/0 is rejected because a zero count has no valid semantics
+    // (our parser uses NonZeroU32 to express "user specified a count").
+    assert!(parse_type_and_operator("regex/0").is_err());
+}
+
+#[test]
+fn test_parse_type_and_operator_regex_operator_adjacent() {
+    use crate::parser::ast::{Operator, RegexFlags, TypeKind};
+
+    // `regex/c=` should leave `=` for parse_operator, matching the `regex/c =`
+    // (space-separated) behavior and mirroring `search/256=`.
+    let (rest, (kind, op)) = parse_type_and_operator("regex/c=").expect("regex/c=");
+    assert_eq!(rest, "=");
+    assert!(op.is_none());
+    assert_eq!(
+        kind,
+        TypeKind::Regex {
+            flags: RegexFlags {
+                case_insensitive: true,
+                ..RegexFlags::default()
+            },
+            count: None,
+        }
+    );
+
+    // `regex/l!=` should leave `!=` for parse_operator.
+    let (rest, (kind, op)) = parse_type_and_operator("regex/l!=").expect("regex/l!=");
+    assert_eq!(rest, "!=");
+    assert!(op.is_none());
+    assert_eq!(
+        kind,
+        TypeKind::Regex {
+            flags: RegexFlags {
+                line_based: true,
+                ..RegexFlags::default()
+            },
+            count: None,
+        }
+    );
+
+    // Confirm the full pipeline parses the operator correctly through
+    // parse_type_and_operator + parse_operator chaining.
+    let (rest, (_, _)) = parse_type_and_operator("regex/c=foo").expect("regex/c=foo");
+    let (rest_after_op, op) = crate::parser::grammar::parse_operator(rest).expect("operator");
+    assert_eq!(op, Operator::Equal);
+    assert_eq!(rest_after_op, "foo");
+}
+
+#[test]
+fn test_parse_magic_rule_regex_and_search() {
+    use crate::parser::ast::RegexFlags;
+    use std::num::{NonZeroU32, NonZeroUsize};
+
+    // regex/c: case-insensitive flag
+    let input = r#"0 regex/c "hello" case-insensitive match"#;
+    let (remaining, rule) = parse_magic_rule(input).unwrap();
+    assert_eq!(remaining, "");
+    assert_eq!(rule.offset, OffsetSpec::Absolute(0));
+    assert_eq!(
+        rule.typ,
+        TypeKind::Regex {
+            flags: RegexFlags {
+                case_insensitive: true,
+                ..RegexFlags::default()
+            },
+            count: None,
+        }
+    );
+    assert_eq!(rule.op, Operator::Equal);
+    assert_eq!(rule.value, Value::String("hello".to_string()));
+    assert_eq!(rule.message, "case-insensitive match");
+
+    // search/256
+    let input = r#"0 search/256 "MZ" DOS executable"#;
+    let (remaining, rule) = parse_magic_rule(input).unwrap();
+    assert_eq!(remaining, "");
+    assert_eq!(
+        rule.typ,
+        TypeKind::Search {
+            range: NonZeroUsize::new(256).unwrap(),
+        }
+    );
+    assert_eq!(rule.op, Operator::Equal);
+    assert_eq!(rule.value, Value::String("MZ".to_string()));
+    assert_eq!(rule.message, "DOS executable");
+
+    // regex/1l: line-based with a count of 1 (mirrors regex-eol.magic
+    // syntax). The count is now preserved, not discarded.
+    let input = r#">1 regex/1l "[0-9]+" version line"#;
+    let (remaining, rule) = parse_magic_rule(input).unwrap();
+    assert_eq!(remaining, "");
+    assert_eq!(rule.level, 1);
+    assert_eq!(
+        rule.typ,
+        TypeKind::Regex {
+            flags: RegexFlags {
+                line_based: true,
+                ..RegexFlags::default()
+            },
+            count: NonZeroU32::new(1),
+        }
+    );
+    assert_eq!(rule.message, "version line");
+}
diff --git a/src/parser/types.rs b/src/parser/types.rs
index fd3b64ab..a9b4538b 100644
--- a/src/parser/types.rs
+++ b/src/parser/types.rs
@@ -95,8 +95,8 @@ pub fn parse_type_keyword(input: &str) -> IResult<&str, &str> {
             tag("ledate"),
             tag("date"),
         )),
-        // String types
-        alt((tag("pstring"), tag("string"))),
+        // String types (and regex/search, which share the string-type family)
+        alt((tag("pstring"), tag("search"), tag("regex"), tag("string"))),
     ))
     .parse(input)
 }
@@ -301,6 +301,25 @@ pub fn type_keyword_to_kind(type_name: &str) -> TypeKind {
             length_includes_itself: false,
         },
 
+        // REGEX type -- suffix parsing (flags and count) handled in
+        // `parse_type_and_operator` in grammar/mod.rs, which constructs
+        // the final `TypeKind::Regex` directly. The value returned here
+        // is a bare-`regex` placeholder used only by the round-trip
+        // keyword test; grammar never observes it.
+        "regex" => TypeKind::Regex {
+            flags: crate::parser::ast::RegexFlags::default(),
+            count: None,
+        },
+
+        // SEARCH type -- range parsing handled in grammar/mod.rs, which
+        // constructs the final `TypeKind::Search` directly from the
+        // mandatory `/N` suffix. The value returned here is a placeholder
+        // with `range = 1` used only by the round-trip keyword test; a
+        // real search rule always has its range set by the grammar layer.
+        "search" => TypeKind::Search {
+            range: ::std::num::NonZeroUsize::new(1).expect("1 is nonzero"),
+        },
+
         _ => unreachable!("type_keyword_to_kind called with unknown type: {type_name}"),
     }
 }
@@ -546,7 +565,8 @@ mod tests {
             "long", "ulong", "lelong", "ulelong", "belong", "ubelong", "quad", "uquad", "lequad",
             "ulequad", "bequad", "ubequad", "float", "befloat", "lefloat", "double", "bedouble",
             "ledouble", "date", "ldate", "bedate", "beldate", "ledate", "leldate", "qdate",
-            "qldate", "beqdate", "beqldate", "leqdate", "leqldate", "pstring", "string",
+            "qldate", "beqdate", "beqldate", "leqdate", "leqldate", "pstring", "string", "regex",
+            "search",
         ];
         for keyword in keywords {
             let (rest, parsed) = parse_type_keyword(keyword).unwrap();
diff --git a/tests/evaluator_tests.rs b/tests/evaluator_tests.rs
index 649373de..2b3a21aa 100644
--- a/tests/evaluator_tests.rs
+++ b/tests/evaluator_tests.rs
@@ -495,3 +495,167 @@ fn test_evaluate_float_rule_no_match() {
         "Float equal rule should not match when value differs"
     );
 }
+
+// ============================================================
+// Third-Party Corpus: regex-eol
+// ============================================================
+
+/// Integration test for the `regex-eol.magic` corpus test from the upstream
+/// `file` project. The magic file itself uses two syntaxes that the text
+/// parser does not yet accept -- a bare unquoted `$ANSIBLE_VAULT` string
+/// value (see GOTCHAS S3.6) and `>&1` / `>>&1` relative-offset anchors (the
+/// `&+N`/`&-N` parsing TODO in AGENTS.md) -- so this test temporarily
+/// bypasses `MagicDatabase::load_from_file` and constructs the equivalent
+/// rule tree programmatically. The testfile fixture at
+/// `third_party/tests/regex-eol.testfile` is still read verbatim, so the
+/// runtime evaluation path (string match, `regex/1l` line-anchored matching,
+/// and `OffsetSpec::Relative` anchor advancement through
+/// `EvaluationContext::last_match_end`) is exercised end-to-end.
+///
+/// Once the parser learns unquoted string values and `&+N` relative offsets,
+/// this test should be rewritten to call `MagicDatabase::load_from_file`
+/// against the unmodified `regex-eol.magic` corpus file.
+#[test]
+fn test_regex_eol_corpus() {
+    let buffer = std::fs::read("third_party/tests/regex-eol.testfile")
+        .expect("failed to read regex-eol.testfile");
+
+    // Mirror of:
+    //   0     string    $ANSIBLE_VAULT     Ansible Vault text
+    //   >&1   regex/1l  [0-9]+(\.[0-9]+)+  \b, version %s
+    //   >>&1  regex/1l  [^;]+$             \b, using %s encryption
+    //
+    // Messages hardcode the captured tokens that libmagic's `%s` formatter
+    // would substitute (libmagic-rs does not yet implement format
+    // substitution), so the final description contains the literal
+    // `version`, `1.1`, and `AES256` strings. The match-value assertions
+    // below separately verify the regex engine actually captured those
+    // tokens from the buffer, so the test still fails if the regex
+    // behavior regresses.
+    //
+    // `max_length: Some(14)` caps read_string at the 14-byte target so the
+    // comparison succeeds on a buffer with no NUL terminator. `Relative(1)`
+    // on each child matches the `&+1` anchor offset (previous match end + 1,
+    // skipping the `;` separator).
+    // `regex/1l` == 1-line scan window, line_based = true, count = 1.
+    // Multi-line mode is always on so `^`/`$` match at line boundaries
+    // regardless; the `/l` flag controls only the scan window extent.
+    let one_line_regex = libmagic_rs::parser::ast::RegexFlags {
+        line_based: true,
+        ..libmagic_rs::parser::ast::RegexFlags::default()
+    };
+    let one = ::std::num::NonZeroU32::new(1);
+
+    let inner_regex = MagicRule {
+        offset: OffsetSpec::Relative(1),
+        typ: TypeKind::Regex {
+            flags: one_line_regex,
+            count: one,
+        },
+        op: Operator::Equal,
+        value: Value::String("[^;]+$".to_string()),
+        message: "\u{0008}, using AES256 encryption".to_string(),
+        children: vec![],
+        level: 2,
+        strength_modifier: None,
+    };
+
+    let version_regex = MagicRule {
+        offset: OffsetSpec::Relative(1),
+        typ: TypeKind::Regex {
+            flags: one_line_regex,
+            count: one,
+        },
+        op: Operator::Equal,
+        value: Value::String("[0-9]+(\\.[0-9]+)+".to_string()),
+        message: "\u{0008}, version 1.1".to_string(),
+        children: vec![inner_regex],
+        level: 1,
+        strength_modifier: None,
+    };
+
+    let ansible_vault = MagicRule {
+        offset: OffsetSpec::Absolute(0),
+        typ: TypeKind::String {
+            max_length: Some("$ANSIBLE_VAULT".len()),
+        },
+        op: Operator::Equal,
+        value: Value::String("$ANSIBLE_VAULT".to_string()),
+        message: "Ansible Vault text".to_string(),
+        children: vec![version_regex],
+        level: 0,
+        strength_modifier: None,
+    };
+
+    let config = EvaluationConfig::default();
+    let mut context = EvaluationContext::new(config);
+    let matches =
+        evaluate_rules(&[ansible_vault], &buffer, &mut context).expect("evaluation failed");
+
+    // All three rules must fire in order: the top-level string, the version
+    // regex, and the encryption regex.
+    assert_eq!(
+        matches.len(),
+        3,
+        "expected 3 matches (string + 2 regex), got {}: {matches:#?}",
+        matches.len()
+    );
+
+    // Verify the regex engine captured the expected tokens from the buffer.
+    // These assertions fail if regex evaluation or the relative-offset
+    // anchor advances incorrectly.
+    assert_eq!(
+        matches[0].value,
+        Value::String("$ANSIBLE_VAULT".to_string()),
+        "top-level string match should capture $ANSIBLE_VAULT"
+    );
+    if let Value::String(s) = &matches[1].value {
+        assert!(
+            s.contains("1.1"),
+            "version regex should capture '1.1', got {s:?}"
+        );
+    } else {
+        panic!(
+            "expected Value::String for version regex, got {:?}",
+            matches[1].value
+        );
+    }
+    if let Value::String(s) = &matches[2].value {
+        assert!(
+            s.contains("AES256"),
+            "encryption regex should capture 'AES256', got {s:?}"
+        );
+    } else {
+        panic!(
+            "expected Value::String for encryption regex, got {:?}",
+            matches[2].value
+        );
+    }
+
+    // Mirror `MagicDatabase::build_result` message concatenation: rules whose
+    // message starts with a backspace (`\b`) suppress the leading space.
+    let mut description = String::new();
+    for m in &matches {
+        if let Some(rest) = m.message.strip_prefix('\u{0008}') {
+            description.push_str(rest);
+        } else if description.is_empty() {
+            description.push_str(&m.message);
+        } else {
+            description.push(' ');
+            description.push_str(&m.message);
+        }
+    }
+
+    assert!(
+        description.contains("Ansible Vault"),
+        "expected 'Ansible Vault' in description, got: {description:?}"
+    );
+    assert!(
+        description.contains("version"),
+        "expected 'version' in description, got: {description:?}"
+    );
+    assert!(
+        description.contains("AES256"),
+        "expected 'AES256' in description, got: {description:?}"
+    );
+}
diff --git a/tests/property_tests.rs b/tests/property_tests.rs
index f36c70c4..ad7027d3 100644
--- a/tests/property_tests.rs
+++ b/tests/property_tests.rs
@@ -65,6 +65,25 @@ fn arb_type_kind() -> impl Strategy<Value = TypeKind> {
                 length_width: width,
                 length_includes_itself: includes_self,
             }),
+        (
+            any::<bool>(),
+            any::<bool>(),
+            any::<bool>(),
+            prop::option::of(1u32..=4096u32),
+        )
+            .prop_map(|(case_insensitive, start_offset, line_based, count)| {
+                TypeKind::Regex {
+                    flags: libmagic_rs::parser::ast::RegexFlags {
+                        case_insensitive,
+                        start_offset,
+                        line_based,
+                    },
+                    count: count.and_then(::std::num::NonZeroU32::new),
+                }
+            }),
+        (1usize..=4096usize).prop_map(|range| TypeKind::Search {
+            range: ::std::num::NonZeroUsize::new(range).unwrap(),
+        }),
     ]
 }
 
diff --git a/tests/regex_search_corpus_tests.rs b/tests/regex_search_corpus_tests.rs
new file mode 100644
index 00000000..56f66776
--- /dev/null
+++ b/tests/regex_search_corpus_tests.rs
@@ -0,0 +1,370 @@
+// Copyright (c) 2025-2026 the libmagic-rs contributors
+// SPDX-License-Identifier: Apache-2.0
+
+//! Corpus integration tests for issue #39 — regex and search types
+//!
+//! This file exercises the regex and search TypeKind variants end-to-end
+//! against the test corpus files listed as "blocked" in issue #39:
+//!
+//! * `searchbug` — exercises `search/N` against a two-match binary buffer
+//! * `json1`, `jsonlines1` — JSON text detection via regex
+//! * `cmd1` — shell script detection via regex
+//! * `gedcom` — GEDCOM genealogy file detection via regex
+//!
+//! Where a corpus file depends on magic-file features we do not yet
+//! support (`use`/`name` directives, `offset` type, the `&+N`/`&-N`
+//! parser for relative offsets), the test bypasses `parse_text_magic_file`
+//! and builds the equivalent rule tree programmatically via the AST.
+//! This pattern is documented in GOTCHAS 3.9.
+
+use libmagic_rs::evaluator::evaluate_rules;
+use libmagic_rs::parser::ast::RegexFlags;
+use libmagic_rs::{
+    EvaluationConfig, EvaluationContext, MagicRule, OffsetSpec, Operator, TypeKind, Value,
+};
+use std::num::NonZeroUsize;
+
+const CORPUS_DIR: &str = "third_party/tests";
+
+fn load_corpus_file(name: &str) -> Vec<u8> {
+    let path = format!("{CORPUS_DIR}/{name}");
+    std::fs::read(&path).unwrap_or_else(|e| panic!("failed to read {path}: {e}"))
+}
+
+/// Run a flat list of rules against a buffer with a permissive config
+/// and return the vector of matches for assertion.
+fn run_rules(rules: &[MagicRule], buffer: &[u8]) -> Vec<libmagic_rs::evaluator::RuleMatch> {
+    let config = EvaluationConfig::default();
+    let mut context = EvaluationContext::new(config);
+    evaluate_rules(rules, buffer, &mut context).expect("evaluation should not fail")
+}
+
+fn regex_rule(
+    offset: OffsetSpec,
+    pattern: &str,
+    flags: RegexFlags,
+    count: Option<u32>,
+    message: &str,
+    children: Vec<MagicRule>,
+    level: u32,
+) -> MagicRule {
+    MagicRule {
+        offset,
+        typ: TypeKind::Regex {
+            flags,
+            count: count.and_then(std::num::NonZeroU32::new),
+        },
+        op: Operator::Equal,
+        value: Value::String(pattern.to_string()),
+        message: message.to_string(),
+        children,
+        level,
+        strength_modifier: None,
+    }
+}
+
+fn search_rule(
+    offset: OffsetSpec,
+    pattern: &str,
+    range: usize,
+    message: &str,
+    children: Vec<MagicRule>,
+    level: u32,
+) -> MagicRule {
+    MagicRule {
+        offset,
+        typ: TypeKind::Search {
+            range: NonZeroUsize::new(range).expect("range must be non-zero"),
+        },
+        op: Operator::Equal,
+        value: Value::String(pattern.to_string()),
+        message: message.to_string(),
+        children,
+        level,
+        strength_modifier: None,
+    }
+}
+
+// =====================================================================
+// searchbug — search type hierarchical scan
+// =====================================================================
+
+/// `searchbug.magic` uses `use`/`name`/`offset`/`&0` features we do not
+/// yet parse. The programmatic equivalent here models the same behavior:
+/// a `TEST` header at offset 0 triggers a `search/12 "ABC"` scan, and
+/// a byte rule reads the character immediately after the `ABC` match
+/// (exercising the `Relative(N)` anchor advance after a search).
+#[test]
+fn test_searchbug_corpus_search_with_relative_child() {
+    let buffer = load_corpus_file("searchbug.testfile");
+    assert!(buffer.starts_with(b"TEST"), "corpus should begin with TEST");
+
+    // Byte child reading the character immediately after "ABC". In the
+    // corpus file the first ABC is `ABC1` at offset 8, so after "ABC"
+    // (match-end at 11) the byte at offset 11 is '1' (0x31).
+    let after_abc = MagicRule {
+        offset: OffsetSpec::Relative(0),
+        typ: TypeKind::Byte { signed: false },
+        op: Operator::Equal,
+        value: Value::Uint(u64::from(b'1')),
+        message: "followed by 1".to_string(),
+        children: vec![],
+        level: 2,
+        strength_modifier: None,
+    };
+
+    // search/12 "ABC" with Relative(0) child.
+    let search_abc = search_rule(
+        OffsetSpec::Relative(0),
+        "ABC",
+        12,
+        "found ABC",
+        vec![after_abc],
+        1,
+    );
+
+    // Parent: TEST header at offset 0.
+    let root = MagicRule {
+        offset: OffsetSpec::Absolute(0),
+        typ: TypeKind::String {
+            max_length: Some(4),
+        },
+        op: Operator::Equal,
+        value: Value::String("TEST".to_string()),
+        message: "Testfmt".to_string(),
+        children: vec![search_abc],
+        level: 0,
+        strength_modifier: None,
+    };
+
+    let matches = run_rules(&[root], &buffer);
+
+    // Expected chain: TEST header -> found ABC -> followed by 1.
+    assert_eq!(
+        matches.len(),
+        3,
+        "expected 3 matches (header + search + byte child), got {matches:#?}"
+    );
+    let messages: Vec<&str> = matches.iter().map(|m| m.message.as_str()).collect();
+    assert_eq!(messages, ["Testfmt", "found ABC", "followed by 1"]);
+}
+
+#[test]
+fn test_searchbug_search_anchor_advance_not_window_end() {
+    // Regression guard: the search anchor must advance to match-end
+    // (8 + 3 = 11), NOT to the window end (first search starts at
+    // offset 0, window 12 would land at offset 12). If it advanced to
+    // window-end, the Relative(0) child would read byte 12 which is
+    // 'x' (0x78), not '1' (0x31).
+    let buffer = load_corpus_file("searchbug.testfile");
+
+    let wrong_byte = MagicRule {
+        offset: OffsetSpec::Relative(0),
+        typ: TypeKind::Byte { signed: false },
+        op: Operator::Equal,
+        value: Value::Uint(u64::from(b'x')),
+        message: "window-end bug -- must NOT match".to_string(),
+        children: vec![],
+        level: 2,
+        strength_modifier: None,
+    };
+
+    let search_abc = search_rule(
+        OffsetSpec::Relative(0),
+        "ABC",
+        12,
+        "found ABC",
+        vec![wrong_byte],
+        1,
+    );
+
+    let root = MagicRule {
+        offset: OffsetSpec::Absolute(0),
+        typ: TypeKind::String {
+            max_length: Some(4),
+        },
+        op: Operator::Equal,
+        value: Value::String("TEST".to_string()),
+        message: "Testfmt".to_string(),
+        children: vec![search_abc],
+        level: 0,
+        strength_modifier: None,
+    };
+
+    let matches = run_rules(&[root], &buffer);
+    // Should see Testfmt + found ABC but NOT the wrong_byte child.
+    assert_eq!(
+        matches.len(),
+        2,
+        "wrong_byte should not match: {matches:#?}"
+    );
+    assert_eq!(matches[1].message, "found ABC");
+}
+
+// =====================================================================
+// json1 / jsonlines1 — JSON text detection via regex
+// =====================================================================
+
+/// JSON detection: a buffer starting with `{` or `[` (after optional
+/// whitespace) is a JSON document. This is the simplified detection
+/// pattern used by libmagic's json.magic for the fast path.
+#[test]
+fn test_json1_corpus_detected_by_regex() {
+    let buffer = load_corpus_file("json1.testfile");
+
+    // `^\s*[\{\[]` — optional leading whitespace followed by an object
+    // or array opener. Multi-line mode is always on, so `^` matches the
+    // buffer start.
+    let json_rule = regex_rule(
+        OffsetSpec::Absolute(0),
+        r"^\s*[\{\[]",
+        RegexFlags::default(),
+        None,
+        "JSON text data",
+        vec![],
+        0,
+    );
+
+    let matches = run_rules(&[json_rule], &buffer);
+    assert_eq!(matches.len(), 1, "json1 should match: {matches:#?}");
+    assert_eq!(matches[0].message, "JSON text data");
+}
+
+#[test]
+fn test_jsonlines1_corpus_detected_by_regex() {
+    let buffer = load_corpus_file("jsonlines1.testfile");
+
+    // JSON Lines detection: each line is an independent JSON document
+    // so we can reuse the same opener check on the first line.
+    let jsonlines_rule = regex_rule(
+        OffsetSpec::Absolute(0),
+        r"^\s*[\{\[]",
+        RegexFlags::default(),
+        None,
+        "JSON Lines text",
+        vec![],
+        0,
+    );
+
+    let matches = run_rules(&[jsonlines_rule], &buffer);
+    assert_eq!(matches.len(), 1, "jsonlines1 should match: {matches:#?}");
+}
+
+// =====================================================================
+// cmd1 — shell script detection via regex
+// =====================================================================
+
+/// Shell script detection: a buffer starting with `#!` is a script. We
+/// use a regex anchored at offset 0 to verify the shebang and capture
+/// the interpreter path for a stronger match.
+#[test]
+fn test_cmd1_corpus_detected_by_regex() {
+    let buffer = load_corpus_file("cmd1.testfile");
+
+    let shebang_rule = regex_rule(
+        OffsetSpec::Absolute(0),
+        r"^#![ \t]*/\S+",
+        RegexFlags::default(),
+        None,
+        "a shell script",
+        vec![],
+        0,
+    );
+
+    let matches = run_rules(&[shebang_rule], &buffer);
+    assert!(!matches.is_empty(), "cmd1 should match: {matches:#?}");
+    assert_eq!(matches[0].message, "a shell script");
+}
+
+// =====================================================================
+// gedcom — genealogy file detection via regex
+// =====================================================================
+
+/// GEDCOM files begin with `0 HEAD` on the first line followed by
+/// `1 SOUR <something>` and `2 VERS <version>`. A simple regex on the
+/// head line (with the `/l` line limit) is enough to detect the format.
+#[test]
+fn test_gedcom_corpus_detected_by_line_based_regex() {
+    let buffer = load_corpus_file("gedcom.testfile");
+
+    // `regex/1l "^0 HEAD"` — scan only the first line for the header.
+    let head_line_flags = RegexFlags {
+        line_based: true,
+        ..RegexFlags::default()
+    };
+
+    let gedcom_rule = regex_rule(
+        OffsetSpec::Absolute(0),
+        r"^0 HEAD",
+        head_line_flags,
+        Some(1),
+        "GEDCOM genealogy data",
+        vec![],
+        0,
+    );
+
+    let matches = run_rules(&[gedcom_rule], &buffer);
+    assert_eq!(matches.len(), 1, "gedcom should match: {matches:#?}");
+    assert_eq!(matches[0].message, "GEDCOM genealogy data");
+}
+
+// =====================================================================
+// regex-eol — simplified version extraction smoke test
+// =====================================================================
+
+/// Smoke test that the simpler non-hierarchical part of the regex-eol
+/// scenario still works after the flag semantic change. Full
+/// hierarchical coverage lives in the `test_regex_eol_corpus` test in
+/// `tests/evaluator_tests.rs`.
+#[test]
+fn test_regex_eol_version_extraction() {
+    let buffer = load_corpus_file("regex-eol.testfile");
+
+    // Match a version number anywhere in the first line.
+    let version_rule = regex_rule(
+        OffsetSpec::Absolute(0),
+        r"[0-9]+(\.[0-9]+)+",
+        RegexFlags {
+            line_based: true,
+            ..RegexFlags::default()
+        },
+        Some(1),
+        "version found",
+        vec![],
+        0,
+    );
+
+    let matches = run_rules(&[version_rule], &buffer);
+    assert_eq!(matches.len(), 1);
+    assert_eq!(matches[0].message, "version found");
+    // The matched value should look like a version number.
+    match &matches[0].value {
+        Value::String(s) => assert!(
+            s.chars().all(|c| c.is_ascii_digit() || c == '.'),
+            "matched text should be a version number, got {s:?}"
+        ),
+        other => panic!("expected Value::String, got {other:?}"),
+    }
+}
+
+// =====================================================================
+// Meta: corpus files exist
+// =====================================================================
+
+#[test]
+fn test_corpus_files_exist() {
+    for name in [
+        "searchbug.testfile",
+        "json1.testfile",
+        "jsonlines1.testfile",
+        "cmd1.testfile",
+        "gedcom.testfile",
+        "regex-eol.testfile",
+    ] {
+        let path = format!("{CORPUS_DIR}/{name}");
+        assert!(
+            std::path::Path::new(&path).exists(),
+            "corpus file missing: {path}"
+        );
+    }
+}