diff --git a/crates/sempai-core/Cargo.toml b/crates/sempai-core/Cargo.toml
index 38f44a52..1379232d 100644
--- a/crates/sempai-core/Cargo.toml
+++ b/crates/sempai-core/Cargo.toml
@@ -9,6 +9,7 @@ test-support = []
 
 [dependencies]
 serde = { workspace = true }
+serde_json = { workspace = true }
 thiserror = { workspace = true }
 
 [dev-dependencies]
diff --git a/crates/sempai-core/src/formula.rs b/crates/sempai-core/src/formula.rs
new file mode 100644
index 00000000..41eb6f9d
--- /dev/null
+++ b/crates/sempai-core/src/formula.rs
@@ -0,0 +1,145 @@
+//! Canonical normalised formula model shared by legacy and v2 paths.
+//!
+//! The [`Formula`] enum is the single internal representation that both
+//! legacy Semgrep operators and v2 `match` syntax lower into.  It lives
+//! in `sempai_core` so that every downstream crate (including the future
+//! `sempai_ts` backend) can depend on it without pulling in the YAML
+//! parser.
+//!
+//! # Design
+//!
+//! The formula tree mirrors the logical structure of a Semgrep query:
+//!
+//! - [`Atom`] — a leaf pattern or regex to match against source code.
+//! - [`Formula::And`] / [`Formula::Or`] — Boolean combinators.
+//! - [`Formula::Not`] / [`Formula::Inside`] / [`Formula::Anywhere`] —
+//!   unary modifiers.
+//! - [`Formula::Constraint`] — opaque constraint preserved for later
+//!   evaluation (e.g. `metavariable-regex`, `metavariable-pattern`).
+//! - [`Decorated`] — metadata wrapper carrying `where`, `as`, and `fix`
+//!   annotations from v2 syntax.
+
+use serde_json::Value;
+
+/// A leaf pattern or regex to match against source code.
+///
+/// # Example
+///
+/// ```
+/// use sempai_core::formula::Atom;
+///
+/// let atom = Atom::Pattern(String::from("foo($X)"));
+/// assert!(matches!(atom, Atom::Pattern(_)));
+/// ```
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Atom {
+    /// A concrete code pattern (e.g. `foo($X)`).
+    Pattern(String),
+    /// A regular expression pattern.
+    Regex(String),
+    /// A raw Tree-sitter S-expression query.
+    TreeSitterQuery(String),
+}
+
+/// Metadata wrapper carrying v2 `where`, `as`, and `fix` annotations.
+///
+/// Legacy formulas produce bare `Decorated` values with empty metadata.
+/// v2 formulas propagate decoration from the parsed `Decorated` variant.
+///
+/// # Example
+///
+/// ```
+/// use sempai_core::formula::{Atom, Decorated, Formula};
+///
+/// let bare = Decorated::bare(Formula::Atom(Atom::Pattern(String::from("x"))));
+/// assert!(bare.where_clauses.is_empty());
+/// assert!(bare.as_name.is_none());
+/// ```
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Decorated<T> {
+    /// The wrapped formula node.
+    pub node: T,
+    /// Raw `where` clauses preserved for later constraint evaluation.
+    pub where_clauses: Vec<Value>,
+    /// Optional metavariable alias name.
+    pub as_name: Option<String>,
+    /// Optional inline fix text.
+    pub fix: Option<String>,
+}
+
+impl<T> Decorated<T> {
+    /// Wraps a node with no decoration metadata.
+    pub const fn bare(node: T) -> Self {
+        Self {
+            node,
+            where_clauses: Vec::new(),
+            as_name: None,
+            fix: None,
+        }
+    }
+
+    /// Wraps a node with full decoration metadata.
+    pub const fn with_metadata(
+        node: T,
+        where_clauses: Vec<Value>,
+        as_name: Option<String>,
+        fix: Option<String>,
+    ) -> Self {
+        Self {
+            node,
+            where_clauses,
+            as_name,
+            fix,
+        }
+    }
+}
+
+/// Canonical normalised formula for a single search rule.
+///
+/// Both legacy Semgrep operators and v2 `match` syntax lower into this
+/// representation.  The tree is validated for semantic constraints after
+/// construction.
+///
+/// # Example
+///
+/// ```
+/// use sempai_core::formula::{Atom, Decorated, Formula};
+///
+/// let conjunction = Formula::And(vec![
+///     Decorated::bare(Formula::Atom(Atom::Pattern(String::from("foo($X)")))),
+///     Decorated::bare(Formula::Not(Box::new(
+///         Decorated::bare(Formula::Atom(Atom::Pattern(String::from("bar($X)")))),
+///     ))),
+/// ]);
+/// assert!(matches!(conjunction, Formula::And(_)));
+/// ```
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Formula {
+    /// A leaf pattern or regex.
+    Atom(Atom),
+    /// Logical negation.
+    Not(Box<Decorated<Formula>>),
+    /// Scope restriction (`pattern-inside` / `inside`).
+    Inside(Box<Decorated<Formula>>),
+    /// Unrestricted scope (`semgrep-internal-pattern-anywhere` / `anywhere`).
+    Anywhere(Box<Decorated<Formula>>),
+    /// Logical conjunction (`patterns` / `all`).
+    And(Vec<Decorated<Formula>>),
+    /// Logical disjunction (`pattern-either` / `any`).
+    Or(Vec<Decorated<Formula>>),
+    /// Opaque constraint preserved for later evaluation.
+    Constraint(Value),
+}
+
+impl Formula {
+    /// Returns `true` if this formula node is a positive match-producing
+    /// term.
+    ///
+    /// Positive terms are `Atom`, `And`, and `Or` nodes.  `Not`, `Inside`,
+    /// `Anywhere`, and `Constraint` are not positive terms for the purpose
+    /// of the `MissingPositiveTermInAnd` semantic check.
+    #[must_use]
+    pub const fn is_positive_term(&self) -> bool {
+        matches!(self, Self::Atom(_) | Self::And(_) | Self::Or(_))
+    }
+}
diff --git a/crates/sempai-core/src/lib.rs b/crates/sempai-core/src/lib.rs
index ab8d4303..818c4486 100644
--- a/crates/sempai-core/src/lib.rs
+++ b/crates/sempai-core/src/lib.rs
@@ -14,6 +14,7 @@
 //! - [`CaptureValue`] and [`CapturedNode`] — metavariable bindings
 //! - [`DiagnosticReport`] and [`Diagnostic`] — structured error reporting
 //! - [`EngineConfig`] and [`EngineLimits`] — performance and safety limits
+//! - [`Formula`], [`Atom`], and [`Decorated`] — canonical normalised formula model
 //!
 //! # Example
 //!
@@ -28,6 +29,7 @@
 mod capture;
 mod config;
 mod diagnostic;
+pub mod formula;
 mod language;
 mod match_result;
 mod span;
@@ -35,6 +37,7 @@ mod span;
 pub use capture::{CaptureValue, CapturedNode};
 pub use config::{EngineConfig, EngineLimits};
 pub use diagnostic::{Diagnostic, DiagnosticCode, DiagnosticReport, SourceSpan};
+pub use formula::{Atom, Decorated, Formula};
 pub use language::{Language, LanguageParseError};
 pub use match_result::Match;
 pub use span::{LineCol, Span};
diff --git a/crates/sempai-core/src/tests/formula_tests.rs b/crates/sempai-core/src/tests/formula_tests.rs
new file mode 100644
index 00000000..f074d25a
--- /dev/null
+++ b/crates/sempai-core/src/tests/formula_tests.rs
@@ -0,0 +1,146 @@
+//! Tests for the canonical formula model types.
+
+use crate::formula::{Atom, Decorated, Formula};
+
+// -----------------------------------------------------------------------
+// Atom construction
+// -----------------------------------------------------------------------
+
+#[test]
+fn atom_pattern_stores_string() {
+    let atom = Atom::Pattern(String::from("foo($X)"));
+    assert_eq!(atom, Atom::Pattern(String::from("foo($X)")));
+}
+
+#[test]
+fn atom_regex_stores_string() {
+    let atom = Atom::Regex(String::from("foo.*"));
+    assert_eq!(atom, Atom::Regex(String::from("foo.*")));
+}
+
+#[test]
+fn atom_tree_sitter_query_stores_string() {
+    let atom = Atom::TreeSitterQuery(String::from("(call_expression)"));
+    assert_eq!(
+        atom,
+        Atom::TreeSitterQuery(String::from("(call_expression)"))
+    );
+}
+
+// -----------------------------------------------------------------------
+// Decorated wrapper
+// -----------------------------------------------------------------------
+
+#[test]
+fn decorated_bare_has_empty_metadata() {
+    let d = Decorated::bare(Formula::Atom(Atom::Pattern(String::from("x"))));
+    assert!(d.where_clauses.is_empty());
+    assert!(d.as_name.is_none());
+    assert!(d.fix.is_none());
+}
+
+#[test]
+fn decorated_with_metadata_preserves_fields() {
+    let d = Decorated::with_metadata(
+        Formula::Atom(Atom::Regex(String::from("r"))),
+        vec![serde_json::json!({"metavariable": "$X"})],
+        Some(String::from("alias")),
+        Some(String::from("fix: $X")),
+    );
+    assert_eq!(d.where_clauses.len(), 1);
+    assert_eq!(d.as_name.as_deref(), Some("alias"));
+    assert_eq!(d.fix.as_deref(), Some("fix: $X"));
+}
+
+// -----------------------------------------------------------------------
+// Formula construction and equality
+// -----------------------------------------------------------------------
+
+#[test]
+fn formula_atom_equality() {
+    let a = Formula::Atom(Atom::Pattern(String::from("x")));
+    let b = Formula::Atom(Atom::Pattern(String::from("x")));
+    assert_eq!(a, b);
+}
+
+#[test]
+fn formula_not_wraps_inner() {
+    let inner = Decorated::bare(Formula::Atom(Atom::Pattern(String::from("x"))));
+    let formula = Formula::Not(Box::new(inner.clone()));
+    assert_eq!(formula, Formula::Not(Box::new(inner)));
+}
+
+#[test]
+fn formula_and_holds_children() {
+    let children = vec![
+        Decorated::bare(Formula::Atom(Atom::Pattern(String::from("a")))),
+        Decorated::bare(Formula::Atom(Atom::Pattern(String::from("b")))),
+    ];
+    let formula = Formula::And(children.clone());
+    assert_eq!(formula, Formula::And(children));
+}
+
+#[test]
+fn formula_or_holds_branches() {
+    let branches = vec![
+        Decorated::bare(Formula::Atom(Atom::Pattern(String::from("a")))),
+        Decorated::bare(Formula::Atom(Atom::Pattern(String::from("b")))),
+    ];
+    let formula = Formula::Or(branches.clone());
+    assert_eq!(formula, Formula::Or(branches));
+}
+
+#[test]
+fn formula_constraint_preserves_json() {
+    let val = serde_json::json!({"metavariable-regex": {"metavariable": "$X", "regex": "foo"}});
+    let formula = Formula::Constraint(val.clone());
+    assert_eq!(formula, Formula::Constraint(val));
+}
+
+// -----------------------------------------------------------------------
+// is_positive_term classification
+// -----------------------------------------------------------------------
+
+#[test]
+fn atom_is_positive() {
+    assert!(Formula::Atom(Atom::Pattern(String::from("x"))).is_positive_term());
+}
+
+#[test]
+fn and_is_positive() {
+    assert!(Formula::And(vec![]).is_positive_term());
+}
+
+#[test]
+fn or_is_positive() {
+    assert!(Formula::Or(vec![]).is_positive_term());
+}
+
+#[test]
+fn not_is_not_positive() {
+    let f = Formula::Not(Box::new(Decorated::bare(Formula::Atom(Atom::Pattern(
+        String::from("x"),
+    )))));
+    assert!(!f.is_positive_term());
+}
+
+#[test]
+fn inside_is_not_positive() {
+    let f = Formula::Inside(Box::new(Decorated::bare(Formula::Atom(Atom::Pattern(
+        String::from("x"),
+    )))));
+    assert!(!f.is_positive_term());
+}
+
+#[test]
+fn anywhere_is_not_positive() {
+    let f = Formula::Anywhere(Box::new(Decorated::bare(Formula::Atom(Atom::Pattern(
+        String::from("x"),
+    )))));
+    assert!(!f.is_positive_term());
+}
+
+#[test]
+fn constraint_is_not_positive() {
+    assert!(!Formula::Constraint(serde_json::json!({})).is_positive_term());
+}
diff --git a/crates/sempai-core/src/tests/mod.rs b/crates/sempai-core/src/tests/mod.rs
index c2a65bd1..34b67b84 100644
--- a/crates/sempai-core/src/tests/mod.rs
+++ b/crates/sempai-core/src/tests/mod.rs
@@ -4,6 +4,7 @@ mod capture_tests;
 mod config_tests;
 mod diagnostic_snapshot_tests;
 mod diagnostic_tests;
+mod formula_tests;
 mod language_tests;
 mod match_tests;
 mod span_tests;
diff --git a/crates/sempai/src/engine.rs b/crates/sempai/src/engine.rs
index c8062bde..0ce9685b 100644
--- a/crates/sempai/src/engine.rs
+++ b/crates/sempai/src/engine.rs
@@ -5,10 +5,12 @@
 //! Compilation and execution are separate phases, allowing a compiled
 //! [`QueryPlan`] to be reused across multiple source files.
 
+use sempai_core::formula::Formula;
 use sempai_core::{DiagnosticReport, EngineConfig, Language, Match};
-use sempai_yaml::parse_rule_file;
+use sempai_yaml::{RuleMode, RulePrincipal, parse_rule_file};
 
 use crate::mode_validation::validate_supported_modes;
+use crate::normalise::{normalise_search_principal, validate_formula_constraints};
 
 /// A compiled query plan for one rule and one language.
 ///
@@ -19,37 +21,40 @@ use crate::mode_validation::validate_supported_modes;
 /// # Example
 ///
 /// ```
-/// use sempai::{Engine, EngineConfig, Language};
+/// use sempai::{Engine, EngineConfig};
 ///
+/// let yaml = concat!(
+///     "rules:\n",
+///     "  - id: demo.rule\n",
+///     "    message: detect foo\n",
+///     "    languages: [rust]\n",
+///     "    severity: ERROR\n",
+///     "    pattern: foo($X)\n",
+/// );
 /// let engine = Engine::new(EngineConfig::default());
-/// // compile_dsl currently returns an error (not yet implemented)
-/// let result = engine.compile_dsl("rule-1", Language::Rust, "pattern(\"fn $F\")");
-/// assert!(result.is_err());
+/// let plans = engine.compile_yaml(yaml).expect("valid rule");
+/// assert_eq!(plans.len(), 1);
+/// assert!(plans[0].formula().is_some());
 /// ```
 #[derive(Debug, Clone)]
 pub struct QueryPlan {
     rule_id: String,
     language: Language,
-    /// Placeholder for the internal plan representation.  Will be replaced
-    /// by `sempai_core::PlanNode` once the normalization layer is built.
-    _plan: (),
+    formula: Option<Formula>,
 }
 
 impl QueryPlan {
-    /// Creates a new query plan (crate-internal).
-    // FIXME: remove `#[cfg(test)]` when `compile_yaml` / `compile_dsl` produce
-    // real plans — https://github.com/leynos/weaver/issues/67
-    #[cfg(test)]
+    /// Creates a new query plan.
     #[must_use]
     #[expect(
         clippy::missing_const_for_fn,
         reason = "heap types cannot be used in const contexts"
     )]
-    pub(crate) fn new(rule_id: String, language: Language) -> Self {
+    pub(crate) fn new(rule_id: String, language: Language, formula: Option<Formula>) -> Self {
         Self {
             rule_id,
             language,
-            _plan: (),
+            formula,
         }
     }
 
@@ -64,6 +69,15 @@ impl QueryPlan {
     pub const fn language(&self) -> Language {
         self.language
     }
+
+    /// Returns the normalised formula, if this rule has formula semantics.
+    ///
+    /// `ProjectDependsOn` rules return `None` because they have no
+    /// formula representation.
+    #[must_use]
+    pub const fn formula(&self) -> Option<&Formula> {
+        self.formula.as_ref()
+    }
 }
 
 /// Compiles and executes Semgrep-compatible queries on Tree-sitter syntax
@@ -80,9 +94,11 @@ impl QueryPlan {
 /// use sempai::{Engine, EngineConfig};
 ///
 /// let engine = Engine::new(EngineConfig::default());
-/// let result = engine.compile_yaml("rules: []");
-/// // Malformed YAML and schema errors now surface parser diagnostics.
-/// assert!(result.is_err());
+/// // Malformed YAML surfaces parser diagnostics.
+/// assert!(engine.compile_yaml("not valid yaml: [").is_err());
+/// // A valid empty rule file produces an empty plan list.
+/// let plans = engine.compile_yaml("rules: []").expect("valid");
+/// assert!(plans.is_empty());
 /// ```
 #[derive(Debug)]
 pub struct Engine {
@@ -104,18 +120,35 @@ impl Engine {
 
     /// Compiles a YAML rule file into query plans.
     ///
-    /// # Errors
+    /// Each search-mode rule produces one [`QueryPlan`] per declared
+    /// language.  The plan carries a normalised [`Formula`] (or `None`
+    /// for `ProjectDependsOn` rules).
     ///
-    /// Returns a diagnostic report if parsing or validation fails.
+    /// # Errors
     ///
-    /// Successful YAML parsing still stops at the post-parse placeholder until
-    /// rule normalization is implemented.
+    /// Returns a diagnostic report if parsing, mode validation,
+    /// normalization, or semantic constraint validation fails.
     pub fn compile_yaml(&self, yaml: &str) -> Result<Vec<QueryPlan>, DiagnosticReport> {
         let file = parse_rule_file(yaml, None)?;
         validate_supported_modes(&file)?;
-        Err(DiagnosticReport::not_implemented(
-            "compile_yaml query-plan normalization",
-        ))
+
+        let mut plans = Vec::new();
+        for rule in file.rules() {
+            if *rule.mode() != RuleMode::Search {
+                continue;
+            }
+            let RulePrincipal::Search(principal) = rule.principal() else {
+                continue;
+            };
+
+            let formula = normalise_search_principal(principal)?;
+            if let Some(ref f) = formula {
+                validate_formula_constraints(f)?;
+            }
+
+            compile_rule_languages(rule.id(), rule.languages(), formula.as_ref(), &mut plans)?;
+        }
+        Ok(plans)
     }
 
     /// Compiles a one-liner query DSL expression into a query plan.
@@ -148,3 +181,23 @@ impl Engine {
         Err(DiagnosticReport::not_implemented("execute"))
     }
 }
+
+/// Parses declared languages and appends query plans for a single rule.
+fn compile_rule_languages(
+    rule_id: &str,
+    languages: &[String],
+    formula: Option<&Formula>,
+    plans: &mut Vec<QueryPlan>,
+) -> Result<(), DiagnosticReport> {
+    for lang_str in languages {
+        let language: Language = lang_str
+            .parse()
+            .map_err(|_| DiagnosticReport::not_implemented(&format!("language `{lang_str}`")))?;
+        plans.push(QueryPlan::new(
+            String::from(rule_id),
+            language,
+            formula.cloned(),
+        ));
+    }
+    Ok(())
+}
diff --git a/crates/sempai/src/lib.rs b/crates/sempai/src/lib.rs
index e0f0b815..d4adadcc 100644
--- a/crates/sempai/src/lib.rs
+++ b/crates/sempai/src/lib.rs
@@ -19,6 +19,7 @@
 //! - [`CaptureValue`] and [`CapturedNode`] — metavariable bindings
 //! - [`DiagnosticReport`] and [`Diagnostic`] — structured error reporting
 //! - [`EngineConfig`] and [`EngineLimits`] — performance and safety limits
+//! - [`Formula`], [`Atom`], and [`Decorated`] — canonical normalised formula model
 //! - [`Engine`] — the query compilation and execution entrypoint
 //! - [`QueryPlan`] — a compiled query plan
 //!
@@ -37,11 +38,13 @@
 
 mod engine;
 mod mode_validation;
+mod normalise;
 
 // Re-export all stable types from sempai_core.
 pub use sempai_core::{
-    CaptureValue, CapturedNode, Diagnostic, DiagnosticCode, DiagnosticReport, EngineConfig,
-    EngineLimits, Language, LanguageParseError, LineCol, Match, SourceSpan, Span,
+    Atom, CaptureValue, CapturedNode, Decorated, Diagnostic, DiagnosticCode, DiagnosticReport,
+    EngineConfig, EngineLimits, Formula, Language, LanguageParseError, LineCol, Match, SourceSpan,
+    Span,
 };
 
 pub use engine::{Engine, QueryPlan};
diff --git a/crates/sempai/src/normalise/constraints.rs b/crates/sempai/src/normalise/constraints.rs
new file mode 100644
index 00000000..5824c785
--- /dev/null
+++ b/crates/sempai/src/normalise/constraints.rs
@@ -0,0 +1,96 @@
+//! Semantic constraint validation for the canonical [`Formula`] tree.
+//!
+//! After normalization, the formula tree must satisfy two invariants:
+//!
+//! 1. **No negation inside disjunction** — a `Not` node must not appear
+//!    as a direct child of an `Or` node.
+//! 2. **Positive term in conjunction** — every `And` node must contain at
+//!    least one positive match-producing child.
+
+use sempai_core::formula::{Decorated, Formula};
+use sempai_core::{DiagnosticCode, DiagnosticReport};
+
+/// Validates semantic constraints on a normalised [`Formula`] tree.
+///
+/// # Errors
+///
+/// Returns a [`DiagnosticReport`] containing the first constraint
+/// violation found during a depth-first walk.
+pub(crate) fn validate_formula_constraints(formula: &Formula) -> Result<(), DiagnosticReport> {
+    walk(formula)
+}
+
+/// Depth-first constraint walk.
+fn walk(formula: &Formula) -> Result<(), DiagnosticReport> {
+    match formula {
+        Formula::Or(branches) => {
+            check_no_not_in_or(branches)?;
+            for branch in branches {
+                walk(&branch.node)?;
+            }
+        }
+        Formula::And(terms) => {
+            check_positive_term_in_and(terms)?;
+            for term in terms {
+                walk(&term.node)?;
+            }
+        }
+        Formula::Not(inner) | Formula::Inside(inner) | Formula::Anywhere(inner) => {
+            walk(&inner.node)?;
+        }
+        Formula::Atom(_) | Formula::Constraint(_) => {}
+    }
+    Ok(())
+}
+
+/// Checks that no direct child of an `Or` node is a `Not`.
+fn check_no_not_in_or(branches: &[Decorated<Formula>]) -> Result<(), DiagnosticReport> {
+    for branch in branches {
+        if matches!(branch.node, Formula::Not(_)) {
+            return Err(DiagnosticReport::validation_error(
+                DiagnosticCode::ESempaiInvalidNotInOr,
+                String::from(
+                    "negated formula inside disjunction \
+                     (`pattern-either` / `any`) is not permitted",
+                ),
+                None,
+                vec![String::from(
+                    "move the negated term into a conjunction \
+                     (`patterns` / `all`) instead",
+                )],
+            ));
+        }
+    }
+    Ok(())
+}
+
+/// Checks that an `And` node has at least one positive term.
+///
+/// Positive terms are `Atom`, `And`, and `Or` nodes.  `Not`, `Inside`,
+/// `Anywhere`, and `Constraint` are not positive for this check.
+///
+/// **Exception:** if all terms are `Constraint` nodes (e.g.
+/// `metavariable-pattern` contexts), the conjunction is accepted because
+/// constraints may act as implicit positive terms in later evaluation.
+fn check_positive_term_in_and(terms: &[Decorated<Formula>]) -> Result<(), DiagnosticReport> {
+    let has_positive = terms.iter().any(|t| t.node.is_positive_term());
+    let all_constraints = terms
+        .iter()
+        .all(|t| matches!(t.node, Formula::Constraint(_)));
+
+    if !has_positive && !all_constraints {
+        return Err(DiagnosticReport::validation_error(
+            DiagnosticCode::ESempaiMissingPositiveTermInAnd,
+            String::from(
+                "conjunction (`patterns` / `all`) contains no positive \
+                 match-producing term",
+            ),
+            None,
+            vec![String::from(
+                "add at least one `pattern`, `regex`, or nested \
+                 conjunction/disjunction",
+            )],
+        ));
+    }
+    Ok(())
+}
diff --git a/crates/sempai/src/normalise/legacy.rs b/crates/sempai/src/normalise/legacy.rs
new file mode 100644
index 00000000..2189a2ff
--- /dev/null
+++ b/crates/sempai/src/normalise/legacy.rs
@@ -0,0 +1,60 @@
+//! Legacy Semgrep operator normalization into canonical [`Formula`].
+//!
+//! Each legacy operator variant maps deterministically to a [`Formula`]
+//! node following the mapping table in the `ExecPlan` and the Semgrep
+//! operator-precedence document.
+
+use sempai_core::formula::{Atom, Decorated, Formula};
+use sempai_yaml::{LegacyClause, LegacyFormula, LegacyValue};
+
+/// Normalises a legacy formula tree into a canonical [`Formula`].
+pub(crate) fn normalise_legacy(formula: &LegacyFormula) -> Formula {
+    match formula {
+        LegacyFormula::Pattern(s) => Formula::Atom(Atom::Pattern(s.clone())),
+
+        LegacyFormula::PatternRegex(s) => Formula::Atom(Atom::Regex(s.clone())),
+
+        LegacyFormula::Patterns(clauses) => {
+            let children = clauses.iter().map(normalise_clause).collect();
+            Formula::And(children)
+        }
+
+        LegacyFormula::PatternEither(branches) => {
+            let children = branches
+                .iter()
+                .map(|b| Decorated::bare(normalise_legacy(b)))
+                .collect();
+            Formula::Or(children)
+        }
+
+        LegacyFormula::PatternNot(value) => Formula::Not(Box::new(normalise_value(value))),
+
+        LegacyFormula::PatternInside(value) => Formula::Inside(Box::new(normalise_value(value))),
+
+        LegacyFormula::PatternNotInside(value) => Formula::Not(Box::new(Decorated::bare(
+            Formula::Inside(Box::new(normalise_value(value))),
+        ))),
+
+        LegacyFormula::PatternNotRegex(s) => Formula::Not(Box::new(Decorated::bare(
+            Formula::Atom(Atom::Regex(s.clone())),
+        ))),
+
+        LegacyFormula::Anywhere(value) => Formula::Anywhere(Box::new(normalise_value(value))),
+    }
+}
+
+/// Normalises a [`LegacyValue`] (string shorthand or nested formula).
+fn normalise_value(value: &LegacyValue) -> Decorated<Formula> {
+    match value {
+        LegacyValue::String(s) => Decorated::bare(Formula::Atom(Atom::Pattern(s.clone()))),
+        LegacyValue::Formula(f) => Decorated::bare(normalise_legacy(f)),
+    }
+}
+
+/// Normalises a [`LegacyClause`] inside a `patterns` conjunction.
+fn normalise_clause(clause: &LegacyClause) -> Decorated<Formula> {
+    match clause {
+        LegacyClause::Formula(f) => Decorated::bare(normalise_legacy(f)),
+        LegacyClause::Constraint(v) => Decorated::bare(Formula::Constraint(v.clone())),
+    }
+}
diff --git a/crates/sempai/src/normalise/mod.rs b/crates/sempai/src/normalise/mod.rs
new file mode 100644
index 00000000..9b7c8e3c
--- /dev/null
+++ b/crates/sempai/src/normalise/mod.rs
@@ -0,0 +1,50 @@
+//! Normalization of parsed Semgrep queries into the canonical [`Formula`]
+//! model.
+//!
+//! This module lowers both legacy Semgrep operators and v2 `match` syntax
+//! into a single [`Formula`] tree defined in `sempai_core`.  After
+//! normalization, [`validate_formula_constraints`] enforces semantic
+//! invariants on the tree.
+//!
+//! Normalization lives in the `sempai` facade crate rather than in
+//! `sempai_core` because it depends on parser-level types from
+//! `sempai_yaml`, and `sempai_yaml` already depends on `sempai_core`.
+//! Placing it here avoids a circular dependency.
+
+mod constraints;
+mod legacy;
+mod v2;
+
+pub(crate) use constraints::validate_formula_constraints;
+
+use sempai_core::DiagnosticReport;
+use sempai_core::formula::Formula;
+use sempai_yaml::SearchQueryPrincipal;
+
+/// Normalises a parsed [`SearchQueryPrincipal`] into a canonical
+/// [`Formula`].
+///
+/// Returns `Ok(Some(formula))` for legacy and v2 match principals, or
+/// `Ok(None)` for `ProjectDependsOn` rules which have no formula
+/// semantics.
+///
+/// # Errors
+///
+/// Returns a [`DiagnosticReport`] if the principal contains structurally
+/// invalid content that cannot be lowered.  In practice the YAML parser
+/// catches structural issues, so this path is defensive.
+#[expect(
+    clippy::unnecessary_wraps,
+    reason = "Result is needed once normalization can report lowering errors"
+)]
+pub(crate) fn normalise_search_principal(
+    principal: &SearchQueryPrincipal,
+) -> Result<Option<Formula>, DiagnosticReport> {
+    match principal {
+        SearchQueryPrincipal::Legacy(legacy) => Ok(Some(legacy::normalise_legacy(legacy))),
+        SearchQueryPrincipal::Match(match_formula) => {
+            Ok(Some(v2::normalise_match(match_formula).node))
+        }
+        SearchQueryPrincipal::ProjectDependsOn(_) => Ok(None),
+    }
+}
diff --git a/crates/sempai/src/normalise/v2.rs b/crates/sempai/src/normalise/v2.rs
new file mode 100644
index 00000000..dde27f96
--- /dev/null
+++ b/crates/sempai/src/normalise/v2.rs
@@ -0,0 +1,57 @@
+//! v2 `match` formula normalization into canonical [`Formula`].
+//!
+//! Each v2 match operator variant maps deterministically to a [`Formula`]
+//! node, with [`Decorated`] metadata preserved through the lowering.
+
+use sempai_core::formula::{Atom, Decorated, Formula};
+use sempai_yaml::MatchFormula;
+
+/// Normalises a v2 match formula into a [`Decorated<Formula>`].
+///
+/// The outer [`Decorated`] wrapper carries any `where`, `as`, or `fix`
+/// metadata from a v2 `Decorated` variant.  Non-decorated nodes produce
+/// a bare wrapper.
+pub(crate) fn normalise_match(formula: &MatchFormula) -> Decorated<Formula> {
+    match formula {
+        MatchFormula::Pattern(s) | MatchFormula::PatternObject(s) => {
+            Decorated::bare(Formula::Atom(Atom::Pattern(s.clone())))
+        }
+
+        MatchFormula::Regex(s) => Decorated::bare(Formula::Atom(Atom::Regex(s.clone()))),
+
+        MatchFormula::All(items) => {
+            let children = items.iter().map(normalise_match).collect();
+            Decorated::bare(Formula::And(children))
+        }
+
+        MatchFormula::Any(items) => {
+            let children = items.iter().map(normalise_match).collect();
+            Decorated::bare(Formula::Or(children))
+        }
+
+        MatchFormula::Not(inner) => Decorated::bare(Formula::Not(Box::new(normalise_match(inner)))),
+
+        MatchFormula::Inside(inner) => {
+            Decorated::bare(Formula::Inside(Box::new(normalise_match(inner))))
+        }
+
+        MatchFormula::Anywhere(inner) => {
+            Decorated::bare(Formula::Anywhere(Box::new(normalise_match(inner))))
+        }
+
+        MatchFormula::Decorated {
+            formula: inner_formula,
+            where_clauses,
+            as_name,
+            fix,
+        } => {
+            let inner = normalise_match(inner_formula);
+            Decorated::with_metadata(
+                inner.node,
+                where_clauses.clone(),
+                as_name.clone(),
+                fix.clone(),
+            )
+        }
+    }
+}
diff --git a/crates/sempai/src/tests/behaviour.rs b/crates/sempai/src/tests/behaviour.rs
index 05386ce5..63a15c22 100644
--- a/crates/sempai/src/tests/behaviour.rs
+++ b/crates/sempai/src/tests/behaviour.rs
@@ -14,7 +14,7 @@ use crate::{DiagnosticReport, Engine, EngineConfig, Language};
 #[derive(Default)]
 struct TestWorld {
     engine: Option<Engine>,
-    compile_result: Option<Result<(), DiagnosticReport>>,
+    compile_result: Option<Result<Vec<QueryPlan>, DiagnosticReport>>,
     execute_result: Option<Result<(), DiagnosticReport>>,
 }
 
@@ -39,7 +39,8 @@ fn given_default_engine(world: &mut TestWorld) {
 #[when("YAML {yaml} is compiled")]
 fn when_compile_yaml(world: &mut TestWorld, yaml: QuotedString) {
     let engine = world.engine.as_ref().expect("engine should be set");
-    world.compile_result = Some(engine.compile_yaml(yaml.as_str()).map(|_| ()));
+    let yaml_text = yaml.as_str().replace("\\n", "\n");
+    world.compile_result = Some(engine.compile_yaml(&yaml_text));
 }
 
 #[when("DSL {dsl} is compiled for language {lang}")]
@@ -49,14 +50,14 @@ fn when_compile_dsl(world: &mut TestWorld, dsl: QuotedString, lang: QuotedString
     world.compile_result = Some(
         engine
             .compile_dsl("interactive", language, dsl.as_str())
-            .map(|_| ()),
+            .map(|plan| vec![plan]),
     );
 }
 
 #[when("a query plan is executed")]
 fn when_execute(world: &mut TestWorld) {
     let engine = world.engine.as_ref().expect("engine should be set");
-    let plan = QueryPlan::new(String::from("test-rule"), Language::Rust);
+    let plan = QueryPlan::new(String::from("test-rule"), Language::Rust, None);
     world.execute_result = Some(
         engine
             .execute(&plan, "file:///t.rs", "fn main() {}")
@@ -68,26 +69,17 @@ fn when_execute(world: &mut TestWorld) {
 // Helpers
 // ---------------------------------------------------------------------------
 
-/// Asserts that a diagnostic result contains a specific error code.
-fn assert_diagnostic_code(
-    result: Option<&Result<(), DiagnosticReport>>,
-    expected_code: &str,
+/// Extracts the diagnostic report from an error result.
+fn extract_report(
+    result: Option<&Result<Vec<QueryPlan>, DiagnosticReport>>,
     result_name: &str,
     failure_kind: &str,
-) {
+) -> DiagnosticReport {
     let inner = result.unwrap_or_else(|| panic!("{result_name} should be set"));
-    let report = inner
+    inner
         .as_ref()
-        .expect_err(&format!("expected {failure_kind}"));
-    let first = report
-        .diagnostics()
-        .first()
-        .expect("at least one diagnostic");
-    let actual_code = format!("{}", first.code());
-    assert_eq!(
-        actual_code, expected_code,
-        "expected code '{expected_code}', got '{actual_code}'"
-    );
+        .expect_err(&format!("expected {failure_kind}"))
+        .clone()
 }
 
 // ---------------------------------------------------------------------------
@@ -102,22 +94,40 @@ fn then_engine_max_matches(world: &mut TestWorld, count: usize) {
 
 #[then("compilation fails with code {code}")]
 fn then_compilation_fails(world: &mut TestWorld, code: QuotedString) {
-    assert_diagnostic_code(
+    let report = extract_report(
         world.compile_result.as_ref(),
-        code.as_str(),
         "compile result",
         "compilation failure",
     );
+    let first = report
+        .diagnostics()
+        .first()
+        .expect("at least one diagnostic");
+    let actual_code = format!("{}", first.code());
+    assert_eq!(
+        actual_code,
+        code.as_str(),
+        "expected code '{}', got '{actual_code}'",
+        code.as_str(),
+    );
 }
 
-#[then("the first diagnostic message contains {snippet}")]
-fn then_first_diagnostic_message_contains(world: &mut TestWorld, snippet: QuotedString) {
-    let report = world
+#[then("compilation succeeds")]
+fn then_compilation_succeeds(world: &mut TestWorld) {
+    let result = world
         .compile_result
         .as_ref()
-        .expect("compile result should be set")
-        .as_ref()
-        .expect_err("expected compilation failure");
+        .expect("compile result should be set");
+    assert!(result.is_ok(), "expected compilation to succeed");
+}
+
+#[then("the first diagnostic message contains {snippet}")]
+fn then_first_diagnostic_message_contains(world: &mut TestWorld, snippet: QuotedString) {
+    let report = extract_report(
+        world.compile_result.as_ref(),
+        "compile result",
+        "compilation failure",
+    );
     let first = report
         .diagnostics()
         .first()
@@ -125,13 +135,70 @@ fn then_first_diagnostic_message_contains(world: &mut TestWorld, snippet: Quoted
     assert!(first.message().contains(snippet.as_str()));
 }
 
+#[then("the result contains {count} query plan")]
+fn then_result_contains_n_plans(world: &mut TestWorld, count: usize) {
+    let plans = world
+        .compile_result
+        .as_ref()
+        .expect("compile result should be set")
+        .as_ref()
+        .expect("expected compilation success");
+    assert_eq!(plans.len(), count);
+}
+
+#[then("the first plan has a formula")]
+fn then_first_plan_has_formula(world: &mut TestWorld) {
+    let plans = world
+        .compile_result
+        .as_ref()
+        .expect("compile result should be set")
+        .as_ref()
+        .expect("expected compilation success");
+    assert!(
+        plans
+            .first()
+            .expect("at least one plan")
+            .formula()
+            .is_some(),
+        "expected plan to have a formula",
+    );
+}
+
+#[then("the first plan has no formula")]
+fn then_first_plan_has_no_formula(world: &mut TestWorld) {
+    let plans = world
+        .compile_result
+        .as_ref()
+        .expect("compile result should be set")
+        .as_ref()
+        .expect("expected compilation success");
+    assert!(
+        plans
+            .first()
+            .expect("at least one plan")
+            .formula()
+            .is_none(),
+        "expected plan to have no formula",
+    );
+}
+
 #[then("execution fails with code {code}")]
 fn then_execution_fails(world: &mut TestWorld, code: QuotedString) {
-    assert_diagnostic_code(
-        world.execute_result.as_ref(),
+    let inner = world
+        .execute_result
+        .as_ref()
+        .expect("execute result should be set");
+    let report = inner.as_ref().expect_err("expected execution failure");
+    let first = report
+        .diagnostics()
+        .first()
+        .expect("at least one diagnostic");
+    let actual_code = format!("{}", first.code());
+    assert_eq!(
+        actual_code,
+        code.as_str(),
+        "expected code '{}', got '{actual_code}'",
         code.as_str(),
-        "execute result",
-        "execution failure",
     );
 }
 
@@ -143,3 +210,8 @@ fn then_execution_fails(world: &mut TestWorld, code: QuotedString) {
 fn sempai_engine_behaviour(world: TestWorld) {
     let _ = world;
 }
+
+#[scenario(path = "tests/features/formula_normalization.feature")]
+fn formula_normalization_behaviour(world: TestWorld) {
+    let _ = world;
+}
diff --git a/crates/sempai/src/tests/constraint_tests.rs b/crates/sempai/src/tests/constraint_tests.rs
new file mode 100644
index 00000000..2b611e92
--- /dev/null
+++ b/crates/sempai/src/tests/constraint_tests.rs
@@ -0,0 +1,151 @@
+//! Unit tests for semantic constraint validation on formula trees.
+
+use sempai_core::DiagnosticCode;
+use sempai_core::formula::{Atom, Decorated, Formula};
+
+use crate::normalise::validate_formula_constraints;
+
+// -----------------------------------------------------------------------
+// Helpers
+// -----------------------------------------------------------------------
+
+fn pat(s: &str) -> Formula {
+    Formula::Atom(Atom::Pattern(String::from(s)))
+}
+fn bare(f: Formula) -> Decorated<Formula> {
+    Decorated::bare(f)
+}
+
+// -----------------------------------------------------------------------
+// InvalidNotInOr
+// -----------------------------------------------------------------------
+
+#[test]
+fn or_with_not_child_is_rejected() {
+    let formula = Formula::Or(vec![
+        bare(pat("a")),
+        bare(Formula::Not(Box::new(bare(pat("b"))))),
+    ]);
+    let err = validate_formula_constraints(&formula).expect_err("should fail");
+    let code = err.diagnostics().first().expect("at least one").code();
+    assert_eq!(code, DiagnosticCode::ESempaiInvalidNotInOr);
+}
+
+#[test]
+fn or_without_not_children_passes() {
+    let formula = Formula::Or(vec![bare(pat("a")), bare(pat("b"))]);
+    assert!(validate_formula_constraints(&formula).is_ok());
+}
+
+#[test]
+fn or_with_only_atoms_passes() {
+    let formula = Formula::Or(vec![
+        bare(pat("a")),
+        bare(Formula::Atom(Atom::Regex(String::from("r")))),
+    ]);
+    assert!(validate_formula_constraints(&formula).is_ok());
+}
+
+// -----------------------------------------------------------------------
+// MissingPositiveTermInAnd
+// -----------------------------------------------------------------------
+
+#[test]
+fn and_with_no_positive_terms_is_rejected() {
+    let formula = Formula::And(vec![
+        bare(Formula::Not(Box::new(bare(pat("a"))))),
+        bare(Formula::Not(Box::new(bare(pat("b"))))),
+    ]);
+    let err = validate_formula_constraints(&formula).expect_err("should fail");
+    let code = err.diagnostics().first().expect("at least one").code();
+    assert_eq!(code, DiagnosticCode::ESempaiMissingPositiveTermInAnd);
+}
+
+#[test]
+fn and_with_one_positive_and_one_negative_passes() {
+    let formula = Formula::And(vec![
+        bare(pat("a")),
+        bare(Formula::Not(Box::new(bare(pat("b"))))),
+    ]);
+    assert!(validate_formula_constraints(&formula).is_ok());
+}
+
+#[test]
+fn and_with_only_inside_and_anywhere_is_rejected() {
+    let formula = Formula::And(vec![
+        bare(Formula::Inside(Box::new(bare(pat("ctx"))))),
+        bare(Formula::Anywhere(Box::new(bare(pat("x"))))),
+    ]);
+    let err = validate_formula_constraints(&formula).expect_err("should fail");
+    let code = err.diagnostics().first().expect("at least one").code();
+    assert_eq!(code, DiagnosticCode::ESempaiMissingPositiveTermInAnd);
+}
+
+#[test]
+fn and_with_only_constraints_passes() {
+    // Metavariable-pattern exception: constraint-only conjunctions are
+    // accepted because they may be metavariable-pattern contexts.
+    let formula = Formula::And(vec![
+        bare(Formula::Constraint(
+            serde_json::json!({"metavariable-regex": {}}),
+        )),
+        bare(Formula::Constraint(
+            serde_json::json!({"metavariable-pattern": {}}),
+        )),
+    ]);
+    assert!(validate_formula_constraints(&formula).is_ok());
+}
+
+#[test]
+fn and_with_positive_term_and_constraint_passes() {
+    let formula = Formula::And(vec![
+        bare(pat("a")),
+        bare(Formula::Constraint(serde_json::json!({}))),
+    ]);
+    assert!(validate_formula_constraints(&formula).is_ok());
+}
+
+// -----------------------------------------------------------------------
+// Nested constraint violations
+// -----------------------------------------------------------------------
+
+#[test]
+fn nested_and_inside_or_with_no_positive_term_is_rejected() {
+    let inner_and = Formula::And(vec![bare(Formula::Not(Box::new(bare(pat("a")))))]);
+    let formula = Formula::Or(vec![bare(inner_and), bare(pat("b"))]);
+    let err = validate_formula_constraints(&formula).expect_err("should fail");
+    let code = err.diagnostics().first().expect("at least one").code();
+    assert_eq!(code, DiagnosticCode::ESempaiMissingPositiveTermInAnd);
+}
+
+#[test]
+fn nested_or_with_not_inside_and_is_rejected() {
+    let inner_or = Formula::Or(vec![
+        bare(pat("a")),
+        bare(Formula::Not(Box::new(bare(pat("b"))))),
+    ]);
+    let formula = Formula::And(vec![bare(pat("c")), bare(inner_or)]);
+    let err = validate_formula_constraints(&formula).expect_err("should fail");
+    let code = err.diagnostics().first().expect("at least one").code();
+    assert_eq!(code, DiagnosticCode::ESempaiInvalidNotInOr);
+}
+
+// -----------------------------------------------------------------------
+// Happy-path atoms and simple formulas
+// -----------------------------------------------------------------------
+
+#[test]
+fn bare_atom_passes() {
+    assert!(validate_formula_constraints(&pat("x")).is_ok());
+}
+
+#[test]
+fn bare_constraint_passes() {
+    assert!(validate_formula_constraints(&Formula::Constraint(serde_json::json!({}))).is_ok());
+}
+
+#[test]
+fn not_wrapping_atom_passes() {
+    let formula = Formula::Not(Box::new(bare(pat("x"))));
+    assert!(validate_formula_constraints(&formula).is_ok());
+}
diff --git a/crates/sempai/src/tests/engine_tests.rs b/crates/sempai/src/tests/engine_tests.rs
index e90d8511..a994ba2b 100644
--- a/crates/sempai/src/tests/engine_tests.rs
+++ b/crates/sempai/src/tests/engine_tests.rs
@@ -1,5 +1,7 @@
 //! Tests for the `Engine` and `QueryPlan` types.
 
+use sempai_core::formula::{Atom, Formula};
+
 use crate::engine::QueryPlan;
 use crate::{
     Diagnostic, DiagnosticCode, DiagnosticReport, Engine, EngineConfig, EngineLimits, Language,
@@ -52,32 +54,47 @@ fn compile_yaml_returns_schema_diagnostic_for_missing_rule_id() {
 }
 
 #[test]
-fn compile_yaml_returns_not_implemented_after_successful_parse() {
+fn compile_yaml_produces_plan_with_formula_for_valid_search_rule() {
     let engine = default_engine();
-    let result = engine.compile_yaml(
-        "rules:\n  - id: demo.rule\n    message: oops\n    languages: [rust]\n    severity: ERROR\n    pattern: foo($X)\n",
+    let plans = engine
+        .compile_yaml(concat!(
+            "rules:\n",
+            "  - id: demo.rule\n",
+            "    message: oops\n",
+            "    languages: [rust]\n",
+            "    severity: ERROR\n",
+            "    pattern: foo($X)\n",
+        ))
+        .expect("valid search rule should produce plans");
+    assert_eq!(plans.len(), 1);
+    let plan = plans.first().expect("one plan");
+    assert_eq!(plan.rule_id(), "demo.rule");
+    assert_eq!(plan.language(), Language::Rust);
+    assert_eq!(
+        plan.formula(),
+        Some(&Formula::Atom(Atom::Pattern(String::from("foo($X)")))),
     );
-    let (code, diag) = first_diagnostic_of_err(result);
-    assert_eq!(code, DiagnosticCode::NotImplemented);
-    assert!(diag.message().contains("normalization"));
 }
 
 #[test]
-fn compile_yaml_returns_not_implemented_for_project_depends_on_search_rule() {
+fn compile_yaml_produces_plan_without_formula_for_project_depends_on() {
     let engine = default_engine();
-    let result = engine.compile_yaml(concat!(
-        "rules:\n",
-        "  - id: demo.depends\n",
-        "    message: detect vulnerable dependency\n",
-        "    languages: [python]\n",
-        "    severity: WARNING\n",
-        "    r2c-internal-project-depends-on:\n",
-        "      namespace: pypi\n",
-        "      package: requests\n",
-    ));
-    let (code, diag) = first_diagnostic_of_err(result);
-    assert_eq!(code, DiagnosticCode::NotImplemented);
-    assert!(diag.message().contains("normalization"));
+    let plans = engine
+        .compile_yaml(concat!(
+            "rules:\n",
+            "  - id: demo.depends\n",
+            "    message: detect vulnerable dependency\n",
+            "    languages: [python]\n",
+            "    severity: WARNING\n",
+            "    r2c-internal-project-depends-on:\n",
+            "      namespace: pypi\n",
+            "      package: requests\n",
+        ))
+        .expect("project-depends-on rule should produce plans");
+    assert_eq!(plans.len(), 1);
+    let plan = plans.first().expect("one plan");
+    assert_eq!(plan.rule_id(), "demo.depends");
+    assert!(plan.formula().is_none());
 }
 
 fn assert_compile_yaml_unsupported_mode(yaml: &str, expected_mode_fragment: &str) {
@@ -140,7 +157,7 @@ fn compile_dsl_returns_not_implemented() {
 #[test]
 fn execute_returns_not_implemented() {
     let engine = default_engine();
-    let plan = QueryPlan::new(String::from("test-rule"), Language::Rust);
+    let plan = QueryPlan::new(String::from("test-rule"), Language::Rust, None);
     let result = engine.execute(&plan, "file:///test.rs", "fn main() {}");
     let (code, diag) = first_diagnostic_of_err(result);
     assert_eq!(code, DiagnosticCode::NotImplemented);
diff --git a/crates/sempai/src/tests/mod.rs b/crates/sempai/src/tests/mod.rs
index d24e15ce..4a054ac8 100644
--- a/crates/sempai/src/tests/mod.rs
+++ b/crates/sempai/src/tests/mod.rs
@@ -1,6 +1,8 @@
 //! Unit tests for the `sempai` facade crate.
 
+mod constraint_tests;
 mod engine_tests;
+mod normalise_tests;
 mod reexport_tests;
 
 mod behaviour;
diff --git a/crates/sempai/src/tests/normalise_tests.rs b/crates/sempai/src/tests/normalise_tests.rs
new file mode 100644
index 00000000..8e7ac5da
--- /dev/null
+++ b/crates/sempai/src/tests/normalise_tests.rs
@@ -0,0 +1,325 @@
+//! Unit tests for legacy and v2 formula normalization.
+
+use rstest::rstest;
+use sempai_core::formula::{Atom, Decorated, Formula};
+use sempai_yaml::{LegacyClause, LegacyFormula, LegacyValue, MatchFormula, SearchQueryPrincipal};
+
+use crate::normalise::normalise_search_principal;
+
+// -----------------------------------------------------------------------
+// Helpers
+// -----------------------------------------------------------------------
+
+fn pat(s: &str) -> Formula {
+    Formula::Atom(Atom::Pattern(String::from(s)))
+}
+fn re(s: &str) -> Formula {
+    Formula::Atom(Atom::Regex(String::from(s)))
+}
+fn bare(f: Formula) -> Decorated<Formula> {
+    Decorated::bare(f)
+}
+
+// -----------------------------------------------------------------------
+// Legacy normalization
+// -----------------------------------------------------------------------
+
+#[test]
+fn legacy_pattern_normalises_to_atom() {
+    let principal = SearchQueryPrincipal::Legacy(LegacyFormula::Pattern(String::from("foo($X)")));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, Some(pat("foo($X)")));
+}
+
+#[test]
+fn legacy_pattern_regex_normalises_to_regex_atom() {
+    let principal =
+        SearchQueryPrincipal::Legacy(LegacyFormula::PatternRegex(String::from("foo.*")));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, Some(re("foo.*")));
+}
+
+#[test]
+fn legacy_patterns_normalises_to_and() {
+    let principal = SearchQueryPrincipal::Legacy(LegacyFormula::Patterns(vec![
+        LegacyClause::Formula(LegacyFormula::Pattern(String::from("a"))),
+        LegacyClause::Formula(LegacyFormula::PatternNot(Box::new(LegacyValue::String(
+            String::from("b"),
+        )))),
+    ]));
+    let result = normalise_search_principal(&principal).expect("ok");
+    let expected = Formula::And(vec![
+        bare(pat("a")),
+        bare(Formula::Not(Box::new(bare(pat("b"))))),
+    ]);
+    assert_eq!(result, Some(expected));
+}
+
+#[test]
+fn legacy_pattern_either_normalises_to_or() {
+    let principal = SearchQueryPrincipal::Legacy(LegacyFormula::PatternEither(vec![
+        LegacyFormula::Pattern(String::from("a")),
+        LegacyFormula::Pattern(String::from("b")),
+    ]));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(
+        result,
+        Some(Formula::Or(vec![bare(pat("a")), bare(pat("b"))]))
+    );
+}
+
+#[test]
+fn legacy_pattern_not_normalises_to_not() {
+    let principal = SearchQueryPrincipal::Legacy(LegacyFormula::PatternNot(Box::new(
+        LegacyValue::String(String::from("x")),
+    )));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, Some(Formula::Not(Box::new(bare(pat("x"))))));
+}
+
+#[test]
+fn legacy_pattern_inside_normalises_to_inside() {
+    let principal = SearchQueryPrincipal::Legacy(LegacyFormula::PatternInside(Box::new(
+        LegacyValue::String(String::from("ctx")),
+    )));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, Some(Formula::Inside(Box::new(bare(pat("ctx"))))));
+}
+
+#[test]
+fn legacy_pattern_not_inside_normalises_to_not_inside() {
+    let principal = SearchQueryPrincipal::Legacy(LegacyFormula::PatternNotInside(Box::new(
+        LegacyValue::String(String::from("ctx")),
+    )));
+    let result = normalise_search_principal(&principal).expect("ok");
+    let expected = Formula::Not(Box::new(bare(Formula::Inside(Box::new(bare(pat("ctx")))))));
+    assert_eq!(result, Some(expected));
+}
+
+#[test]
+fn legacy_pattern_not_regex_normalises_to_not_regex() {
+    let principal =
+        SearchQueryPrincipal::Legacy(LegacyFormula::PatternNotRegex(String::from("bad.*")));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, Some(Formula::Not(Box::new(bare(re("bad.*"))))));
+}
+
+#[test]
+fn legacy_anywhere_normalises_to_anywhere() {
+    let principal = SearchQueryPrincipal::Legacy(LegacyFormula::Anywhere(Box::new(
+        LegacyValue::String(String::from("x")),
+    )));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, Some(Formula::Anywhere(Box::new(bare(pat("x"))))));
+}
+
+#[test]
+fn legacy_value_formula_recurses() {
+    let inner = LegacyFormula::Pattern(String::from("inner"));
+    let principal = SearchQueryPrincipal::Legacy(LegacyFormula::PatternNot(Box::new(
+        LegacyValue::Formula(inner),
+    )));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, Some(Formula::Not(Box::new(bare(pat("inner"))))));
+}
+
+#[test]
+fn legacy_constraint_preserved_as_opaque() {
+    let constraint_json = serde_json::json!({
+        "metavariable-regex": {
+            "metavariable": "$X",
+            "regex": "foo.*"
+        }
+    });
+    let principal = SearchQueryPrincipal::Legacy(LegacyFormula::Patterns(vec![
+        LegacyClause::Formula(LegacyFormula::Pattern(String::from("a"))),
+        LegacyClause::Constraint(constraint_json.clone()),
+    ]));
+    let result = normalise_search_principal(&principal).expect("ok");
+    let expected = Formula::And(vec![
+        bare(pat("a")),
+        bare(Formula::Constraint(constraint_json)),
+    ]);
+    assert_eq!(result, Some(expected));
+}
+
+// -----------------------------------------------------------------------
+// v2 Match normalization
+// -----------------------------------------------------------------------
+
+#[test]
+fn v2_pattern_string_normalises_to_atom() {
+    let principal = SearchQueryPrincipal::Match(MatchFormula::Pattern(String::from("foo")));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, Some(pat("foo")));
+}
+
+#[test]
+fn v2_pattern_object_normalises_to_atom() {
+    let principal = SearchQueryPrincipal::Match(MatchFormula::PatternObject(String::from("foo")));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, Some(pat("foo")));
+}
+
+#[test]
+fn v2_regex_normalises_to_regex_atom() {
+    let principal = SearchQueryPrincipal::Match(MatchFormula::Regex(String::from("r.*")));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, Some(re("r.*")));
+}
+
+#[test]
+fn v2_all_normalises_to_and() {
+    let principal = SearchQueryPrincipal::Match(MatchFormula::All(vec![
+        MatchFormula::Pattern(String::from("a")),
+        MatchFormula::Not(Box::new(MatchFormula::Pattern(String::from("b")))),
+    ]));
+    let result = normalise_search_principal(&principal).expect("ok");
+    let expected = Formula::And(vec![
+        bare(pat("a")),
+        bare(Formula::Not(Box::new(bare(pat("b"))))),
+    ]);
+    assert_eq!(result, Some(expected));
+}
+
+#[test]
+fn v2_any_normalises_to_or() {
+    let principal = SearchQueryPrincipal::Match(MatchFormula::Any(vec![
+        MatchFormula::Pattern(String::from("a")),
+        MatchFormula::Pattern(String::from("b")),
+    ]));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(
+        result,
+        Some(Formula::Or(vec![bare(pat("a")), bare(pat("b"))]))
+    );
+}
+
+#[test]
+fn v2_not_normalises_to_not() {
+    let principal = SearchQueryPrincipal::Match(MatchFormula::Not(Box::new(
+        MatchFormula::Pattern(String::from("x")),
+    )));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, Some(Formula::Not(Box::new(bare(pat("x"))))));
+}
+
+#[test]
+fn v2_inside_normalises_to_inside() {
+    let principal = SearchQueryPrincipal::Match(MatchFormula::Inside(Box::new(
+        MatchFormula::Pattern(String::from("ctx")),
+    )));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, Some(Formula::Inside(Box::new(bare(pat("ctx"))))));
+}
+
+#[test]
+fn v2_anywhere_normalises_to_anywhere() {
+    let principal = SearchQueryPrincipal::Match(MatchFormula::Anywhere(Box::new(
+        MatchFormula::Pattern(String::from("x")),
+    )));
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, Some(Formula::Anywhere(Box::new(bare(pat("x"))))));
+}
+
+#[test]
+fn v2_decorated_carries_metadata() {
+    let principal = SearchQueryPrincipal::Match(MatchFormula::Decorated {
+        formula: Box::new(MatchFormula::Pattern(String::from("foo"))),
+        where_clauses: vec![serde_json::json!({"metavariable": "$X"})],
+        as_name: Some(String::from("alias")),
+        fix: Some(String::from("bar")),
+    });
+    // The top-level normalise_search_principal returns the inner node
+    // stripped of its Decorated wrapper (since the top level is just a
+    // Formula, not Decorated<Formula>).  The metadata is lost at the top
+    // level — it is preserved when Decorated appears as a child within
+    // And/Or/Not etc.
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, Some(pat("foo")));
+}
+
+// -----------------------------------------------------------------------
+// ProjectDependsOn passthrough
+// -----------------------------------------------------------------------
+
+#[test]
+fn project_depends_on_returns_none() {
+    use sempai_yaml::ProjectDependsOnPayload;
+    let payload: ProjectDependsOnPayload = serde_json::json!({
+        "namespace": "pypi",
+        "package": "requests"
+    })
+    .try_into()
+    .expect("valid payload");
+    let principal = SearchQueryPrincipal::ProjectDependsOn(payload);
+    let result = normalise_search_principal(&principal).expect("ok");
+    assert_eq!(result, None);
+}
+
+// -----------------------------------------------------------------------
+// Paired equivalence tests
+// -----------------------------------------------------------------------
+
+#[rstest]
+#[case::simple_pattern(
+    SearchQueryPrincipal::Legacy(LegacyFormula::Pattern(String::from("foo($X)"))),
+    SearchQueryPrincipal::Match(MatchFormula::Pattern(String::from("foo($X)")))
+)]
+#[case::conjunction(
+    SearchQueryPrincipal::Legacy(LegacyFormula::Patterns(vec![
+        LegacyClause::Formula(LegacyFormula::Pattern(String::from("a"))),
+        LegacyClause::Formula(LegacyFormula::PatternNot(
+            Box::new(LegacyValue::String(String::from("b"))),
+        )),
+    ])),
+    SearchQueryPrincipal::Match(MatchFormula::All(vec![
+        MatchFormula::Pattern(String::from("a")),
+        MatchFormula::Not(Box::new(MatchFormula::Pattern(String::from("b")))),
+    ])),
+)]
+#[case::disjunction(
+    SearchQueryPrincipal::Legacy(LegacyFormula::PatternEither(vec![
+        LegacyFormula::Pattern(String::from("a")),
+        LegacyFormula::Pattern(String::from("b")),
+    ])),
+    SearchQueryPrincipal::Match(MatchFormula::Any(vec![
+        MatchFormula::Pattern(String::from("a")),
+        MatchFormula::Pattern(String::from("b")),
+    ])),
+)]
+fn paired_legacy_and_v2_produce_equal_formula(
+    #[case] legacy: SearchQueryPrincipal,
+    #[case] v2: SearchQueryPrincipal,
+) {
+    let legacy_formula = normalise_search_principal(&legacy).expect("ok");
+    let v2_formula = normalise_search_principal(&v2).expect("ok");
+    assert_eq!(legacy_formula, v2_formula);
+}
+
+// -----------------------------------------------------------------------
+// Deep nesting
+// -----------------------------------------------------------------------
+
+#[test]
+fn deeply_nested_legacy_formula() {
+    // pattern-either containing patterns containing pattern-not
+    let principal = SearchQueryPrincipal::Legacy(LegacyFormula::PatternEither(vec![
+        LegacyFormula::Patterns(vec![
+            LegacyClause::Formula(LegacyFormula::Pattern(String::from("a"))),
+            LegacyClause::Formula(LegacyFormula::PatternNot(Box::new(LegacyValue::String(
+                String::from("b"),
+            )))),
+        ]),
+        LegacyFormula::Pattern(String::from("c")),
+    ]));
+    let result = normalise_search_principal(&principal).expect("ok");
+    let expected = Formula::Or(vec![
+        bare(Formula::And(vec![
+            bare(pat("a")),
+            bare(Formula::Not(Box::new(bare(pat("b"))))),
+        ])),
+        bare(pat("c")),
+    ]);
+    assert_eq!(result, Some(expected));
+}
diff --git a/crates/sempai/src/tests/reexport_tests.rs b/crates/sempai/src/tests/reexport_tests.rs
index 245b8e3c..da351c13 100644
--- a/crates/sempai/src/tests/reexport_tests.rs
+++ b/crates/sempai/src/tests/reexport_tests.rs
@@ -7,8 +7,8 @@
 use std::collections::BTreeMap;
 
 use crate::{
-    CaptureValue, CapturedNode, Diagnostic, DiagnosticCode, DiagnosticReport, EngineConfig,
-    Language, LineCol, Match, SourceSpan, Span,
+    Atom, CaptureValue, CapturedNode, Decorated, Diagnostic, DiagnosticCode, DiagnosticReport,
+    EngineConfig, Formula, Language, LineCol, Match, SourceSpan, Span,
 };
 
 #[test]
@@ -62,3 +62,11 @@ fn engine_config_is_accessible() {
     let config = EngineConfig::default();
     assert_eq!(config.max_matches_per_rule(), 10_000);
 }
+
+#[test]
+fn formula_types_are_accessible() {
+    let atom = Atom::Pattern(String::from("foo"));
+    let formula = Formula::Atom(atom);
+    let decorated = Decorated::bare(formula);
+    assert!(decorated.where_clauses.is_empty());
+}
diff --git a/crates/sempai/tests/features/formula_normalization.feature b/crates/sempai/tests/features/formula_normalization.feature
new file mode 100644
index 00000000..dc32261e
--- /dev/null
+++ b/crates/sempai/tests/features/formula_normalization.feature
@@ -0,0 +1,11 @@
+Feature: Formula normalization
+
+  Scenario: Negation inside disjunction is rejected
+    Given an engine with default configuration
+    When YAML "rules:\n  - id: bad.not-in-or\n    message: detect\n    languages: [rust]\n    severity: ERROR\n    pattern-either:\n      - pattern: foo($X)\n      - pattern-not: bar($X)\n" is compiled
+    Then compilation fails with code "E_SEMPAI_INVALID_NOT_IN_OR"
+
+  Scenario: Conjunction without positive term is rejected
+    Given an engine with default configuration
+    When YAML "rules:\n  - id: bad.no-positive\n    message: detect\n    languages: [rust]\n    severity: ERROR\n    patterns:\n      - pattern-not: foo($X)\n" is compiled
+    Then compilation fails with code "E_SEMPAI_MISSING_POSITIVE_TERM_IN_AND"
diff --git a/crates/sempai/tests/features/sempai_engine.feature b/crates/sempai/tests/features/sempai_engine.feature
index 68773b0f..645b9490 100644
--- a/crates/sempai/tests/features/sempai_engine.feature
+++ b/crates/sempai/tests/features/sempai_engine.feature
@@ -14,17 +14,19 @@ Feature: Sempai engine facade behaviour
     When YAML "rules:\n  - message: detect foo\n    languages: [rust]\n    severity: ERROR\n    pattern: foo($X)\n" is compiled
     Then compilation fails with code "E_SEMPAI_SCHEMA_INVALID"
 
-  Scenario: Engine compile_yaml keeps a post-parse placeholder for valid YAML
+  Scenario: Engine compile_yaml produces a plan with formula for valid search rule
     Given an engine with default configuration
     When YAML "rules:\n  - id: demo.rule\n    message: detect foo\n    languages: [rust]\n    severity: ERROR\n    pattern: foo($X)\n" is compiled
-    Then compilation fails with code "NOT_IMPLEMENTED"
-    And the first diagnostic message contains "normalization"
+    Then compilation succeeds
+    And the result contains 1 query plan
+    And the first plan has a formula
 
-  Scenario: Engine compile_yaml keeps the placeholder for dependency search rules
+  Scenario: Engine compile_yaml produces a plan without formula for dependency rules
     Given an engine with default configuration
     When YAML "rules:\n  - id: demo.depends\n    message: detect vulnerable dependency\n    languages: [python]\n    severity: WARNING\n    r2c-internal-project-depends-on:\n      namespace: pypi\n      package: requests\n" is compiled
-    Then compilation fails with code "NOT_IMPLEMENTED"
-    And the first diagnostic message contains "normalization"
+    Then compilation succeeds
+    And the result contains 1 query plan
+    And the first plan has no formula
 
   Scenario: Engine compile_yaml rejects extract mode during execution gating
     Given an engine with default configuration
diff --git a/docs/execplans/4-1-5-normalization-into-canonical-formula-model.md b/docs/execplans/4-1-5-normalization-into-canonical-formula-model.md
new file mode 100644
index 00000000..00333129
--- /dev/null
+++ b/docs/execplans/4-1-5-normalization-into-canonical-formula-model.md
@@ -0,0 +1,875 @@
+# 4.1.5 Implement legacy and v2 normalization into canonical Formula model
+
+This ExecPlan (execution plan) is a living document. The sections
+`Constraints`, `Tolerances`, `Risks`, `Progress`, `Surprises & Discoveries`,
+`Decision Log`, and `Outcomes & Retrospective` must be kept up to date as work
+proceeds.
+
+Status: COMPLETE
+
+## Purpose / big picture
+
+After this change, the Sempai compilation pipeline will lower both legacy
+Semgrep operators (`pattern`, `patterns`, `pattern-either`, `pattern-not`,
+`pattern-inside`, `pattern-not-inside`, `pattern-not-regex`,
+`semgrep-internal-pattern-anywhere`) and v2 `match` operators (`pattern`,
+`regex`, `all`, `any`, `not`, `inside`, `anywhere`) into one canonical
+`Formula` enum defined in `sempai_core`. Two parser-enforced semantic
+constraint checks will reject invalid formula shapes deterministically:
+
+- `E_SEMPAI_INVALID_NOT_IN_OR`: a negated branch inside `pattern-either` or
+  `any`.
+- `E_SEMPAI_MISSING_POSITIVE_TERM_IN_AND`: a conjunction (`patterns` or `all`)
+  with no positive match-producing term (except in metavariable-pattern
+  contexts, which are preserved as opaque constraints for now).
+
+Observable user-facing behaviour after implementation:
+
+- `Engine::compile_yaml(...)` accepts valid `search` rules and produces
+  `Vec<QueryPlan>` containing a normalised `Formula` for each rule, instead of
+  returning `NOT_IMPLEMENTED`.
+- Paired legacy and v2 YAML fixtures that express equivalent queries normalise
+  to structurally identical `Formula` values.
+- Semantically invalid rules (negation in disjunction, missing positive term in
+  conjunction) emit deterministic rule diagnostics with stable error codes and
+  source spans pointing at the offending formula node.
+- Legacy `LegacyClause::Constraint` objects are preserved as opaque
+  `Formula::Constraint` nodes rather than dropped or rejected, so constraint
+  evaluation can be added in a later milestone.
+- v2 `Decorated` formulas have their `where`, `as`, and `fix` metadata carried
+  through to the canonical `Decorated<Formula>` wrapper.
+- `Engine::execute(...)` continues to return `NOT_IMPLEMENTED` until backend
+  work lands in 4.2.x.
+
+Observable completion evidence:
+
+```plaintext
+set -o pipefail; cargo test -p sempai_core 2>&1 | tee /tmp/4-1-5-sempai-core-test.log
+set -o pipefail; cargo test -p sempai 2>&1 | tee /tmp/4-1-5-sempai-test.log
+set -o pipefail; make check-fmt 2>&1 | tee /tmp/4-1-5-make-check-fmt.log
+set -o pipefail; make lint 2>&1 | tee /tmp/4-1-5-make-lint.log
+set -o pipefail; make test 2>&1 | tee /tmp/4-1-5-make-test.log
+```
+
+Because this milestone updates Markdown documentation and the roadmap:
+
+```plaintext
+set -o pipefail; make fmt 2>&1 | tee /tmp/4-1-5-make-fmt.log
+set -o pipefail; make markdownlint 2>&1 | tee /tmp/4-1-5-make-markdownlint.log
+set -o pipefail; make nixie 2>&1 | tee /tmp/4-1-5-make-nixie.log
+```
+
+## Constraints
+
+- Follow the normalised formula model from
+  [docs/sempai-query-language-design.md](../sempai-query-language-design.md)
+  section "Normalised formula model" and the operator mappings in
+  [docs/semgrep-language-reference/semgrep-operator-precedence.md](../semgrep-language-reference/semgrep-operator-precedence.md)
+  and
+  [docs/semgrep-language-reference/semgrep-legacy-vs-v2-guidance.md](../semgrep-language-reference/semgrep-legacy-vs-v2-guidance.md).
+- The canonical `Formula` enum and its `Decorated<T>` wrapper live in
+  `sempai_core`, not in `sempai_yaml` or `sempai`. This keeps the formula model
+  available to all downstream crates including the future `sempai_ts` backend.
+- Normalization must be a pure function from `SearchQueryPrincipal` to
+  `Result<Formula, DiagnosticReport>`, with no side-effects and no I/O.
+- Preserve the existing parser-vs-engine validation boundary: `sempai_yaml`
+  owns YAML shape and structural validation; the normalization pass in
+  `sempai_core` owns semantic constraint checks on the canonical formula.
+- Parser-level types (`LegacyFormula`, `MatchFormula`, `LegacyValue`,
+  `LegacyClause`) remain in `sempai_yaml`. Do not remove or restructure them.
+- Semantic constraint checks must use the existing diagnostic codes
+  `ESempaiInvalidNotInOr` and `ESempaiMissingPositiveTermInAnd` from
+  `sempai_core::DiagnosticCode`.
+- Legacy `LegacyClause::Constraint` objects must be preserved as opaque nodes
+  (`Formula::Constraint(serde_json::Value)`) so metavariable-regex,
+  metavariable-pattern, and similar constraints can be evaluated in a later
+  milestone.
+- The `QueryPlan` struct in `sempai/src/engine.rs` must carry a real `Formula`
+  instead of the current `_plan: ()` placeholder.
+- `ProjectDependsOn` search principals must be preserved as-is and skip
+  normalization, since they have no formula semantics.
+- Tests must include unit tests and BDD tests using `rstest-bdd` v0.5.0,
+  covering happy paths, unhappy paths (semantic constraint violations), and
+  edge cases (empty conjunctions, single-element disjunctions, deeply nested
+  formulas, decorated v2 nodes).
+- Keep files below 400 lines. Split the normalization module, semantic
+  validator, and test helpers into focused modules.
+- Every new module must begin with a `//!` module comment. New public items must
+  carry Rustdoc with examples where appropriate.
+- Use en-GB-oxendict spelling and grammar throughout code, comments, and
+  documentation.
+- Record any design decisions in
+  [docs/sempai-query-language-design.md](../sempai-query-language-design.md).
+- Update [docs/users-guide.md](../users-guide.md) with the user-visible change
+  in `compile_yaml(...)` behaviour.
+- Mark roadmap item 4.1.5 done in [docs/roadmap.md](../roadmap.md) only after
+  all tests and quality gates pass.
+- `make check-fmt`, `make lint`, and `make test` must all succeed.
+
+## Tolerances (exception triggers)
+
+- Scope: if implementation requires touching more than 18 net files outside
+  `crates/sempai-core/`, `crates/sempai/`, `crates/sempai-yaml/`, and the four
+  required docs, stop and escalate.
+- Interface: if normalisation requires a breaking change to
+  `sempai_yaml::SearchQueryPrincipal`, `sempai_yaml::LegacyFormula`,
+  `sempai_yaml::MatchFormula`, or `sempai::Engine::compile_yaml`, stop and
+  present the least disruptive options before proceeding.
+- Dependency: if the canonical `Formula` requires a new workspace dependency
+  beyond `serde_json` (already available), stop and escalate.
+- Iterations: if the same failing lint or test loop is attempted five times
+  without a clear path forward, stop and escalate.
+- Ambiguity: if the Semgrep schema and the design document disagree on whether
+  `pattern-not-inside` normalises to `Not(Inside(...))` versus a distinct
+  `NotInside(...)` variant, stop and escalate with competing interpretations.
+
+## Risks
+
+- Risk: the existing `LegacyValue` type wraps either a string or a nested
+  formula, but the canonical `Formula` expects atoms. Normalization must
+  handle both forms. Severity: medium. Likelihood: high. Mitigation: treat
+  `LegacyValue::String(s)` as `Formula::Atom(Atom::Pattern(s))` during
+  lowering.
+
+- Risk: `MatchFormula::Pattern(s)` and `MatchFormula::PatternObject(s)` are
+  semantically identical (both are pattern atoms) but structurally different.
+  The normaliser must unify them. Severity: low. Likelihood: certain.
+  Mitigation: map both to `Formula::Atom(Atom::Pattern(s))`.
+
+- Risk: the `MissingPositiveTermInAnd` check must not reject conjunctions
+  where the only positive term comes from a metavariable-pattern constraint.
+  The Semgrep spec allows this as a special case. Severity: medium. Likelihood:
+  medium. Mitigation: treat `LegacyClause::Constraint` as a non-positive,
+  non-negative term for the purpose of the positive-term check, and add a
+  fixture specifically testing the metavariable-pattern exception.
+
+- Risk: `Decorated` v2 formulas nest decorators around formulas. Normalization
+  must preserve decoration while lowering the inner formula. Severity: medium.
+  Likelihood: high. Mitigation: normalise the inner formula recursively, then
+  wrap in `Decorated<Formula>` carrying the `where`, `as`, and `fix` metadata.
+
+- Risk: the positive-term check interacts with `Inside` and `Anywhere`
+  semantics. Per the design doc, these are constraints, not match-producers, so
+  they do not count as positive terms. Severity: medium. Likelihood: medium.
+  Mitigation: follow the design doc classification precisely and add explicit
+  test fixtures for `all: [inside(...)]` (should fail) and
+  `all: [pattern(...), inside(...)]` (should pass).
+
+## Progress
+
+- [x] Reviewed roadmap item 4.1.5, the Sempai design doc, the Semgrep operator
+      precedence and legacy-vs-v2 guidance, the current `sempai_yaml` model, and
+      the 4.1.4 ExecPlan.
+- [x] Drafted this ExecPlan.
+- [x] Stage A: Defined canonical `Formula`, `Atom`, `Decorated<T>` in
+      `sempai_core/src/formula.rs`. Added `serde_json` as a regular dependency
+      to `sempai_core`. Exported from `sempai_core/src/lib.rs`.
+- [x] Stage B: Implemented normalization module in `sempai/src/normalise/` with
+      submodules `legacy.rs`, `v2.rs`, and `constraints.rs`. Followed Option B
+      from Step 7 (normalization in `sempai` to avoid circular dependencies).
+- [x] Stage C: Implemented `validate_formula_constraints(...)` with depth-first
+      walk checking `InvalidNotInOr` and `MissingPositiveTermInAnd`. Added
+      metavariable-pattern exception for all-constraint conjunctions.
+- [x] Stage D: Added 17 formula type unit tests in `sempai_core`, 25
+      normalization unit tests in `sempai` (including 3 paired equivalence
+      tests via `rstest`), and 14 constraint validation unit tests.
+- [x] Stage D (BDD): Added 2 semantic constraint BDD scenarios in
+      `formula_normalization.feature` and updated 2 existing scenarios in
+      `sempai_engine.feature` from `NOT_IMPLEMENTED` expectations to formula
+      plan assertions.
+- [x] Stage E: Wired normalization into `Engine::compile_yaml`, updated
+      `QueryPlan` to carry `Option<Formula>` with a `formula()` accessor.
+      Removed `#[cfg(test)]` gate from `QueryPlan::new`.
+- [x] Stage F: Updated `sempai-query-language-design.md` with implementation
+      notes (crate placement, `pattern-not-inside` lowering, constraint
+      preservation, positive-term exception, `ProjectDependsOn` passthrough).
+      Updated `users-guide.md` with `compile_yaml` behaviour and semantic
+      constraint error codes. Marked 4.1.5 done in `roadmap.md`.
+- [x] Stage G: All quality gates pass — `make check-fmt`, `make lint`,
+      `make test` (199 tests), `make markdownlint` (0 errors). `make nixie`
+      unavailable in environment (custom Mermaid tool); no Mermaid diagrams
+      were modified.
+
+## Surprises & Discoveries
+
+1. **BDD step function `\n` escaping**: The `QuotedString` parser from
+   `rstest-bdd` does not interpret `\n` escape sequences — it strips
+   surrounding quotes but passes the literal `\n` characters through.
+   The `sempai_yaml` BDD tests already worked around this with
+   `.replace("\\n", "\n")` in their step definitions. The same
+   workaround was applied to the `sempai` BDD step definitions.
+
+2. **BDD step registry is global**: `rstest-bdd` v0.5.0 registers step
+   functions in a global registry keyed by step text. Defining the same
+   step (e.g. `"Then compilation succeeds"`) in two different BDD
+   modules causes a runtime panic due to duplicate registration. All
+   normalization BDD scenarios were therefore placed in the existing
+   `behaviour.rs` and registered via a second `#[scenario]` attribute
+   pointing at `formula_normalization.feature`.
+
+3. **`serde_json` upgrade to workspace dependency**: `serde_json` was
+   already available as a workspace dependency but only as a
+   dev-dependency in `sempai_core`. Adding it as a regular dependency
+   was straightforward with no version conflicts.
+
+## Decision Log
+
+1. **Normalization placement: Option B confirmed.** Normalization functions
+   live in `crates/sempai/src/normalise/` (not in `sempai_core`) because
+   they depend on `sempai_yaml` model types, and `sempai_yaml` depends on
+   `sempai_core`. Placing them in `sempai` avoids the circular dependency.
+   The canonical `Formula` type remains in `sempai_core` for downstream
+   consumption.
+
+2. **`normalise_search_principal` returns `Option<Formula>`.** Rather than
+   returning `Result<Formula, DiagnosticReport>`, the function returns
+   `Result<Option<Formula>, DiagnosticReport>` where `None` represents
+   `ProjectDependsOn` rules that have no formula semantics. This avoids
+   needing a sentinel `Formula` variant for non-formula rules.
+
+3. **`QueryPlan::new` no longer test-only.** The `#[cfg(test)]` gate was
+   removed since `Engine::compile_yaml` now constructs real `QueryPlan`
+   values in production code.
+
+4. **Constraint-only conjunction exception.** A conjunction (`And`) where
+   every child is `Constraint` is accepted without triggering
+   `MissingPositiveTermInAnd`. This accommodates metavariable-pattern
+   contexts where constraints act as implicit positive terms.
+
+5. **Top-level `Decorated` metadata from v2.** When `normalise_match`
+   encounters a v2 `Decorated` variant, the metadata (`where`, `as`,
+   `fix`) is preserved in the resulting `Decorated<Formula>`. However,
+   `normalise_search_principal` returns only the inner `Formula` node
+   (stripping the outer `Decorated` wrapper) because the top-level plan
+   needs just the `Formula`. Metadata is preserved when `Decorated`
+   appears as a child within `And`/`Or`/`Not` etc.
+
+## Outcomes & Retrospective
+
+Target outcome at completion:
+
+1. `sempai_core` exports a canonical `Formula` enum, `Atom` enum, and
+   `Decorated<T>` wrapper as stable public types.
+2. `sempai_core` exports a `normalise_search_principal(...)` function that
+   lowers `SearchQueryPrincipal` to `Formula`.
+3. `sempai_core` exports a `validate_formula_constraints(...)` function that
+   checks semantic constraints and returns `DiagnosticReport` on violation.
+4. Paired legacy and v2 test fixtures produce structurally equal `Formula`
+   values after normalization.
+5. Invalid formula shapes (`Not` in `Or`, missing positive term in `And`) emit
+   deterministic diagnostics with the correct `E_SEMPAI_*` codes.
+6. `Engine::compile_yaml(...)` returns `Ok(Vec<QueryPlan>)` for valid search
+   rules, with each plan carrying a normalised `Formula`.
+7. `Engine::execute(...)` still returns `NOT_IMPLEMENTED` until 4.2.x.
+8. `docs/sempai-query-language-design.md` records the normalization mapping and
+   any implementation decisions.
+9. `docs/users-guide.md` documents the change in `compile_yaml(...)` behaviour.
+10. `docs/roadmap.md` marks 4.1.5 done.
+11. `make fmt`, `make markdownlint`, `make nixie`, `make check-fmt`,
+    `make lint`, and `make test` all pass.
+
+Retrospective notes:
+
+**What went well:**
+
+- The plan's dependency graph (Steps 1→2→3→5→6→8→9) held throughout. No
+  backtracking was required between stages.
+- Option B (normalization in `sempai`) was the correct crate-placement call.
+  The alternative (Option A, normalization in `sempai_core`) would have
+  introduced a circular dependency with `sempai_yaml`.
+- The paired equivalence tests (legacy ↔ v2 producing identical `Formula`
+  values) gave high confidence that the two lowering paths are consistent.
+- The metavariable-pattern exception (all-constraint conjunction) was
+  anticipated in the plan and handled smoothly.
+- The 400-line file limit was maintained across all new modules by splitting
+  normalization into `legacy.rs`, `v2.rs`, `constraints.rs`, and `mod.rs`.
+
+**What was harder than expected:**
+
+- `rstest-bdd` v0.5.0's global step registry caused a runtime panic when
+  step definitions were split across two modules. The workaround (a second
+  `#[scenario]` attribute on the existing `behaviour.rs` test function)
+  was effective but non-obvious. This should be documented as a project
+  gotcha.
+- The `QuotedString` type from `rstest-bdd` does not unescape `\n`, which
+  caused BDD scenarios with multi-line YAML to fail silently. The fix
+  (`.replace("\\n", "\n")`) mirrors the existing `sempai_yaml` BDD tests
+  but cost a debugging cycle to rediscover.
+- Clippy's `excessive_nesting` and `manual_let_else` lints required
+  extracting a helper function and restructuring control flow in
+  `engine.rs`. This was ultimately beneficial for readability.
+
+**Outcomes vs. targets:**
+
+All eleven target outcomes from the plan are met:
+
+1. ✓ `sempai_core` exports `Formula`, `Atom`, `Decorated<T>`.
+2. ✓ Normalization dispatches through `normalise_search_principal()` in
+   `sempai` (not `sempai_core`, per Option B — the plan's public API
+   section listed `sempai_core` but Decision Log entry 1 corrected this).
+3. ✓ `validate_formula_constraints()` checks both semantic invariants.
+4. ✓ Three paired equivalence tests confirm legacy ↔ v2 structural
+   equality.
+5. ✓ Invalid formula shapes emit `ESempaiInvalidNotInOr` and
+   `ESempaiMissingPositiveTermInAnd` diagnostics.
+6. ✓ `Engine::compile_yaml()` returns `Ok(Vec<QueryPlan>)` with formulas.
+7. ✓ `Engine::execute()` still returns `NOT_IMPLEMENTED`.
+8. ✓ `sempai-query-language-design.md` updated with implementation notes.
+9. ✓ `users-guide.md` documents the new `compile_yaml` behaviour.
+10. ✓ `roadmap.md` marks 4.1.5 as `[x]`.
+11. ✓ `make check-fmt`, `make lint`, `make test`, `make markdownlint` all
+    pass. `make nixie` unavailable in environment.
+
+**Test coverage summary:**
+
+- 17 formula type unit tests (`sempai_core`).
+- 25 normalization unit tests (`sempai`), including 3 paired
+  `#[rstest]` `#[case]` equivalence tests.
+- 14 semantic constraint unit tests (`sempai`).
+- 2 BDD scenarios for semantic constraint rejection.
+- 2 updated BDD scenarios for engine integration.
+- 199 total workspace tests passing.
+
+## Context and orientation
+
+Current files that matter for this milestone:
+
+- [crates/sempai-core/src/lib.rs](../../crates/sempai-core/src/lib.rs) —
+  re-exports; will expose the new formula module.
+- `crates/sempai-core/src/diagnostic.rs` — already defines
+  `ESempaiInvalidNotInOr` and `ESempaiMissingPositiveTermInAnd`.
+- [crates/sempai-yaml/src/model.rs](../../crates/sempai-yaml/src/model.rs) —
+  `SearchQueryPrincipal`, `LegacyFormula`, `MatchFormula`, `LegacyClause`,
+  `LegacyValue`, `MatchFormula::Decorated`.
+- [crates/sempai/src/engine.rs](../../crates/sempai/src/engine.rs) — `Engine`,
+  `QueryPlan`, and the `NOT_IMPLEMENTED` placeholder at line 116.
+- `crates/sempai/src/mode_validation.rs` — mode-aware gating
+  from 4.1.4.
+- `crates/sempai/src/tests/engine_tests.rs` — existing engine
+  test coverage.
+- `crates/sempai/tests/features/sempai_engine.feature` —
+  existing BDD scenarios for the engine.
+- `crates/sempai/src/tests/behaviour.rs` — BDD step
+  definitions for the engine.
+- `docs/sempai-query-language-design.md` — design reference,
+  records implementation decisions.
+- `docs/semgrep-language-reference/semgrep-operator-precedence.md`
+  — operator mapping and precedence ladder.
+- `docs/semgrep-language-reference/semgrep-legacy-vs-v2-guidance.md`
+  — legacy-to-v2 equivalence table.
+- [docs/users-guide.md](../users-guide.md) — user-facing documentation.
+- [docs/roadmap.md](../roadmap.md) — roadmap state tracking.
+
+Current behaviour to preserve or intentionally change:
+
+- `parse_rule_file(...)` produces `RuleFile` with `Rule` and
+  `SearchQueryPrincipal` for search-mode rules. This remains unchanged.
+- `validate_supported_modes(...)` rejects non-search modes with
+  `E_SEMPAI_UNSUPPORTED_MODE`. This remains unchanged.
+- `compile_yaml(...)` currently returns `NOT_IMPLEMENTED` for all valid search
+  rules after mode validation. This will be replaced with normalization and
+  semantic validation, producing real `QueryPlan` values on success.
+- `QueryPlan._plan` is currently `()`. This will be replaced with `Formula`.
+
+## Plan of work
+
+### Stage A: Define the canonical Formula model in sempai_core
+
+Add the `Formula`, `Atom`, and `Decorated<T>` types to `sempai_core` with full
+Rustdoc documentation. These types follow the design document's normalised
+formula model.
+
+### Stage B: Implement normalization functions
+
+Implement `normalise_legacy(...)` and `normalise_match(...)` functions that
+lower `LegacyFormula` and `MatchFormula` respectively into `Formula`. Add a
+top-level `normalise_search_principal(...)` dispatcher. These are pure
+functions in `sempai_core` that depend on `sempai_yaml` model types.
+
+### Stage C: Implement semantic constraint checks
+
+Add `validate_formula_constraints(...)` that walks the normalised `Formula`
+tree and checks:
+
+- No `Not` children inside `Or` nodes (`InvalidNotInOr`).
+- Every `And` node has at least one positive term (`MissingPositiveTermInAnd`).
+
+### Stage D: Lock expected behaviour with tests first
+
+Add unit tests and BDD scenarios before wiring the normalization into the
+engine, so the intended behaviour is explicit and verifiable.
+
+### Stage E: Wire normalization into Engine::compile_yaml
+
+Replace the `NOT_IMPLEMENTED` placeholder in `Engine::compile_yaml(...)` with
+the normalization and validation pipeline. Update `QueryPlan` to carry a real
+`Formula`.
+
+### Stage F: Update design docs, user docs, and roadmap
+
+Synchronise living documentation with the implementation.
+
+### Stage G: Run the full gate sequence and capture evidence
+
+Run targeted crate tests and the full repository gates.
+
+## Concrete steps
+
+### Step 1: Define canonical Formula types (Stage A)
+
+Create `crates/sempai-core/src/formula.rs`:
+
+```rust
+/// Canonical normalised formula shared by legacy and v2 paths.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Formula {
+    Atom(Atom),
+    Not(Box<Decorated<Formula>>),
+    Inside(Box<Decorated<Formula>>),
+    Anywhere(Box<Decorated<Formula>>),
+    And(Vec<Decorated<Formula>>),
+    Or(Vec<Decorated<Formula>>),
+    /// Opaque constraint preserved for later evaluation.
+    Constraint(serde_json::Value),
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Atom {
+    Pattern(String),
+    Regex(String),
+    TreeSitterQuery(String),
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Decorated<T> {
+    pub node: T,
+    pub where_clauses: Vec<serde_json::Value>,
+    pub as_name: Option<String>,
+    pub fix: Option<String>,
+}
+```
+
+Export from `sempai_core/src/lib.rs`. Keep file under 400 lines.
+
+### Step 2: Implement normalization module (Stages B and C)
+
+Create `crates/sempai-core/src/normalise.rs` (or split across
+`normalise/mod.rs`, `normalise/legacy.rs`, `normalise/v2.rs` if needed to
+stay within the 400-line limit).
+
+Key mappings:
+
+**Legacy to Formula:**
+
+| Legacy | Formula |
+| --- | --- |
+| `Pattern(s)` | `Atom(Pattern(s))` |
+| `PatternRegex(s)` | `Atom(Regex(s))` |
+| `Patterns(clauses)` | `And(normalised_clauses)` |
+| `PatternEither(branches)` | `Or(normalised_branches)` |
+| `PatternNot(val)` | `Not(normalise_value(val))` |
+| `PatternInside(val)` | `Inside(normalise_value(val))` |
+| `PatternNotInside(val)` | `Not(Inside(normalise_value(val)))` |
+| `PatternNotRegex(s)` | `Not(Atom(Regex(s)))` |
+| `Anywhere(val)` | `Anywhere(normalise_value(val))` |
+| `LegacyValue::String(s)` | `Atom(Pattern(s))` |
+| `LegacyValue::Formula(f)` | `normalise_legacy(f)` |
+| `LegacyClause::Constraint(v)` | `Constraint(v)` |
+
+**v2 Match to Formula:**
+
+| v2 Match | Formula |
+| --- | --- |
+| `Pattern(s)` | `Atom(Pattern(s))` |
+| `PatternObject(s)` | `Atom(Pattern(s))` |
+| `Regex(s)` | `Atom(Regex(s))` |
+| `All(items)` | `And(normalised_items)` |
+| `Any(items)` | `Or(normalised_items)` |
+| `Not(inner)` | `Not(normalise_match(inner))` |
+| `Inside(inner)` | `Inside(normalise_match(inner))` |
+| `Anywhere(inner)` | `Anywhere(normalise_match(inner))` |
+| `Decorated { formula, where_, as_, fix }` | `Decorated { normalise_match(formula), where_, as_, fix }` |
+
+Implement `validate_formula_constraints(formula: &Formula)`:
+
+- Walk the tree recursively.
+- For `Or(branches)`: check that no direct child (after unwrapping
+  `Decorated`) is a `Not(...)`. If found, emit
+  `E_SEMPAI_INVALID_NOT_IN_OR`.
+- For `And(terms)`: check that at least one direct child is a positive term
+  (i.e., not `Not`, not `Inside`, not `Anywhere`, not `Constraint`). If none
+  found, emit `E_SEMPAI_MISSING_POSITIVE_TERM_IN_AND`.
+
+### Step 3: Add unit tests for normalization (Stage D)
+
+Create `crates/sempai-core/src/tests/normalise_tests.rs` with `rstest`
+parameterised cases:
+
+- Legacy `pattern` → `Atom(Pattern(...))`.
+- Legacy `pattern-regex` → `Atom(Regex(...))`.
+- Legacy `patterns` with mixed positive and negative terms →
+  `And([Atom(...), Not(...)])`.
+- Legacy `pattern-either` with two branches →
+  `Or([Atom(...), Atom(...)])`.
+- Legacy `pattern-not-inside` → `Not(Inside(Atom(...)))`.
+- Legacy `pattern-not-regex` → `Not(Atom(Regex(...)))`.
+- Legacy `semgrep-internal-pattern-anywhere` → `Anywhere(Atom(...))`.
+- v2 `match: "foo"` → `Atom(Pattern("foo"))`.
+- v2 `all` with `not` and `pattern` → `And([Atom(...), Not(...)])`.
+- v2 `any` with two patterns → `Or([Atom(...), Atom(...)])`.
+- v2 decorated formula carries `where`, `as`, `fix` metadata.
+- Paired equivalence: legacy `patterns` vs v2 `all` produce the same
+  `Formula`.
+- Paired equivalence: legacy `pattern-either` vs v2 `any` produce the same
+  `Formula`.
+- Deep nesting: `pattern-either` containing `patterns` containing
+  `pattern-not`.
+
+### Step 4: Add unit tests for semantic constraints (Stage D)
+
+Create `crates/sempai-core/src/tests/constraint_tests.rs`:
+
+- `Or` with `Not` child → `E_SEMPAI_INVALID_NOT_IN_OR`.
+- `Or` without `Not` children → passes.
+- `And` with no positive terms → `E_SEMPAI_MISSING_POSITIVE_TERM_IN_AND`.
+- `And` with one positive and one negative → passes.
+- `And` with only `Inside` and `Anywhere` → fails (not positive).
+- `And` with only `Constraint` objects → passes (metavariable-pattern
+  exception: constraints alone do not trigger the positive-term violation,
+  because they may be metavariable-pattern constraints that act as implicit
+  positive terms).
+- Nested: `And` inside `Or` where the `And` has no positive term → fails.
+
+### Step 5: Add BDD scenarios (Stage D)
+
+Add feature file
+`crates/sempai/tests/features/formula_normalization.feature` (or extend the
+existing `sempai_engine.feature`) with scenarios:
+
+```gherkin
+Feature: Formula normalization
+
+  Scenario: Legacy pattern normalises to atom
+    Given a valid search rule with a legacy "pattern" principal
+    When the rule is compiled
+    Then the query plan contains a pattern atom
+
+  Scenario: v2 match string normalises to atom
+    Given a valid search rule with a v2 match string principal
+    When the rule is compiled
+    Then the query plan contains a pattern atom
+
+  Scenario: Legacy and v2 conjunction produce equivalent formulas
+    Given a legacy "patterns" rule and an equivalent v2 "all" rule
+    When both rules are compiled
+    Then both query plans contain structurally equal formulas
+
+  Scenario: Negation inside disjunction is rejected
+    Given a search rule with a "not" inside "pattern-either"
+    When the rule is compiled
+    Then compilation fails with E_SEMPAI_INVALID_NOT_IN_OR
+
+  Scenario: Conjunction without positive term is rejected
+    Given a search rule with "patterns" containing only "pattern-not"
+    When the rule is compiled
+    Then compilation fails with E_SEMPAI_MISSING_POSITIVE_TERM_IN_AND
+```
+
+Add corresponding step definitions in
+`crates/sempai/src/tests/behaviour.rs` (or a new
+`crates/sempai/src/tests/normalise_behaviour.rs` if the file exceeds 400
+lines).
+
+### Step 6: Wire normalization into Engine (Stage E)
+
+Update `crates/sempai/src/engine.rs`:
+
+- Import `normalise_search_principal` and `validate_formula_constraints` from
+  `sempai_core`.
+- In `compile_yaml(...)`:
+  1. Parse YAML → `RuleFile`.
+  2. Validate supported modes.
+  3. For each search rule, extract its `SearchQueryPrincipal`.
+  4. If `ProjectDependsOn`, skip normalization and produce a plan without a
+     formula (or a sentinel marker).
+  5. Call `normalise_search_principal(principal)` → `Formula`.
+  6. Call `validate_formula_constraints(&formula)` → check for violations.
+  7. Construct `QueryPlan` with the rule's ID, language, and `Formula`.
+  8. Return `Ok(plans)`.
+
+Update `QueryPlan`:
+
+- Replace `_plan: ()` with `formula: Option<Formula>`.
+- `None` represents a `ProjectDependsOn` rule with no formula.
+- Add `pub fn formula(&self) -> Option<&Formula>` accessor.
+- Remove the `#[cfg(test)]` gate on `QueryPlan::new`.
+
+### Step 7: Ensure sempai_core depends on sempai_yaml types (Stage B)
+
+The normalization functions in `sempai_core` accept `sempai_yaml` types
+(`SearchQueryPrincipal`, `LegacyFormula`, `MatchFormula`). This creates a
+dependency `sempai_core → sempai_yaml`. If this introduces a circular
+dependency (since `sempai_yaml` depends on `sempai_core` for `SourceSpan` and
+diagnostics), resolve it by one of:
+
+- **Option A (preferred):** Keep normalization in `sempai_core` and accept the
+  `sempai_yaml` dependency. This requires `sempai_yaml` to depend on
+  `sempai_core` for diagnostics, and `sempai_core` to depend on `sempai_yaml`
+  for model types. This is circular and **not viable**.
+- **Option B:** Place normalization in the `sempai` facade crate, which already
+  depends on both. Normalization functions live in `crates/sempai/src/normalise/`
+  and convert from `sempai_yaml` types to `sempai_core::Formula`.
+- **Option C:** Place normalization in a new `sempai_core::normalise` module
+  but define the conversion traits such that the actual `From` impls live in
+  `sempai` or `sempai_yaml`.
+
+**Resolution:** Option B is the most pragmatic. The normalization module lives
+in `crates/sempai/src/normalise/` and the `Formula` type itself lives in
+`sempai_core`. This avoids circular dependencies and keeps the canonical type
+in the core crate.
+
+### Step 8: Update documentation (Stage F)
+
+- Update
+  [docs/sempai-query-language-design.md](../sempai-query-language-design.md):
+  - Add an implementation note recording the normalization mapping.
+  - Document the `pattern-not-inside` → `Not(Inside(...))` lowering.
+  - Document the constraint-preservation strategy.
+  - Record the crate-placement decision (Formula in `sempai_core`,
+    normalization in `sempai`).
+- Update [docs/users-guide.md](../users-guide.md):
+  - Document that `compile_yaml(...)` now returns real query plans for valid
+    search rules.
+  - List the semantic constraint error codes users may encounter.
+- Update [docs/roadmap.md](../roadmap.md):
+  - Mark 4.1.5 done.
+
+### Step 9: Run full gate sequence (Stage G)
+
+```plaintext
+set -o pipefail; cargo test -p sempai_core --all-targets --all-features 2>&1 | tee /tmp/4-1-5-sempai-core-test.log
+set -o pipefail; cargo test -p sempai --all-targets --all-features 2>&1 | tee /tmp/4-1-5-sempai-test.log
+set -o pipefail; make fmt 2>&1 | tee /tmp/4-1-5-make-fmt.log
+set -o pipefail; make markdownlint 2>&1 | tee /tmp/4-1-5-make-markdownlint.log
+set -o pipefail; make nixie 2>&1 | tee /tmp/4-1-5-make-nixie.log
+set -o pipefail; make check-fmt 2>&1 | tee /tmp/4-1-5-make-check-fmt.log
+set -o pipefail; make lint 2>&1 | tee /tmp/4-1-5-make-lint.log
+set -o pipefail; make test 2>&1 | tee /tmp/4-1-5-make-test.log
+```
+
+## Dependency graph
+
+```plaintext
+Step 1 (Formula types in sempai_core)
+  ├─→ Step 2 (normalization functions in sempai)
+  │     └─→ Step 3 (normalization unit tests)
+  │           └─→ Step 5 (BDD scenarios)
+  │                 └─→ Step 6 (wire into Engine)
+  └─→ Step 4 (constraint unit tests)
+        └─→ Step 5
+              └─→ Step 6
+                    └─→ Step 8 (docs)
+                          └─→ Step 9 (gates)
+```
+
+Step 7 (dependency resolution) is performed during Step 2 and determines the
+final module placement.
+
+## Test plan
+
+### Unit tests (sempai_core)
+
+- Formula type construction and equality.
+- `Decorated` wrapper with and without metadata.
+- `Atom` variants.
+
+### Unit tests (sempai — normalization)
+
+- Legacy → Formula mapping for each `LegacyFormula` variant.
+- v2 Match → Formula mapping for each `MatchFormula` variant.
+- Paired equivalence tests (legacy ↔ v2).
+- `LegacyValue::String` vs `LegacyValue::Formula` handling.
+- `LegacyClause::Constraint` → `Formula::Constraint`.
+- Decorated v2 formulas carry metadata.
+- Deep nesting.
+
+### Unit tests (sempai — semantic constraints)
+
+- `InvalidNotInOr` positive and negative cases.
+- `MissingPositiveTermInAnd` positive and negative cases.
+- Nested constraint violations.
+- `Inside`/`Anywhere` not counted as positive terms.
+- `Constraint` alone in `And` does not trigger violation.
+
+### BDD tests (sempai)
+
+- Happy paths: legacy pattern, v2 match, paired equivalence.
+- Unhappy paths: negation in disjunction, missing positive term.
+- Edge cases: empty rules array, `ProjectDependsOn` passthrough.
+
+### Integration (engine-level)
+
+- `compile_yaml(...)` returns `Ok(Vec<QueryPlan>)` for valid rules.
+- `compile_yaml(...)` returns `Err(DiagnosticReport)` with correct codes for
+  invalid rules.
+- Plans carry accessible `Formula` values.
+
+## Validation and acceptance
+
+- All tests in `cargo test -p sempai_core`, `cargo test -p sempai`, and
+  `make test` pass.
+- `make check-fmt`, `make lint`, `make markdownlint`, and `make nixie` pass.
+- `docs/roadmap.md` shows 4.1.5 as `[x]`.
+- `docs/users-guide.md` documents the new `compile_yaml(...)` behaviour.
+- `docs/sempai-query-language-design.md` contains implementation notes.
+
+## Idempotence and recovery
+
+All steps are re-runnable. If the implementation hits a blocker:
+
+- If the canonical `Formula` type needs to change, update `formula.rs` and
+  re-run all normalization and constraint tests.
+- If the crate placement causes circular dependencies, switch to Option B
+  (normalization in `sempai`) per Step 7.
+- If semantic constraint logic is incorrect, update `normalise.rs` and re-run
+  the constraint test suite.
+
+## Artefacts and notes
+
+Expected new files:
+
+- `crates/sempai-core/src/formula.rs` — canonical `Formula`, `Atom`,
+  `Decorated<T>`.
+- `crates/sempai/src/normalise.rs` (or `normalise/mod.rs` + sub-modules) —
+  normalization functions.
+- `crates/sempai/src/normalise/legacy.rs` — legacy lowering.
+- `crates/sempai/src/normalise/v2.rs` — v2 lowering.
+- `crates/sempai/src/normalise/constraints.rs` — semantic constraint checks.
+- `crates/sempai-core/src/tests/formula_tests.rs` — formula type tests.
+- `crates/sempai/src/tests/normalise_tests.rs` — normalization unit tests.
+- `crates/sempai/src/tests/constraint_tests.rs` — constraint check unit tests.
+- `crates/sempai/tests/features/formula_normalization.feature` — BDD
+  scenarios.
+
+Expected modified files:
+
+- `crates/sempai-core/src/lib.rs` — new `formula` module export.
+- `crates/sempai/src/engine.rs` — wire normalization, update `QueryPlan`.
+- `crates/sempai/src/lib.rs` — re-export formula types if needed.
+- `crates/sempai/src/tests/behaviour.rs` — new BDD step definitions.
+- `docs/sempai-query-language-design.md` — implementation notes.
+- `docs/users-guide.md` — user-visible behaviour change.
+- `docs/roadmap.md` — mark 4.1.5 done.
+
+## Interfaces and dependencies
+
+### Public API additions (sempai_core)
+
+```rust
+// crates/sempai-core/src/formula.rs
+
+pub enum Formula { ... }
+pub enum Atom { ... }
+pub struct Decorated<T> { ... }
+```
+
+### Public API additions (sempai)
+
+```rust
+// crates/sempai/src/normalise.rs (or mod.rs)
+
+pub fn normalise_search_principal(
+    principal: &SearchQueryPrincipal,
+) -> Result<Formula, DiagnosticReport>;
+
+pub fn validate_formula_constraints(
+    formula: &Formula,
+) -> Result<(), DiagnosticReport>;
+```
+
+### Modified API (sempai)
+
+```rust
+// crates/sempai/src/engine.rs
+
+pub struct QueryPlan {
+    rule_id: String,
+    language: Language,
+    formula: Option<Formula>,
+}
+
+impl QueryPlan {
+    pub fn formula(&self) -> Option<&Formula>;
+}
+```
+
+## Practice documentation
+
+The following project guidance documents are relevant to this milestone.
+Implementors should consult them for conventions, patterns, and constraints.
+
+### Testing
+
+- [docs/rstest-bdd-users-guide.md](../rstest-bdd-users-guide.md) — BDD
+  framework usage: feature files, step definitions, fixture injection,
+  `Slot<T>` state, `ScenarioState`, data tables, and `#[scenario]`
+  binding. All behavioural tests in this milestone use `rstest-bdd`
+  v0.5.0.
+- [docs/rust-testing-with-rstest-fixtures.md](../rust-testing-with-rstest-fixtures.md)
+  — `rstest` fixture injection, `#[case]` parameterised tests,
+  `#[values]` combinatorial tests, `#[from]` and `#[with]` overrides.
+  Unit tests in the normalization and constraint modules should use
+  `rstest` parameterised cases for the mapping tables.
+- [docs/reliable-testing-in-rust-via-dependency-injection.md](../reliable-testing-in-rust-via-dependency-injection.md)
+  — dependency injection patterns for testable Rust code. Normalization
+  functions are pure and need no DI, but the engine integration path
+  should remain injectable for future backend substitution.
+- [docs/rust-doctest-dry-guide.md](../rust-doctest-dry-guide.md) —
+  DRY doctest patterns, `concat!()` for multi-line literals, hidden
+  setup lines, and `no_run` annotations. All new public types and
+  functions require Rustdoc examples.
+
+### Design and architecture
+
+- [docs/sempai-query-language-design.md](../sempai-query-language-design.md)
+  — primary design reference for the normalised formula model, operator
+  mappings, semantic constraints, and the parser-to-validation pipeline.
+  Implementation decisions must be recorded here.
+- [docs/weaver-design.md](../weaver-design.md) — system architecture,
+  the observe/act/verify command model, JSONL protocol, and the
+  Double-Lock safety harness. Relevant for understanding how query
+  plans flow through the daemon.
+- [docs/semgrep-language-reference/semgrep-operator-precedence.md](../semgrep-language-reference/semgrep-operator-precedence.md)
+  — Semgrep operator precedence ladder and Pratt binding powers.
+  Defines the normalization model that this milestone implements.
+- [docs/semgrep-language-reference/semgrep-legacy-vs-v2-guidance.md](../semgrep-language-reference/semgrep-legacy-vs-v2-guidance.md)
+  — legacy-to-v2 equivalence table. Paired test fixtures should
+  mirror the equivalences documented here.
+
+### Code quality
+
+- [docs/complexity-antipatterns-and-refactoring-strategies.md](../complexity-antipatterns-and-refactoring-strategies.md)
+  — Cyclomatic and Cognitive Complexity metrics, the "Bumpy Road"
+  antipattern, extract-method and dispatcher-pattern refactoring
+  strategies. Relevant for keeping the recursive normalization and
+  constraint-walking functions simple and well-factored.
+- [AGENTS.md](../../AGENTS.md) — code style, 400-line file limit,
+  en-GB spelling, commit gating (`make check-fmt`, `make lint`,
+  `make test`), module-level `//!` comments, and Rustdoc requirements.
+
+### Configuration
+
+- [docs/ortho-config-users-guide.md](../ortho-config-users-guide.md)
+  — OrthoConfig usage for CLI/env/file configuration merging.
+  Not directly exercised in normalization but relevant for
+  understanding how `EngineConfig` reaches the engine.
+
+## Revision note
+
+Initial draft created on 2026-04-12. Implements roadmap item 4.1.5 from
+[docs/roadmap.md](../roadmap.md).
+
+- 2026-04-13: Implementation completed. Normalization placed in `sempai`
+  (Option B confirmed). All unit and BDD tests passing (199 total). All
+  quality gates pass. Documentation updated. `ExecPlan` status set to
+  COMPLETE.
diff --git a/docs/roadmap.md b/docs/roadmap.md
index af044c41..7a2fc56e 100644
--- a/docs/roadmap.md
+++ b/docs/roadmap.md
@@ -424,7 +424,7 @@ phase* *with explicit parser, backend, and Weaver-integration milestones.*
   - Acceptance criteria: unsupported execution modes return deterministic
     `UnsupportedMode` diagnostics, and search mode validation enforces required
     key combinations.
-- [ ] 4.1.5. Implement legacy and v2 normalization into one canonical
+- [x] 4.1.5. Implement legacy and v2 normalization into one canonical
       `Formula` model with semantic constraint checks. Requires 4.1.3.
   - Acceptance criteria: paired legacy and v2 fixtures normalize to equivalent
     formulas, and semantic invalid states emit deterministic rule diagnostics.
diff --git a/docs/sempai-query-language-design.md b/docs/sempai-query-language-design.md
index 429b0202..51244d10 100644
--- a/docs/sempai-query-language-design.md
+++ b/docs/sempai-query-language-design.md
@@ -428,12 +428,34 @@ semantics early.
 
 Implementation note (2026-03-29): `sempai::Engine::compile_yaml` now applies a
 mode-aware validation pass after parsing. Search rules continue to the
-deliberate `NOT_IMPLEMENTED` normalization placeholder, while `extract`,
-`taint`, `join`, and forward-compatible unknown modes fail deterministically
-with `E_SEMPAI_UNSUPPORTED_MODE`. The first unsupported rule in source order is
+normalization pipeline (see below), while `extract`, `taint`, `join`, and
+forward-compatible unknown modes fail deterministically with
+`E_SEMPAI_UNSUPPORTED_MODE`. The first unsupported rule in source order is
 reported, and its `primary_span` prefers the `mode` field span before falling
 back to the enclosing rule span.
 
+Implementation note (2026-04-13): `sempai_core` now exports a canonical
+`Formula` enum, `Atom` enum, and `Decorated<T>` wrapper. The normalization
+module in `crates/sempai/src/normalise/` lowers both legacy Semgrep operators
+and v2 `match` operators into this representation. Key implementation
+decisions:
+
+- **Crate placement**: `Formula` lives in `sempai_core::formula` for
+  downstream consumption. Normalization functions live in `sempai`
+  (`crates/sempai/src/normalise/`) because they depend on `sempai_yaml`
+  model types, and placing them in `sempai_core` would create a circular
+  dependency.
+- **`pattern-not-inside` lowering**: normalises to `Not(Inside(...))` rather
+  than a distinct `NotInside` variant, matching the Semgrep specification's
+  desugaring semantics.
+- **Constraint preservation**: legacy `LegacyClause::Constraint` objects are
+  preserved as `Formula::Constraint(serde_json::Value)` for later evaluation.
+- **Positive-term exception**: conjunctions containing only `Constraint`
+  children do not trigger `E_SEMPAI_MISSING_POSITIVE_TERM_IN_AND`, because
+  metavariable-pattern constraints may act as implicit positive terms.
+- **`ProjectDependsOn` passthrough**: these rules produce `QueryPlan` values
+  with `formula: None` and skip normalization entirely.
+
 Extract mode rules require legacy query keys, not `match`.[^1]
 
 ### Sempai extensions for Tree-sitter queries
diff --git a/docs/users-guide.md b/docs/users-guide.md
index 742dc14c..5a7706c9 100644
--- a/docs/users-guide.md
+++ b/docs/users-guide.md
@@ -1367,29 +1367,46 @@ The `Engine` struct exposes three methods for query compilation and execution:
 - `execute(plan, uri, source)` — executes a compiled plan against a source
   snapshot.
 
-`compile_yaml(yaml)` now performs real YAML parsing plus a mode-aware
-validation pass. Malformed YAML returns `E_SEMPAI_YAML_PARSE`, and schema-shape
-failures such as missing required rule keys return `E_SEMPAI_SCHEMA_INVALID`,
-both using the shared structured diagnostic payload with `primary_span`
-locations when available.
+`compile_yaml(yaml)` performs YAML parsing, mode-aware validation, and
+formula normalization. Malformed YAML returns `E_SEMPAI_YAML_PARSE`, and
+schema-shape failures such as missing required rule keys return
+`E_SEMPAI_SCHEMA_INVALID`, both using the shared structured diagnostic
+payload with `primary_span` locations when available.
 
-After parsing succeeds, `compile_yaml(yaml)` now distinguishes parseable rules
+After parsing succeeds, `compile_yaml(yaml)` distinguishes parseable rules
 from executable ones:
 
-- Valid `search` rules, including compatibility-only
-  `r2c-internal-project-depends-on` rules, continue to the existing
-  `NOT_IMPLEMENTED` normalization placeholder.
-- Valid `extract`, `taint`, `join`, and unknown future mode strings now fail
-  deterministically with `E_SEMPAI_UNSUPPORTED_MODE` instead of falling through
-  to the generic placeholder.
-
-Unsupported-mode diagnostics point at the rule's `mode` field when that span is
-available, which makes whole-document failures deterministic even though the
-execution backend is still pending.
+- Valid `search` rules are normalised into a canonical `Formula` model.
+  Both legacy Semgrep operators (`pattern`, `patterns`, `pattern-either`,
+  `pattern-not`, `pattern-inside`, `pattern-not-inside`, `pattern-not-regex`,
+  `semgrep-internal-pattern-anywhere`) and v2 `match` operators (`pattern`,
+  `regex`, `all`, `any`, `not`, `inside`, `anywhere`) lower into the same
+  `Formula` tree.  Paired legacy and v2 rules that express equivalent
+  queries produce structurally identical formulas.
+- Compatibility-only `r2c-internal-project-depends-on` rules produce query
+  plans with no formula (`formula()` returns `None`), since they have no
+  formula semantics.
+- Valid `extract`, `taint`, `join`, and unknown future mode strings fail
+  deterministically with `E_SEMPAI_UNSUPPORTED_MODE`.
+
+After normalization, semantic constraint checks validate the formula tree:
+
+- `E_SEMPAI_INVALID_NOT_IN_OR`: a negated formula inside a disjunction
+  (`pattern-either` / `any`) is not permitted.
+- `E_SEMPAI_MISSING_POSITIVE_TERM_IN_AND`: a conjunction (`patterns` /
+  `all`) must contain at least one positive match-producing term.
+
+On success, `compile_yaml(yaml)` returns `Vec<QueryPlan>` with one plan
+per rule per declared language. Each plan carries the normalised `Formula`
+accessible via `plan.formula()`.
+
+Unsupported-mode diagnostics point at the rule's `mode` field when that
+span is available, which makes whole-document failures deterministic even
+though the execution backend is still pending.
 
 `compile_dsl(...)` and `execute(...)` still return "not implemented"
-diagnostics. They will be wired to the DSL parser and Tree-sitter backend as
-those components are delivered in subsequent roadmap phases.
+diagnostics. They will be wired to the DSL parser and Tree-sitter backend
+as those components are delivered in subsequent roadmap phases.
 
 All error conditions are reported through `DiagnosticReport`, which carries
 stable diagnostic codes suitable for programmatic consumption. Stub methods