Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/sempai-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ test-support = []

[dependencies]
serde = { workspace = true }
serde_json = { workspace = true }
thiserror = { workspace = true }

[dev-dependencies]
Expand Down
145 changes: 145 additions & 0 deletions crates/sempai-core/src/formula.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
//! Canonical normalised formula model shared by legacy and v2 paths.
//!
//! The [`Formula`] enum is the single internal representation that both
//! legacy Semgrep operators and v2 `match` syntax lower into. It lives
//! in `sempai_core` so that every downstream crate (including the future
//! `sempai_ts` backend) can depend on it without pulling in the YAML
//! parser.
//!
//! # Design
//!
//! The formula tree mirrors the logical structure of a Semgrep query:
//!
//! - [`Atom`] — a leaf pattern or regex to match against source code.
//! - [`Formula::And`] / [`Formula::Or`] — Boolean combinators.
//! - [`Formula::Not`] / [`Formula::Inside`] / [`Formula::Anywhere`] —
//! unary modifiers.
//! - [`Formula::Constraint`] — opaque constraint preserved for later
//! evaluation (e.g. `metavariable-regex`, `metavariable-pattern`).
//! - [`Decorated`] — metadata wrapper carrying `where`, `as`, and `fix`
//! annotations from v2 syntax.

use serde_json::Value;

/// A leaf pattern or regex to match against source code.
///
/// # Example
///
/// ```
/// use sempai_core::formula::Atom;
///
/// let atom = Atom::Pattern(String::from("foo($X)"));
/// assert!(matches!(atom, Atom::Pattern(_)));
/// ```
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Atom {
/// A concrete code pattern (e.g. `foo($X)`).
Pattern(String),
/// A regular expression pattern.
Regex(String),
/// A raw Tree-sitter S-expression query.
TreeSitterQuery(String),
}

/// Metadata wrapper carrying v2 `where`, `as`, and `fix` annotations.
///
/// Legacy formulas produce bare `Decorated` values with empty metadata.
/// v2 formulas propagate decoration from the parsed `Decorated` variant.
///
/// # Example
///
/// ```
/// use sempai_core::formula::{Atom, Decorated, Formula};
///
/// let bare = Decorated::bare(Formula::Atom(Atom::Pattern(String::from("x"))));
/// assert!(bare.where_clauses.is_empty());
/// assert!(bare.as_name.is_none());
/// ```
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Decorated<T> {
/// The wrapped formula node.
pub node: T,
/// Raw `where` clauses preserved for later constraint evaluation.
pub where_clauses: Vec<Value>,
/// Optional metavariable alias name.
pub as_name: Option<String>,
/// Optional inline fix text.
pub fix: Option<String>,
}

impl<T> Decorated<T> {
/// Wraps a node with no decoration metadata.
pub const fn bare(node: T) -> Self {
Self {
node,
where_clauses: Vec::new(),
as_name: None,
fix: None,
}
}

/// Wraps a node with full decoration metadata.
pub const fn with_metadata(
node: T,
where_clauses: Vec<Value>,
as_name: Option<String>,
fix: Option<String>,
) -> Self {
Self {
node,
where_clauses,
as_name,
fix,
}
}
}

/// Canonical normalised formula for a single search rule.
///
/// Both legacy Semgrep operators and v2 `match` syntax lower into this
/// representation. The tree is validated for semantic constraints after
/// construction.
///
/// # Example
///
/// ```
/// use sempai_core::formula::{Atom, Decorated, Formula};
///
/// let conjunction = Formula::And(vec![
/// Decorated::bare(Formula::Atom(Atom::Pattern(String::from("foo($X)")))),
/// Decorated::bare(Formula::Not(Box::new(
/// Decorated::bare(Formula::Atom(Atom::Pattern(String::from("bar($X)")))),
/// ))),
/// ]);
/// assert!(matches!(conjunction, Formula::And(_)));
/// ```
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Formula {
/// A leaf pattern or regex.
Atom(Atom),
/// Logical negation.
Not(Box<Decorated<Formula>>),
/// Scope restriction (`pattern-inside` / `inside`).
Inside(Box<Decorated<Formula>>),
/// Unrestricted scope (`semgrep-internal-pattern-anywhere` / `anywhere`).
Anywhere(Box<Decorated<Formula>>),
/// Logical conjunction (`patterns` / `all`).
And(Vec<Decorated<Formula>>),
/// Logical disjunction (`pattern-either` / `any`).
Or(Vec<Decorated<Formula>>),
/// Opaque constraint preserved for later evaluation.
Constraint(Value),
}

impl Formula {
/// Returns `true` if this formula node is a positive match-producing
/// term.
///
/// Positive terms are `Atom`, `And`, and `Or` nodes. `Not`, `Inside`,
/// `Anywhere`, and `Constraint` are not positive terms for the purpose
/// of the `MissingPositiveTermInAnd` semantic check.
#[must_use]
pub const fn is_positive_term(&self) -> bool {
matches!(self, Self::Atom(_) | Self::And(_) | Self::Or(_))
}
}
3 changes: 3 additions & 0 deletions crates/sempai-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
//! - [`CaptureValue`] and [`CapturedNode`] — metavariable bindings
//! - [`DiagnosticReport`] and [`Diagnostic`] — structured error reporting
//! - [`EngineConfig`] and [`EngineLimits`] — performance and safety limits
//! - [`Formula`], [`Atom`], and [`Decorated`] — canonical normalised formula model
//!
//! # Example
//!
Expand All @@ -28,13 +29,15 @@
mod capture;
mod config;
mod diagnostic;
pub mod formula;
mod language;
mod match_result;
mod span;

pub use capture::{CaptureValue, CapturedNode};
pub use config::{EngineConfig, EngineLimits};
pub use diagnostic::{Diagnostic, DiagnosticCode, DiagnosticReport, SourceSpan};
pub use formula::{Atom, Decorated, Formula};
pub use language::{Language, LanguageParseError};
pub use match_result::Match;
pub use span::{LineCol, Span};
Expand Down
146 changes: 146 additions & 0 deletions crates/sempai-core/src/tests/formula_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
//! Tests for the canonical formula model types.

use crate::formula::{Atom, Decorated, Formula};

// -----------------------------------------------------------------------
// Atom construction
// -----------------------------------------------------------------------

#[test]
fn atom_pattern_stores_string() {
let atom = Atom::Pattern(String::from("foo($X)"));
assert_eq!(atom, Atom::Pattern(String::from("foo($X)")));
}

#[test]
fn atom_regex_stores_string() {
let atom = Atom::Regex(String::from("foo.*"));
assert_eq!(atom, Atom::Regex(String::from("foo.*")));
}

#[test]
fn atom_tree_sitter_query_stores_string() {
let atom = Atom::TreeSitterQuery(String::from("(call_expression)"));
assert_eq!(
atom,
Atom::TreeSitterQuery(String::from("(call_expression)"))
);
}

// -----------------------------------------------------------------------
// Decorated wrapper
// -----------------------------------------------------------------------

#[test]
fn decorated_bare_has_empty_metadata() {
let d = Decorated::bare(Formula::Atom(Atom::Pattern(String::from("x"))));
assert!(d.where_clauses.is_empty());
assert!(d.as_name.is_none());
assert!(d.fix.is_none());
}

#[test]
fn decorated_with_metadata_preserves_fields() {
let d = Decorated::with_metadata(
Formula::Atom(Atom::Regex(String::from("r"))),
vec![serde_json::json!({"metavariable": "$X"})],
Some(String::from("alias")),
Some(String::from("fix: $X")),
);
assert_eq!(d.where_clauses.len(), 1);
assert_eq!(d.as_name.as_deref(), Some("alias"));
assert_eq!(d.fix.as_deref(), Some("fix: $X"));
}

// -----------------------------------------------------------------------
// Formula construction and equality
// -----------------------------------------------------------------------

#[test]
fn formula_atom_equality() {
let a = Formula::Atom(Atom::Pattern(String::from("x")));
let b = Formula::Atom(Atom::Pattern(String::from("x")));
assert_eq!(a, b);
}

#[test]
fn formula_not_wraps_inner() {
let inner = Decorated::bare(Formula::Atom(Atom::Pattern(String::from("x"))));
let formula = Formula::Not(Box::new(inner.clone()));
assert_eq!(formula, Formula::Not(Box::new(inner)));
}

#[test]
fn formula_and_holds_children() {
let children = vec![
Decorated::bare(Formula::Atom(Atom::Pattern(String::from("a")))),
Decorated::bare(Formula::Atom(Atom::Pattern(String::from("b")))),
];
let formula = Formula::And(children.clone());
assert_eq!(formula, Formula::And(children));
}

#[test]
fn formula_or_holds_branches() {
let branches = vec![
Decorated::bare(Formula::Atom(Atom::Pattern(String::from("a")))),
Decorated::bare(Formula::Atom(Atom::Pattern(String::from("b")))),
];
let formula = Formula::Or(branches.clone());
assert_eq!(formula, Formula::Or(branches));
}

#[test]
fn formula_constraint_preserves_json() {
let val = serde_json::json!({"metavariable-regex": {"metavariable": "$X", "regex": "foo"}});
let formula = Formula::Constraint(val.clone());
assert_eq!(formula, Formula::Constraint(val));
}

// -----------------------------------------------------------------------
// is_positive_term classification
// -----------------------------------------------------------------------

#[test]
fn atom_is_positive() {
assert!(Formula::Atom(Atom::Pattern(String::from("x"))).is_positive_term());
}

#[test]
fn and_is_positive() {
assert!(Formula::And(vec![]).is_positive_term());
}

#[test]
fn or_is_positive() {
assert!(Formula::Or(vec![]).is_positive_term());
}

#[test]
fn not_is_not_positive() {
let f = Formula::Not(Box::new(Decorated::bare(Formula::Atom(Atom::Pattern(
String::from("x"),
)))));
assert!(!f.is_positive_term());
}

#[test]
fn inside_is_not_positive() {
let f = Formula::Inside(Box::new(Decorated::bare(Formula::Atom(Atom::Pattern(
String::from("x"),
)))));
assert!(!f.is_positive_term());
}

#[test]
fn anywhere_is_not_positive() {
let f = Formula::Anywhere(Box::new(Decorated::bare(Formula::Atom(Atom::Pattern(
String::from("x"),
)))));
assert!(!f.is_positive_term());
}

#[test]
fn constraint_is_not_positive() {
assert!(!Formula::Constraint(serde_json::json!({})).is_positive_term());
}
1 change: 1 addition & 0 deletions crates/sempai-core/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ mod capture_tests;
mod config_tests;
mod diagnostic_snapshot_tests;
mod diagnostic_tests;
mod formula_tests;
mod language_tests;
mod match_tests;
mod span_tests;
Expand Down
Loading
Loading