From b23e50023792ed81a3080327c4bf5ac1d7489da7 Mon Sep 17 00:00:00 2001
From: Markus <markushh@uio.no>
Date: Mon, 11 May 2026 20:16:03 +0200
Subject: [PATCH 1/9] chore: Refactor benchmarks

---
 Cargo.toml                   |  25 ++--
 benches/data.rs              | 247 -----------------------------------
 benches/likelihood.rs        | 150 +++++++++++++++++++++
 benches/likelihood_matrix.rs | 247 -----------------------------------
 benches/nca.rs               | 102 ++++-----------
 benches/ode.rs               | 123 -----------------
 benches/performance.rs       |  55 --------
 benches/predictions.rs       | 199 ++++++++++++++++++++++++++++
 benches/runtime_matrix.rs    | 100 --------------
 9 files changed, 383 insertions(+), 865 deletions(-)
 delete mode 100644 benches/data.rs
 create mode 100644 benches/likelihood.rs
 delete mode 100644 benches/likelihood_matrix.rs
 delete mode 100644 benches/ode.rs
 delete mode 100644 benches/performance.rs
 create mode 100644 benches/predictions.rs
 delete mode 100644 benches/runtime_matrix.rs

diff --git a/Cargo.toml b/Cargo.toml
index 18698813..2f190b6f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,6 +6,7 @@ resolver = "2"
 name = "pharmsol"
 version = "0.26.1"
 edition = "2021"
+autobenches = false
 authors = ["Julián D. Otálvaro <juliandavid347@gmail.com>", "Markus Hovd"]
 description = "Rust library for solving analytic and ode-defined pharmacometric models."
 license = "GPL-3.0"
@@ -14,7 +15,13 @@ documentation = "https://lapkb.github.io/pharmsol/"
 [features]
 default = []
 dsl-core = []
-dsl-jit = ["dsl-core", "dep:cranelift", "dep:cranelift-jit", "dep:cranelift-module", "dep:cranelift-native"]
+dsl-jit = [
+    "dsl-core",
+    "dep:cranelift",
+    "dep:cranelift-jit",
+    "dep:cranelift-module",
+    "dep:cranelift-native",
+]
 dsl-aot = ["dsl-core"]
 dsl-aot-load = ["dsl-core", "dep:libloading"]
 dsl-wasm-compile = ["dsl-core", "dep:wasm-encoder"]
@@ -61,15 +68,7 @@ webbrowser = "1.2.1"
 bench = false
 
 [[bench]]
-name = "performance"
-harness = false
-
-[[bench]]
-name = "data"
-harness = false
-
-[[bench]]
-name = "ode"
+name = "predictions"
 harness = false
 
 [[bench]]
@@ -77,9 +76,5 @@ name = "nca"
 harness = false
 
 [[bench]]
-name = "runtime_matrix"
-harness = false
-
-[[bench]]
-name = "likelihood_matrix"
+name = "likelihood"
 harness = false
diff --git a/benches/data.rs b/benches/data.rs
deleted file mode 100644
index 1202972c..00000000
--- a/benches/data.rs
+++ /dev/null
@@ -1,247 +0,0 @@
-use criterion::{criterion_group, criterion_main, Criterion};
-use pharmsol::prelude::*;
-use pharmsol::SubjectBuilderExt; // Ensure this trait is in scope
-use std::hint::black_box;
-
-fn subject_builder_benchmark(c: &mut Criterion) {
-    // Simple subject creation
-    c.bench_function("SubjectBuilder simple", |b| {
-        b.iter(|| {
-            let subject = Subject::builder("patient1")
-                .bolus(0.0, 100.0, 0)
-                .observation(3.0, 100.0, 0)
-                .observation(4.0, 200.0, 0)
-                .observation(5.0, 300.0, 0)
-                .build();
-            black_box(subject);
-        })
-    });
-
-    // Subject with covariates
-    c.bench_function("SubjectBuilder with covariates", |b| {
-        b.iter(|| {
-            let subject = Subject::builder("patient1")
-                .bolus(0.0, 100.0, 0)
-                .observation(3.0, 100.0, 0)
-                .observation(4.0, 200.0, 0)
-                .observation(5.0, 300.0, 0)
-                .observation(12.0, 300.0, 0)
-                .covariate("age", 0.0, 30.0)
-                .covariate("weight", 0.0, 70.0)
-                .build();
-            black_box(subject);
-        })
-    });
-
-    // Multi-occasion subject
-    c.bench_function("SubjectBuilder multi-occasion", |b| {
-        b.iter(|| {
-            let subject = Subject::builder("patient1")
-                .bolus(0.0, 100.0, 0)
-                .observation(1.0, 50.0, 0)
-                .observation(6.0, 25.0, 0)
-                .covariate("age", 0.0, 30.0)
-                .reset()
-                .bolus(24.0, 120.0, 0)
-                .observation(25.0, 55.0, 0)
-                .observation(30.0, 30.0, 0)
-                .covariate("age", 24.0, 30.0)
-                .build();
-            black_box(subject);
-        })
-    });
-}
-
-fn data_expand_benchmark(c: &mut Criterion) {
-    // Create datasets for expansion testing
-    let simple_data = {
-        let subject = Subject::builder("patient1")
-            .bolus(0.0, 100.0, 0)
-            .observation(1.0, 50.0, 0)
-            .observation(6.0, 25.0, 0)
-            .observation(12.0, 15.0, 0)
-            .build();
-        Data::new(vec![subject])
-    };
-
-    let complex_data = {
-        let subjects = (1..=10)
-            .map(|i| {
-                Subject::builder(format!("patient{}", i))
-                    .bolus(0.0, 100.0 * i as f64, 0)
-                    .observation(1.0, 50.0, 0)
-                    .observation(2.0, 45.0, 1) // Different output equation
-                    .observation(6.0, 25.0, 0)
-                    .observation(24.0, 8.0, 1)
-                    .infusion(48.0, 200.0, 0, 2.0)
-                    .observation(50.0, 30.0, 0)
-                    .build()
-            })
-            .collect();
-        Data::new(subjects)
-    };
-
-    // Test different expansion intervals
-    c.bench_function("Data expand simple (1h intervals)", |b| {
-        b.iter(|| {
-            let expanded = simple_data.expand(1.0, 0.0);
-            black_box(expanded);
-        })
-    });
-
-    c.bench_function("Data expand complex (1h intervals)", |b| {
-        b.iter(|| {
-            let expanded = complex_data.expand(1.0, 0.0);
-            black_box(expanded);
-        })
-    });
-
-    c.bench_function("Data expand with additional time", |b| {
-        b.iter(|| {
-            let expanded = complex_data.expand(1.0, 24.0);
-            black_box(expanded);
-        })
-    });
-}
-
-fn dose_modification_benchmark(c: &mut Criterion) {
-    let create_dosing_data = || {
-        let subjects = (1..=5)
-            .map(|i| {
-                Subject::builder(format!("patient{}", i))
-                    .bolus(0.0, 100.0, 0)
-                    .bolus(12.0, 100.0, 0)
-                    .bolus(24.0, 100.0, 0)
-                    .infusion(48.0, 200.0, 0, 2.0)
-                    .infusion(72.0, 250.0, 0, 3.0)
-                    .observation(1.0, 50.0, 0)
-                    .observation(25.0, 45.0, 0)
-                    .observation(50.0, 30.0, 0)
-                    .build()
-            })
-            .collect();
-        Data::new(subjects)
-    };
-
-    c.bench_function("Modify all bolus doses", |b| {
-        b.iter(|| {
-            let mut data = create_dosing_data();
-            for subject in data.iter_mut() {
-                for occasion in subject.occasions_iter_mut() {
-                    for event in occasion.events_iter_mut() {
-                        if let Event::Bolus(bolus) = event {
-                            bolus.set_amount(bolus.amount() * 2.0);
-                        }
-                    }
-                }
-            }
-            black_box(data);
-        })
-    });
-
-    c.bench_function("Modify all infusion doses", |b| {
-        b.iter(|| {
-            let mut data = create_dosing_data();
-            for subject in data.iter_mut() {
-                for occasion in subject.occasions_iter_mut() {
-                    for event in occasion.events_iter_mut() {
-                        if let Event::Infusion(infusion) = event {
-                            infusion.set_amount(infusion.amount() * 1.5);
-                            infusion.set_duration(infusion.duration() * 0.8);
-                        }
-                    }
-                }
-            }
-            black_box(data);
-        })
-    });
-
-    c.bench_function("Conditional dose modification", |b| {
-        b.iter(|| {
-            let mut data = create_dosing_data();
-            for subject in data.iter_mut() {
-                for occasion in subject.occasions_iter_mut() {
-                    for event in occasion.events_iter_mut() {
-                        match event {
-                            Event::Bolus(bolus) if bolus.time() >= 24.0 => {
-                                bolus.set_amount(bolus.amount() * 1.2);
-                            }
-                            Event::Infusion(infusion) if infusion.time() >= 48.0 => {
-                                infusion.set_amount(infusion.amount() * 0.9);
-                            }
-                            _ => {}
-                        }
-                    }
-                }
-            }
-            black_box(data);
-        })
-    });
-}
-
-fn data_operations_benchmark(c: &mut Criterion) {
-    // Combined benchmark for data filtering and large dataset creation
-    let large_data = {
-        let subjects = (1..=100)
-            .map(|i| {
-                Subject::builder(format!("patient{:03}", i))
-                    .bolus(0.0, 100.0, 0)
-                    .observation(1.0, 50.0, 0)
-                    .observation(6.0, 25.0, 0)
-                    .observation(12.0, 15.0, 0)
-                    .covariate("age", 0.0, 20.0 + (i as f64 % 50.0))
-                    .covariate("weight", 0.0, 60.0 + (i as f64 % 40.0))
-                    .build()
-            })
-            .collect();
-        Data::new(subjects)
-    };
-
-    c.bench_function("Create large dataset (100 subjects)", |b| {
-        b.iter(|| {
-            let subjects: Vec<Subject> = (1..=100)
-                .map(|i| {
-                    Subject::builder(format!("patient{:03}", i))
-                        .bolus(0.0, 100.0, 0)
-                        .observation(0.5, 80.0, 0)
-                        .observation(1.0, 60.0, 0)
-                        .observation(2.0, 45.0, 0)
-                        .observation(4.0, 30.0, 0)
-                        .observation(8.0, 20.0, 0)
-                        .observation(12.0, 15.0, 0)
-                        .observation(24.0, 8.0, 0)
-                        .covariate("age", 0.0, 25.0 + (i as f64 % 50.0))
-                        .covariate("weight", 0.0, 60.0 + (i as f64 % 40.0))
-                        .build()
-                })
-                .collect();
-            let data = Data::new(subjects);
-            black_box(data);
-        })
-    });
-
-    c.bench_function("Filter include subjects", |b| {
-        b.iter(|| {
-            let include_ids: Vec<String> = (1..=20).map(|i| format!("patient{:03}", i)).collect();
-            let filtered = large_data.filter_include(&include_ids);
-            black_box(filtered);
-        })
-    });
-
-    c.bench_function("Filter exclude subjects", |b| {
-        b.iter(|| {
-            let exclude_ids: Vec<String> = (80..=100).map(|i| format!("patient{:03}", i)).collect();
-            let filtered = large_data.filter_exclude(exclude_ids);
-            black_box(filtered);
-        })
-    });
-}
-
-criterion_group!(
-    benches,
-    subject_builder_benchmark,
-    data_expand_benchmark,
-    dose_modification_benchmark,
-    data_operations_benchmark
-);
-criterion_main!(benches);
diff --git a/benches/likelihood.rs b/benches/likelihood.rs
new file mode 100644
index 00000000..93fc9e70
--- /dev/null
+++ b/benches/likelihood.rs
@@ -0,0 +1,150 @@
+use criterion::{criterion_group, criterion_main, Criterion, SamplingMode, Throughput};
+use ndarray::Array2;
+use pharmsol::prelude::simulator::{log_likelihood_batch, log_likelihood_matrix};
+use pharmsol::prelude::*;
+use pharmsol::{ResidualErrorModel, ResidualErrorModels, ODE};
+use std::hint::black_box;
+use std::time::Duration;
+
+fn benchmark_equation() -> ODE {
+    equation::ODE::new(
+        |x, p, _t, dx, _b, _rateiv, _cov| {
+            fetch_params!(p, ka, ke, _tlag, _v);
+            dx[0] = -ka * x[0];
+            dx[1] = ka * x[0] - ke * x[1];
+        },
+        |p, _t, _cov| {
+            fetch_params!(p, _ka, _ke, tlag, _v);
+            lag! {0=>tlag}
+        },
+        |_p, _t, _cov| fa! {},
+        |_p, _t, _cov, _x| {},
+        |x, p, _t, _cov, y| {
+            fetch_params!(p, _ka, _ke, _tlag, v);
+            y[0] = x[1] / v;
+        },
+    )
+    .with_nstates(2)
+    .with_nout(1)
+}
+
+fn benchmark_data(n_subjects: usize) -> Data {
+    let subjects = (0..n_subjects)
+        .map(|index| {
+            let dose = 100.0 + index as f64;
+            let offset = index as f64 * 0.02;
+            Subject::builder(format!("subject_{index}"))
+                .bolus(0.0, dose, 0)
+                .observation(0.5, 4.2 + offset, 0)
+                .observation(1.0, 5.9 + offset, 0)
+                .observation(2.0, 7.4 + offset, 0)
+                .observation(4.0, 6.8 + offset, 0)
+                .observation(8.0, 4.7 + offset, 0)
+                .observation(12.0, 3.3 + offset, 0)
+                .build()
+        })
+        .collect();
+
+    Data::new(subjects)
+}
+
+fn support_points(n_support_points: usize) -> Array2<f64> {
+    Array2::from_shape_fn((n_support_points, 4), |(row, column)| match column {
+        0 => 0.55 + row as f64 * 0.002,
+        1 => 0.08 + row as f64 * 0.0004,
+        2 => 0.05,
+        3 => 18.0 + row as f64 * 0.15,
+        _ => unreachable!(),
+    })
+}
+
+fn batch_parameters(n_subjects: usize) -> Array2<f64> {
+    Array2::from_shape_fn((n_subjects, 4), |(row, column)| match column {
+        0 => 0.60 + row as f64 * 0.001,
+        1 => 0.09 + row as f64 * 0.0002,
+        2 => 0.05,
+        3 => 20.0 + row as f64 * 0.05,
+        _ => unreachable!(),
+    })
+}
+
+fn assay_error_models() -> AssayErrorModels {
+    AssayErrorModels::new()
+        .add(
+            0,
+            AssayErrorModel::additive(ErrorPoly::new(0.0, 0.1, 0.0, 0.0), 0.0),
+        )
+        .unwrap()
+}
+
+fn residual_error_models() -> ResidualErrorModels {
+    ResidualErrorModels::new().add(0, ResidualErrorModel::constant(0.2))
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let assay_error_models = assay_error_models();
+    let residual_error_models = residual_error_models();
+
+    let matrix_data = benchmark_data(64);
+    let matrix_theta = support_points(128);
+    let matrix_equation = benchmark_equation();
+    let _ = log_likelihood_matrix(
+        &matrix_equation,
+        &matrix_data,
+        &matrix_theta,
+        &assay_error_models,
+        false,
+    )
+    .unwrap();
+
+    let batch_data = benchmark_data(256);
+    let batch_theta = batch_parameters(256);
+    let batch_equation = benchmark_equation();
+    let _ = log_likelihood_batch(
+        &batch_equation,
+        &batch_data,
+        &batch_theta,
+        &residual_error_models,
+    )
+    .unwrap();
+
+    let mut matrix_group = c.benchmark_group("population/likelihood-matrix");
+
+    matrix_group.sampling_mode(SamplingMode::Flat);
+    matrix_group.sample_size(10);
+    matrix_group.measurement_time(Duration::from_secs(30));
+    matrix_group.throughput(Throughput::Elements((64 * 128) as u64));
+    matrix_group.bench_function("ode-hot-64x128", |b| {
+        b.iter(|| {
+            black_box(log_likelihood_matrix(
+                black_box(&matrix_equation),
+                black_box(&matrix_data),
+                black_box(&matrix_theta),
+                black_box(&assay_error_models),
+                false,
+            ))
+            .unwrap()
+        })
+    });
+    matrix_group.finish();
+
+    let mut batch_group = c.benchmark_group("population/likelihood-batch");
+
+    batch_group.sampling_mode(SamplingMode::Flat);
+    batch_group.throughput(Throughput::Elements(256));
+    batch_group.bench_function("ode-hot-256", |b| {
+        b.iter(|| {
+            black_box(log_likelihood_batch(
+                black_box(&batch_equation),
+                black_box(&batch_data),
+                black_box(&batch_theta),
+                black_box(&residual_error_models),
+            ))
+            .unwrap()
+        })
+    });
+    batch_group.finish();
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/benches/likelihood_matrix.rs b/benches/likelihood_matrix.rs
deleted file mode 100644
index b8f94d54..00000000
--- a/benches/likelihood_matrix.rs
+++ /dev/null
@@ -1,247 +0,0 @@
-use criterion::{
-    criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode, Throughput,
-};
-use ndarray::Array2;
-use pharmsol::prelude::simulator::{log_likelihood_batch, log_likelihood_matrix};
-use pharmsol::prelude::*;
-use pharmsol::{Cache, ResidualErrorModel, ResidualErrorModels, ODE};
-use std::hint::black_box;
-use std::time::Duration;
-
-fn example_equation() -> ODE {
-    equation::ODE::new(
-        |x, p, _t, dx, _b, _rateiv, _cov| {
-            fetch_params!(p, ka, ke, _tlag, _v);
-            dx[0] = -ka * x[0];
-            dx[1] = ka * x[0] - ke * x[1];
-        },
-        |p, _t, _cov| {
-            fetch_params!(p, _ka, _ke, tlag, _v);
-            lag! {0=>tlag}
-        },
-        |_p, _t, _cov| fa! {},
-        |_p, _t, _cov, _x| {},
-        |x, p, _t, _cov, y| {
-            fetch_params!(p, _ka, _ke, _tlag, v);
-            y[0] = x[1] / v;
-        },
-    )
-    .with_nstates(2)
-    .with_nout(1)
-}
-
-fn example_data(n_subjects: usize) -> Data {
-    let subjects = (0..n_subjects)
-        .map(|index| {
-            let dose = 100.0 + index as f64;
-            let offset = index as f64 * 0.02;
-            Subject::builder(format!("subject_{index}"))
-                .bolus(0.0, dose, 0)
-                .observation(0.5, 4.2 + offset, 0)
-                .observation(1.0, 5.9 + offset, 0)
-                .observation(2.0, 7.4 + offset, 0)
-                .observation(4.0, 6.8 + offset, 0)
-                .observation(8.0, 4.7 + offset, 0)
-                .observation(12.0, 3.3 + offset, 0)
-                .build()
-        })
-        .collect();
-
-    Data::new(subjects)
-}
-
-fn example_subject() -> Subject {
-    example_data(1)
-        .get_subject("subject_0")
-        .expect("example subject exists")
-        .clone()
-}
-
-fn example_params() -> [f64; 4] {
-    [0.60, 0.09, 0.05, 20.0]
-}
-
-fn support_points(n_support_points: usize) -> Array2<f64> {
-    Array2::from_shape_fn((n_support_points, 4), |(row, column)| match column {
-        0 => 0.55 + row as f64 * 0.002,
-        1 => 0.08 + row as f64 * 0.0004,
-        2 => 0.05,
-        3 => 18.0 + row as f64 * 0.15,
-        _ => unreachable!(),
-    })
-}
-
-fn batch_parameters(n_subjects: usize) -> Array2<f64> {
-    Array2::from_shape_fn((n_subjects, 4), |(row, column)| match column {
-        0 => 0.60 + row as f64 * 0.001,
-        1 => 0.09 + row as f64 * 0.0002,
-        2 => 0.05,
-        3 => 20.0 + row as f64 * 0.05,
-        _ => unreachable!(),
-    })
-}
-
-fn assay_error_models() -> AssayErrorModels {
-    AssayErrorModels::new()
-        .add(
-            0,
-            AssayErrorModel::additive(ErrorPoly::new(0.0, 0.1, 0.0, 0.0), 0.0),
-        )
-        .unwrap()
-}
-
-fn residual_error_models() -> ResidualErrorModels {
-    ResidualErrorModels::new().add(0, ResidualErrorModel::constant(0.2))
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    let assay_error_models = assay_error_models();
-    let residual_error_models = residual_error_models();
-
-    let subject = example_subject();
-    let params = example_params();
-    let equation_cold = example_equation().disable_cache();
-    let equation_hot = example_equation();
-    let _ = equation_hot
-        .estimate_predictions(&subject, params.as_slice())
-        .unwrap();
-    let predictions = equation_cold
-        .estimate_predictions(&subject, params.as_slice())
-        .unwrap();
-
-    let mut breakdown_group = c.benchmark_group("ode/runtime-breakdown");
-    breakdown_group.sample_size(10);
-    breakdown_group.warm_up_time(Duration::from_millis(250));
-    breakdown_group.measurement_time(Duration::from_secs(1));
-    breakdown_group.bench_function("predict-cold", |b| {
-        b.iter(|| {
-            black_box(
-                equation_cold
-                    .estimate_predictions(&subject, params.as_slice())
-                    .unwrap(),
-            )
-        })
-    });
-    breakdown_group.bench_function("predict-hot", |b| {
-        b.iter(|| {
-            black_box(
-                equation_hot
-                    .estimate_predictions(&subject, params.as_slice())
-                    .unwrap(),
-            )
-        })
-    });
-    breakdown_group.bench_function("score-only", |b| {
-        b.iter(|| black_box(predictions.log_likelihood(&assay_error_models)).unwrap())
-    });
-    breakdown_group.bench_function("loglik-cold", |b| {
-        b.iter(|| {
-            black_box(
-                equation_cold
-                    .estimate_log_likelihood(&subject, params.as_slice(), &assay_error_models)
-                    .unwrap(),
-            )
-        })
-    });
-    breakdown_group.bench_function("loglik-hot", |b| {
-        b.iter(|| {
-            black_box(
-                equation_hot
-                    .estimate_log_likelihood(&subject, params.as_slice(), &assay_error_models)
-                    .unwrap(),
-            )
-        })
-    });
-    breakdown_group.finish();
-
-    let mut matrix_group = c.benchmark_group("likelihood/matrix");
-    matrix_group.sampling_mode(SamplingMode::Flat);
-    matrix_group.sample_size(10);
-    matrix_group.warm_up_time(Duration::from_millis(250));
-    matrix_group.measurement_time(Duration::from_secs(2));
-
-    for (n_subjects, n_support_points) in [(16usize, 32usize), (64usize, 128usize)] {
-        let case = (example_data(n_subjects), support_points(n_support_points));
-        let equation_cold = example_equation().disable_cache();
-        let equation_hot = example_equation();
-        let _ = log_likelihood_matrix(&equation_hot, &case.0, &case.1, &assay_error_models, false)
-            .unwrap();
-
-        matrix_group.throughput(Throughput::Elements((n_subjects * n_support_points) as u64));
-        matrix_group.bench_with_input(
-            BenchmarkId::new("cold", format!("{n_subjects}x{n_support_points}")),
-            &case,
-            |b, (data, theta)| {
-                b.iter(|| {
-                    black_box(log_likelihood_matrix(
-                        &equation_cold,
-                        data,
-                        theta,
-                        &assay_error_models,
-                        false,
-                    ))
-                    .unwrap()
-                })
-            },
-        );
-        matrix_group.bench_with_input(
-            BenchmarkId::new("hot", format!("{n_subjects}x{n_support_points}")),
-            &case,
-            |b, (data, theta)| {
-                b.iter(|| {
-                    black_box(log_likelihood_matrix(
-                        &equation_hot,
-                        data,
-                        theta,
-                        &assay_error_models,
-                        false,
-                    ))
-                    .unwrap()
-                })
-            },
-        );
-    }
-    matrix_group.finish();
-
-    let mut batch_group = c.benchmark_group("likelihood/batch");
-    batch_group.sample_size(10);
-    batch_group.warm_up_time(Duration::from_millis(250));
-    batch_group.measurement_time(Duration::from_secs(1));
-
-    for n_subjects in [16usize, 64usize, 256usize] {
-        let data = example_data(n_subjects);
-        let parameters = batch_parameters(n_subjects);
-        let equation_cold = example_equation().disable_cache();
-        let equation_hot = example_equation();
-        let _ = log_likelihood_batch(&equation_hot, &data, &parameters, &residual_error_models)
-            .unwrap();
-
-        batch_group.throughput(Throughput::Elements(n_subjects as u64));
-        batch_group.bench_with_input(BenchmarkId::new("cold", n_subjects), &data, |b, data| {
-            b.iter(|| {
-                black_box(log_likelihood_batch(
-                    &equation_cold,
-                    data,
-                    &parameters,
-                    &residual_error_models,
-                ))
-                .unwrap()
-            })
-        });
-        batch_group.bench_with_input(BenchmarkId::new("hot", n_subjects), &data, |b, data| {
-            b.iter(|| {
-                black_box(log_likelihood_batch(
-                    &equation_hot,
-                    data,
-                    &parameters,
-                    &residual_error_models,
-                ))
-                .unwrap()
-            })
-        });
-    }
-    batch_group.finish();
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/benches/nca.rs b/benches/nca.rs
index d53413e1..ec7a5220 100644
--- a/benches/nca.rs
+++ b/benches/nca.rs
@@ -1,100 +1,46 @@
-use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion, SamplingMode, Throughput};
 use pharmsol::nca::{NCAOptions, NCA};
 use pharmsol::prelude::*;
 use std::hint::black_box;
 
-/// Build a typical PK subject with 12 time points (oral dose)
-fn typical_oral_subject(id: &str) -> Subject {
-    Subject::builder(id)
+fn typical_oral_subject(id: impl Into<String>, scale: f64) -> Subject {
+    Subject::builder(id.into())
         .bolus(0.0, 100.0, 0)
         .observation(0.0, 0.0, 0)
-        .observation(0.25, 2.5, 0)
-        .observation(0.5, 5.0, 0)
-        .observation(1.0, 8.0, 0)
-        .observation(2.0, 10.0, 0)
-        .observation(4.0, 7.5, 0)
-        .observation(6.0, 5.0, 0)
-        .observation(8.0, 3.5, 0)
-        .observation(12.0, 1.5, 0)
-        .observation(16.0, 0.8, 0)
-        .observation(24.0, 0.2, 0)
-        .observation(36.0, 0.05, 0)
+        .observation(0.25, 2.5 * scale, 0)
+        .observation(0.5, 5.0 * scale, 0)
+        .observation(1.0, 8.0 * scale, 0)
+        .observation(2.0, 10.0 * scale, 0)
+        .observation(4.0, 7.5 * scale, 0)
+        .observation(6.0, 5.0 * scale, 0)
+        .observation(8.0, 3.5 * scale, 0)
+        .observation(12.0, 1.5 * scale, 0)
+        .observation(16.0, 0.8 * scale, 0)
+        .observation(24.0, 0.2 * scale, 0)
+        .observation(36.0, 0.05 * scale, 0)
         .build()
 }
 
-/// Build a population of n subjects with slight variation
 fn build_population(n: usize) -> Data {
     let subjects: Vec<Subject> = (0..n)
-        .map(|i| {
-            let scale = 1.0 + (i as f64 % 7.0) * 0.05; // slight variation
-            Subject::builder(format!("subj_{}", i))
-                .bolus(0.0, 100.0, 0)
-                .observation(0.0, 0.0, 0)
-                .observation(0.25, 2.5 * scale, 0)
-                .observation(0.5, 5.0 * scale, 0)
-                .observation(1.0, 8.0 * scale, 0)
-                .observation(2.0, 10.0 * scale, 0)
-                .observation(4.0, 7.5 * scale, 0)
-                .observation(6.0, 5.0 * scale, 0)
-                .observation(8.0, 3.5 * scale, 0)
-                .observation(12.0, 1.5 * scale, 0)
-                .observation(16.0, 0.8 * scale, 0)
-                .observation(24.0, 0.2 * scale, 0)
-                .observation(36.0, 0.05 * scale, 0)
-                .build()
-        })
+        .map(|i| typical_oral_subject(format!("subj_{i}"), 1.0 + (i as f64 % 7.0) * 0.05))
         .collect();
     Data::new(subjects)
 }
 
-fn bench_single_subject_nca(c: &mut Criterion) {
-    let subject = typical_oral_subject("bench_subj");
+fn criterion_benchmark(c: &mut Criterion) {
+    let data = build_population(128);
     let opts = NCAOptions::default();
 
-    c.bench_function("nca_single_subject", |b| {
-        b.iter(|| {
-            let result = black_box(&subject).nca(black_box(&opts));
-            let _ = black_box(result);
-        });
-    });
-}
-
-fn bench_population_nca(c: &mut Criterion) {
-    let mut group = c.benchmark_group("nca_population");
+    let mut group = c.benchmark_group("nca");
 
-    for size in [10, 100, 500] {
-        let data = build_population(size);
-        let opts = NCAOptions::default();
-
-        group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, _| {
-            b.iter(|| {
-                let results = black_box(&data).nca_all(black_box(&opts));
-                black_box(results);
-            });
-        });
-    }
-
-    group.finish();
-}
-
-fn bench_observation_metrics(c: &mut Criterion) {
-    use pharmsol::data::event::AUCMethod;
-
-    let subject = typical_oral_subject("bench_subj");
-
-    c.bench_function("nca_auc_cmax_metrics", |b| {
-        b.iter(|| {
-            let auc = black_box(&subject).auc(0, &AUCMethod::Linear);
-            let cmax = black_box(&subject).cmax(0);
-            black_box((auc, cmax));
-        });
+    group.sampling_mode(SamplingMode::Flat);
+    group.throughput(Throughput::Elements(128));
+    group.bench_function("population-128", |b| {
+        b.iter(|| black_box(black_box(&data).nca_all(black_box(&opts))))
     });
+    group.finish();
 }
 
-criterion_group!(
-    benches,
-    bench_single_subject_nca,
-    bench_population_nca,
-    bench_observation_metrics,
-);
+criterion_group!(benches, criterion_benchmark);
 criterion_main!(benches);
diff --git a/benches/ode.rs b/benches/ode.rs
deleted file mode 100644
index c677fe94..00000000
--- a/benches/ode.rs
+++ /dev/null
@@ -1,123 +0,0 @@
-use criterion::{criterion_group, criterion_main, Criterion};
-use pharmsol::*;
-use std::hint::black_box;
-
-fn example_subject() -> Subject {
-    Subject::builder("1")
-        .bolus(0.0, 100.0, 0)
-        .observation(3.0, 0.1, 0)
-        .observation(6.0, 0.4, 0)
-        .observation(12.0, 1.0, 0)
-        .observation(24.0, 1.1, 0)
-        .build()
-}
-
-fn example_subject_covariates() -> Subject {
-    Subject::builder("1")
-        .bolus(0.0, 100.0, 0)
-        .covariate("iron", 0.0, 10.0)
-        .observation(3.0, 0.1, 0)
-        .covariate("iron", 3.0, 40.0)
-        .observation(6.0, 0.4, 0)
-        .covariate("iron", 6.0, 40.0)
-        .observation(12.0, 1.0, 0)
-        .covariate("iron", 12.0, 50.0)
-        .observation(24.0, 1.1, 0)
-        .covariate("iron", 24.0, 100.0)
-        .build()
-}
-
-fn one_compartment() {
-    let subject = example_subject();
-    let ode = equation::ODE::new(
-        |x, p, _t, dx, _b, _rateiv, _cov| {
-            fetch_params!(p, ka, ke, _tlag, _v);
-            dx[0] = -ka * x[0];
-            dx[1] = ka * x[0] - ke * x[1];
-        },
-        |p, _t, _cov| {
-            fetch_params!(p, _ka, _ke, tlag, _v);
-            lag! {0=>tlag}
-        },
-        |_p, _t, _cov| fa! {},
-        |_p, _t, _cov, _x| {},
-        |x, p, _t, _cov, y| {
-            fetch_params!(p, _ka, _ke, _tlag, v);
-            y[0] = x[1] / v;
-        },
-    )
-    .with_nstates(2)
-    .with_nout(1);
-    black_box(
-        ode.estimate_predictions(&subject, &[0.3, 0.5, 0.1, 70.0])
-            .unwrap(),
-    );
-}
-
-fn one_compartment_covariates() {
-    let subject = example_subject_covariates();
-    let ode = equation::ODE::new(
-        |x, p, t, dx, _b, _rateiv, cov| {
-            fetch_cov!(cov, t, iron);
-            fetch_params!(p, ka, ke, _tlag, _v);
-
-            ke = ke * iron.powf(0.75) / 50.0;
-            dx[0] = -ka * x[0];
-            dx[1] = ka * x[0] - ke * x[1];
-        },
-        |p, _t, _cov| {
-            fetch_params!(p, _ka, _ke, tlag, _v);
-            lag! {0=>tlag}
-        },
-        |_p, _t, _cov| fa! {},
-        |_p, _t, _cov, _x| {},
-        |x, p, _t, _cov, y| {
-            fetch_params!(p, _ka, _ke, _tlag, v);
-            y[0] = x[1] / v;
-        },
-    )
-    .with_nstates(2)
-    .with_nout(1);
-    black_box(
-        ode.estimate_predictions(&subject, &[0.3, 0.5, 0.1, 70.0])
-            .unwrap(),
-    );
-}
-
-fn two_compartment() {
-    let subject = example_subject();
-    let ode = equation::ODE::new(
-        |x, p, _t, dx, _b, _rateiv, _cov| {
-            fetch_params!(p, ka, ke, k12, k21, _tlag, _v);
-            dx[0] = -ka * x[0] - k12 * x[0] + k21 * x[1];
-            dx[1] = k12 * x[0] - k21 * x[1] - ke * x[1];
-        },
-        |p, _t, _cov| {
-            fetch_params!(p, _ka, _ke, _k12, _k21, tlag, _v);
-            lag! {0=>tlag}
-        },
-        |_p, _t, _cov| fa! {},
-        |_p, _t, _cov, _x| {},
-        |x, p, _t, _cov, y| {
-            fetch_params!(p, _ka, _ke, _k12, _k21, _tlag, v);
-            y[0] = x[1] / v;
-        },
-    )
-    .with_nstates(2)
-    .with_nout(1);
-    black_box(
-        ode.estimate_predictions(&subject, &[0.3, 0.5, 0.1, 0.04, 0.08, 70.0])
-            .unwrap(),
-    );
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("one_compartment", |b| b.iter(one_compartment));
-    c.bench_function("one_compartment_covariates", |b| {
-        b.iter(one_compartment_covariates)
-    });
-    c.bench_function("two_compartment", |b| b.iter(two_compartment));
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/benches/performance.rs b/benches/performance.rs
deleted file mode 100644
index 2ab17403..00000000
--- a/benches/performance.rs
+++ /dev/null
@@ -1,55 +0,0 @@
-use criterion::{criterion_group, criterion_main, Criterion};
-use pharmsol::*;
-use std::hint::black_box;
-
-fn example_subject() -> Subject {
-    Subject::builder("id1")
-        .bolus(0.0, 100.0, 0)
-        .repeat(2, 0.5)
-        .observation(0.5, 0.1, 0)
-        .observation(1.0, 0.4, 0)
-        .observation(2.0, 1.0, 0)
-        .observation(2.5, 1.1, 0)
-        .covariate("wt", 0.0, 80.0)
-        .covariate("wt", 1.0, 83.0)
-        .covariate("age", 0.0, 25.0)
-        .build()
-}
-
-fn readme(n: usize) {
-    let subject = example_subject();
-    let ode = equation::ODE::new(
-        |x, p, _t, dx, _b, _rateiv, _cov| {
-            // fetch_cov!(cov, t, _wt, _age);
-            fetch_params!(p, ka, ke, _tlag, _v);
-            //Struct
-            dx[0] = -ka * x[0];
-            dx[1] = ka * x[0] - ke * x[1];
-        },
-        |p, _t, _cov| {
-            fetch_params!(p, _ka, _ke, tlag, _v);
-            lag! {0=>tlag}
-        },
-        |_p, _t, _cov| fa! {},
-        |_p, _t, _cov, _x| {},
-        |x, p, _t, _cov, y| {
-            fetch_params!(p, _ka, _ke, _tlag, v);
-            y[0] = x[1] / v;
-        },
-    )
-    .with_nstates(2)
-    .with_nout(1);
-    for _ in 0..n {
-        let op = ode
-            .estimate_predictions(&subject, &[0.3, 0.5, 0.1, 70.0])
-            .unwrap();
-        black_box(op);
-    }
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("readme 20", |b| b.iter(|| readme(black_box(20))));
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/benches/predictions.rs b/benches/predictions.rs
new file mode 100644
index 00000000..ae9e3aba
--- /dev/null
+++ b/benches/predictions.rs
@@ -0,0 +1,199 @@
+use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
+use pharmsol::prelude::*;
+use pharmsol::{Analytical, Cache, ODE};
+use std::hint::black_box;
+
+/// Oral dose with measurements in plasma
+fn oral_subject() -> Subject {
+    Subject::builder("bench_oral")
+        .bolus(0.0, 500.0, "oral")
+        .missing_observation(0.25, "plasma")
+        .missing_observation(0.5, "plasma")
+        .missing_observation(1.0, "plasma")
+        .missing_observation(2.0, "plasma")
+        .missing_observation(4.0, "plasma")
+        .missing_observation(8.0, "plasma")
+        .missing_observation(12.0, "plasma")
+        .missing_observation(24.0, "plasma")
+        .build()
+}
+
+/// Intravenous infusion with measurements in plasma
+fn iv_subject() -> Subject {
+    Subject::builder("bench_iv")
+        .infusion(0.0, 500.0, "iv", 0.5)
+        .missing_observation(0.5, "plasma")
+        .missing_observation(1.0, "plasma")
+        .missing_observation(2.0, "plasma")
+        .missing_observation(4.0, "plasma")
+        .missing_observation(8.0, "plasma")
+        .missing_observation(12.0, "plasma")
+        .missing_observation(24.0, "plasma")
+        .build()
+}
+
+fn analytical_one_cpt() -> Analytical {
+    analytical! {
+        name: "one_cmt_oral",
+        params: [ka, ke, v],
+        states: [gut, central],
+        outputs: [plasma],
+        routes: [
+            bolus(oral) -> gut,
+        ],
+        structure: one_compartment_with_absorption,
+        out: |x, _p, _t, _cov, y| {
+            y[plasma] = x[central] / v;
+        },
+    }
+}
+
+/// Two-compartment intravenous model
+fn analytical_two_cpt() -> Analytical {
+    analytical! {
+        name: "two_cmt_iv",
+        params: [ke, kcp, kpc, v],
+        states: [central, peripheral],
+        outputs: [plasma],
+        routes: [
+            infusion(iv) -> central,
+        ],
+        structure: two_compartments,
+        out: |x, _p, _t, _cov, y| {
+            y[plasma] = x[central] / v;
+        },
+    }
+}
+
+fn ode_one_cpt() -> ODE {
+    ode! {
+        name: "one_cmt_oral",
+        params: [ka, ke, v],
+        states: [gut, central],
+        outputs: [plasma],
+        routes: [
+            bolus(oral) -> gut,
+        ],
+        diffeq: |x, _p, _t, dx, _cov| {
+            dx[gut] = -ka * x[gut];
+            dx[central] = ka * x[gut] - ke * x[central];
+        },
+        out: |x, _p, _t, _cov, y| {
+            y[plasma] = x[central] / v;
+        },
+    }
+}
+
+/// Two-compartment model with infusion
+fn ode_two_cpt() -> ODE {
+    ode! {
+        name: "two_cmt_iv",
+        params: [cl, v, vp, q],
+        states: [central, peripheral],
+        outputs: [plasma],
+        routes: [
+            infusion(iv) -> central,
+        ],
+        diffeq: |x, _p, _t, dx, _cov| {
+            let ke = cl / v;
+            let kcp = q / v;
+            let kpc = q / vp;
+            dx[central] = -ke * x[central] - kcp * x[central] + kpc * x[peripheral];
+            dx[peripheral] = kcp * x[central] - kpc * x[peripheral];
+        },
+        out: |x, _p, _t, _cov, y| {
+            y[plasma] = x[central] / v;
+        },
+    }
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    // One-compartment oral
+    let oral = oral_subject();
+    let oral_params = [1.0, 0.1, 50.0];
+    let analytical_1 = analytical_one_cpt();
+    let ode_1_cold = ode_one_cpt().disable_cache();
+    let ode_1_hot = ode_one_cpt();
+    let _ = ode_1_hot
+        .estimate_predictions(&oral, oral_params.as_slice())
+        .unwrap();
+
+    // Two-compartment with infusion
+    let iv = iv_subject();
+    let analytical_2_params = [0.1, 0.05, 0.04, 50.0];
+    let ode_2_params = [5.0, 50.0, 100.0, 10.0];
+    let analytical_2 = analytical_two_cpt();
+    let ode_2_cold = ode_two_cpt().disable_cache();
+    let ode_2_hot = ode_two_cpt();
+    let _ = ode_2_hot
+        .estimate_predictions(&iv, ode_2_params.as_slice())
+        .unwrap();
+
+    let mut group = c.benchmark_group("core/predictions");
+
+    // Per-iteration cost is in the sub-microsecond to tens-of-microseconds
+    // range, so Criterion's defaults (100 samples, 5 s measurement) collect
+    // plenty of iterations. The only override is flat sampling, which keeps
+    // per-iteration timings comparable across runs instead of using the
+    // default linear ramp.
+    group.sampling_mode(SamplingMode::Flat);
+
+    group.bench_function("analytical/one-compartment-oral", |b| {
+        b.iter(|| {
+            black_box(
+                analytical_1
+                    .estimate_predictions(black_box(&oral), black_box(oral_params.as_slice()))
+                    .unwrap(),
+            )
+        })
+    });
+    group.bench_function("ode/one-compartment-oral-cold", |b| {
+        b.iter(|| {
+            black_box(
+                ode_1_cold
+                    .estimate_predictions(black_box(&oral), black_box(oral_params.as_slice()))
+                    .unwrap(),
+            )
+        })
+    });
+    group.bench_function("ode/one-compartment-oral-hot", |b| {
+        b.iter(|| {
+            black_box(
+                ode_1_hot
+                    .estimate_predictions(black_box(&oral), black_box(oral_params.as_slice()))
+                    .unwrap(),
+            )
+        })
+    });
+    group.bench_function("analytical/two-compartment-iv", |b| {
+        b.iter(|| {
+            black_box(
+                analytical_2
+                    .estimate_predictions(black_box(&iv), black_box(analytical_2_params.as_slice()))
+                    .unwrap(),
+            )
+        })
+    });
+    group.bench_function("ode/two-compartment-iv-cold", |b| {
+        b.iter(|| {
+            black_box(
+                ode_2_cold
+                    .estimate_predictions(black_box(&iv), black_box(ode_2_params.as_slice()))
+                    .unwrap(),
+            )
+        })
+    });
+    group.bench_function("ode/two-compartment-iv-hot", |b| {
+        b.iter(|| {
+            black_box(
+                ode_2_hot
+                    .estimate_predictions(black_box(&iv), black_box(ode_2_params.as_slice()))
+                    .unwrap(),
+            )
+        })
+    });
+    group.finish();
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/benches/runtime_matrix.rs b/benches/runtime_matrix.rs
deleted file mode 100644
index 81648b8d..00000000
--- a/benches/runtime_matrix.rs
+++ /dev/null
@@ -1,100 +0,0 @@
-#[path = "../tests/support/runtime_corpus.rs"]
-mod runtime_corpus;
-
-use criterion::{criterion_group, criterion_main, Criterion};
-
-#[cfg(all(
-    feature = "dsl-jit",
-    feature = "dsl-aot",
-    feature = "dsl-aot-load",
-    feature = "dsl-wasm"
-))]
-fn runtime_matrix_benchmark(c: &mut Criterion) {
-    use std::hint::black_box;
-    use std::time::Duration;
-
-    use criterion::{BatchSize, BenchmarkId, SamplingMode};
-    use pharmsol::dsl::WasmCompileCache;
-    use runtime_corpus as corpus;
-    use runtime_corpus::{ArtifactWorkspace, CorpusCase};
-
-    let cases = [CorpusCase::Ode, CorpusCase::Analytical, CorpusCase::Sde];
-
-    let mut compile_group = c.benchmark_group("dsl/runtime-compile");
-    compile_group.sampling_mode(SamplingMode::Flat);
-    compile_group.sample_size(10);
-    compile_group.warm_up_time(Duration::from_millis(500));
-    compile_group.measurement_time(Duration::from_secs(1));
-
-    for case in cases {
-        compile_group.bench_with_input(BenchmarkId::new("jit", case.label()), &case, |b, case| {
-            b.iter(|| black_box(corpus::compile_runtime_jit_model(*case).unwrap()))
-        });
-        compile_group.bench_with_input(
-            BenchmarkId::new("native-aot", case.label()),
-            &case,
-            |b, case| {
-                b.iter_batched(
-                    || ArtifactWorkspace::new().unwrap(),
-                    |workspace| {
-                        black_box(
-                            corpus::compile_runtime_native_aot_model(*case, &workspace).unwrap(),
-                        )
-                    },
-                    BatchSize::PerIteration,
-                )
-            },
-        );
-        compile_group.bench_with_input(BenchmarkId::new("wasm", case.label()), &case, |b, case| {
-            b.iter(|| black_box(corpus::compile_runtime_wasm_model(*case).unwrap()))
-        });
-        compile_group.bench_with_input(
-            BenchmarkId::new("wasm-module", case.label()),
-            &case,
-            |b, case| b.iter(|| black_box(corpus::compile_wasm_module(*case).unwrap())),
-        );
-
-        let cache = WasmCompileCache::default();
-        corpus::compile_wasm_module_with_cache(case, &cache).unwrap();
-        compile_group.bench_function(BenchmarkId::new("wasm-module-cached", case.label()), |b| {
-            b.iter(|| black_box(corpus::compile_wasm_module_with_cache(case, &cache).unwrap()))
-        });
-    }
-    compile_group.finish();
-
-    let mut runtime_group = c.benchmark_group("dsl/runtime-predict");
-    runtime_group.sampling_mode(SamplingMode::Flat);
-    runtime_group.sample_size(10);
-    runtime_group.warm_up_time(Duration::from_millis(500));
-    runtime_group.measurement_time(Duration::from_secs(1));
-
-    for case in cases {
-        let jit = corpus::compile_runtime_jit_model(case).unwrap();
-        runtime_group.bench_function(BenchmarkId::new("jit", case.label()), |b| {
-            b.iter(|| black_box(corpus::estimate_runtime_predictions(case, &jit).unwrap()))
-        });
-
-        let aot_workspace = ArtifactWorkspace::new().unwrap();
-        let aot = corpus::compile_runtime_native_aot_model(case, &aot_workspace).unwrap();
-        runtime_group.bench_function(BenchmarkId::new("native-aot", case.label()), |b| {
-            b.iter(|| black_box(corpus::estimate_runtime_predictions(case, &aot).unwrap()))
-        });
-
-        let wasm = corpus::compile_runtime_wasm_model(case).unwrap();
-        runtime_group.bench_function(BenchmarkId::new("wasm", case.label()), |b| {
-            b.iter(|| black_box(corpus::estimate_runtime_predictions(case, &wasm).unwrap()))
-        });
-    }
-    runtime_group.finish();
-}
-
-#[cfg(not(all(
-    feature = "dsl-jit",
-    feature = "dsl-aot",
-    feature = "dsl-aot-load",
-    feature = "dsl-wasm"
-)))]
-fn runtime_matrix_benchmark(_: &mut Criterion) {}
-
-criterion_group!(benches, runtime_matrix_benchmark);
-criterion_main!(benches);

From 9e181ee01ce9000f66a2d3d821b07453fa0ff814 Mon Sep 17 00:00:00 2001
From: Markus <66058642+mhovd@users.noreply.github.com>
Date: Tue, 12 May 2026 08:56:47 +0200
Subject: [PATCH 2/9] Bring back the runtime matrix

---
 Cargo.toml                |   4 +
 benches/predictions.rs    |  88 ++++----
 benches/runtime_matrix.rs | 409 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 466 insertions(+), 35 deletions(-)
 create mode 100644 benches/runtime_matrix.rs

diff --git a/Cargo.toml b/Cargo.toml
index 2f190b6f..ae598688 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -75,6 +75,10 @@ harness = false
 name = "nca"
 harness = false
 
+[[bench]]
+name = "runtime_matrix"
+harness = false
+
 [[bench]]
 name = "likelihood"
 harness = false
diff --git a/benches/predictions.rs b/benches/predictions.rs
index ae9e3aba..54e08468 100644
--- a/benches/predictions.rs
+++ b/benches/predictions.rs
@@ -3,10 +3,10 @@ use pharmsol::prelude::*;
 use pharmsol::{Analytical, Cache, ODE};
 use std::hint::black_box;
 
-/// Oral dose with measurements in plasma
-fn oral_subject() -> Subject {
-    Subject::builder("bench_oral")
-        .bolus(0.0, 500.0, "oral")
+/// Oral acetaminophen dose with measurements in plasma
+fn acetaminophen_oral_subject() -> Subject {
+    Subject::builder("bench_acetaminophen_oral")
+        .bolus(0.0, 500.0, "acetaminophen_oral")
         .missing_observation(0.25, "plasma")
         .missing_observation(0.5, "plasma")
         .missing_observation(1.0, "plasma")
@@ -18,10 +18,10 @@ fn oral_subject() -> Subject {
         .build()
 }
 
-/// Intravenous infusion with measurements in plasma
-fn iv_subject() -> Subject {
-    Subject::builder("bench_iv")
-        .infusion(0.0, 500.0, "iv", 0.5)
+/// Intravenous acetaminophen infusion with measurements in plasma
+fn acetaminophen_iv_subject() -> Subject {
+    Subject::builder("bench_acetaminophen_iv")
+        .infusion(0.0, 500.0, "acetaminophen_iv", 0.5)
         .missing_observation(0.5, "plasma")
         .missing_observation(1.0, "plasma")
         .missing_observation(2.0, "plasma")
@@ -34,12 +34,12 @@ fn iv_subject() -> Subject {
 
 fn analytical_one_cpt() -> Analytical {
     analytical! {
-        name: "one_cmt_oral",
+        name: "one_cmt_acetaminophen_oral",
         params: [ka, ke, v],
         states: [gut, central],
         outputs: [plasma],
         routes: [
-            bolus(oral) -> gut,
+            bolus(acetaminophen_oral) -> gut,
         ],
         structure: one_compartment_with_absorption,
         out: |x, _p, _t, _cov, y| {
@@ -51,12 +51,12 @@ fn analytical_one_cpt() -> Analytical {
 /// Two-compartment intravenous model
 fn analytical_two_cpt() -> Analytical {
     analytical! {
-        name: "two_cmt_iv",
+        name: "two_cmt_acetaminophen_iv",
         params: [ke, kcp, kpc, v],
         states: [central, peripheral],
         outputs: [plasma],
         routes: [
-            infusion(iv) -> central,
+            infusion(acetaminophen_iv) -> central,
         ],
         structure: two_compartments,
         out: |x, _p, _t, _cov, y| {
@@ -67,12 +67,12 @@ fn analytical_two_cpt() -> Analytical {
 
 fn ode_one_cpt() -> ODE {
     ode! {
-        name: "one_cmt_oral",
+        name: "one_cmt_acetaminophen_oral",
         params: [ka, ke, v],
         states: [gut, central],
         outputs: [plasma],
         routes: [
-            bolus(oral) -> gut,
+            bolus(acetaminophen_oral) -> gut,
         ],
         diffeq: |x, _p, _t, dx, _cov| {
             dx[gut] = -ka * x[gut];
@@ -87,12 +87,12 @@ fn ode_one_cpt() -> ODE {
 /// Two-compartment model with infusion
 fn ode_two_cpt() -> ODE {
     ode! {
-        name: "two_cmt_iv",
+        name: "two_cmt_acetaminophen_iv",
         params: [cl, v, vp, q],
         states: [central, peripheral],
         outputs: [plasma],
         routes: [
-            infusion(iv) -> central,
+            infusion(acetaminophen_iv) -> central,
         ],
         diffeq: |x, _p, _t, dx, _cov| {
             let ke = cl / v;
@@ -108,25 +108,25 @@ fn ode_two_cpt() -> ODE {
 }
 
 fn criterion_benchmark(c: &mut Criterion) {
-    // One-compartment oral
-    let oral = oral_subject();
-    let oral_params = [1.0, 0.1, 50.0];
+    // One-compartment oral acetaminophen
+    let acetaminophen_oral = acetaminophen_oral_subject();
+    let acetaminophen_oral_params = [1.0, 0.1, 50.0];
     let analytical_1 = analytical_one_cpt();
     let ode_1_cold = ode_one_cpt().disable_cache();
     let ode_1_hot = ode_one_cpt();
     let _ = ode_1_hot
-        .estimate_predictions(&oral, oral_params.as_slice())
+        .estimate_predictions(&acetaminophen_oral, acetaminophen_oral_params.as_slice())
         .unwrap();
 
-    // Two-compartment with infusion
-    let iv = iv_subject();
+    // Two-compartment intravenous acetaminophen
+    let acetaminophen_iv = acetaminophen_iv_subject();
     let analytical_2_params = [0.1, 0.05, 0.04, 50.0];
     let ode_2_params = [5.0, 50.0, 100.0, 10.0];
     let analytical_2 = analytical_two_cpt();
     let ode_2_cold = ode_two_cpt().disable_cache();
     let ode_2_hot = ode_two_cpt();
     let _ = ode_2_hot
-        .estimate_predictions(&iv, ode_2_params.as_slice())
+        .estimate_predictions(&acetaminophen_iv, ode_2_params.as_slice())
         .unwrap();
 
     let mut group = c.benchmark_group("core/predictions");
@@ -138,56 +138,74 @@ fn criterion_benchmark(c: &mut Criterion) {
     // default linear ramp.
     group.sampling_mode(SamplingMode::Flat);
 
-    group.bench_function("analytical/one-compartment-oral", |b| {
+    group.bench_function("analytical/one-compartment-acetaminophen-oral", |b| {
         b.iter(|| {
             black_box(
                 analytical_1
-                    .estimate_predictions(black_box(&oral), black_box(oral_params.as_slice()))
+                    .estimate_predictions(
+                        black_box(&acetaminophen_oral),
+                        black_box(acetaminophen_oral_params.as_slice()),
+                    )
                     .unwrap(),
             )
         })
     });
-    group.bench_function("ode/one-compartment-oral-cold", |b| {
+    group.bench_function("ode/one-compartment-acetaminophen-oral-cold", |b| {
         b.iter(|| {
             black_box(
                 ode_1_cold
-                    .estimate_predictions(black_box(&oral), black_box(oral_params.as_slice()))
+                    .estimate_predictions(
+                        black_box(&acetaminophen_oral),
+                        black_box(acetaminophen_oral_params.as_slice()),
+                    )
                     .unwrap(),
             )
         })
     });
-    group.bench_function("ode/one-compartment-oral-hot", |b| {
+    group.bench_function("ode/one-compartment-acetaminophen-oral-hot", |b| {
         b.iter(|| {
             black_box(
                 ode_1_hot
-                    .estimate_predictions(black_box(&oral), black_box(oral_params.as_slice()))
+                    .estimate_predictions(
+                        black_box(&acetaminophen_oral),
+                        black_box(acetaminophen_oral_params.as_slice()),
+                    )
                     .unwrap(),
             )
         })
     });
-    group.bench_function("analytical/two-compartment-iv", |b| {
+    group.bench_function("analytical/two-compartment-acetaminophen-iv", |b| {
         b.iter(|| {
             black_box(
                 analytical_2
-                    .estimate_predictions(black_box(&iv), black_box(analytical_2_params.as_slice()))
+                    .estimate_predictions(
+                        black_box(&acetaminophen_iv),
+                        black_box(analytical_2_params.as_slice()),
+                    )
                     .unwrap(),
             )
         })
     });
-    group.bench_function("ode/two-compartment-iv-cold", |b| {
+    group.bench_function("ode/two-compartment-acetaminophen-iv-cold", |b| {
         b.iter(|| {
             black_box(
                 ode_2_cold
-                    .estimate_predictions(black_box(&iv), black_box(ode_2_params.as_slice()))
+                    .estimate_predictions(
+                        black_box(&acetaminophen_iv),
+                        black_box(ode_2_params.as_slice()),
+                    )
                     .unwrap(),
             )
         })
     });
-    group.bench_function("ode/two-compartment-iv-hot", |b| {
+    group.bench_function("ode/two-compartment-acetaminophen-iv-hot", |b| {
         b.iter(|| {
             black_box(
                 ode_2_hot
-                    .estimate_predictions(black_box(&iv), black_box(ode_2_params.as_slice()))
+                    .estimate_predictions(
+                        black_box(&acetaminophen_iv),
+                        black_box(ode_2_params.as_slice()),
+                    )
                     .unwrap(),
             )
         })
diff --git a/benches/runtime_matrix.rs b/benches/runtime_matrix.rs
new file mode 100644
index 00000000..3ea61e91
--- /dev/null
+++ b/benches/runtime_matrix.rs
@@ -0,0 +1,409 @@
+use criterion::{criterion_group, criterion_main, Criterion};
+
+#[cfg(all(
+    feature = "dsl-jit",
+    feature = "dsl-aot",
+    feature = "dsl-aot-load",
+    feature = "dsl-wasm"
+))]
+mod self_contained {
+    use std::error::Error;
+    use std::io;
+    use std::path::PathBuf;
+
+    use pharmsol::dsl::{
+        self, CompiledRuntimeModel, CompiledWasmModule, RuntimeCompilationTarget,
+        RuntimePredictions, WasmCompileCache,
+    };
+    use pharmsol::{Subject, SubjectBuilderExt};
+    use tempfile::{tempdir, TempDir};
+
+    const ODE_SOURCE: &str = r#"
+name = one_cmt_acetaminophen_oral_iv
+kind = ode
+
+params = ka, cl, v, tlag, f_oral
+covariates = wt@linear
+states = depot, central
+derived = cl_i, ke
+outputs = plasma
+
+bolus(acetaminophen_oral) -> depot
+infusion(acetaminophen_iv) -> central
+
+lag(acetaminophen_oral) = tlag
+fa(acetaminophen_oral) = f_oral
+
+cl_i = cl * pow(wt / 70.0, 0.75)
+ke = cl_i / v
+
+dx(depot) = -ka * depot
+dx(central) = ka * depot - ke * central
+
+out(plasma) = central / v ~ continuous()
+"#;
+
+    const ANALYTICAL_SOURCE: &str = r#"
+name = one_cmt_acetaminophen_oral
+kind = analytical
+
+params = ka, ke, v, tlag, f_oral
+states = depot, central
+outputs = plasma
+
+bolus(acetaminophen_oral) -> depot
+
+lag(acetaminophen_oral) = tlag
+fa(acetaminophen_oral) = f_oral
+
+structure = one_compartment_with_absorption
+
+out(plasma) = central / v ~ continuous()
+"#;
+
+    const SDE_SOURCE: &str = r#"
+name = acetaminophen_iv_sde
+kind = sde
+
+params = ka, ke0, kcp, kpc, vol, ske
+covariates = wt@locf
+states = depot, central, peripheral, ke_latent
+particles = 16
+outputs = plasma
+
+bolus(acetaminophen_iv) -> depot
+
+init(ke_latent) = ke0
+
+dx(depot) = -ka * depot
+dx(central) = ka * depot - (ke_latent + kcp) * central + kpc * peripheral
+dx(peripheral) = kcp * central - kpc * peripheral
+dx(ke_latent) = -ke_latent + ke0
+
+noise(ke_latent) = ske
+
+out(plasma) = central / (vol * wt) ~ continuous()
+"#;
+
+    const SDE_PARTICLE_COUNT: usize = 16;
+
+    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
+    pub enum CorpusCase {
+        Ode,
+        Analytical,
+        Sde,
+    }
+
+    impl CorpusCase {
+        pub fn label(self) -> &'static str {
+            match self {
+                Self::Ode => "dsl-ode-one_cmt_acetaminophen_oral_iv",
+                Self::Analytical => "dsl-analytical-one_cmt_acetaminophen_oral",
+                Self::Sde => "dsl-sde-acetaminophen_iv_sde",
+            }
+        }
+
+        fn model_name(self) -> &'static str {
+            match self {
+                Self::Ode => "one_cmt_acetaminophen_oral_iv",
+                Self::Analytical => "one_cmt_acetaminophen_oral",
+                Self::Sde => "acetaminophen_iv_sde",
+            }
+        }
+
+        fn source(self) -> &'static str {
+            match self {
+                Self::Ode => ODE_SOURCE,
+                Self::Analytical => ANALYTICAL_SOURCE,
+                Self::Sde => SDE_SOURCE,
+            }
+        }
+
+        fn support_point(self) -> &'static [f64] {
+            match self {
+                Self::Ode => &[1.2, 5.0, 40.0, 0.5, 0.8],
+                Self::Analytical => &[1.0, 0.15, 25.0, 0.5, 0.8],
+                Self::Sde => &[1.1, 0.2, 0.12, 0.08, 15.0, 0.0],
+            }
+        }
+
+        fn runtime_subject(self, model: &CompiledRuntimeModel) -> Result<Subject, Box<dyn Error>> {
+            model
+                .info()
+                .outputs
+                .iter()
+                .find(|output| output.name == "plasma")
+                .ok_or_else(|| {
+                    io::Error::other(format!("{}: missing plasma output", self.label()))
+                })?;
+
+            let subject = match self {
+                Self::Ode => {
+                    model
+                        .info()
+                        .routes
+                        .iter()
+                        .find(|route| route.name == "acetaminophen_oral")
+                        .ok_or_else(|| {
+                            io::Error::other(format!(
+                                "{}: missing acetaminophen_oral route",
+                                self.label()
+                            ))
+                        })?;
+                    model
+                        .info()
+                        .routes
+                        .iter()
+                        .find(|route| route.name == "acetaminophen_iv")
+                        .ok_or_else(|| {
+                            io::Error::other(format!(
+                                "{}: missing acetaminophen_iv route",
+                                self.label()
+                            ))
+                        })?;
+                    Subject::builder(self.label())
+                        .covariate("wt", 0.0, 70.0)
+                        .bolus(0.0, 120.0, "acetaminophen_oral")
+                        .infusion(6.0, 60.0, "acetaminophen_iv", 2.0)
+                        .missing_observation(0.5, "plasma")
+                        .missing_observation(1.0, "plasma")
+                        .missing_observation(2.0, "plasma")
+                        .missing_observation(6.0, "plasma")
+                        .missing_observation(7.0, "plasma")
+                        .missing_observation(9.0, "plasma")
+                        .build()
+                }
+                Self::Analytical => {
+                    model
+                        .info()
+                        .routes
+                        .iter()
+                        .find(|route| route.name == "acetaminophen_oral")
+                        .ok_or_else(|| {
+                            io::Error::other(format!(
+                                "{}: missing acetaminophen_oral route",
+                                self.label()
+                            ))
+                        })?;
+                    Subject::builder(self.label())
+                        .bolus(0.0, 100.0, "acetaminophen_oral")
+                        .missing_observation(0.5, "plasma")
+                        .missing_observation(1.0, "plasma")
+                        .missing_observation(2.0, "plasma")
+                        .missing_observation(4.0, "plasma")
+                        .build()
+                }
+                Self::Sde => {
+                    model
+                        .info()
+                        .routes
+                        .iter()
+                        .find(|route| route.name == "acetaminophen_iv")
+                        .ok_or_else(|| {
+                            io::Error::other(format!(
+                                "{}: missing acetaminophen_iv route",
+                                self.label()
+                            ))
+                        })?;
+                    Subject::builder(self.label())
+                        .covariate("wt", 0.0, 70.0)
+                        .bolus(0.0, 80.0, "acetaminophen_iv")
+                        .missing_observation(0.5, "plasma")
+                        .missing_observation(1.0, "plasma")
+                        .missing_observation(2.0, "plasma")
+                        .missing_observation(4.0, "plasma")
+                        .build()
+                }
+            };
+
+            Ok(subject)
+        }
+    }
+
+    #[derive(Debug)]
+    pub struct ArtifactWorkspace {
+        tempdir: TempDir,
+    }
+
+    impl ArtifactWorkspace {
+        pub fn new() -> Result<Self, Box<dyn Error>> {
+            Ok(Self {
+                tempdir: tempdir()?,
+            })
+        }
+
+        fn aot_output(&self, stem: &str) -> PathBuf {
+            self.tempdir.path().join(format!("{stem}.pkm"))
+        }
+
+        fn build_root(&self, stem: &str) -> PathBuf {
+            self.tempdir.path().join(stem)
+        }
+    }
+
+    fn adjust_runtime_model(case: CorpusCase, model: CompiledRuntimeModel) -> CompiledRuntimeModel {
+        match (case, model) {
+            (CorpusCase::Sde, CompiledRuntimeModel::Sde(model)) => {
+                CompiledRuntimeModel::Sde(model.with_particles(SDE_PARTICLE_COUNT))
+            }
+            (_, model) => model,
+        }
+    }
+
+    pub fn compile_runtime_jit_model(
+        case: CorpusCase,
+    ) -> Result<CompiledRuntimeModel, Box<dyn Error>> {
+        Ok(adjust_runtime_model(
+            case,
+            dsl::compile_module_source_to_runtime(
+                case.source(),
+                Some(case.model_name()),
+                RuntimeCompilationTarget::Jit,
+                |_, _| {},
+            )?,
+        ))
+    }
+
+    pub fn compile_runtime_native_aot_model(
+        case: CorpusCase,
+        workspace: &ArtifactWorkspace,
+    ) -> Result<CompiledRuntimeModel, Box<dyn Error>> {
+        Ok(adjust_runtime_model(
+            case,
+            dsl::compile_module_source_to_runtime(
+                case.source(),
+                Some(case.model_name()),
+                RuntimeCompilationTarget::NativeAot(
+                    dsl::NativeAotCompileOptions::new(
+                        workspace.build_root(&format!("{}-runtime-aot-build", case.label())),
+                    )
+                    .with_output(workspace.aot_output(&format!("{}-runtime-aot", case.label()))),
+                ),
+                |_, _| {},
+            )?,
+        ))
+    }
+
+    pub fn compile_runtime_wasm_model(
+        case: CorpusCase,
+    ) -> Result<CompiledRuntimeModel, Box<dyn Error>> {
+        Ok(adjust_runtime_model(
+            case,
+            dsl::compile_module_source_to_runtime_wasm(case.source(), Some(case.model_name()))?,
+        ))
+    }
+
+    pub fn compile_wasm_module(case: CorpusCase) -> Result<CompiledWasmModule, Box<dyn Error>> {
+        Ok(dsl::compile_module_source_to_wasm_module(
+            case.source(),
+            Some(case.model_name()),
+        )?)
+    }
+
+    pub fn compile_wasm_module_with_cache(
+        case: CorpusCase,
+        cache: &WasmCompileCache,
+    ) -> Result<CompiledWasmModule, Box<dyn Error>> {
+        Ok(cache.compile_module_source_to_wasm_module(case.source(), Some(case.model_name()))?)
+    }
+
+    pub fn estimate_runtime_predictions(
+        case: CorpusCase,
+        model: &CompiledRuntimeModel,
+    ) -> Result<RuntimePredictions, Box<dyn Error>> {
+        Ok(model.estimate_predictions(&case.runtime_subject(model)?, case.support_point())?)
+    }
+}
+
+#[cfg(all(
+    feature = "dsl-jit",
+    feature = "dsl-aot",
+    feature = "dsl-aot-load",
+    feature = "dsl-wasm"
+))]
+fn runtime_matrix_benchmark(c: &mut Criterion) {
+    use std::hint::black_box;
+    use std::time::Duration;
+
+    use criterion::{BatchSize, BenchmarkId, SamplingMode};
+    use pharmsol::dsl::WasmCompileCache;
+    use self_contained as corpus;
+    use self_contained::{ArtifactWorkspace, CorpusCase};
+
+    let cases = [CorpusCase::Ode, CorpusCase::Analytical, CorpusCase::Sde];
+
+    let mut compile_group = c.benchmark_group("dsl/runtime-compile");
+    compile_group.sampling_mode(SamplingMode::Flat);
+    compile_group.sample_size(10);
+    compile_group.warm_up_time(Duration::from_millis(500));
+    compile_group.measurement_time(Duration::from_secs(1));
+
+    for case in cases {
+        compile_group.bench_with_input(BenchmarkId::new("jit", case.label()), &case, |b, case| {
+            b.iter(|| black_box(corpus::compile_runtime_jit_model(*case).unwrap()))
+        });
+        compile_group.bench_with_input(
+            BenchmarkId::new("native-aot", case.label()),
+            &case,
+            |b, case| {
+                b.iter_batched(
+                    || ArtifactWorkspace::new().unwrap(),
+                    |workspace| {
+                        black_box(
+                            corpus::compile_runtime_native_aot_model(*case, &workspace).unwrap(),
+                        )
+                    },
+                    BatchSize::PerIteration,
+                )
+            },
+        );
+        compile_group.bench_with_input(BenchmarkId::new("wasm", case.label()), &case, |b, case| {
+            b.iter(|| black_box(corpus::compile_runtime_wasm_model(*case).unwrap()))
+        });
+        compile_group.bench_with_input(
+            BenchmarkId::new("wasm-module", case.label()),
+            &case,
+            |b, case| b.iter(|| black_box(corpus::compile_wasm_module(*case).unwrap())),
+        );
+
+        let cache = WasmCompileCache::default();
+        corpus::compile_wasm_module_with_cache(case, &cache).unwrap();
+        compile_group.bench_function(BenchmarkId::new("wasm-module-cached", case.label()), |b| {
+            b.iter(|| black_box(corpus::compile_wasm_module_with_cache(case, &cache).unwrap()))
+        });
+    }
+    compile_group.finish();
+
+    let mut runtime_group = c.benchmark_group("dsl/runtime-predict");
+    runtime_group.sampling_mode(SamplingMode::Flat);
+
+    for case in cases {
+        let jit = corpus::compile_runtime_jit_model(case).unwrap();
+        runtime_group.bench_function(BenchmarkId::new("jit", case.label()), |b| {
+            b.iter(|| black_box(corpus::estimate_runtime_predictions(case, &jit).unwrap()))
+        });
+
+        let aot_workspace = ArtifactWorkspace::new().unwrap();
+        let aot = corpus::compile_runtime_native_aot_model(case, &aot_workspace).unwrap();
+        runtime_group.bench_function(BenchmarkId::new("native-aot", case.label()), |b| {
+            b.iter(|| black_box(corpus::estimate_runtime_predictions(case, &aot).unwrap()))
+        });
+
+        let wasm = corpus::compile_runtime_wasm_model(case).unwrap();
+        runtime_group.bench_function(BenchmarkId::new("wasm", case.label()), |b| {
+            b.iter(|| black_box(corpus::estimate_runtime_predictions(case, &wasm).unwrap()))
+        });
+    }
+    runtime_group.finish();
+}
+
+#[cfg(not(all(
+    feature = "dsl-jit",
+    feature = "dsl-aot",
+    feature = "dsl-aot-load",
+    feature = "dsl-wasm"
+)))]
+fn runtime_matrix_benchmark(_: &mut Criterion) {}
+
+criterion_group!(benches, runtime_matrix_benchmark);
+criterion_main!(benches);

From e18048d35d4e3f30b49fdf4cdd25d11fca6b746c Mon Sep 17 00:00:00 2001
From: Markus <66058642+mhovd@users.noreply.github.com>
Date: Tue, 12 May 2026 11:18:37 +0200
Subject: [PATCH 3/9] Update CI

To run new benchmarks behind feature flag
---
 .github/workflows/base_benchmarks.yml | 2 +-
 .github/workflows/pr_benchmarks.yml   | 2 +-
 Cargo.toml                            | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/base_benchmarks.yml b/.github/workflows/base_benchmarks.yml
index 0353e30b..9b31d3d4 100644
--- a/.github/workflows/base_benchmarks.yml
+++ b/.github/workflows/base_benchmarks.yml
@@ -28,4 +28,4 @@ jobs:
           --err \
           --adapter rust_criterion \
           --github-actions '${{ secrets.GITHUB_TOKEN }}' \
-          cargo bench
+          cargo bench --all-features
diff --git a/.github/workflows/pr_benchmarks.yml b/.github/workflows/pr_benchmarks.yml
index e71bb67b..ce40f86b 100644
--- a/.github/workflows/pr_benchmarks.yml
+++ b/.github/workflows/pr_benchmarks.yml
@@ -29,4 +29,4 @@ jobs:
           --err \
           --adapter rust_criterion \
           --github-actions '${{ secrets.GITHUB_TOKEN }}' \
-          cargo bench
+          cargo bench --all-features
diff --git a/Cargo.toml b/Cargo.toml
index ae598688..9bd72241 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -78,6 +78,7 @@ harness = false
 [[bench]]
 name = "runtime_matrix"
 harness = false
+required-features = ["dsl-jit", "dsl-aot", "dsl-aot-load", "dsl-wasm"]
 
 [[bench]]
 name = "likelihood"

From 14beba10dcf67e8aea241c0e9114fafb7d4dd1b5 Mon Sep 17 00:00:00 2001
From: Markus <66058642+mhovd@users.noreply.github.com>
Date: Tue, 12 May 2026 11:26:55 +0200
Subject: [PATCH 4/9] Update runtime_matrix.rs

---
 benches/runtime_matrix.rs | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/benches/runtime_matrix.rs b/benches/runtime_matrix.rs
index 3ea61e91..86595f16 100644
--- a/benches/runtime_matrix.rs
+++ b/benches/runtime_matrix.rs
@@ -1,11 +1,5 @@
 use criterion::{criterion_group, criterion_main, Criterion};
 
-#[cfg(all(
-    feature = "dsl-jit",
-    feature = "dsl-aot",
-    feature = "dsl-aot-load",
-    feature = "dsl-wasm"
-))]
 mod self_contained {
     use std::error::Error;
     use std::io;
@@ -315,12 +309,6 @@ out(plasma) = central / (vol * wt) ~ continuous()
     }
 }
 
-#[cfg(all(
-    feature = "dsl-jit",
-    feature = "dsl-aot",
-    feature = "dsl-aot-load",
-    feature = "dsl-wasm"
-))]
 fn runtime_matrix_benchmark(c: &mut Criterion) {
     use std::hint::black_box;
     use std::time::Duration;
@@ -397,13 +385,5 @@ fn runtime_matrix_benchmark(c: &mut Criterion) {
     runtime_group.finish();
 }
 
-#[cfg(not(all(
-    feature = "dsl-jit",
-    feature = "dsl-aot",
-    feature = "dsl-aot-load",
-    feature = "dsl-wasm"
-)))]
-fn runtime_matrix_benchmark(_: &mut Criterion) {}
-
 criterion_group!(benches, runtime_matrix_benchmark);
 criterion_main!(benches);

From 7ef1ef4fb2067d6af58b322f01684bfce7889d02 Mon Sep 17 00:00:00 2001
From: Markus <markushh@uio.no>
Date: Wed, 13 May 2026 21:05:42 +0200
Subject: [PATCH 5/9] Add bolus

---
 benches/likelihood.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/benches/likelihood.rs b/benches/likelihood.rs
index 93fc9e70..c8c90d41 100644
--- a/benches/likelihood.rs
+++ b/benches/likelihood.rs
@@ -8,9 +8,9 @@ use std::time::Duration;
 
 fn benchmark_equation() -> ODE {
     equation::ODE::new(
-        |x, p, _t, dx, _b, _rateiv, _cov| {
+        |x, p, _t, dx, b, _rateiv, _cov| {
             fetch_params!(p, ka, ke, _tlag, _v);
-            dx[0] = -ka * x[0];
+            dx[0] = -ka * x[0] + b[0];
             dx[1] = ka * x[0] - ke * x[1];
         },
         |p, _t, _cov| {

From ea895e257f362b793cc69595d7ec37001b407c72 Mon Sep 17 00:00:00 2001
From: Markus <markushh@uio.no>
Date: Thu, 14 May 2026 09:24:44 +0200
Subject: [PATCH 6/9] WIP

---
 Cargo.toml                |  12 +-
 benches/common/mod.rs     | 685 ++++++++++++++++++++++++++++++++++++++
 benches/dsl_matrix.rs     | 355 ++++++++++++++++++++
 benches/likelihood.rs     | 150 ---------
 benches/native_matrix.rs  | 590 ++++++++++++++++++++++++++++++++
 benches/nca.rs            |  46 ---
 benches/predictions.rs    | 217 ------------
 benches/runtime_matrix.rs | 389 ----------------------
 8 files changed, 1632 insertions(+), 812 deletions(-)
 create mode 100644 benches/common/mod.rs
 create mode 100644 benches/dsl_matrix.rs
 delete mode 100644 benches/likelihood.rs
 create mode 100644 benches/native_matrix.rs
 delete mode 100644 benches/nca.rs
 delete mode 100644 benches/predictions.rs
 delete mode 100644 benches/runtime_matrix.rs

diff --git a/Cargo.toml b/Cargo.toml
index 9bd72241..faa35d37 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -68,18 +68,10 @@ webbrowser = "1.2.1"
 bench = false
 
 [[bench]]
-name = "predictions"
+name = "native_matrix"
 harness = false
 
 [[bench]]
-name = "nca"
-harness = false
-
-[[bench]]
-name = "runtime_matrix"
+name = "dsl_matrix"
 harness = false
 required-features = ["dsl-jit", "dsl-aot", "dsl-aot-load", "dsl-wasm"]
-
-[[bench]]
-name = "likelihood"
-harness = false
diff --git a/benches/common/mod.rs b/benches/common/mod.rs
new file mode 100644
index 00000000..0aa74fb8
--- /dev/null
+++ b/benches/common/mod.rs
@@ -0,0 +1,685 @@
+//! Shared bench fixtures for the native + DSL benchmark suites.
+//!
+//! This module centralizes the model definitions, subjects, parameter vectors,
+//! and error-model fixtures that every backend variant in `benches/native_matrix.rs`
+//! and `benches/dsl_matrix.rs` reuses. The goal is to keep all backends
+//! (handwritten, macro, dsl-jit, dsl-aot, dsl-wasm) measuring **the same**
+//! model, subject, and parameters so the only thing that differs between
+//! cells is the engine itself.
+//!
+//! Two workloads are exposed:
+//! - [`Workload::Short`]: 1-compartment, single 100 mg PO bolus at t=0,
+//!   9 observations spread across 12 hours.
+//! - [`Workload::Repeat`]: 2-compartment, 100 mg IV bolus q12h × 10
+//!   (doses at t=0,12,…,108), 14 observations spread across 120 hours.
+//!
+//! For each workload three solver kinds (ODE, Analytical, SDE) and three
+//! authoring/backend flavors (handwritten, macro, DSL-source) are provided.
+//!
+//! Some helpers (`SDE_PARTICLES`, `matrix_data`, `support_points`) are
+//! deliberately public — both bench binaries consume them, but cargo compiles
+//! each bench separately so unused helpers would trigger dead-code lints in
+//! one binary or the other. We silence those with `#![allow(dead_code)]`.
+
+#![allow(dead_code)]
+
+use ndarray::Array2;
+use pharmsol::prelude::*;
+use pharmsol::simulator::equation::analytical::{
+    one_compartment_with_absorption, two_compartments,
+};
+use pharmsol::{
+    equation::{self, Route},
+    Analytical, ResidualErrorModel, ResidualErrorModels, ODE, SDE,
+};
+
+/// Build the `ModelMetadata` for `(workload, kind)`. Handwritten factories
+/// attach this so they resolve string route labels (`"po"`/`"iv"`) and output
+/// labels (`"plasma"`) the same way the `ode!` / `analytical!` / `sde!`
+/// macros and the DSL do.
+fn model_metadata(workload: Workload, kind: SolverKind) -> equation::ModelMetadata {
+    let name = match (workload, kind) {
+        (Workload::Short, SolverKind::Ode) => "bench_one_cpt_po_ode",
+        (Workload::Short, SolverKind::Analytical) => "bench_one_cpt_po_analytical",
+        (Workload::Short, SolverKind::Sde) => "bench_one_cpt_po_sde",
+        (Workload::Repeat, SolverKind::Ode) => "bench_two_cpt_iv_ode",
+        (Workload::Repeat, SolverKind::Analytical) => "bench_two_cpt_iv_analytical",
+        (Workload::Repeat, SolverKind::Sde) => "bench_two_cpt_iv_sde",
+    };
+    match workload {
+        Workload::Short => {
+            let params: &[&str] = match kind {
+                SolverKind::Sde => &["ka", "ke", "v", "sigma_ke"],
+                _ => &["ka", "ke", "v"],
+            };
+            equation::metadata::new(name)
+                .parameters(params.iter().copied())
+                .states(["depot", "central"])
+                .outputs(["plasma"])
+                .route(
+                    Route::bolus("po")
+                        .to_state("depot")
+                        .inject_input_to_destination(),
+                )
+        }
+        Workload::Repeat => {
+            let params: &[&str] = match kind {
+                SolverKind::Sde => &["ke", "kcp", "kpc", "v", "sigma_ke"],
+                _ => &["ke", "kcp", "kpc", "v"],
+            };
+            equation::metadata::new(name)
+                .parameters(params.iter().copied())
+                .states(["central", "peripheral"])
+                .outputs(["plasma"])
+                .route(
+                    Route::bolus("iv")
+                        .to_state("central")
+                        .inject_input_to_destination(),
+                )
+        }
+    }
+}
+
+/// SDE particle count used across every SDE bench cell. Kept low so SDE
+/// wall-clock stays comparable to ODE/Analytical at the same workload.
+pub const SDE_PARTICLES: usize = 16;
+
+/// Two scenarios that every backend variant must implement identically.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Workload {
+    /// 1-compartment, single 100 mg PO bolus, 12 h horizon.
+    Short,
+    /// 2-compartment, 100 mg IV bolus q12h × 10, 120 h horizon.
+    Repeat,
+}
+
+impl Workload {
+    pub fn label(self) -> &'static str {
+        match self {
+            Self::Short => "1cpt-12h-po",
+            Self::Repeat => "2cpt-120h-q12h",
+        }
+    }
+
+    pub fn all() -> [Workload; 2] {
+        [Workload::Short, Workload::Repeat]
+    }
+}
+
+/// Solver kinds covered by the matrix.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum SolverKind {
+    Ode,
+    Analytical,
+    Sde,
+}
+
+impl SolverKind {
+    pub fn label(self) -> &'static str {
+        match self {
+            Self::Ode => "ode",
+            Self::Analytical => "analytical",
+            Self::Sde => "sde",
+        }
+    }
+
+    pub fn all() -> [SolverKind; 3] {
+        [SolverKind::Ode, SolverKind::Analytical, SolverKind::Sde]
+    }
+}
+
+// ───────────────────────────── Subjects ──────────────────────────────
+
+/// Time grid for the short workload (1-cpt, 12 h). 9 sampling points.
+const SHORT_TIMES: &[f64] = &[0.25, 0.5, 1.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0];
+
+/// Synthetic concentration values for the short workload. These do not need to
+/// match any specific model exactly — they only feed into the likelihood
+/// calculation so that we exercise the same code path across backends.
+const SHORT_OBS: &[f64] = &[0.50, 0.90, 1.60, 2.40, 2.10, 1.50, 1.05, 0.72, 0.48];
+
+/// Time grid for the repeat workload (2-cpt, 120 h). 14 sampling points.
+const REPEAT_TIMES: &[f64] = &[
+    0.5, 2.0, 6.0, 10.0, 14.0, 24.0, 36.0, 48.0, 60.0, 72.0, 84.0, 96.0, 108.0, 120.0,
+];
+
+const REPEAT_OBS: &[f64] = &[
+    1.80, 1.45, 1.10, 0.90, 1.30, 1.60, 1.55, 1.50, 1.48, 1.45, 1.43, 1.42, 1.41, 0.95,
+];
+
+/// Build a subject for the given workload, using `missing_observation` so it
+/// can drive `estimate_predictions` benchmarks without paying for observation
+/// allocation.
+pub fn subject_for_predictions(workload: Workload) -> Subject {
+    let id = format!("{}-pred", workload.label());
+    let mut builder = Subject::builder(id);
+    match workload {
+        Workload::Short => {
+            builder = builder.bolus(0.0, 100.0, "po");
+            for &t in SHORT_TIMES {
+                builder = builder.missing_observation(t, "plasma");
+            }
+        }
+        Workload::Repeat => {
+            for dose in 0..10 {
+                let t = dose as f64 * 12.0;
+                builder = builder.bolus(t, 100.0, "iv");
+            }
+            for &t in REPEAT_TIMES {
+                builder = builder.missing_observation(t, "plasma");
+            }
+        }
+    }
+    builder.build()
+}
+
+/// Build a subject for the given workload with real observations, suitable
+/// for `estimate_log_likelihood` and `log_likelihood_matrix` benchmarks.
+pub fn subject_for_likelihood(workload: Workload) -> Subject {
+    let id = format!("{}-lik", workload.label());
+    let mut builder = Subject::builder(id);
+    match workload {
+        Workload::Short => {
+            builder = builder.bolus(0.0, 100.0, "po");
+            for (&t, &y) in SHORT_TIMES.iter().zip(SHORT_OBS.iter()) {
+                builder = builder.observation(t, y, "plasma");
+            }
+        }
+        Workload::Repeat => {
+            for dose in 0..10 {
+                let t = dose as f64 * 12.0;
+                builder = builder.bolus(t, 100.0, "iv");
+            }
+            for (&t, &y) in REPEAT_TIMES.iter().zip(REPEAT_OBS.iter()) {
+                builder = builder.observation(t, y, "plasma");
+            }
+        }
+    }
+    builder.build()
+}
+
+/// Population dataset for `log_likelihood_matrix`. `n` subjects share the
+/// same template with small per-subject perturbations on observation values
+/// (mirrors the pattern used in the original `benches/likelihood.rs`).
+pub fn matrix_data(workload: Workload, n: usize) -> Data {
+    let subjects = (0..n)
+        .map(|i| {
+            let offset = i as f64 * 0.01;
+            let id = format!("{}-{i:03}", workload.label());
+            let mut builder = Subject::builder(id);
+            match workload {
+                Workload::Short => {
+                    builder = builder.bolus(0.0, 100.0, "po");
+                    for (&t, &y) in SHORT_TIMES.iter().zip(SHORT_OBS.iter()) {
+                        builder = builder.observation(t, y + offset, "plasma");
+                    }
+                }
+                Workload::Repeat => {
+                    for dose in 0..10 {
+                        let t = dose as f64 * 12.0;
+                        builder = builder.bolus(t, 100.0, "iv");
+                    }
+                    for (&t, &y) in REPEAT_TIMES.iter().zip(REPEAT_OBS.iter()) {
+                        builder = builder.observation(t, y + offset, "plasma");
+                    }
+                }
+            }
+            builder.build()
+        })
+        .collect();
+    Data::new(subjects)
+}
+
+// ───────────────────────────── Parameters ────────────────────────────
+
+/// Reference parameter vector for the given (workload, solver) cell.
+///
+/// **Short / ODE**: `[ka, ke, v]` = `[1.0, 0.2, 50.0]`
+/// **Short / Analytical**: `[ka, ke, v]` = `[1.0, 0.2, 50.0]`
+/// **Short / SDE**: `[ka, ke, v, sigma_ke]` = `[1.0, 0.2, 50.0, 0.05]`
+/// **Repeat / ODE**: `[ke, kcp, kpc, v]` = `[0.10, 0.05, 0.04, 50.0]`
+/// **Repeat / Analytical**: `[ke, kcp, kpc, v]` = `[0.10, 0.05, 0.04, 50.0]`
+/// **Repeat / SDE**: `[ke, kcp, kpc, v, sigma_ke]` = `[0.10, 0.05, 0.04, 50.0, 0.01]`
+pub fn params(workload: Workload, kind: SolverKind) -> Vec<f64> {
+    match (workload, kind) {
+        (Workload::Short, SolverKind::Ode | SolverKind::Analytical) => vec![1.0, 0.2, 50.0],
+        (Workload::Short, SolverKind::Sde) => vec![1.0, 0.2, 50.0, 0.05],
+        (Workload::Repeat, SolverKind::Ode | SolverKind::Analytical) => {
+            vec![0.10, 0.05, 0.04, 50.0]
+        }
+        (Workload::Repeat, SolverKind::Sde) => vec![0.10, 0.05, 0.04, 50.0, 0.01],
+    }
+}
+
+/// Support-point grid for `log_likelihood_matrix`. Shape: `(n, nparams)`.
+/// Rows are small perturbations of [`params`] so every grid cell stays inside
+/// a numerically well-behaved region.
+pub fn support_points(workload: Workload, kind: SolverKind, n: usize) -> Array2<f64> {
+    let base = params(workload, kind);
+    let nparams = base.len();
+    Array2::from_shape_fn((n, nparams), |(row, col)| {
+        let p = base[col];
+        let perturbation = (row as f64) * 0.001 * p.abs().max(1e-3);
+        p + perturbation
+    })
+}
+
+// ───────────────────────────── Error models ──────────────────────────
+
+/// Assay error model for `estimate_log_likelihood` / `log_likelihood_matrix`.
+pub fn assay_error_models() -> AssayErrorModels {
+    AssayErrorModels::new()
+        .add(
+            "plasma",
+            AssayErrorModel::additive(ErrorPoly::new(0.1, 0.1, 0.0, 0.0), 0.0),
+        )
+        .expect("plasma assay error model")
+}
+
+/// Residual error model (kept for symmetry with the existing benches even
+/// though the new suite does not currently exercise `log_likelihood_batch`).
+pub fn residual_error_models() -> ResidualErrorModels {
+    // `ResidualErrorModels` indexes outputs by their dense `usize` index
+    // (not by label), so this assumes a single output equation at index 0.
+    ResidualErrorModels::new().add(0, ResidualErrorModel::constant(0.2))
+}
+
+// ───────────────────────────── Handwritten factories ─────────────────
+
+pub fn handwritten_ode(workload: Workload) -> ODE {
+    match workload {
+        Workload::Short => {
+            // 1-cpt, oral absorption. Params = [ka, ke, v].
+            ODE::new(
+                |x, p, _t, dx, b, _rateiv, _cov| {
+                    let ka = p[0];
+                    let ke = p[1];
+                    dx[0] = -ka * x[0] + b[0];
+                    dx[1] = ka * x[0] - ke * x[1];
+                },
+                |_p, _t, _cov| lag! {},
+                |_p, _t, _cov| fa! {},
+                |_p, _t, _cov, _x| {},
+                |x, p, _t, _cov, y| {
+                    let v = p[2];
+                    y[0] = x[1] / v;
+                },
+            )
+            .with_nstates(2)
+            .with_ndrugs(1)
+            .with_nout(1)
+            .with_metadata(model_metadata(workload, SolverKind::Ode))
+            .expect("short ODE metadata validates")
+        }
+        Workload::Repeat => {
+            // 2-cpt IV bolus. Params = [ke, kcp, kpc, v].
+            ODE::new(
+                |x, p, _t, dx, b, _rateiv, _cov| {
+                    let ke = p[0];
+                    let kcp = p[1];
+                    let kpc = p[2];
+                    dx[0] = -ke * x[0] - kcp * x[0] + kpc * x[1] + b[0];
+                    dx[1] = kcp * x[0] - kpc * x[1];
+                },
+                |_p, _t, _cov| lag! {},
+                |_p, _t, _cov| fa! {},
+                |_p, _t, _cov, _x| {},
+                |x, p, _t, _cov, y| {
+                    let v = p[3];
+                    y[0] = x[0] / v;
+                },
+            )
+            .with_nstates(2)
+            .with_ndrugs(1)
+            .with_nout(1)
+            .with_metadata(model_metadata(workload, SolverKind::Ode))
+            .expect("repeat ODE metadata validates")
+        }
+    }
+}
+
+pub fn handwritten_analytical(workload: Workload) -> Analytical {
+    match workload {
+        Workload::Short => Analytical::new(
+            one_compartment_with_absorption,
+            |_x, _t, _cov| {},
+            |_p, _t, _cov| lag! {},
+            |_p, _t, _cov| fa! {},
+            |_p, _t, _cov, _x| {},
+            |x, p, _t, _cov, y| {
+                let v = p[2];
+                y[0] = x[1] / v;
+            },
+        )
+        .with_nstates(2)
+        .with_ndrugs(1)
+        .with_nout(1)
+        .with_metadata(model_metadata(workload, SolverKind::Analytical))
+        .expect("short Analytical metadata validates"),
+        Workload::Repeat => Analytical::new(
+            two_compartments,
+            |_x, _t, _cov| {},
+            |_p, _t, _cov| lag! {},
+            |_p, _t, _cov| fa! {},
+            |_p, _t, _cov, _x| {},
+            |x, p, _t, _cov, y| {
+                let v = p[3];
+                y[0] = x[0] / v;
+            },
+        )
+        .with_nstates(2)
+        .with_ndrugs(1)
+        .with_nout(1)
+        .with_metadata(model_metadata(workload, SolverKind::Analytical))
+        .expect("repeat Analytical metadata validates"),
+    }
+}
+
+pub fn handwritten_sde(workload: Workload) -> SDE {
+    match workload {
+        Workload::Short => {
+            // Drift mirrors handwritten_ode short. Diffusion = sigma_ke on central.
+            // Params = [ka, ke, v, sigma_ke].
+            SDE::new(
+                |x, p, _t, dx, b, _cov| {
+                    let ka = p[0];
+                    let ke = p[1];
+                    dx[0] = -ka * x[0] + b[0];
+                    dx[1] = ka * x[0] - ke * x[1];
+                },
+                |p, sigma| {
+                    let sigma_ke = p[3];
+                    sigma[0] = 0.0;
+                    sigma[1] = sigma_ke;
+                },
+                |_p, _t, _cov| lag! {},
+                |_p, _t, _cov| fa! {},
+                |_p, _t, _cov, _x| {},
+                |x, p, _t, _cov, y| {
+                    let v = p[2];
+                    y[0] = x[1] / v;
+                },
+                SDE_PARTICLES,
+            )
+            .with_nstates(2)
+            .with_ndrugs(1)
+            .with_nout(1)
+            .with_metadata(model_metadata(workload, SolverKind::Sde))
+            .expect("short SDE metadata validates")
+        }
+        Workload::Repeat => {
+            // 2-cpt IV bolus SDE. Params = [ke, kcp, kpc, v, sigma_ke].
+            SDE::new(
+                |x, p, _t, dx, b, _cov| {
+                    let ke = p[0];
+                    let kcp = p[1];
+                    let kpc = p[2];
+                    dx[0] = -ke * x[0] - kcp * x[0] + kpc * x[1] + b[0];
+                    dx[1] = kcp * x[0] - kpc * x[1];
+                },
+                |p, sigma| {
+                    let sigma_ke = p[4];
+                    sigma[0] = sigma_ke;
+                    sigma[1] = 0.0;
+                },
+                |_p, _t, _cov| lag! {},
+                |_p, _t, _cov| fa! {},
+                |_p, _t, _cov, _x| {},
+                |x, p, _t, _cov, y| {
+                    let v = p[3];
+                    y[0] = x[0] / v;
+                },
+                SDE_PARTICLES,
+            )
+            .with_nstates(2)
+            .with_ndrugs(1)
+            .with_nout(1)
+            .with_metadata(model_metadata(workload, SolverKind::Sde))
+            .expect("repeat SDE metadata validates")
+        }
+    }
+}
+
+// ───────────────────────────── Macro factories ───────────────────────
+
+pub fn macro_ode(workload: Workload) -> ODE {
+    match workload {
+        Workload::Short => ode! {
+            name: "bench_one_cpt_po_ode",
+            params: [ka, ke, v],
+            states: [depot, central],
+            outputs: [plasma],
+            routes: [
+                bolus(po) -> depot,
+            ],
+            diffeq: |x, _p, _t, dx, _cov| {
+                dx[depot] = -ka * x[depot];
+                dx[central] = ka * x[depot] - ke * x[central];
+            },
+            out: |x, _p, _t, _cov, y| {
+                y[plasma] = x[central] / v;
+            },
+        },
+        Workload::Repeat => ode! {
+            name: "bench_two_cpt_iv_ode",
+            params: [ke, kcp, kpc, v],
+            states: [central, peripheral],
+            outputs: [plasma],
+            routes: [
+                bolus(iv) -> central,
+            ],
+            diffeq: |x, _p, _t, dx, _cov| {
+                dx[central] = -ke * x[central] - kcp * x[central] + kpc * x[peripheral];
+                dx[peripheral] = kcp * x[central] - kpc * x[peripheral];
+            },
+            out: |x, _p, _t, _cov, y| {
+                y[plasma] = x[central] / v;
+            },
+        },
+    }
+}
+
+pub fn macro_analytical(workload: Workload) -> Analytical {
+    match workload {
+        Workload::Short => analytical! {
+            name: "bench_one_cpt_po_analytical",
+            params: [ka, ke, v],
+            states: [depot, central],
+            outputs: [plasma],
+            routes: [
+                bolus(po) -> depot,
+            ],
+            structure: one_compartment_with_absorption,
+            out: |x, _p, _t, _cov, y| {
+                y[plasma] = x[central] / v;
+            },
+        },
+        Workload::Repeat => analytical! {
+            name: "bench_two_cpt_iv_analytical",
+            params: [ke, kcp, kpc, v],
+            states: [central, peripheral],
+            outputs: [plasma],
+            routes: [
+                bolus(iv) -> central,
+            ],
+            structure: two_compartments,
+            out: |x, _p, _t, _cov, y| {
+                y[plasma] = x[central] / v;
+            },
+        },
+    }
+}
+
+pub fn macro_sde(workload: Workload) -> SDE {
+    match workload {
+        Workload::Short => sde! {
+            name: "bench_one_cpt_po_sde",
+            params: [ka, ke, v, sigma_ke],
+            states: [depot, central],
+            outputs: [plasma],
+            particles: 16,
+            routes: [
+                bolus(po) -> depot,
+            ],
+            drift: |x, _p, _t, dx, _cov| {
+                dx[depot] = -ka * x[depot];
+                dx[central] = ka * x[depot] - ke * x[central];
+            },
+            diffusion: |_p, sigma| {
+                sigma[central] = sigma_ke;
+            },
+            out: |x, _p, _t, _cov, y| {
+                y[plasma] = x[central] / v;
+            },
+        },
+        Workload::Repeat => sde! {
+            name: "bench_two_cpt_iv_sde",
+            params: [ke, kcp, kpc, v, sigma_ke],
+            states: [central, peripheral],
+            outputs: [plasma],
+            particles: 16,
+            routes: [
+                bolus(iv) -> central,
+            ],
+            drift: |x, _p, _t, dx, _cov| {
+                dx[central] = -ke * x[central] - kcp * x[central] + kpc * x[peripheral];
+                dx[peripheral] = kcp * x[central] - kpc * x[peripheral];
+            },
+            diffusion: |_p, sigma| {
+                sigma[central] = sigma_ke;
+            },
+            out: |x, _p, _t, _cov, y| {
+                y[plasma] = x[central] / v;
+            },
+        },
+    }
+}
+
+// ───────────────────────────── DSL source strings ────────────────────
+
+/// DSL source for `(workload, kind)`. Consumed by `benches/dsl_matrix.rs`
+/// via `compile_module_source_to_runtime` (JIT/AOT) and
+/// `compile_module_source_to_runtime_wasm` (WASM).
+pub fn dsl_source(workload: Workload, kind: SolverKind) -> &'static str {
+    match (workload, kind) {
+        (Workload::Short, SolverKind::Ode) => SHORT_ODE_DSL,
+        (Workload::Short, SolverKind::Analytical) => SHORT_ANALYTICAL_DSL,
+        (Workload::Short, SolverKind::Sde) => SHORT_SDE_DSL,
+        (Workload::Repeat, SolverKind::Ode) => REPEAT_ODE_DSL,
+        (Workload::Repeat, SolverKind::Analytical) => REPEAT_ANALYTICAL_DSL,
+        (Workload::Repeat, SolverKind::Sde) => REPEAT_SDE_DSL,
+    }
+}
+
+/// DSL `name = ...` field for `(workload, kind)`. Used by `Some(model_name)`
+/// arguments on the DSL compile entrypoints.
+pub fn dsl_model_name(workload: Workload, kind: SolverKind) -> &'static str {
+    match (workload, kind) {
+        (Workload::Short, SolverKind::Ode) => "bench_one_cpt_po_ode",
+        (Workload::Short, SolverKind::Analytical) => "bench_one_cpt_po_analytical",
+        (Workload::Short, SolverKind::Sde) => "bench_one_cpt_po_sde",
+        (Workload::Repeat, SolverKind::Ode) => "bench_two_cpt_iv_ode",
+        (Workload::Repeat, SolverKind::Analytical) => "bench_two_cpt_iv_analytical",
+        (Workload::Repeat, SolverKind::Sde) => "bench_two_cpt_iv_sde",
+    }
+}
+
+const SHORT_ODE_DSL: &str = r#"
+name = bench_one_cpt_po_ode
+kind = ode
+
+params = ka, ke, v
+states = depot, central
+outputs = plasma
+
+bolus(po) -> depot
+
+dx(depot) = -ka * depot
+dx(central) = ka * depot - ke * central
+
+out(plasma) = central / v ~ continuous()
+"#;
+
+const SHORT_ANALYTICAL_DSL: &str = r#"
+name = bench_one_cpt_po_analytical
+kind = analytical
+
+params = ka, ke, v
+states = depot, central
+outputs = plasma
+
+bolus(po) -> depot
+
+structure = one_compartment_with_absorption
+
+out(plasma) = central / v ~ continuous()
+"#;
+
+const SHORT_SDE_DSL: &str = r#"
+name = bench_one_cpt_po_sde
+kind = sde
+
+params = ka, ke, v, sigma_ke
+states = depot, central
+particles = 16
+outputs = plasma
+
+bolus(po) -> depot
+
+dx(depot) = -ka * depot
+dx(central) = ka * depot - ke * central
+
+noise(central) = sigma_ke
+
+out(plasma) = central / v ~ continuous()
+"#;
+
+const REPEAT_ODE_DSL: &str = r#"
+name = bench_two_cpt_iv_ode
+kind = ode
+
+params = ke, kcp, kpc, v
+states = central, peripheral
+outputs = plasma
+
+bolus(iv) -> central
+
+dx(central) = -ke * central - kcp * central + kpc * peripheral
+dx(peripheral) = kcp * central - kpc * peripheral
+
+out(plasma) = central / v ~ continuous()
+"#;
+
+const REPEAT_ANALYTICAL_DSL: &str = r#"
+name = bench_two_cpt_iv_analytical
+kind = analytical
+
+params = ke, kcp, kpc, v
+states = central, peripheral
+outputs = plasma
+
+bolus(iv) -> central
+
+structure = two_compartments
+
+out(plasma) = central / v ~ continuous()
+"#;
+
+const REPEAT_SDE_DSL: &str = r#"
+name = bench_two_cpt_iv_sde
+kind = sde
+
+params = ke, kcp, kpc, v, sigma_ke
+states = central, peripheral
+particles = 16
+outputs = plasma
+
+bolus(iv) -> central
+
+dx(central) = -ke * central - kcp * central + kpc * peripheral
+dx(peripheral) = kcp * central - kpc * peripheral
+
+noise(central) = sigma_ke
+
+out(plasma) = central / v ~ continuous()
+"#;
diff --git a/benches/dsl_matrix.rs b/benches/dsl_matrix.rs
new file mode 100644
index 00000000..753ecb84
--- /dev/null
+++ b/benches/dsl_matrix.rs
@@ -0,0 +1,355 @@
+//! DSL backend benchmark matrix (feature-gated).
+//!
+//! Covers the three DSL backends — JIT, native AoT, WASM — across the same
+//! two workloads and three solver kinds as `native_matrix.rs`.
+//!
+//! Coverage notes:
+//! - Predictions: every (workload, kind, backend) cell.
+//! - Log-likelihood + likelihood-matrix: **ODE only** across all three
+//!   backends. The DSL ODE runtime model is `RuntimeOdeModel = NativeOdeModel`
+//!   (see `src/dsl/runtime.rs:117`), and `NativeOdeModel` implements both the
+//!   `Equation` and `Cache` traits. `NativeAnalyticalModel` and
+//!   `NativeSdeModel` do not, so analytical/SDE backends remain
+//!   predictions-only on this bench surface.
+//! - Hot/cold cache: ODE only, via `.disable_cache()` on the extracted
+//!   `NativeOdeModel`. Analytical/SDE skip the cache axis.
+//! - Compile-time: one bench per (workload, kind, backend) that measures the
+//!   `source -> CompiledRuntimeModel` pipeline end-to-end.
+//!
+//! Bench id format:
+//! - `dsl/compile`               → `{workload}/{kind}/{backend}`
+//! - `dsl/predictions`           → `{workload}/{kind}/{backend}/{cache}` (ODE)
+//!                                 `{workload}/{kind}/{backend}` (Ana/SDE)
+//! - `dsl/log-likelihood`        → `{workload}/ode/{backend}/{cache}`
+//! - `dsl/likelihood-matrix`     → `{workload}/ode/{backend}`
+
+use std::hint::black_box;
+use std::path::PathBuf;
+use std::time::Duration;
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode};
+use tempfile::TempDir;
+
+use pharmsol::dsl::{
+    compile_module_source_to_runtime, CompiledRuntimeModel, NativeAotCompileOptions, NativeOdeModel,
+    RuntimeCompilationTarget,
+};
+use pharmsol::prelude::*;
+use pharmsol::Cache;
+
+mod common;
+use common::{
+    assay_error_models, dsl_model_name, dsl_source, matrix_data, params, subject_for_likelihood,
+    subject_for_predictions, support_points, SolverKind, Workload,
+};
+
+const MATRIX_N_SUBJECTS: usize = 32;
+const MATRIX_N_SUPPORT: usize = 64;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum Backend {
+    Jit,
+    Aot,
+    Wasm,
+}
+
+impl Backend {
+    fn label(self) -> &'static str {
+        match self {
+            Self::Jit => "dsl-jit",
+            Self::Aot => "dsl-aot",
+            Self::Wasm => "dsl-wasm",
+        }
+    }
+
+    fn all() -> [Backend; 3] {
+        [Backend::Jit, Backend::Aot, Backend::Wasm]
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+enum CacheState {
+    Hot,
+    Cold,
+}
+
+impl CacheState {
+    fn label(self) -> &'static str {
+        match self {
+            Self::Hot => "hot",
+            Self::Cold => "cold",
+        }
+    }
+
+    fn all() -> [CacheState; 2] {
+        [CacheState::Hot, CacheState::Cold]
+    }
+}
+
+/// Per-bench AoT workspace. One `TempDir` for the whole bench binary; each
+/// compile call uses a fresh subdirectory so artifacts don't clash.
+struct AotWorkspace {
+    root: TempDir,
+    counter: std::cell::Cell<usize>,
+}
+
+impl AotWorkspace {
+    fn new() -> Self {
+        Self {
+            root: tempfile::Builder::new()
+                .prefix("pharmsol-bench-dsl-aot-")
+                .tempdir()
+                .expect("create AoT workspace tempdir"),
+            counter: std::cell::Cell::new(0),
+        }
+    }
+
+    fn fresh(&self, stem: &str) -> PathBuf {
+        let n = self.counter.get();
+        self.counter.set(n + 1);
+        self.root.path().join(format!("{stem}-{n:04}"))
+    }
+}
+
+/// Compile a (workload, kind) DSL model with the chosen backend, returning the
+/// full `CompiledRuntimeModel`.
+fn compile_runtime(
+    workload: Workload,
+    kind: SolverKind,
+    backend: Backend,
+    aot: &AotWorkspace,
+) -> CompiledRuntimeModel {
+    let source = dsl_source(workload, kind);
+    let name = dsl_model_name(workload, kind);
+    let target = match backend {
+        Backend::Jit => RuntimeCompilationTarget::Jit,
+        Backend::Aot => {
+            let dir = aot.fresh(&format!("{}-{}", workload.label(), kind.label()));
+            RuntimeCompilationTarget::NativeAot(NativeAotCompileOptions::new(dir))
+        }
+        Backend::Wasm => RuntimeCompilationTarget::Wasm,
+    };
+    compile_module_source_to_runtime(source, Some(name), target, |_, _| {})
+        .unwrap_or_else(|e| panic!("compile {} via {} failed: {e:?}", name, backend.label()))
+}
+
+/// Compile and extract the inner ODE model so the bench can drive `Equation`
+/// directly (and toggle the cache).
+fn compile_ode(workload: Workload, backend: Backend, aot: &AotWorkspace) -> NativeOdeModel {
+    match compile_runtime(workload, SolverKind::Ode, backend, aot) {
+        CompiledRuntimeModel::Ode(model) => model,
+        other => panic!(
+            "expected Ode model for {}, got {:?}",
+            workload.label(),
+            other.backend()
+        ),
+    }
+}
+
+// ───────────────────────────── compile group ─────────────────────────
+
+fn compile_group(c: &mut Criterion) {
+    let mut group = c.benchmark_group("dsl/compile");
+    group.sampling_mode(SamplingMode::Flat);
+    // Each compile is expensive (especially AoT — full rustc invocation).
+    group.sample_size(10);
+    group.measurement_time(Duration::from_secs(30));
+
+    let aot = AotWorkspace::new();
+
+    for workload in Workload::all() {
+        for kind in SolverKind::all() {
+            for backend in Backend::all() {
+                let bench_id = BenchmarkId::from_parameter(format!(
+                    "{}/{}/{}",
+                    workload.label(),
+                    kind.label(),
+                    backend.label()
+                ));
+                group.bench_function(bench_id, |b| {
+                    b.iter(|| {
+                        black_box(compile_runtime(
+                            black_box(workload),
+                            black_box(kind),
+                            black_box(backend),
+                            &aot,
+                        ));
+                    });
+                });
+            }
+        }
+    }
+
+    group.finish();
+}
+
+// ───────────────────────────── predictions group ─────────────────────
+
+fn predictions_group(c: &mut Criterion) {
+    let mut group = c.benchmark_group("dsl/predictions");
+    group.sampling_mode(SamplingMode::Flat);
+
+    let aot = AotWorkspace::new();
+
+    for workload in Workload::all() {
+        let subject = subject_for_predictions(workload);
+        for kind in SolverKind::all() {
+            let theta = params(workload, kind);
+            for backend in Backend::all() {
+                match kind {
+                    SolverKind::Ode => {
+                        // ODE supports hot/cold via NativeOdeModel.disable_cache().
+                        for cache in CacheState::all() {
+                            let model = match cache {
+                                CacheState::Hot => compile_ode(workload, backend, &aot),
+                                CacheState::Cold => {
+                                    compile_ode(workload, backend, &aot).disable_cache()
+                                }
+                            };
+                            let bench_id = BenchmarkId::from_parameter(format!(
+                                "{}/{}/{}/{}",
+                                workload.label(),
+                                kind.label(),
+                                backend.label(),
+                                cache.label()
+                            ));
+                            group.bench_function(bench_id, |b| {
+                                b.iter(|| {
+                                    black_box(
+                                        model
+                                            .estimate_predictions(
+                                                black_box(&subject),
+                                                black_box(&theta),
+                                            )
+                                            .unwrap(),
+                                    )
+                                });
+                            });
+                        }
+                    }
+                    SolverKind::Analytical | SolverKind::Sde => {
+                        // Analytical / SDE: no cache axis. Drive predictions
+                        // through the enum-level `estimate_predictions`.
+                        let model = compile_runtime(workload, kind, backend, &aot);
+                        let bench_id = BenchmarkId::from_parameter(format!(
+                            "{}/{}/{}",
+                            workload.label(),
+                            kind.label(),
+                            backend.label()
+                        ));
+                        group.bench_function(bench_id, |b| {
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_predictions(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        });
+                    }
+                }
+            }
+        }
+    }
+
+    group.finish();
+}
+
+// ───────────────────────────── log-likelihood group ──────────────────
+
+fn log_likelihood_group(c: &mut Criterion) {
+    let mut group = c.benchmark_group("dsl/log-likelihood");
+    group.sampling_mode(SamplingMode::Flat);
+
+    let aot = AotWorkspace::new();
+    let error_models = assay_error_models();
+
+    for workload in Workload::all() {
+        let subject = subject_for_likelihood(workload);
+        let theta = params(workload, SolverKind::Ode);
+        for backend in Backend::all() {
+            for cache in CacheState::all() {
+                let model = match cache {
+                    CacheState::Hot => compile_ode(workload, backend, &aot),
+                    CacheState::Cold => compile_ode(workload, backend, &aot).disable_cache(),
+                };
+                let bench_id = BenchmarkId::from_parameter(format!(
+                    "{}/ode/{}/{}",
+                    workload.label(),
+                    backend.label(),
+                    cache.label()
+                ));
+                group.bench_function(bench_id, |b| {
+                    b.iter(|| {
+                        black_box(
+                            model
+                                .estimate_log_likelihood(
+                                    black_box(&subject),
+                                    black_box(&theta),
+                                    black_box(&error_models),
+                                )
+                                .unwrap(),
+                        )
+                    });
+                });
+            }
+        }
+    }
+
+    group.finish();
+}
+
+// ───────────────────────────── likelihood-matrix group ───────────────
+
+fn likelihood_matrix_group(c: &mut Criterion) {
+    use pharmsol::prelude::simulator::log_likelihood_matrix;
+
+    let mut group = c.benchmark_group("dsl/likelihood-matrix");
+    group.sampling_mode(SamplingMode::Flat);
+    group.sample_size(10);
+    group.measurement_time(Duration::from_secs(20));
+
+    let aot = AotWorkspace::new();
+    let error_models = assay_error_models();
+
+    for workload in Workload::all() {
+        let data = matrix_data(workload, MATRIX_N_SUBJECTS);
+        let theta = support_points(workload, SolverKind::Ode, MATRIX_N_SUPPORT);
+        for backend in Backend::all() {
+            let model = compile_ode(workload, backend, &aot);
+            let bench_id = BenchmarkId::from_parameter(format!(
+                "{}/ode/{}",
+                workload.label(),
+                backend.label()
+            ));
+            group.bench_function(bench_id, |b| {
+                b.iter(|| {
+                    black_box(
+                        log_likelihood_matrix(
+                            black_box(&model),
+                            black_box(&data),
+                            black_box(&theta),
+                            black_box(&error_models),
+                            false,
+                        )
+                        .unwrap(),
+                    )
+                });
+            });
+        }
+    }
+
+    group.finish();
+}
+
+criterion_group!(
+    dsl_matrix,
+    compile_group,
+    predictions_group,
+    log_likelihood_group,
+    likelihood_matrix_group
+);
+criterion_main!(dsl_matrix);
diff --git a/benches/likelihood.rs b/benches/likelihood.rs
deleted file mode 100644
index c8c90d41..00000000
--- a/benches/likelihood.rs
+++ /dev/null
@@ -1,150 +0,0 @@
-use criterion::{criterion_group, criterion_main, Criterion, SamplingMode, Throughput};
-use ndarray::Array2;
-use pharmsol::prelude::simulator::{log_likelihood_batch, log_likelihood_matrix};
-use pharmsol::prelude::*;
-use pharmsol::{ResidualErrorModel, ResidualErrorModels, ODE};
-use std::hint::black_box;
-use std::time::Duration;
-
-fn benchmark_equation() -> ODE {
-    equation::ODE::new(
-        |x, p, _t, dx, b, _rateiv, _cov| {
-            fetch_params!(p, ka, ke, _tlag, _v);
-            dx[0] = -ka * x[0] + b[0];
-            dx[1] = ka * x[0] - ke * x[1];
-        },
-        |p, _t, _cov| {
-            fetch_params!(p, _ka, _ke, tlag, _v);
-            lag! {0=>tlag}
-        },
-        |_p, _t, _cov| fa! {},
-        |_p, _t, _cov, _x| {},
-        |x, p, _t, _cov, y| {
-            fetch_params!(p, _ka, _ke, _tlag, v);
-            y[0] = x[1] / v;
-        },
-    )
-    .with_nstates(2)
-    .with_nout(1)
-}
-
-fn benchmark_data(n_subjects: usize) -> Data {
-    let subjects = (0..n_subjects)
-        .map(|index| {
-            let dose = 100.0 + index as f64;
-            let offset = index as f64 * 0.02;
-            Subject::builder(format!("subject_{index}"))
-                .bolus(0.0, dose, 0)
-                .observation(0.5, 4.2 + offset, 0)
-                .observation(1.0, 5.9 + offset, 0)
-                .observation(2.0, 7.4 + offset, 0)
-                .observation(4.0, 6.8 + offset, 0)
-                .observation(8.0, 4.7 + offset, 0)
-                .observation(12.0, 3.3 + offset, 0)
-                .build()
-        })
-        .collect();
-
-    Data::new(subjects)
-}
-
-fn support_points(n_support_points: usize) -> Array2<f64> {
-    Array2::from_shape_fn((n_support_points, 4), |(row, column)| match column {
-        0 => 0.55 + row as f64 * 0.002,
-        1 => 0.08 + row as f64 * 0.0004,
-        2 => 0.05,
-        3 => 18.0 + row as f64 * 0.15,
-        _ => unreachable!(),
-    })
-}
-
-fn batch_parameters(n_subjects: usize) -> Array2<f64> {
-    Array2::from_shape_fn((n_subjects, 4), |(row, column)| match column {
-        0 => 0.60 + row as f64 * 0.001,
-        1 => 0.09 + row as f64 * 0.0002,
-        2 => 0.05,
-        3 => 20.0 + row as f64 * 0.05,
-        _ => unreachable!(),
-    })
-}
-
-fn assay_error_models() -> AssayErrorModels {
-    AssayErrorModels::new()
-        .add(
-            0,
-            AssayErrorModel::additive(ErrorPoly::new(0.0, 0.1, 0.0, 0.0), 0.0),
-        )
-        .unwrap()
-}
-
-fn residual_error_models() -> ResidualErrorModels {
-    ResidualErrorModels::new().add(0, ResidualErrorModel::constant(0.2))
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    let assay_error_models = assay_error_models();
-    let residual_error_models = residual_error_models();
-
-    let matrix_data = benchmark_data(64);
-    let matrix_theta = support_points(128);
-    let matrix_equation = benchmark_equation();
-    let _ = log_likelihood_matrix(
-        &matrix_equation,
-        &matrix_data,
-        &matrix_theta,
-        &assay_error_models,
-        false,
-    )
-    .unwrap();
-
-    let batch_data = benchmark_data(256);
-    let batch_theta = batch_parameters(256);
-    let batch_equation = benchmark_equation();
-    let _ = log_likelihood_batch(
-        &batch_equation,
-        &batch_data,
-        &batch_theta,
-        &residual_error_models,
-    )
-    .unwrap();
-
-    let mut matrix_group = c.benchmark_group("population/likelihood-matrix");
-
-    matrix_group.sampling_mode(SamplingMode::Flat);
-    matrix_group.sample_size(10);
-    matrix_group.measurement_time(Duration::from_secs(30));
-    matrix_group.throughput(Throughput::Elements((64 * 128) as u64));
-    matrix_group.bench_function("ode-hot-64x128", |b| {
-        b.iter(|| {
-            black_box(log_likelihood_matrix(
-                black_box(&matrix_equation),
-                black_box(&matrix_data),
-                black_box(&matrix_theta),
-                black_box(&assay_error_models),
-                false,
-            ))
-            .unwrap()
-        })
-    });
-    matrix_group.finish();
-
-    let mut batch_group = c.benchmark_group("population/likelihood-batch");
-
-    batch_group.sampling_mode(SamplingMode::Flat);
-    batch_group.throughput(Throughput::Elements(256));
-    batch_group.bench_function("ode-hot-256", |b| {
-        b.iter(|| {
-            black_box(log_likelihood_batch(
-                black_box(&batch_equation),
-                black_box(&batch_data),
-                black_box(&batch_theta),
-                black_box(&residual_error_models),
-            ))
-            .unwrap()
-        })
-    });
-    batch_group.finish();
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/benches/native_matrix.rs b/benches/native_matrix.rs
new file mode 100644
index 00000000..b0e063eb
--- /dev/null
+++ b/benches/native_matrix.rs
@@ -0,0 +1,590 @@
+//! Native (Rust-authored) benchmark matrix.
+//!
+//! Covers handwritten and macro-authored models across all three solver kinds
+//! (ODE, Analytical, SDE), both workloads (short 1-cpt PO; long 2-cpt IV q12h),
+//! and the three measured methods: `estimate_predictions`,
+//! `estimate_log_likelihood`, and `log_likelihood_matrix`.
+//!
+//! This bench compiles with **default features only** so it produces a stable
+//! regression signal on CI (which runs plain `cargo bench`). DSL backends
+//! live in `dsl_matrix.rs` behind feature gates.
+//!
+//! Benchmark id format:
+//! - `native/predictions`     → `{workload}/{solver}/{authoring}/{cache}`
+//! - `native/log-likelihood`  → `{workload}/{solver}/{authoring}/{cache}`
+//! - `native/likelihood-matrix` → `{workload}/{solver}/{authoring}` (matrix is
+//!   the unit of work; per-cell cache toggling is not meaningful here).
+//!
+//! `cache` is `hot` or `cold` (cold = `.disable_cache()` on a fresh handle).
+//! `authoring` is `handwritten` or `macro`.
+
+use std::hint::black_box;
+use std::time::Duration;
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode};
+use pharmsol::prelude::*;
+use pharmsol::Cache;
+
+mod common;
+use common::{
+    assay_error_models, handwritten_analytical, handwritten_ode, handwritten_sde, macro_analytical,
+    macro_ode, macro_sde, matrix_data, params, subject_for_likelihood, subject_for_predictions,
+    support_points, SolverKind, Workload,
+};
+
+/// Size of the population dataset (rows) for `log_likelihood_matrix`.
+const MATRIX_N_SUBJECTS: usize = 32;
+/// Size of the parameter grid (columns) for `log_likelihood_matrix`.
+const MATRIX_N_SUPPORT: usize = 64;
+
+#[derive(Debug, Clone, Copy)]
+enum Authoring {
+    Handwritten,
+    Macro,
+}
+
+impl Authoring {
+    fn label(self) -> &'static str {
+        match self {
+            Self::Handwritten => "handwritten",
+            Self::Macro => "macro",
+        }
+    }
+
+    fn all() -> [Authoring; 2] {
+        [Authoring::Handwritten, Authoring::Macro]
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+enum CacheState {
+    Hot,
+    Cold,
+}
+
+impl CacheState {
+    fn label(self) -> &'static str {
+        match self {
+            Self::Hot => "hot",
+            Self::Cold => "cold",
+        }
+    }
+
+    fn all() -> [CacheState; 2] {
+        [CacheState::Hot, CacheState::Cold]
+    }
+}
+
+/// Helper that materializes the correct concrete handwritten/macro factory
+/// then dispatches `estimate_predictions` on it. The `match` over solver kind
+/// stays inside each closure so the chosen branch is a fresh handle every
+/// time we configure a new (cold) instance, and we don't pay any virtual
+/// dispatch cost (each model type has its own concrete `Equation` impl).
+fn id(workload: Workload, kind: SolverKind, authoring: Authoring, cache: CacheState) -> String {
+    format!(
+        "{}/{}/{}/{}",
+        workload.label(),
+        kind.label(),
+        authoring.label(),
+        cache.label()
+    )
+}
+
+fn predictions_group(c: &mut Criterion) {
+    let mut group = c.benchmark_group("native/predictions");
+    group.sampling_mode(SamplingMode::Flat);
+
+    for workload in Workload::all() {
+        let subject = subject_for_predictions(workload);
+        for kind in SolverKind::all() {
+            let theta = params(workload, kind);
+            for authoring in Authoring::all() {
+                for cache in CacheState::all() {
+                    let bench_id =
+                        BenchmarkId::from_parameter(id(workload, kind, authoring, cache));
+                    group.bench_function(bench_id, |b| match (kind, authoring, cache) {
+                        (SolverKind::Ode, Authoring::Handwritten, CacheState::Hot) => {
+                            let model = handwritten_ode(workload);
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_predictions(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Ode, Authoring::Handwritten, CacheState::Cold) => {
+                            let model = handwritten_ode(workload).disable_cache();
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_predictions(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Ode, Authoring::Macro, CacheState::Hot) => {
+                            let model = macro_ode(workload);
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_predictions(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Ode, Authoring::Macro, CacheState::Cold) => {
+                            let model = macro_ode(workload).disable_cache();
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_predictions(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Analytical, Authoring::Handwritten, CacheState::Hot) => {
+                            let model = handwritten_analytical(workload);
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_predictions(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Analytical, Authoring::Handwritten, CacheState::Cold) => {
+                            let model = handwritten_analytical(workload).disable_cache();
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_predictions(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Analytical, Authoring::Macro, CacheState::Hot) => {
+                            let model = macro_analytical(workload);
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_predictions(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Analytical, Authoring::Macro, CacheState::Cold) => {
+                            let model = macro_analytical(workload).disable_cache();
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_predictions(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Sde, Authoring::Handwritten, CacheState::Hot) => {
+                            let model = handwritten_sde(workload);
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_predictions(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Sde, Authoring::Handwritten, CacheState::Cold) => {
+                            let model = handwritten_sde(workload).disable_cache();
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_predictions(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Sde, Authoring::Macro, CacheState::Hot) => {
+                            let model = macro_sde(workload);
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_predictions(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Sde, Authoring::Macro, CacheState::Cold) => {
+                            let model = macro_sde(workload).disable_cache();
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_predictions(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                    });
+                }
+            }
+        }
+    }
+
+    group.finish();
+}
+
+fn log_likelihood_group(c: &mut Criterion) {
+    let mut group = c.benchmark_group("native/log-likelihood");
+    group.sampling_mode(SamplingMode::Flat);
+    let error_models = assay_error_models();
+
+    for workload in Workload::all() {
+        let subject = subject_for_likelihood(workload);
+        for kind in SolverKind::all() {
+            let theta = params(workload, kind);
+            for authoring in Authoring::all() {
+                for cache in CacheState::all() {
+                    let bench_id =
+                        BenchmarkId::from_parameter(id(workload, kind, authoring, cache));
+                    group.bench_function(bench_id, |b| match (kind, authoring, cache) {
+                        (SolverKind::Ode, Authoring::Handwritten, CacheState::Hot) => {
+                            let model = handwritten_ode(workload);
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_log_likelihood(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                            black_box(&error_models),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Ode, Authoring::Handwritten, CacheState::Cold) => {
+                            let model = handwritten_ode(workload).disable_cache();
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_log_likelihood(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                            black_box(&error_models),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Ode, Authoring::Macro, CacheState::Hot) => {
+                            let model = macro_ode(workload);
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_log_likelihood(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                            black_box(&error_models),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Ode, Authoring::Macro, CacheState::Cold) => {
+                            let model = macro_ode(workload).disable_cache();
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_log_likelihood(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                            black_box(&error_models),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Analytical, Authoring::Handwritten, CacheState::Hot) => {
+                            let model = handwritten_analytical(workload);
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_log_likelihood(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                            black_box(&error_models),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Analytical, Authoring::Handwritten, CacheState::Cold) => {
+                            let model = handwritten_analytical(workload).disable_cache();
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_log_likelihood(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                            black_box(&error_models),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Analytical, Authoring::Macro, CacheState::Hot) => {
+                            let model = macro_analytical(workload);
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_log_likelihood(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                            black_box(&error_models),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Analytical, Authoring::Macro, CacheState::Cold) => {
+                            let model = macro_analytical(workload).disable_cache();
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_log_likelihood(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                            black_box(&error_models),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Sde, Authoring::Handwritten, CacheState::Hot) => {
+                            let model = handwritten_sde(workload);
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_log_likelihood(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                            black_box(&error_models),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Sde, Authoring::Handwritten, CacheState::Cold) => {
+                            let model = handwritten_sde(workload).disable_cache();
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_log_likelihood(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                            black_box(&error_models),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Sde, Authoring::Macro, CacheState::Hot) => {
+                            let model = macro_sde(workload);
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_log_likelihood(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                            black_box(&error_models),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                        (SolverKind::Sde, Authoring::Macro, CacheState::Cold) => {
+                            let model = macro_sde(workload).disable_cache();
+                            b.iter(|| {
+                                black_box(
+                                    model
+                                        .estimate_log_likelihood(
+                                            black_box(&subject),
+                                            black_box(&theta),
+                                            black_box(&error_models),
+                                        )
+                                        .unwrap(),
+                                )
+                            });
+                        }
+                    });
+                }
+            }
+        }
+    }
+
+    group.finish();
+}
+
+fn likelihood_matrix_group(c: &mut Criterion) {
+    use pharmsol::prelude::simulator::log_likelihood_matrix;
+
+    let mut group = c.benchmark_group("native/likelihood-matrix");
+    group.sampling_mode(SamplingMode::Flat);
+    // Each matrix iteration is multi-second under any solver — small sample
+    // budgets keep wall-clock manageable while still producing a useful signal.
+    group.sample_size(10);
+    group.measurement_time(Duration::from_secs(20));
+
+    let error_models = assay_error_models();
+
+    for workload in Workload::all() {
+        let data = matrix_data(workload, MATRIX_N_SUBJECTS);
+        for kind in SolverKind::all() {
+            let theta = support_points(workload, kind, MATRIX_N_SUPPORT);
+            for authoring in Authoring::all() {
+                let bench_id = BenchmarkId::from_parameter(format!(
+                    "{}/{}/{}",
+                    workload.label(),
+                    kind.label(),
+                    authoring.label()
+                ));
+                group.bench_function(bench_id, |b| match (kind, authoring) {
+                    (SolverKind::Ode, Authoring::Handwritten) => {
+                        let model = handwritten_ode(workload);
+                        b.iter(|| {
+                            black_box(
+                                log_likelihood_matrix(
+                                    black_box(&model),
+                                    black_box(&data),
+                                    black_box(&theta),
+                                    black_box(&error_models),
+                                    false,
+                                )
+                                .unwrap(),
+                            )
+                        });
+                    }
+                    (SolverKind::Ode, Authoring::Macro) => {
+                        let model = macro_ode(workload);
+                        b.iter(|| {
+                            black_box(
+                                log_likelihood_matrix(
+                                    black_box(&model),
+                                    black_box(&data),
+                                    black_box(&theta),
+                                    black_box(&error_models),
+                                    false,
+                                )
+                                .unwrap(),
+                            )
+                        });
+                    }
+                    (SolverKind::Analytical, Authoring::Handwritten) => {
+                        let model = handwritten_analytical(workload);
+                        b.iter(|| {
+                            black_box(
+                                log_likelihood_matrix(
+                                    black_box(&model),
+                                    black_box(&data),
+                                    black_box(&theta),
+                                    black_box(&error_models),
+                                    false,
+                                )
+                                .unwrap(),
+                            )
+                        });
+                    }
+                    (SolverKind::Analytical, Authoring::Macro) => {
+                        let model = macro_analytical(workload);
+                        b.iter(|| {
+                            black_box(
+                                log_likelihood_matrix(
+                                    black_box(&model),
+                                    black_box(&data),
+                                    black_box(&theta),
+                                    black_box(&error_models),
+                                    false,
+                                )
+                                .unwrap(),
+                            )
+                        });
+                    }
+                    (SolverKind::Sde, Authoring::Handwritten) => {
+                        let model = handwritten_sde(workload);
+                        b.iter(|| {
+                            black_box(
+                                log_likelihood_matrix(
+                                    black_box(&model),
+                                    black_box(&data),
+                                    black_box(&theta),
+                                    black_box(&error_models),
+                                    false,
+                                )
+                                .unwrap(),
+                            )
+                        });
+                    }
+                    (SolverKind::Sde, Authoring::Macro) => {
+                        let model = macro_sde(workload);
+                        b.iter(|| {
+                            black_box(
+                                log_likelihood_matrix(
+                                    black_box(&model),
+                                    black_box(&data),
+                                    black_box(&theta),
+                                    black_box(&error_models),
+                                    false,
+                                )
+                                .unwrap(),
+                            )
+                        });
+                    }
+                });
+            }
+        }
+    }
+
+    group.finish();
+}
+
+criterion_group!(
+    native_matrix,
+    predictions_group,
+    log_likelihood_group,
+    likelihood_matrix_group
+);
+criterion_main!(native_matrix);
diff --git a/benches/nca.rs b/benches/nca.rs
deleted file mode 100644
index ec7a5220..00000000
--- a/benches/nca.rs
+++ /dev/null
@@ -1,46 +0,0 @@
-use criterion::{criterion_group, criterion_main, Criterion, SamplingMode, Throughput};
-use pharmsol::nca::{NCAOptions, NCA};
-use pharmsol::prelude::*;
-use std::hint::black_box;
-
-fn typical_oral_subject(id: impl Into<String>, scale: f64) -> Subject {
-    Subject::builder(id.into())
-        .bolus(0.0, 100.0, 0)
-        .observation(0.0, 0.0, 0)
-        .observation(0.25, 2.5 * scale, 0)
-        .observation(0.5, 5.0 * scale, 0)
-        .observation(1.0, 8.0 * scale, 0)
-        .observation(2.0, 10.0 * scale, 0)
-        .observation(4.0, 7.5 * scale, 0)
-        .observation(6.0, 5.0 * scale, 0)
-        .observation(8.0, 3.5 * scale, 0)
-        .observation(12.0, 1.5 * scale, 0)
-        .observation(16.0, 0.8 * scale, 0)
-        .observation(24.0, 0.2 * scale, 0)
-        .observation(36.0, 0.05 * scale, 0)
-        .build()
-}
-
-fn build_population(n: usize) -> Data {
-    let subjects: Vec<Subject> = (0..n)
-        .map(|i| typical_oral_subject(format!("subj_{i}"), 1.0 + (i as f64 % 7.0) * 0.05))
-        .collect();
-    Data::new(subjects)
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    let data = build_population(128);
-    let opts = NCAOptions::default();
-
-    let mut group = c.benchmark_group("nca");
-
-    group.sampling_mode(SamplingMode::Flat);
-    group.throughput(Throughput::Elements(128));
-    group.bench_function("population-128", |b| {
-        b.iter(|| black_box(black_box(&data).nca_all(black_box(&opts))))
-    });
-    group.finish();
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/benches/predictions.rs b/benches/predictions.rs
deleted file mode 100644
index 54e08468..00000000
--- a/benches/predictions.rs
+++ /dev/null
@@ -1,217 +0,0 @@
-use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
-use pharmsol::prelude::*;
-use pharmsol::{Analytical, Cache, ODE};
-use std::hint::black_box;
-
-/// Oral acetaminophen dose with measurements in plasma
-fn acetaminophen_oral_subject() -> Subject {
-    Subject::builder("bench_acetaminophen_oral")
-        .bolus(0.0, 500.0, "acetaminophen_oral")
-        .missing_observation(0.25, "plasma")
-        .missing_observation(0.5, "plasma")
-        .missing_observation(1.0, "plasma")
-        .missing_observation(2.0, "plasma")
-        .missing_observation(4.0, "plasma")
-        .missing_observation(8.0, "plasma")
-        .missing_observation(12.0, "plasma")
-        .missing_observation(24.0, "plasma")
-        .build()
-}
-
-/// Intravenous acetaminophen infusion with measurements in plasma
-fn acetaminophen_iv_subject() -> Subject {
-    Subject::builder("bench_acetaminophen_iv")
-        .infusion(0.0, 500.0, "acetaminophen_iv", 0.5)
-        .missing_observation(0.5, "plasma")
-        .missing_observation(1.0, "plasma")
-        .missing_observation(2.0, "plasma")
-        .missing_observation(4.0, "plasma")
-        .missing_observation(8.0, "plasma")
-        .missing_observation(12.0, "plasma")
-        .missing_observation(24.0, "plasma")
-        .build()
-}
-
-fn analytical_one_cpt() -> Analytical {
-    analytical! {
-        name: "one_cmt_acetaminophen_oral",
-        params: [ka, ke, v],
-        states: [gut, central],
-        outputs: [plasma],
-        routes: [
-            bolus(acetaminophen_oral) -> gut,
-        ],
-        structure: one_compartment_with_absorption,
-        out: |x, _p, _t, _cov, y| {
-            y[plasma] = x[central] / v;
-        },
-    }
-}
-
-/// Two-compartment intravenous model
-fn analytical_two_cpt() -> Analytical {
-    analytical! {
-        name: "two_cmt_acetaminophen_iv",
-        params: [ke, kcp, kpc, v],
-        states: [central, peripheral],
-        outputs: [plasma],
-        routes: [
-            infusion(acetaminophen_iv) -> central,
-        ],
-        structure: two_compartments,
-        out: |x, _p, _t, _cov, y| {
-            y[plasma] = x[central] / v;
-        },
-    }
-}
-
-fn ode_one_cpt() -> ODE {
-    ode! {
-        name: "one_cmt_acetaminophen_oral",
-        params: [ka, ke, v],
-        states: [gut, central],
-        outputs: [plasma],
-        routes: [
-            bolus(acetaminophen_oral) -> gut,
-        ],
-        diffeq: |x, _p, _t, dx, _cov| {
-            dx[gut] = -ka * x[gut];
-            dx[central] = ka * x[gut] - ke * x[central];
-        },
-        out: |x, _p, _t, _cov, y| {
-            y[plasma] = x[central] / v;
-        },
-    }
-}
-
-/// Two-compartment model with infusion
-fn ode_two_cpt() -> ODE {
-    ode! {
-        name: "two_cmt_acetaminophen_iv",
-        params: [cl, v, vp, q],
-        states: [central, peripheral],
-        outputs: [plasma],
-        routes: [
-            infusion(acetaminophen_iv) -> central,
-        ],
-        diffeq: |x, _p, _t, dx, _cov| {
-            let ke = cl / v;
-            let kcp = q / v;
-            let kpc = q / vp;
-            dx[central] = -ke * x[central] - kcp * x[central] + kpc * x[peripheral];
-            dx[peripheral] = kcp * x[central] - kpc * x[peripheral];
-        },
-        out: |x, _p, _t, _cov, y| {
-            y[plasma] = x[central] / v;
-        },
-    }
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    // One-compartment oral acetaminophen
-    let acetaminophen_oral = acetaminophen_oral_subject();
-    let acetaminophen_oral_params = [1.0, 0.1, 50.0];
-    let analytical_1 = analytical_one_cpt();
-    let ode_1_cold = ode_one_cpt().disable_cache();
-    let ode_1_hot = ode_one_cpt();
-    let _ = ode_1_hot
-        .estimate_predictions(&acetaminophen_oral, acetaminophen_oral_params.as_slice())
-        .unwrap();
-
-    // Two-compartment intravenous acetaminophen
-    let acetaminophen_iv = acetaminophen_iv_subject();
-    let analytical_2_params = [0.1, 0.05, 0.04, 50.0];
-    let ode_2_params = [5.0, 50.0, 100.0, 10.0];
-    let analytical_2 = analytical_two_cpt();
-    let ode_2_cold = ode_two_cpt().disable_cache();
-    let ode_2_hot = ode_two_cpt();
-    let _ = ode_2_hot
-        .estimate_predictions(&acetaminophen_iv, ode_2_params.as_slice())
-        .unwrap();
-
-    let mut group = c.benchmark_group("core/predictions");
-
-    // Per-iteration cost is in the sub-microsecond to tens-of-microseconds
-    // range, so Criterion's defaults (100 samples, 5 s measurement) collect
-    // plenty of iterations. The only override is flat sampling, which keeps
-    // per-iteration timings comparable across runs instead of using the
-    // default linear ramp.
-    group.sampling_mode(SamplingMode::Flat);
-
-    group.bench_function("analytical/one-compartment-acetaminophen-oral", |b| {
-        b.iter(|| {
-            black_box(
-                analytical_1
-                    .estimate_predictions(
-                        black_box(&acetaminophen_oral),
-                        black_box(acetaminophen_oral_params.as_slice()),
-                    )
-                    .unwrap(),
-            )
-        })
-    });
-    group.bench_function("ode/one-compartment-acetaminophen-oral-cold", |b| {
-        b.iter(|| {
-            black_box(
-                ode_1_cold
-                    .estimate_predictions(
-                        black_box(&acetaminophen_oral),
-                        black_box(acetaminophen_oral_params.as_slice()),
-                    )
-                    .unwrap(),
-            )
-        })
-    });
-    group.bench_function("ode/one-compartment-acetaminophen-oral-hot", |b| {
-        b.iter(|| {
-            black_box(
-                ode_1_hot
-                    .estimate_predictions(
-                        black_box(&acetaminophen_oral),
-                        black_box(acetaminophen_oral_params.as_slice()),
-                    )
-                    .unwrap(),
-            )
-        })
-    });
-    group.bench_function("analytical/two-compartment-acetaminophen-iv", |b| {
-        b.iter(|| {
-            black_box(
-                analytical_2
-                    .estimate_predictions(
-                        black_box(&acetaminophen_iv),
-                        black_box(analytical_2_params.as_slice()),
-                    )
-                    .unwrap(),
-            )
-        })
-    });
-    group.bench_function("ode/two-compartment-acetaminophen-iv-cold", |b| {
-        b.iter(|| {
-            black_box(
-                ode_2_cold
-                    .estimate_predictions(
-                        black_box(&acetaminophen_iv),
-                        black_box(ode_2_params.as_slice()),
-                    )
-                    .unwrap(),
-            )
-        })
-    });
-    group.bench_function("ode/two-compartment-acetaminophen-iv-hot", |b| {
-        b.iter(|| {
-            black_box(
-                ode_2_hot
-                    .estimate_predictions(
-                        black_box(&acetaminophen_iv),
-                        black_box(ode_2_params.as_slice()),
-                    )
-                    .unwrap(),
-            )
-        })
-    });
-    group.finish();
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/benches/runtime_matrix.rs b/benches/runtime_matrix.rs
deleted file mode 100644
index 86595f16..00000000
--- a/benches/runtime_matrix.rs
+++ /dev/null
@@ -1,389 +0,0 @@
-use criterion::{criterion_group, criterion_main, Criterion};
-
-mod self_contained {
-    use std::error::Error;
-    use std::io;
-    use std::path::PathBuf;
-
-    use pharmsol::dsl::{
-        self, CompiledRuntimeModel, CompiledWasmModule, RuntimeCompilationTarget,
-        RuntimePredictions, WasmCompileCache,
-    };
-    use pharmsol::{Subject, SubjectBuilderExt};
-    use tempfile::{tempdir, TempDir};
-
-    const ODE_SOURCE: &str = r#"
-name = one_cmt_acetaminophen_oral_iv
-kind = ode
-
-params = ka, cl, v, tlag, f_oral
-covariates = wt@linear
-states = depot, central
-derived = cl_i, ke
-outputs = plasma
-
-bolus(acetaminophen_oral) -> depot
-infusion(acetaminophen_iv) -> central
-
-lag(acetaminophen_oral) = tlag
-fa(acetaminophen_oral) = f_oral
-
-cl_i = cl * pow(wt / 70.0, 0.75)
-ke = cl_i / v
-
-dx(depot) = -ka * depot
-dx(central) = ka * depot - ke * central
-
-out(plasma) = central / v ~ continuous()
-"#;
-
-    const ANALYTICAL_SOURCE: &str = r#"
-name = one_cmt_acetaminophen_oral
-kind = analytical
-
-params = ka, ke, v, tlag, f_oral
-states = depot, central
-outputs = plasma
-
-bolus(acetaminophen_oral) -> depot
-
-lag(acetaminophen_oral) = tlag
-fa(acetaminophen_oral) = f_oral
-
-structure = one_compartment_with_absorption
-
-out(plasma) = central / v ~ continuous()
-"#;
-
-    const SDE_SOURCE: &str = r#"
-name = acetaminophen_iv_sde
-kind = sde
-
-params = ka, ke0, kcp, kpc, vol, ske
-covariates = wt@locf
-states = depot, central, peripheral, ke_latent
-particles = 16
-outputs = plasma
-
-bolus(acetaminophen_iv) -> depot
-
-init(ke_latent) = ke0
-
-dx(depot) = -ka * depot
-dx(central) = ka * depot - (ke_latent + kcp) * central + kpc * peripheral
-dx(peripheral) = kcp * central - kpc * peripheral
-dx(ke_latent) = -ke_latent + ke0
-
-noise(ke_latent) = ske
-
-out(plasma) = central / (vol * wt) ~ continuous()
-"#;
-
-    const SDE_PARTICLE_COUNT: usize = 16;
-
-    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
-    pub enum CorpusCase {
-        Ode,
-        Analytical,
-        Sde,
-    }
-
-    impl CorpusCase {
-        pub fn label(self) -> &'static str {
-            match self {
-                Self::Ode => "dsl-ode-one_cmt_acetaminophen_oral_iv",
-                Self::Analytical => "dsl-analytical-one_cmt_acetaminophen_oral",
-                Self::Sde => "dsl-sde-acetaminophen_iv_sde",
-            }
-        }
-
-        fn model_name(self) -> &'static str {
-            match self {
-                Self::Ode => "one_cmt_acetaminophen_oral_iv",
-                Self::Analytical => "one_cmt_acetaminophen_oral",
-                Self::Sde => "acetaminophen_iv_sde",
-            }
-        }
-
-        fn source(self) -> &'static str {
-            match self {
-                Self::Ode => ODE_SOURCE,
-                Self::Analytical => ANALYTICAL_SOURCE,
-                Self::Sde => SDE_SOURCE,
-            }
-        }
-
-        fn support_point(self) -> &'static [f64] {
-            match self {
-                Self::Ode => &[1.2, 5.0, 40.0, 0.5, 0.8],
-                Self::Analytical => &[1.0, 0.15, 25.0, 0.5, 0.8],
-                Self::Sde => &[1.1, 0.2, 0.12, 0.08, 15.0, 0.0],
-            }
-        }
-
-        fn runtime_subject(self, model: &CompiledRuntimeModel) -> Result<Subject, Box<dyn Error>> {
-            model
-                .info()
-                .outputs
-                .iter()
-                .find(|output| output.name == "plasma")
-                .ok_or_else(|| {
-                    io::Error::other(format!("{}: missing plasma output", self.label()))
-                })?;
-
-            let subject = match self {
-                Self::Ode => {
-                    model
-                        .info()
-                        .routes
-                        .iter()
-                        .find(|route| route.name == "acetaminophen_oral")
-                        .ok_or_else(|| {
-                            io::Error::other(format!(
-                                "{}: missing acetaminophen_oral route",
-                                self.label()
-                            ))
-                        })?;
-                    model
-                        .info()
-                        .routes
-                        .iter()
-                        .find(|route| route.name == "acetaminophen_iv")
-                        .ok_or_else(|| {
-                            io::Error::other(format!(
-                                "{}: missing acetaminophen_iv route",
-                                self.label()
-                            ))
-                        })?;
-                    Subject::builder(self.label())
-                        .covariate("wt", 0.0, 70.0)
-                        .bolus(0.0, 120.0, "acetaminophen_oral")
-                        .infusion(6.0, 60.0, "acetaminophen_iv", 2.0)
-                        .missing_observation(0.5, "plasma")
-                        .missing_observation(1.0, "plasma")
-                        .missing_observation(2.0, "plasma")
-                        .missing_observation(6.0, "plasma")
-                        .missing_observation(7.0, "plasma")
-                        .missing_observation(9.0, "plasma")
-                        .build()
-                }
-                Self::Analytical => {
-                    model
-                        .info()
-                        .routes
-                        .iter()
-                        .find(|route| route.name == "acetaminophen_oral")
-                        .ok_or_else(|| {
-                            io::Error::other(format!(
-                                "{}: missing acetaminophen_oral route",
-                                self.label()
-                            ))
-                        })?;
-                    Subject::builder(self.label())
-                        .bolus(0.0, 100.0, "acetaminophen_oral")
-                        .missing_observation(0.5, "plasma")
-                        .missing_observation(1.0, "plasma")
-                        .missing_observation(2.0, "plasma")
-                        .missing_observation(4.0, "plasma")
-                        .build()
-                }
-                Self::Sde => {
-                    model
-                        .info()
-                        .routes
-                        .iter()
-                        .find(|route| route.name == "acetaminophen_iv")
-                        .ok_or_else(|| {
-                            io::Error::other(format!(
-                                "{}: missing acetaminophen_iv route",
-                                self.label()
-                            ))
-                        })?;
-                    Subject::builder(self.label())
-                        .covariate("wt", 0.0, 70.0)
-                        .bolus(0.0, 80.0, "acetaminophen_iv")
-                        .missing_observation(0.5, "plasma")
-                        .missing_observation(1.0, "plasma")
-                        .missing_observation(2.0, "plasma")
-                        .missing_observation(4.0, "plasma")
-                        .build()
-                }
-            };
-
-            Ok(subject)
-        }
-    }
-
-    #[derive(Debug)]
-    pub struct ArtifactWorkspace {
-        tempdir: TempDir,
-    }
-
-    impl ArtifactWorkspace {
-        pub fn new() -> Result<Self, Box<dyn Error>> {
-            Ok(Self {
-                tempdir: tempdir()?,
-            })
-        }
-
-        fn aot_output(&self, stem: &str) -> PathBuf {
-            self.tempdir.path().join(format!("{stem}.pkm"))
-        }
-
-        fn build_root(&self, stem: &str) -> PathBuf {
-            self.tempdir.path().join(stem)
-        }
-    }
-
-    fn adjust_runtime_model(case: CorpusCase, model: CompiledRuntimeModel) -> CompiledRuntimeModel {
-        match (case, model) {
-            (CorpusCase::Sde, CompiledRuntimeModel::Sde(model)) => {
-                CompiledRuntimeModel::Sde(model.with_particles(SDE_PARTICLE_COUNT))
-            }
-            (_, model) => model,
-        }
-    }
-
-    pub fn compile_runtime_jit_model(
-        case: CorpusCase,
-    ) -> Result<CompiledRuntimeModel, Box<dyn Error>> {
-        Ok(adjust_runtime_model(
-            case,
-            dsl::compile_module_source_to_runtime(
-                case.source(),
-                Some(case.model_name()),
-                RuntimeCompilationTarget::Jit,
-                |_, _| {},
-            )?,
-        ))
-    }
-
-    pub fn compile_runtime_native_aot_model(
-        case: CorpusCase,
-        workspace: &ArtifactWorkspace,
-    ) -> Result<CompiledRuntimeModel, Box<dyn Error>> {
-        Ok(adjust_runtime_model(
-            case,
-            dsl::compile_module_source_to_runtime(
-                case.source(),
-                Some(case.model_name()),
-                RuntimeCompilationTarget::NativeAot(
-                    dsl::NativeAotCompileOptions::new(
-                        workspace.build_root(&format!("{}-runtime-aot-build", case.label())),
-                    )
-                    .with_output(workspace.aot_output(&format!("{}-runtime-aot", case.label()))),
-                ),
-                |_, _| {},
-            )?,
-        ))
-    }
-
-    pub fn compile_runtime_wasm_model(
-        case: CorpusCase,
-    ) -> Result<CompiledRuntimeModel, Box<dyn Error>> {
-        Ok(adjust_runtime_model(
-            case,
-            dsl::compile_module_source_to_runtime_wasm(case.source(), Some(case.model_name()))?,
-        ))
-    }
-
-    pub fn compile_wasm_module(case: CorpusCase) -> Result<CompiledWasmModule, Box<dyn Error>> {
-        Ok(dsl::compile_module_source_to_wasm_module(
-            case.source(),
-            Some(case.model_name()),
-        )?)
-    }
-
-    pub fn compile_wasm_module_with_cache(
-        case: CorpusCase,
-        cache: &WasmCompileCache,
-    ) -> Result<CompiledWasmModule, Box<dyn Error>> {
-        Ok(cache.compile_module_source_to_wasm_module(case.source(), Some(case.model_name()))?)
-    }
-
-    pub fn estimate_runtime_predictions(
-        case: CorpusCase,
-        model: &CompiledRuntimeModel,
-    ) -> Result<RuntimePredictions, Box<dyn Error>> {
-        Ok(model.estimate_predictions(&case.runtime_subject(model)?, case.support_point())?)
-    }
-}
-
-fn runtime_matrix_benchmark(c: &mut Criterion) {
-    use std::hint::black_box;
-    use std::time::Duration;
-
-    use criterion::{BatchSize, BenchmarkId, SamplingMode};
-    use pharmsol::dsl::WasmCompileCache;
-    use self_contained as corpus;
-    use self_contained::{ArtifactWorkspace, CorpusCase};
-
-    let cases = [CorpusCase::Ode, CorpusCase::Analytical, CorpusCase::Sde];
-
-    let mut compile_group = c.benchmark_group("dsl/runtime-compile");
-    compile_group.sampling_mode(SamplingMode::Flat);
-    compile_group.sample_size(10);
-    compile_group.warm_up_time(Duration::from_millis(500));
-    compile_group.measurement_time(Duration::from_secs(1));
-
-    for case in cases {
-        compile_group.bench_with_input(BenchmarkId::new("jit", case.label()), &case, |b, case| {
-            b.iter(|| black_box(corpus::compile_runtime_jit_model(*case).unwrap()))
-        });
-        compile_group.bench_with_input(
-            BenchmarkId::new("native-aot", case.label()),
-            &case,
-            |b, case| {
-                b.iter_batched(
-                    || ArtifactWorkspace::new().unwrap(),
-                    |workspace| {
-                        black_box(
-                            corpus::compile_runtime_native_aot_model(*case, &workspace).unwrap(),
-                        )
-                    },
-                    BatchSize::PerIteration,
-                )
-            },
-        );
-        compile_group.bench_with_input(BenchmarkId::new("wasm", case.label()), &case, |b, case| {
-            b.iter(|| black_box(corpus::compile_runtime_wasm_model(*case).unwrap()))
-        });
-        compile_group.bench_with_input(
-            BenchmarkId::new("wasm-module", case.label()),
-            &case,
-            |b, case| b.iter(|| black_box(corpus::compile_wasm_module(*case).unwrap())),
-        );
-
-        let cache = WasmCompileCache::default();
-        corpus::compile_wasm_module_with_cache(case, &cache).unwrap();
-        compile_group.bench_function(BenchmarkId::new("wasm-module-cached", case.label()), |b| {
-            b.iter(|| black_box(corpus::compile_wasm_module_with_cache(case, &cache).unwrap()))
-        });
-    }
-    compile_group.finish();
-
-    let mut runtime_group = c.benchmark_group("dsl/runtime-predict");
-    runtime_group.sampling_mode(SamplingMode::Flat);
-
-    for case in cases {
-        let jit = corpus::compile_runtime_jit_model(case).unwrap();
-        runtime_group.bench_function(BenchmarkId::new("jit", case.label()), |b| {
-            b.iter(|| black_box(corpus::estimate_runtime_predictions(case, &jit).unwrap()))
-        });
-
-        let aot_workspace = ArtifactWorkspace::new().unwrap();
-        let aot = corpus::compile_runtime_native_aot_model(case, &aot_workspace).unwrap();
-        runtime_group.bench_function(BenchmarkId::new("native-aot", case.label()), |b| {
-            b.iter(|| black_box(corpus::estimate_runtime_predictions(case, &aot).unwrap()))
-        });
-
-        let wasm = corpus::compile_runtime_wasm_model(case).unwrap();
-        runtime_group.bench_function(BenchmarkId::new("wasm", case.label()), |b| {
-            b.iter(|| black_box(corpus::estimate_runtime_predictions(case, &wasm).unwrap()))
-        });
-    }
-    runtime_group.finish();
-}
-
-criterion_group!(benches, runtime_matrix_benchmark);
-criterion_main!(benches);

From 206ef69c81171bec2b861de19e4c64f0e580c44e Mon Sep 17 00:00:00 2001
From: Markus <markushh@uio.no>
Date: Thu, 14 May 2026 09:44:09 +0200
Subject: [PATCH 7/9] Cleanup

---
 benches/common/mod.rs    |  90 +++-------
 benches/dsl_matrix.rs    | 340 +++++++++++++++++++++++------------
 benches/native_matrix.rs |  34 +---
 src/dsl/native.rs        | 370 ++++++++++++++++++++++++++++++++++++++-
 4 files changed, 629 insertions(+), 205 deletions(-)

diff --git a/benches/common/mod.rs b/benches/common/mod.rs
index 0aa74fb8..6f8923aa 100644
--- a/benches/common/mod.rs
+++ b/benches/common/mod.rs
@@ -1,25 +1,13 @@
-//! Shared bench fixtures for the native + DSL benchmark suites.
+//! Shared bench fixtures (workloads, subjects, params, error models, model factories)
+//! used by both `native_matrix.rs` and `dsl_matrix.rs`. Every backend measures the
+//! same model + subject + params so only the engine varies between cells.
 //!
-//! This module centralizes the model definitions, subjects, parameter vectors,
-//! and error-model fixtures that every backend variant in `benches/native_matrix.rs`
-//! and `benches/dsl_matrix.rs` reuses. The goal is to keep all backends
-//! (handwritten, macro, dsl-jit, dsl-aot, dsl-wasm) measuring **the same**
-//! model, subject, and parameters so the only thing that differs between
-//! cells is the engine itself.
+//! Workloads:
+//! - [`Workload::Short`]: 1-cpt, 100 mg PO at t=0, 9 obs over 12 h.
+//! - [`Workload::Repeat`]: 2-cpt, 100 mg IV q12h × 10, 14 obs over 120 h.
 //!
-//! Two workloads are exposed:
-//! - [`Workload::Short`]: 1-compartment, single 100 mg PO bolus at t=0,
-//!   9 observations spread across 12 hours.
-//! - [`Workload::Repeat`]: 2-compartment, 100 mg IV bolus q12h × 10
-//!   (doses at t=0,12,…,108), 14 observations spread across 120 hours.
-//!
-//! For each workload three solver kinds (ODE, Analytical, SDE) and three
-//! authoring/backend flavors (handwritten, macro, DSL-source) are provided.
-//!
-//! Some helpers (`SDE_PARTICLES`, `matrix_data`, `support_points`) are
-//! deliberately public — both bench binaries consume them, but cargo compiles
-//! each bench separately so unused helpers would trigger dead-code lints in
-//! one binary or the other. We silence those with `#![allow(dead_code)]`.
+//! `#![allow(dead_code)]` since each bench binary compiles separately and may
+//! not use every helper.
 
 #![allow(dead_code)]
 
@@ -33,10 +21,7 @@ use pharmsol::{
     Analytical, ResidualErrorModel, ResidualErrorModels, ODE, SDE,
 };
 
-/// Build the `ModelMetadata` for `(workload, kind)`. Handwritten factories
-/// attach this so they resolve string route labels (`"po"`/`"iv"`) and output
-/// labels (`"plasma"`) the same way the `ode!` / `analytical!` / `sde!`
-/// macros and the DSL do.
+/// `ModelMetadata` for handwritten factories so route/output labels resolve like the macro/DSL paths.
 fn model_metadata(workload: Workload, kind: SolverKind) -> equation::ModelMetadata {
     let name = match (workload, kind) {
         (Workload::Short, SolverKind::Ode) => "bench_one_cpt_po_ode",
@@ -80,16 +65,14 @@ fn model_metadata(workload: Workload, kind: SolverKind) -> equation::ModelMetada
     }
 }
 
-/// SDE particle count used across every SDE bench cell. Kept low so SDE
-/// wall-clock stays comparable to ODE/Analytical at the same workload.
+/// SDE particle count. Kept low so SDE wall-clock stays in the same ballpark as ODE/Analytical.
 pub const SDE_PARTICLES: usize = 16;
 
-/// Two scenarios that every backend variant must implement identically.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum Workload {
-    /// 1-compartment, single 100 mg PO bolus, 12 h horizon.
+    /// 1-cpt, 100 mg PO, 12 h.
     Short,
-    /// 2-compartment, 100 mg IV bolus q12h × 10, 120 h horizon.
+    /// 2-cpt, 100 mg IV q12h × 10, 120 h.
     Repeat,
 }
 
@@ -106,7 +89,7 @@ impl Workload {
     }
 }
 
-/// Solver kinds covered by the matrix.
+/// Solver kinds.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum SolverKind {
     Ode,
@@ -130,15 +113,13 @@ impl SolverKind {
 
 // ───────────────────────────── Subjects ──────────────────────────────
 
-/// Time grid for the short workload (1-cpt, 12 h). 9 sampling points.
+/// 9 sampling points for the short workload.
 const SHORT_TIMES: &[f64] = &[0.25, 0.5, 1.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0];
 
-/// Synthetic concentration values for the short workload. These do not need to
-/// match any specific model exactly — they only feed into the likelihood
-/// calculation so that we exercise the same code path across backends.
+/// Synthetic plasma concentrations — only need to exercise the likelihood path, not match any model.
 const SHORT_OBS: &[f64] = &[0.50, 0.90, 1.60, 2.40, 2.10, 1.50, 1.05, 0.72, 0.48];
 
-/// Time grid for the repeat workload (2-cpt, 120 h). 14 sampling points.
+/// 14 sampling points for the repeat workload.
 const REPEAT_TIMES: &[f64] = &[
     0.5, 2.0, 6.0, 10.0, 14.0, 24.0, 36.0, 48.0, 60.0, 72.0, 84.0, 96.0, 108.0, 120.0,
 ];
@@ -147,9 +128,7 @@ const REPEAT_OBS: &[f64] = &[
     1.80, 1.45, 1.10, 0.90, 1.30, 1.60, 1.55, 1.50, 1.48, 1.45, 1.43, 1.42, 1.41, 0.95,
 ];
 
-/// Build a subject for the given workload, using `missing_observation` so it
-/// can drive `estimate_predictions` benchmarks without paying for observation
-/// allocation.
+/// Subject with `missing_observation` slots — used for prediction benches.
 pub fn subject_for_predictions(workload: Workload) -> Subject {
     let id = format!("{}-pred", workload.label());
     let mut builder = Subject::builder(id);
@@ -173,8 +152,7 @@ pub fn subject_for_predictions(workload: Workload) -> Subject {
     builder.build()
 }
 
-/// Build a subject for the given workload with real observations, suitable
-/// for `estimate_log_likelihood` and `log_likelihood_matrix` benchmarks.
+/// Subject with real observations — used for log-likelihood / matrix benches.
 pub fn subject_for_likelihood(workload: Workload) -> Subject {
     let id = format!("{}-lik", workload.label());
     let mut builder = Subject::builder(id);
@@ -198,9 +176,7 @@ pub fn subject_for_likelihood(workload: Workload) -> Subject {
     builder.build()
 }
 
-/// Population dataset for `log_likelihood_matrix`. `n` subjects share the
-/// same template with small per-subject perturbations on observation values
-/// (mirrors the pattern used in the original `benches/likelihood.rs`).
+/// Population dataset for `log_likelihood_matrix`: `n` subjects, same template with small obs perturbations.
 pub fn matrix_data(workload: Workload, n: usize) -> Data {
     let subjects = (0..n)
         .map(|i| {
@@ -232,14 +208,9 @@ pub fn matrix_data(workload: Workload, n: usize) -> Data {
 
 // ───────────────────────────── Parameters ────────────────────────────
 
-/// Reference parameter vector for the given (workload, solver) cell.
-///
-/// **Short / ODE**: `[ka, ke, v]` = `[1.0, 0.2, 50.0]`
-/// **Short / Analytical**: `[ka, ke, v]` = `[1.0, 0.2, 50.0]`
-/// **Short / SDE**: `[ka, ke, v, sigma_ke]` = `[1.0, 0.2, 50.0, 0.05]`
-/// **Repeat / ODE**: `[ke, kcp, kpc, v]` = `[0.10, 0.05, 0.04, 50.0]`
-/// **Repeat / Analytical**: `[ke, kcp, kpc, v]` = `[0.10, 0.05, 0.04, 50.0]`
-/// **Repeat / SDE**: `[ke, kcp, kpc, v, sigma_ke]` = `[0.10, 0.05, 0.04, 50.0, 0.01]`
+/// Reference parameter vector per `(workload, kind)`:
+/// Short ODE/Analytical `[ka, ke, v]`, Short SDE adds `sigma_ke`;
+/// Repeat ODE/Analytical `[ke, kcp, kpc, v]`, Repeat SDE adds `sigma_ke`.
 pub fn params(workload: Workload, kind: SolverKind) -> Vec<f64> {
     match (workload, kind) {
         (Workload::Short, SolverKind::Ode | SolverKind::Analytical) => vec![1.0, 0.2, 50.0],
@@ -251,9 +222,8 @@ pub fn params(workload: Workload, kind: SolverKind) -> Vec<f64> {
     }
 }
 
-/// Support-point grid for `log_likelihood_matrix`. Shape: `(n, nparams)`.
-/// Rows are small perturbations of [`params`] so every grid cell stays inside
-/// a numerically well-behaved region.
+/// Support-point grid for `log_likelihood_matrix`, shape `(n, nparams)`.
+/// Rows are small perturbations of [`params`].
 pub fn support_points(workload: Workload, kind: SolverKind, n: usize) -> Array2<f64> {
     let base = params(workload, kind);
     let nparams = base.len();
@@ -276,11 +246,8 @@ pub fn assay_error_models() -> AssayErrorModels {
         .expect("plasma assay error model")
 }
 
-/// Residual error model (kept for symmetry with the existing benches even
-/// though the new suite does not currently exercise `log_likelihood_batch`).
+/// Residual error model. Indexes outputs by dense `usize` — assumes a single output at index 0.
 pub fn residual_error_models() -> ResidualErrorModels {
-    // `ResidualErrorModels` indexes outputs by their dense `usize` index
-    // (not by label), so this assumes a single output equation at index 0.
     ResidualErrorModels::new().add(0, ResidualErrorModel::constant(0.2))
 }
 
@@ -557,9 +524,7 @@ pub fn macro_sde(workload: Workload) -> SDE {
 
 // ───────────────────────────── DSL source strings ────────────────────
 
-/// DSL source for `(workload, kind)`. Consumed by `benches/dsl_matrix.rs`
-/// via `compile_module_source_to_runtime` (JIT/AOT) and
-/// `compile_module_source_to_runtime_wasm` (WASM).
+/// DSL source for `(workload, kind)`. Compiled via `compile_module_source_to_runtime`.
 pub fn dsl_source(workload: Workload, kind: SolverKind) -> &'static str {
     match (workload, kind) {
         (Workload::Short, SolverKind::Ode) => SHORT_ODE_DSL,
@@ -571,8 +536,7 @@ pub fn dsl_source(workload: Workload, kind: SolverKind) -> &'static str {
     }
 }
 
-/// DSL `name = ...` field for `(workload, kind)`. Used by `Some(model_name)`
-/// arguments on the DSL compile entrypoints.
+/// DSL `name = ...` field for `(workload, kind)`.
 pub fn dsl_model_name(workload: Workload, kind: SolverKind) -> &'static str {
     match (workload, kind) {
         (Workload::Short, SolverKind::Ode) => "bench_one_cpt_po_ode",
diff --git a/benches/dsl_matrix.rs b/benches/dsl_matrix.rs
index 753ecb84..c5975f70 100644
--- a/benches/dsl_matrix.rs
+++ b/benches/dsl_matrix.rs
@@ -1,27 +1,11 @@
-//! DSL backend benchmark matrix (feature-gated).
+//! DSL bench matrix (feature-gated): JIT, native AoT, WASM across all workloads + solvers.
+//! Mirrors `native_matrix.rs` but compiles models from DSL source.
 //!
-//! Covers the three DSL backends — JIT, native AoT, WASM — across the same
-//! two workloads and three solver kinds as `native_matrix.rs`.
-//!
-//! Coverage notes:
-//! - Predictions: every (workload, kind, backend) cell.
-//! - Log-likelihood + likelihood-matrix: **ODE only** across all three
-//!   backends. The DSL ODE runtime model is `RuntimeOdeModel = NativeOdeModel`
-//!   (see `src/dsl/runtime.rs:117`), and `NativeOdeModel` implements both the
-//!   `Equation` and `Cache` traits. `NativeAnalyticalModel` and
-//!   `NativeSdeModel` do not, so analytical/SDE backends remain
-//!   predictions-only on this bench surface.
-//! - Hot/cold cache: ODE only, via `.disable_cache()` on the extracted
-//!   `NativeOdeModel`. Analytical/SDE skip the cache axis.
-//! - Compile-time: one bench per (workload, kind, backend) that measures the
-//!   `source -> CompiledRuntimeModel` pipeline end-to-end.
-//!
-//! Bench id format:
-//! - `dsl/compile`               → `{workload}/{kind}/{backend}`
-//! - `dsl/predictions`           → `{workload}/{kind}/{backend}/{cache}` (ODE)
-//!                                 `{workload}/{kind}/{backend}` (Ana/SDE)
-//! - `dsl/log-likelihood`        → `{workload}/ode/{backend}/{cache}`
-//! - `dsl/likelihood-matrix`     → `{workload}/ode/{backend}`
+//! IDs:
+//! - `dsl/compile`           → `{workload}/{kind}/{backend}`
+//! - `dsl/predictions`       → `{workload}/{kind}/{backend}/{cache}`
+//! - `dsl/log-likelihood`    → `{workload}/{kind}/{backend}/{cache}`
+//! - `dsl/likelihood-matrix` → `{workload}/{kind}/{backend}`
 
 use std::hint::black_box;
 use std::path::PathBuf;
@@ -31,8 +15,8 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Samplin
 use tempfile::TempDir;
 
 use pharmsol::dsl::{
-    compile_module_source_to_runtime, CompiledRuntimeModel, NativeAotCompileOptions, NativeOdeModel,
-    RuntimeCompilationTarget,
+    compile_module_source_to_runtime, CompiledRuntimeModel, NativeAnalyticalModel,
+    NativeAotCompileOptions, NativeOdeModel, NativeSdeModel, RuntimeCompilationTarget,
 };
 use pharmsol::prelude::*;
 use pharmsol::Cache;
@@ -86,8 +70,7 @@ impl CacheState {
     }
 }
 
-/// Per-bench AoT workspace. One `TempDir` for the whole bench binary; each
-/// compile call uses a fresh subdirectory so artifacts don't clash.
+/// One `TempDir` shared across the bench binary; each compile gets a fresh subdir.
 struct AotWorkspace {
     root: TempDir,
     counter: std::cell::Cell<usize>,
@@ -111,8 +94,7 @@ impl AotWorkspace {
     }
 }
 
-/// Compile a (workload, kind) DSL model with the chosen backend, returning the
-/// full `CompiledRuntimeModel`.
+/// Compile `(workload, kind)` with `backend` and return the full `CompiledRuntimeModel`.
 fn compile_runtime(
     workload: Workload,
     kind: SolverKind,
@@ -133,8 +115,6 @@ fn compile_runtime(
         .unwrap_or_else(|e| panic!("compile {} via {} failed: {e:?}", name, backend.label()))
 }
 
-/// Compile and extract the inner ODE model so the bench can drive `Equation`
-/// directly (and toggle the cache).
 fn compile_ode(workload: Workload, backend: Backend, aot: &AotWorkspace) -> NativeOdeModel {
     match compile_runtime(workload, SolverKind::Ode, backend, aot) {
         CompiledRuntimeModel::Ode(model) => model,
@@ -146,12 +126,38 @@ fn compile_ode(workload: Workload, backend: Backend, aot: &AotWorkspace) -> Nati
     }
 }
 
+fn compile_analytical(
+    workload: Workload,
+    backend: Backend,
+    aot: &AotWorkspace,
+) -> NativeAnalyticalModel {
+    match compile_runtime(workload, SolverKind::Analytical, backend, aot) {
+        CompiledRuntimeModel::Analytical(model) => model,
+        other => panic!(
+            "expected Analytical model for {}, got {:?}",
+            workload.label(),
+            other.backend()
+        ),
+    }
+}
+
+fn compile_sde(workload: Workload, backend: Backend, aot: &AotWorkspace) -> NativeSdeModel {
+    match compile_runtime(workload, SolverKind::Sde, backend, aot) {
+        CompiledRuntimeModel::Sde(model) => model,
+        other => panic!(
+            "expected Sde model for {}, got {:?}",
+            workload.label(),
+            other.backend()
+        ),
+    }
+}
+
 // ───────────────────────────── compile group ─────────────────────────
 
 fn compile_group(c: &mut Criterion) {
     let mut group = c.benchmark_group("dsl/compile");
     group.sampling_mode(SamplingMode::Flat);
-    // Each compile is expensive (especially AoT — full rustc invocation).
+    // AoT especially is expensive — full rustc invocation per iteration.
     group.sample_size(10);
     group.measurement_time(Duration::from_secs(30));
 
@@ -196,23 +202,22 @@ fn predictions_group(c: &mut Criterion) {
         for kind in SolverKind::all() {
             let theta = params(workload, kind);
             for backend in Backend::all() {
-                match kind {
-                    SolverKind::Ode => {
-                        // ODE supports hot/cold via NativeOdeModel.disable_cache().
-                        for cache in CacheState::all() {
+                for cache in CacheState::all() {
+                    let bench_id = BenchmarkId::from_parameter(format!(
+                        "{}/{}/{}/{}",
+                        workload.label(),
+                        kind.label(),
+                        backend.label(),
+                        cache.label()
+                    ));
+                    match kind {
+                        SolverKind::Ode => {
                             let model = match cache {
                                 CacheState::Hot => compile_ode(workload, backend, &aot),
                                 CacheState::Cold => {
                                     compile_ode(workload, backend, &aot).disable_cache()
                                 }
                             };
-                            let bench_id = BenchmarkId::from_parameter(format!(
-                                "{}/{}/{}/{}",
-                                workload.label(),
-                                kind.label(),
-                                backend.label(),
-                                cache.label()
-                            ));
                             group.bench_function(bench_id, |b| {
                                 b.iter(|| {
                                     black_box(
@@ -226,29 +231,46 @@ fn predictions_group(c: &mut Criterion) {
                                 });
                             });
                         }
-                    }
-                    SolverKind::Analytical | SolverKind::Sde => {
-                        // Analytical / SDE: no cache axis. Drive predictions
-                        // through the enum-level `estimate_predictions`.
-                        let model = compile_runtime(workload, kind, backend, &aot);
-                        let bench_id = BenchmarkId::from_parameter(format!(
-                            "{}/{}/{}",
-                            workload.label(),
-                            kind.label(),
-                            backend.label()
-                        ));
-                        group.bench_function(bench_id, |b| {
-                            b.iter(|| {
-                                black_box(
-                                    model
-                                        .estimate_predictions(
-                                            black_box(&subject),
-                                            black_box(&theta),
-                                        )
-                                        .unwrap(),
-                                )
+                        SolverKind::Analytical => {
+                            let model = match cache {
+                                CacheState::Hot => compile_analytical(workload, backend, &aot),
+                                CacheState::Cold => {
+                                    compile_analytical(workload, backend, &aot).disable_cache()
+                                }
+                            };
+                            group.bench_function(bench_id, |b| {
+                                b.iter(|| {
+                                    black_box(
+                                        model
+                                            .estimate_predictions(
+                                                black_box(&subject),
+                                                black_box(&theta),
+                                            )
+                                            .unwrap(),
+                                    )
+                                });
                             });
-                        });
+                        }
+                        SolverKind::Sde => {
+                            let model = match cache {
+                                CacheState::Hot => compile_sde(workload, backend, &aot),
+                                CacheState::Cold => {
+                                    compile_sde(workload, backend, &aot).disable_cache()
+                                }
+                            };
+                            group.bench_function(bench_id, |b| {
+                                b.iter(|| {
+                                    black_box(
+                                        model
+                                            .estimate_predictions(
+                                                black_box(&subject),
+                                                black_box(&theta),
+                                            )
+                                            .unwrap(),
+                                    )
+                                });
+                            });
+                        }
                     }
                 }
             }
@@ -269,32 +291,83 @@ fn log_likelihood_group(c: &mut Criterion) {
 
     for workload in Workload::all() {
         let subject = subject_for_likelihood(workload);
-        let theta = params(workload, SolverKind::Ode);
-        for backend in Backend::all() {
-            for cache in CacheState::all() {
-                let model = match cache {
-                    CacheState::Hot => compile_ode(workload, backend, &aot),
-                    CacheState::Cold => compile_ode(workload, backend, &aot).disable_cache(),
-                };
-                let bench_id = BenchmarkId::from_parameter(format!(
-                    "{}/ode/{}/{}",
-                    workload.label(),
-                    backend.label(),
-                    cache.label()
-                ));
-                group.bench_function(bench_id, |b| {
-                    b.iter(|| {
-                        black_box(
-                            model
-                                .estimate_log_likelihood(
-                                    black_box(&subject),
-                                    black_box(&theta),
-                                    black_box(&error_models),
-                                )
-                                .unwrap(),
-                        )
-                    });
-                });
+        for kind in SolverKind::all() {
+            let theta = params(workload, kind);
+            for backend in Backend::all() {
+                for cache in CacheState::all() {
+                    let bench_id = BenchmarkId::from_parameter(format!(
+                        "{}/{}/{}/{}",
+                        workload.label(),
+                        kind.label(),
+                        backend.label(),
+                        cache.label()
+                    ));
+                    match kind {
+                        SolverKind::Ode => {
+                            let model = match cache {
+                                CacheState::Hot => compile_ode(workload, backend, &aot),
+                                CacheState::Cold => {
+                                    compile_ode(workload, backend, &aot).disable_cache()
+                                }
+                            };
+                            group.bench_function(bench_id, |b| {
+                                b.iter(|| {
+                                    black_box(
+                                        model
+                                            .estimate_log_likelihood(
+                                                black_box(&subject),
+                                                black_box(&theta),
+                                                black_box(&error_models),
+                                            )
+                                            .unwrap(),
+                                    )
+                                });
+                            });
+                        }
+                        SolverKind::Analytical => {
+                            let model = match cache {
+                                CacheState::Hot => compile_analytical(workload, backend, &aot),
+                                CacheState::Cold => {
+                                    compile_analytical(workload, backend, &aot).disable_cache()
+                                }
+                            };
+                            group.bench_function(bench_id, |b| {
+                                b.iter(|| {
+                                    black_box(
+                                        model
+                                            .estimate_log_likelihood(
+                                                black_box(&subject),
+                                                black_box(&theta),
+                                                black_box(&error_models),
+                                            )
+                                            .unwrap(),
+                                    )
+                                });
+                            });
+                        }
+                        SolverKind::Sde => {
+                            let model = match cache {
+                                CacheState::Hot => compile_sde(workload, backend, &aot),
+                                CacheState::Cold => {
+                                    compile_sde(workload, backend, &aot).disable_cache()
+                                }
+                            };
+                            group.bench_function(bench_id, |b| {
+                                b.iter(|| {
+                                    black_box(
+                                        model
+                                            .estimate_log_likelihood(
+                                                black_box(&subject),
+                                                black_box(&theta),
+                                                black_box(&error_models),
+                                            )
+                                            .unwrap(),
+                                    )
+                                });
+                            });
+                        }
+                    }
+                }
             }
         }
     }
@@ -317,28 +390,69 @@ fn likelihood_matrix_group(c: &mut Criterion) {
 
     for workload in Workload::all() {
         let data = matrix_data(workload, MATRIX_N_SUBJECTS);
-        let theta = support_points(workload, SolverKind::Ode, MATRIX_N_SUPPORT);
-        for backend in Backend::all() {
-            let model = compile_ode(workload, backend, &aot);
-            let bench_id = BenchmarkId::from_parameter(format!(
-                "{}/ode/{}",
-                workload.label(),
-                backend.label()
-            ));
-            group.bench_function(bench_id, |b| {
-                b.iter(|| {
-                    black_box(
-                        log_likelihood_matrix(
-                            black_box(&model),
-                            black_box(&data),
-                            black_box(&theta),
-                            black_box(&error_models),
-                            false,
-                        )
-                        .unwrap(),
-                    )
-                });
-            });
+        for kind in SolverKind::all() {
+            let theta = support_points(workload, kind, MATRIX_N_SUPPORT);
+            for backend in Backend::all() {
+                let bench_id = BenchmarkId::from_parameter(format!(
+                    "{}/{}/{}",
+                    workload.label(),
+                    kind.label(),
+                    backend.label()
+                ));
+                match kind {
+                    SolverKind::Ode => {
+                        let model = compile_ode(workload, backend, &aot);
+                        group.bench_function(bench_id, |b| {
+                            b.iter(|| {
+                                black_box(
+                                    log_likelihood_matrix(
+                                        black_box(&model),
+                                        black_box(&data),
+                                        black_box(&theta),
+                                        black_box(&error_models),
+                                        false,
+                                    )
+                                    .unwrap(),
+                                )
+                            });
+                        });
+                    }
+                    SolverKind::Analytical => {
+                        let model = compile_analytical(workload, backend, &aot);
+                        group.bench_function(bench_id, |b| {
+                            b.iter(|| {
+                                black_box(
+                                    log_likelihood_matrix(
+                                        black_box(&model),
+                                        black_box(&data),
+                                        black_box(&theta),
+                                        black_box(&error_models),
+                                        false,
+                                    )
+                                    .unwrap(),
+                                )
+                            });
+                        });
+                    }
+                    SolverKind::Sde => {
+                        let model = compile_sde(workload, backend, &aot);
+                        group.bench_function(bench_id, |b| {
+                            b.iter(|| {
+                                black_box(
+                                    log_likelihood_matrix(
+                                        black_box(&model),
+                                        black_box(&data),
+                                        black_box(&theta),
+                                        black_box(&error_models),
+                                        false,
+                                    )
+                                    .unwrap(),
+                                )
+                            });
+                        });
+                    }
+                }
+            }
         }
     }
 
diff --git a/benches/native_matrix.rs b/benches/native_matrix.rs
index b0e063eb..4ad12e39 100644
--- a/benches/native_matrix.rs
+++ b/benches/native_matrix.rs
@@ -1,22 +1,10 @@
-//! Native (Rust-authored) benchmark matrix.
+//! Native bench matrix: handwritten + macro models, all solvers, both workloads.
+//! Default features only — this is the CI regression signal. DSL backends are in `dsl_matrix.rs`.
 //!
-//! Covers handwritten and macro-authored models across all three solver kinds
-//! (ODE, Analytical, SDE), both workloads (short 1-cpt PO; long 2-cpt IV q12h),
-//! and the three measured methods: `estimate_predictions`,
-//! `estimate_log_likelihood`, and `log_likelihood_matrix`.
-//!
-//! This bench compiles with **default features only** so it produces a stable
-//! regression signal on CI (which runs plain `cargo bench`). DSL backends
-//! live in `dsl_matrix.rs` behind feature gates.
-//!
-//! Benchmark id format:
-//! - `native/predictions`     → `{workload}/{solver}/{authoring}/{cache}`
-//! - `native/log-likelihood`  → `{workload}/{solver}/{authoring}/{cache}`
-//! - `native/likelihood-matrix` → `{workload}/{solver}/{authoring}` (matrix is
-//!   the unit of work; per-cell cache toggling is not meaningful here).
-//!
-//! `cache` is `hot` or `cold` (cold = `.disable_cache()` on a fresh handle).
-//! `authoring` is `handwritten` or `macro`.
+//! IDs:
+//! - `native/predictions`       → `{workload}/{solver}/{authoring}/{cache}`
+//! - `native/log-likelihood`    → `{workload}/{solver}/{authoring}/{cache}`
+//! - `native/likelihood-matrix` → `{workload}/{solver}/{authoring}`
 
 use std::hint::black_box;
 use std::time::Duration;
@@ -32,9 +20,7 @@ use common::{
     support_points, SolverKind, Workload,
 };
 
-/// Size of the population dataset (rows) for `log_likelihood_matrix`.
 const MATRIX_N_SUBJECTS: usize = 32;
-/// Size of the parameter grid (columns) for `log_likelihood_matrix`.
 const MATRIX_N_SUPPORT: usize = 64;
 
 #[derive(Debug, Clone, Copy)]
@@ -75,11 +61,6 @@ impl CacheState {
     }
 }
 
-/// Helper that materializes the correct concrete handwritten/macro factory
-/// then dispatches `estimate_predictions` on it. The `match` over solver kind
-/// stays inside each closure so the chosen branch is a fresh handle every
-/// time we configure a new (cold) instance, and we don't pay any virtual
-/// dispatch cost (each model type has its own concrete `Equation` impl).
 fn id(workload: Workload, kind: SolverKind, authoring: Authoring, cache: CacheState) -> String {
     format!(
         "{}/{}/{}/{}",
@@ -464,8 +445,7 @@ fn likelihood_matrix_group(c: &mut Criterion) {
 
     let mut group = c.benchmark_group("native/likelihood-matrix");
     group.sampling_mode(SamplingMode::Flat);
-    // Each matrix iteration is multi-second under any solver — small sample
-    // budgets keep wall-clock manageable while still producing a useful signal.
+    // One matrix iteration is multi-second; small sample budget keeps wall-clock sane.
     group.sample_size(10);
     group.measurement_time(Duration::from_secs(20));
 
diff --git a/src/dsl/native.rs b/src/dsl/native.rs
index 7197084e..7c2e327c 100644
--- a/src/dsl/native.rs
+++ b/src/dsl/native.rs
@@ -24,11 +24,11 @@ use crate::{
     data::error_model::AssayErrorModels,
     data::{Covariates, Infusion, InputLabel, OutputLabel},
     simulator::{
-        cache::{PredictionCache, DEFAULT_CACHE_SIZE},
+        cache::{PredictionCache, SdeLikelihoodCache, DEFAULT_CACHE_SIZE},
         equation::{
             ode::{closure_helpers::PMProblem, ExplicitRkTableau, OdeSolver, SdirkTableau},
             sde::simulate_sde_event_with,
-            EqnKind, Equation, EquationPriv, EquationTypes,
+            EqnKind, Equation, EquationPriv, EquationTypes, Predictions,
         },
         likelihood::{Prediction, SubjectPredictions},
         Fa, Lag, M, T, V,
@@ -745,12 +745,14 @@ pub struct NativeOdeModel {
 #[derive(Clone, Debug)]
 pub struct NativeAnalyticalModel {
     shared: Arc<SharedNativeModel>,
+    cache: Option<PredictionCache>,
 }
 
 #[derive(Clone, Debug)]
 pub struct NativeSdeModel {
     shared: Arc<SharedNativeModel>,
     nparticles: usize,
+    cache: Option<SdeLikelihoodCache>,
 }
 
 #[derive(Clone, Debug)]
@@ -1224,6 +1226,7 @@ impl NativeAnalyticalModel {
     pub(crate) fn new(info: NativeModelInfo, artifact: impl RuntimeArtifact + 'static) -> Self {
         Self {
             shared: Arc::new(SharedNativeModel::new(info, artifact)),
+            cache: Some(PredictionCache::new(DEFAULT_CACHE_SIZE)),
         }
     }
 
@@ -1377,12 +1380,189 @@ impl NativeAnalyticalModel {
     }
 }
 
+#[inline(always)]
+fn runtime_analytical_predictions(
+    model: &NativeAnalyticalModel,
+    subject: &Subject,
+    support_point: &[f64],
+) -> Result<SubjectPredictions, PharmsolError> {
+    if let Some(cache) = &model.cache {
+        let key = (
+            subject.hash(),
+            crate::simulator::equation::spphash(support_point),
+        );
+        if let Some(cached) = cache.get(&key) {
+            return Ok(cached);
+        }
+
+        let result = model.estimate_predictions(subject, support_point)?;
+        cache.insert(key, result.clone());
+        Ok(result)
+    } else {
+        model.estimate_predictions(subject, support_point)
+    }
+}
+
+impl crate::simulator::equation::Cache for NativeAnalyticalModel {
+    fn with_cache_capacity(mut self, size: u64) -> Self {
+        self.cache = Some(PredictionCache::new(size));
+        self
+    }
+
+    fn enable_cache(mut self) -> Self {
+        self.cache = Some(PredictionCache::new(DEFAULT_CACHE_SIZE));
+        self
+    }
+
+    fn clear_cache(&self) {
+        if let Some(cache) = &self.cache {
+            cache.invalidate_all();
+        }
+    }
+
+    fn disable_cache(mut self) -> Self {
+        self.cache = None;
+        self
+    }
+}
+
+impl EquationTypes for NativeAnalyticalModel {
+    type S = V;
+    type P = SubjectPredictions;
+}
+
+impl EquationPriv for NativeAnalyticalModel {
+    fn lag(&self) -> &Lag {
+        &(runtime_no_lag as Lag)
+    }
+
+    fn fa(&self) -> &Fa {
+        &(runtime_no_fa as Fa)
+    }
+
+    fn get_nstates(&self) -> usize {
+        self.shared.info.state_len
+    }
+
+    fn get_ndrugs(&self) -> usize {
+        self.shared.info.route_len
+    }
+
+    fn get_nouteqs(&self) -> usize {
+        self.shared.info.output_len
+    }
+
+    fn metadata(&self) -> Option<&crate::ValidatedModelMetadata> {
+        None
+    }
+
+    fn solve(
+        &self,
+        _state: &mut Self::S,
+        _support_point: &[f64],
+        _covariates: &Covariates,
+        _infusions: &[Infusion],
+        _start_time: f64,
+        _end_time: f64,
+    ) -> Result<(), PharmsolError> {
+        unimplemented!("solve is not used for runtime analytical models")
+    }
+
+    fn process_observation(
+        &self,
+        _support_point: &[f64],
+        _observation: &Observation,
+        _error_models: Option<&AssayErrorModels>,
+        _time: f64,
+        _covariates: &Covariates,
+        _x: &mut Self::S,
+        _likelihood: &mut Vec<f64>,
+        _output: &mut Self::P,
+    ) -> Result<(), PharmsolError> {
+        unimplemented!("process_observation is not used for runtime analytical models")
+    }
+
+    fn initial_state(
+        &self,
+        _support_point: &[f64],
+        _covariates: &Covariates,
+        _occasion_index: usize,
+    ) -> Self::S {
+        V::zeros(self.shared.info.state_len, NalgebraContext)
+    }
+}
+
+impl Equation for NativeAnalyticalModel {
+    fn estimate_likelihood(
+        &self,
+        subject: &Subject,
+        support_point: &[f64],
+        error_models: &AssayErrorModels,
+    ) -> Result<f64, PharmsolError> {
+        Ok(self
+            .estimate_log_likelihood(subject, support_point, error_models)?
+            .exp())
+    }
+
+    fn estimate_log_likelihood(
+        &self,
+        subject: &Subject,
+        support_point: &[f64],
+        error_models: &AssayErrorModels,
+    ) -> Result<f64, PharmsolError> {
+        let bound_error_models = self.bind_error_models(error_models)?;
+        let predictions = runtime_analytical_predictions(self, subject, support_point)?;
+        predictions.log_likelihood(&bound_error_models)
+    }
+
+    fn kind() -> EqnKind {
+        EqnKind::Analytical
+    }
+
+    fn assay_error_models(&self) -> AssayErrorModels {
+        AssayErrorModels::with_output_names(
+            self.info()
+                .outputs
+                .iter()
+                .map(|output| output.name.as_str()),
+        )
+    }
+
+    fn estimate_predictions(
+        &self,
+        subject: &Subject,
+        support_point: &[f64],
+    ) -> Result<Self::P, PharmsolError> {
+        runtime_analytical_predictions(self, subject, support_point)
+    }
+
+    fn simulate_subject(
+        &self,
+        subject: &Subject,
+        support_point: &[f64],
+        error_models: Option<&AssayErrorModels>,
+    ) -> Result<(Self::P, Option<f64>), PharmsolError> {
+        let bound_error_models = match error_models {
+            Some(error_models) => Some(self.bind_error_models(error_models)?),
+            None => None,
+        };
+
+        let predictions = runtime_analytical_predictions(self, subject, support_point)?;
+        let likelihood = match bound_error_models.as_ref() {
+            Some(error_models) => Some(predictions.log_likelihood(error_models)?.exp()),
+            None => None,
+        };
+        Ok((predictions, likelihood))
+    }
+}
+
 impl NativeSdeModel {
     pub(crate) fn new(info: NativeModelInfo, artifact: impl RuntimeArtifact + 'static) -> Self {
         let nparticles = info.particles.unwrap_or(1);
         Self {
             shared: Arc::new(SharedNativeModel::new(info, artifact)),
             nparticles,
+            cache: Some(SdeLikelihoodCache::new(DEFAULT_CACHE_SIZE)),
         }
     }
 
@@ -1633,6 +1813,192 @@ impl NativeSdeModel {
     }
 }
 
+#[inline(always)]
+fn runtime_sde_log_likelihood(
+    model: &NativeSdeModel,
+    subject: &Subject,
+    support_point: &[f64],
+    error_models: &AssayErrorModels,
+) -> Result<f64, PharmsolError> {
+    if let Some(cache) = &model.cache {
+        let key = (
+            subject.hash(),
+            crate::simulator::equation::spphash(support_point),
+            error_models.hash(),
+        );
+        if let Some(cached) = cache.get(&key) {
+            return Ok(cached);
+        }
+
+        let predictions = model.estimate_predictions(subject, support_point)?;
+        let log_lik = predictions.log_likelihood(error_models)?;
+        cache.insert(key, log_lik);
+        Ok(log_lik)
+    } else {
+        let predictions = model.estimate_predictions(subject, support_point)?;
+        predictions.log_likelihood(error_models)
+    }
+}
+
+impl crate::simulator::equation::Cache for NativeSdeModel {
+    fn with_cache_capacity(mut self, size: u64) -> Self {
+        self.cache = Some(SdeLikelihoodCache::new(size));
+        self
+    }
+
+    fn enable_cache(mut self) -> Self {
+        self.cache = Some(SdeLikelihoodCache::new(DEFAULT_CACHE_SIZE));
+        self
+    }
+
+    fn clear_cache(&self) {
+        if let Some(cache) = &self.cache {
+            cache.invalidate_all();
+        }
+    }
+
+    fn disable_cache(mut self) -> Self {
+        self.cache = None;
+        self
+    }
+}
+
+impl EquationTypes for NativeSdeModel {
+    type S = Vec<DVector<f64>>;
+    type P = Array2<Prediction>;
+}
+
+impl EquationPriv for NativeSdeModel {
+    fn lag(&self) -> &Lag {
+        &(runtime_no_lag as Lag)
+    }
+
+    fn fa(&self) -> &Fa {
+        &(runtime_no_fa as Fa)
+    }
+
+    fn get_nstates(&self) -> usize {
+        self.shared.info.state_len
+    }
+
+    fn get_ndrugs(&self) -> usize {
+        self.shared.info.route_len
+    }
+
+    fn get_nouteqs(&self) -> usize {
+        self.shared.info.output_len
+    }
+
+    fn nparticles(&self) -> usize {
+        self.nparticles
+    }
+
+    fn is_sde(&self) -> bool {
+        true
+    }
+
+    fn metadata(&self) -> Option<&crate::ValidatedModelMetadata> {
+        None
+    }
+
+    fn solve(
+        &self,
+        _state: &mut Self::S,
+        _support_point: &[f64],
+        _covariates: &Covariates,
+        _infusions: &[Infusion],
+        _start_time: f64,
+        _end_time: f64,
+    ) -> Result<(), PharmsolError> {
+        unimplemented!("solve is not used for runtime SDE models")
+    }
+
+    fn process_observation(
+        &self,
+        _support_point: &[f64],
+        _observation: &Observation,
+        _error_models: Option<&AssayErrorModels>,
+        _time: f64,
+        _covariates: &Covariates,
+        _x: &mut Self::S,
+        _likelihood: &mut Vec<f64>,
+        _output: &mut Self::P,
+    ) -> Result<(), PharmsolError> {
+        unimplemented!("process_observation is not used for runtime SDE models")
+    }
+
+    fn initial_state(
+        &self,
+        _support_point: &[f64],
+        _covariates: &Covariates,
+        _occasion_index: usize,
+    ) -> Self::S {
+        vec![DVector::zeros(self.shared.info.state_len); self.nparticles]
+    }
+}
+
+impl Equation for NativeSdeModel {
+    fn estimate_likelihood(
+        &self,
+        subject: &Subject,
+        support_point: &[f64],
+        error_models: &AssayErrorModels,
+    ) -> Result<f64, PharmsolError> {
+        let log_lik = self.estimate_log_likelihood(subject, support_point, error_models)?;
+        Ok(log_lik.exp())
+    }
+
+    fn estimate_log_likelihood(
+        &self,
+        subject: &Subject,
+        support_point: &[f64],
+        error_models: &AssayErrorModels,
+    ) -> Result<f64, PharmsolError> {
+        let bound_error_models = self.bind_error_models(error_models)?;
+        runtime_sde_log_likelihood(self, subject, support_point, &bound_error_models)
+    }
+
+    fn kind() -> EqnKind {
+        EqnKind::SDE
+    }
+
+    fn assay_error_models(&self) -> AssayErrorModels {
+        AssayErrorModels::with_output_names(
+            self.info()
+                .outputs
+                .iter()
+                .map(|output| output.name.as_str()),
+        )
+    }
+
+    fn estimate_predictions(
+        &self,
+        subject: &Subject,
+        support_point: &[f64],
+    ) -> Result<Self::P, PharmsolError> {
+        NativeSdeModel::estimate_predictions(self, subject, support_point)
+    }
+
+    fn simulate_subject(
+        &self,
+        subject: &Subject,
+        support_point: &[f64],
+        error_models: Option<&AssayErrorModels>,
+    ) -> Result<(Self::P, Option<f64>), PharmsolError> {
+        let bound_error_models = match error_models {
+            Some(error_models) => Some(self.bind_error_models(error_models)?),
+            None => None,
+        };
+
+        let predictions = NativeSdeModel::estimate_predictions(self, subject, support_point)?;
+        let likelihood = match bound_error_models.as_ref() {
+            Some(error_models) => Some(predictions.log_likelihood(error_models)?.exp()),
+            None => None,
+        };
+        Ok((predictions, likelihood))
+    }
+}
+
 fn active_route_inputs(infusions: &[Infusion], time: f64, route_len: usize) -> Vec<f64> {
     let mut values = vec![0.0; route_len];
     for infusion in infusions {

From 180ab0971cf52bca6ea4c8550e1101a696a45708 Mon Sep 17 00:00:00 2001
From: Markus <markushh@uio.no>
Date: Thu, 14 May 2026 10:43:07 +0200
Subject: [PATCH 8/9] Disable SDEs, WASM, and AOT

---
 benches/common/mod.rs | 5 +++--
 benches/dsl_matrix.rs | 6 ++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/benches/common/mod.rs b/benches/common/mod.rs
index 6f8923aa..997e6e9c 100644
--- a/benches/common/mod.rs
+++ b/benches/common/mod.rs
@@ -106,8 +106,9 @@ impl SolverKind {
         }
     }
 
-    pub fn all() -> [SolverKind; 3] {
-        [SolverKind::Ode, SolverKind::Analytical, SolverKind::Sde]
+    // SDE bench cells temporarily disabled — too slow for the current matrix.
+    pub fn all() -> [SolverKind; 2] {
+        [SolverKind::Ode, SolverKind::Analytical]
     }
 }
 
diff --git a/benches/dsl_matrix.rs b/benches/dsl_matrix.rs
index c5975f70..2d913a35 100644
--- a/benches/dsl_matrix.rs
+++ b/benches/dsl_matrix.rs
@@ -31,6 +31,7 @@ const MATRIX_N_SUBJECTS: usize = 32;
 const MATRIX_N_SUPPORT: usize = 64;
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[allow(dead_code)] // Aot/Wasm temporarily disabled in `Backend::all`
 enum Backend {
     Jit,
     Aot,
@@ -46,8 +47,9 @@ impl Backend {
         }
     }
 
-    fn all() -> [Backend; 3] {
-        [Backend::Jit, Backend::Aot, Backend::Wasm]
+    // AoT and WASM backends temporarily disabled — too slow for the current matrix.
+    fn all() -> [Backend; 1] {
+        [Backend::Jit]
     }
 }
 

From b973b108de026c424dcb330269627aad5ce1379c Mon Sep 17 00:00:00 2001
From: Markus <markushh@uio.no>
Date: Thu, 14 May 2026 10:50:02 +0200
Subject: [PATCH 9/9] Fix memory issue

---
 benches/dsl_matrix.rs | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/benches/dsl_matrix.rs b/benches/dsl_matrix.rs
index 2d913a35..50072072 100644
--- a/benches/dsl_matrix.rs
+++ b/benches/dsl_matrix.rs
@@ -157,11 +157,19 @@ fn compile_sde(workload: Workload, backend: Backend, aot: &AotWorkspace) -> Nati
 // ───────────────────────────── compile group ─────────────────────────
 
 fn compile_group(c: &mut Criterion) {
+    use std::time::Instant;
+
     let mut group = c.benchmark_group("dsl/compile");
     group.sampling_mode(SamplingMode::Flat);
-    // AoT especially is expensive — full rustc invocation per iteration.
     group.sample_size(10);
-    group.measurement_time(Duration::from_secs(30));
+    group.measurement_time(Duration::from_secs(5));
+    // Each compile leaks an executable mmap (JIT) or runs rustc (AoT). Without
+    // a cap, a fast JIT compile (~60 µs) lets Criterion request hundreds of
+    // thousands of iterations per cell and exhausts the runner's executable
+    // memory pool / `vm.max_map_count`. We hard-cap real iterations per
+    // Criterion batch to `MAX_ITERS_PER_BATCH` and scale the reported elapsed
+    // time linearly so per-iteration timings stay accurate.
+    const MAX_ITERS_PER_BATCH: u64 = 25;
 
     let aot = AotWorkspace::new();
 
@@ -175,13 +183,19 @@ fn compile_group(c: &mut Criterion) {
                     backend.label()
                 ));
                 group.bench_function(bench_id, |b| {
-                    b.iter(|| {
-                        black_box(compile_runtime(
-                            black_box(workload),
-                            black_box(kind),
-                            black_box(backend),
-                            &aot,
-                        ));
+                    b.iter_custom(|iters| {
+                        let actual = iters.min(MAX_ITERS_PER_BATCH).max(1);
+                        let start = Instant::now();
+                        for _ in 0..actual {
+                            black_box(compile_runtime(
+                                black_box(workload),
+                                black_box(kind),
+                                black_box(backend),
+                                &aot,
+                            ));
+                        }
+                        let elapsed = start.elapsed();
+                        elapsed.mul_f64(iters as f64 / actual as f64)
                     });
                 });
             }