diff --git a/docs/index.md b/docs/index.md
index 3b1f228..0a6a9bc 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -36,6 +36,8 @@ Key guides include embedded [marimo](https://marimo.io) notebooks — run code,
 | `gds-games` | `ogs` | Typed DSL for compositional game theory (Open Games) |
 | `gds-software` | `gds_software` | Software architecture DSL (DFD, state machine, C4, ERD, etc.) |
 | `gds-business` | `gds_business` | Business dynamics DSL (CLD, supply chain, value stream map) |
+| `gds-sim` | `gds_sim` | Simulation engine — Model, Simulation, Results |
+| `gds-psuu` | `gds_psuu` | Parameter space search under uncertainty for gds-sim |
 | `gds-examples` | — | Tutorial models demonstrating framework features |
 
 ## Architecture
@@ -51,6 +53,10 @@ gds-software   ←  software architecture DSL (depends on gds-framework)
 gds-business   ←  business dynamics DSL (depends on gds-framework)
     ↑
 gds-examples   ←  tutorials (depends on gds-framework + gds-viz)
+
+gds-sim        ←  simulation engine (standalone — no gds-framework dep)
+    ↑
+gds-psuu       ←  parameter search under uncertainty (depends on gds-sim)
 ```
 
 ## License
diff --git a/docs/psuu/api/evaluation.md b/docs/psuu/api/evaluation.md
new file mode 100644
index 0000000..22f5a48
--- /dev/null
+++ b/docs/psuu/api/evaluation.md
@@ -0,0 +1,8 @@
+# gds_psuu.evaluation
+
+Evaluation bridge between parameter points and gds-sim.
+
+::: gds_psuu.evaluation
+    options:
+      show_source: true
+      members_order: source
diff --git a/docs/psuu/api/init.md b/docs/psuu/api/init.md
new file mode 100644
index 0000000..6bdb0fb
--- /dev/null
+++ b/docs/psuu/api/init.md
@@ -0,0 +1,8 @@
+# gds_psuu
+
+Public API -- top-level exports.
+
+::: gds_psuu
+    options:
+      show_submodules: false
+      members: false
diff --git a/docs/psuu/api/kpi.md b/docs/psuu/api/kpi.md
new file mode 100644
index 0000000..63b4178
--- /dev/null
+++ b/docs/psuu/api/kpi.md
@@ -0,0 +1,8 @@
+# gds_psuu.kpi
+
+KPI wrapper and legacy helper functions.
+
+::: gds_psuu.kpi
+    options:
+      show_source: true
+      members_order: source
diff --git a/docs/psuu/api/metric.md b/docs/psuu/api/metric.md
new file mode 100644
index 0000000..9a3ccbf
--- /dev/null
+++ b/docs/psuu/api/metric.md
@@ -0,0 +1,8 @@
+# gds_psuu.metric
+
+Metric and Aggregation primitives for composable KPI construction.
+
+::: gds_psuu.metric
+    options:
+      show_source: true
+      members_order: source
diff --git a/docs/psuu/api/optimizers.md b/docs/psuu/api/optimizers.md
new file mode 100644
index 0000000..ffc727e
--- /dev/null
+++ b/docs/psuu/api/optimizers.md
@@ -0,0 +1,31 @@
+# gds_psuu.optimizers
+
+Search strategy implementations.
+
+## Base
+
+::: gds_psuu.optimizers.base
+    options:
+      show_source: true
+      members_order: source
+
+## Grid Search
+
+::: gds_psuu.optimizers.grid
+    options:
+      show_source: true
+      members_order: source
+
+## Random Search
+
+::: gds_psuu.optimizers.random
+    options:
+      show_source: true
+      members_order: source
+
+## Bayesian (optional)
+
+::: gds_psuu.optimizers.bayesian
+    options:
+      show_source: true
+      members_order: source
diff --git a/docs/psuu/api/results.md b/docs/psuu/api/results.md
new file mode 100644
index 0000000..c4f8231
--- /dev/null
+++ b/docs/psuu/api/results.md
@@ -0,0 +1,8 @@
+# gds_psuu.results
+
+Sweep results and summary types.
+
+::: gds_psuu.results
+    options:
+      show_source: true
+      members_order: source
diff --git a/docs/psuu/api/space.md b/docs/psuu/api/space.md
new file mode 100644
index 0000000..d697c61
--- /dev/null
+++ b/docs/psuu/api/space.md
@@ -0,0 +1,8 @@
+# gds_psuu.space
+
+Parameter space definitions for search.
+
+::: gds_psuu.space
+    options:
+      show_source: true
+      members_order: source
diff --git a/docs/psuu/api/sweep.md b/docs/psuu/api/sweep.md
new file mode 100644
index 0000000..6c49ff6
--- /dev/null
+++ b/docs/psuu/api/sweep.md
@@ -0,0 +1,8 @@
+# gds_psuu.sweep
+
+Sweep orchestrator -- the main entry point for parameter search.
+
+::: gds_psuu.sweep
+    options:
+      show_source: true
+      members_order: source
diff --git a/docs/psuu/getting-started.md b/docs/psuu/getting-started.md
new file mode 100644
index 0000000..55c4728
--- /dev/null
+++ b/docs/psuu/getting-started.md
@@ -0,0 +1,182 @@
+# Getting Started
+
+## Installation
+
+```bash
+uv add gds-psuu
+# or: pip install gds-psuu
+```
+
+For Bayesian optimization (optional):
+
+```bash
+uv add "gds-psuu[bayesian]"
+# or: pip install gds-psuu[bayesian]
+```
+
+For development (monorepo):
+
+```bash
+git clone https://github.com/BlockScience/gds-core.git
+cd gds-core
+uv sync --all-packages
+```
+
+## Your First Parameter Sweep
+
+Define a `gds-sim` model, then sweep a parameter to find the best value:
+
+```python
+from gds_sim import Model, StateUpdateBlock
+from gds_psuu import (
+    KPI,
+    Continuous,
+    GridSearchOptimizer,
+    ParameterSpace,
+    Sweep,
+    final_value,
+    mean_agg,
+)
+
+
+# 1. Define a growth model
+def growth_policy(state, params, **kw):
+    return {"delta": state["population"] * params["growth_rate"]}
+
+
+def update_pop(state, params, *, signal=None, **kw):
+    return ("population", state["population"] + signal["delta"])
+
+
+model = Model(
+    initial_state={"population": 100.0},
+    state_update_blocks=[
+        StateUpdateBlock(
+            policies={"growth": growth_policy},
+            variables={"population": update_pop},
+        )
+    ],
+)
+
+# 2. Define what to search
+space = ParameterSpace(
+    params={"growth_rate": Continuous(min_val=0.01, max_val=0.2)}
+)
+
+# 3. Define what to measure
+kpis = [
+    KPI(
+        name="avg_final_pop",
+        metric=final_value("population"),    # per-run: final value
+        aggregation=mean_agg,                # cross-run: mean
+    ),
+]
+
+# 4. Run the sweep
+sweep = Sweep(
+    model=model,
+    space=space,
+    kpis=kpis,
+    optimizer=GridSearchOptimizer(n_steps=5),
+    timesteps=10,
+    runs=3,  # 3 Monte Carlo runs per parameter point
+)
+results = sweep.run()
+
+# 5. Inspect results
+best = results.best("avg_final_pop")
+print(f"Best growth_rate: {best.params['growth_rate']:.3f}")
+print(f"Best avg final pop: {best.scores['avg_final_pop']:.1f}")
+```
+
+## Composable KPIs
+
+The key design is the **Metric + Aggregation = KPI** pattern:
+
+```python
+from gds_psuu import (
+    KPI,
+    final_value,
+    trajectory_mean,
+    max_value,
+    mean_agg,
+    std_agg,
+    percentile_agg,
+    probability_above,
+)
+
+# Mean of final population across runs
+avg_final = KPI(name="avg_pop", metric=final_value("population"), aggregation=mean_agg)
+
+# Standard deviation of final population (measures uncertainty)
+std_final = KPI(name="std_pop", metric=final_value("population"), aggregation=std_agg)
+
+# 90th percentile of trajectory means
+p90_mean = KPI(name="p90_mean", metric=trajectory_mean("population"), aggregation=percentile_agg(90))
+
+# Probability that max population exceeds 500
+risk = KPI(name="boom_risk", metric=max_value("population"), aggregation=probability_above(500.0))
+```
+
+**Metric** extracts a scalar from each run. **Aggregation** reduces the per-run values to a single score.
+
+If no aggregation is specified, `mean_agg` is used by default:
+
+```python
+# These are equivalent:
+KPI(name="avg_pop", metric=final_value("population"))
+KPI(name="avg_pop", metric=final_value("population"), aggregation=mean_agg)
+```
+
+## Per-Run Distributions
+
+Metric-based KPIs track the full distribution across Monte Carlo runs:
+
+```python
+results = sweep.run()
+
+for ev in results.evaluations:
+    dist = ev.distributions["avg_final_pop"]
+    print(f"  params={ev.params}, per_run={dist}")
+    # e.g. per_run=[265.3, 265.3, 265.3] for deterministic model
+```
+
+## Multiple Optimizers
+
+```python
+from gds_psuu import GridSearchOptimizer, RandomSearchOptimizer
+
+# Exhaustive grid (good for 1-2 dimensions)
+grid = GridSearchOptimizer(n_steps=10)  # 10 points per continuous dim
+
+# Random sampling (good for higher dimensions)
+rand = RandomSearchOptimizer(n_samples=50, seed=42)
+```
+
+For Bayesian optimization (requires `gds-psuu[bayesian]`):
+
+```python
+from gds_psuu.optimizers.bayesian import BayesianOptimizer
+
+bayes = BayesianOptimizer(n_calls=30, target_kpi="avg_final_pop", seed=42)
+```
+
+## Legacy KPI Support
+
+The older `fn`-based KPI interface still works:
+
+```python
+from gds_psuu import KPI, final_state_mean
+
+# Legacy style (backwards compatible)
+kpi = KPI(name="pop", fn=lambda r: final_state_mean(r, "population"))
+```
+
+Legacy KPIs don't track per-run distributions -- use metric-based KPIs for full Monte Carlo awareness.
+
+## Next Steps
+
+- [Concepts](guide/concepts.md) -- Metric, Aggregation, KPI, and the full conceptual hierarchy
+- [Parameter Spaces](guide/spaces.md) -- dimensions, validation, and grid generation
+- [Optimizers](guide/optimizers.md) -- grid, random, and Bayesian search strategies
+- [API Reference](api/init.md) -- complete auto-generated API docs
diff --git a/docs/psuu/guide/concepts.md b/docs/psuu/guide/concepts.md
new file mode 100644
index 0000000..d78e811
--- /dev/null
+++ b/docs/psuu/guide/concepts.md
@@ -0,0 +1,215 @@
+# Concepts
+
+This page explains the core abstractions in `gds-psuu` and how they compose.
+
+## The Hierarchy
+
+```
+Parameter Point  ->  Simulation  ->  Results  ->  Metric  ->  Aggregation  ->  KPI
+```
+
+Each layer transforms data from the previous one. The sweep loop orchestrates the full pipeline across many parameter points.
+
+---
+
+## Parameter Space
+
+A `ParameterSpace` defines what to search. Each dimension has a name and a type:
+
+| Dimension | Description | Grid behavior |
+|-----------|-------------|---------------|
+| `Continuous(min_val, max_val)` | Real-valued range | `n_steps` evenly spaced points |
+| `Integer(min_val, max_val)` | Integer range (inclusive) | All integers in range |
+| `Discrete(values=(...))` | Explicit set of values | All values |
+
+```python
+from gds_psuu import Continuous, Integer, Discrete, ParameterSpace
+
+space = ParameterSpace(params={
+    "learning_rate": Continuous(min_val=0.001, max_val=0.1),
+    "hidden_layers": Integer(min_val=1, max_val=5),
+    "activation": Discrete(values=("relu", "tanh", "sigmoid")),
+})
+```
+
+Validation enforces `min_val < max_val` and at least one parameter.
+
+---
+
+## Metric
+
+A `Metric` extracts a **single scalar from one simulation run**. It receives the full `Results` object and a run ID.
+
+```python
+from gds_psuu import Metric
+
+# Built-in factories
+from gds_psuu import final_value, trajectory_mean, max_value, min_value
+
+final_value("population")      # value at last timestep
+trajectory_mean("population")  # mean over all timesteps
+max_value("population")        # maximum over all timesteps
+min_value("population")        # minimum over all timesteps
+```
+
+Custom metrics:
+
+```python
+Metric(
+    name="range",
+    fn=lambda results, run: (
+        max_value("x").fn(results, run) - min_value("x").fn(results, run)
+    ),
+)
+```
+
+The `MetricFn` signature is `(Results, int) -> float` where the int is the run ID.
+
+---
+
+## Aggregation
+
+An `Aggregation` reduces a **list of per-run values into a single scalar**. It operates on `list[float]` and returns `float`.
+
+```python
+from gds_psuu import mean_agg, std_agg, percentile_agg, probability_above, probability_below
+
+mean_agg                    # arithmetic mean
+std_agg                     # sample standard deviation
+percentile_agg(50)          # median (50th percentile)
+percentile_agg(95)          # 95th percentile
+probability_above(100.0)    # fraction of runs > 100
+probability_below(0.0)      # fraction of runs < 0 (risk measure)
+```
+
+Custom aggregations:
+
+```python
+from gds_psuu import Aggregation
+
+cv_agg = Aggregation(
+    name="cv",
+    fn=lambda vals: (
+        (sum((x - sum(vals)/len(vals))**2 for x in vals) / (len(vals)-1))**0.5
+        / (sum(vals)/len(vals))
+        if len(vals) > 1 and sum(vals) != 0 else 0.0
+    ),
+)
+```
+
+---
+
+## KPI
+
+A `KPI` composes a Metric and an Aggregation into a named score:
+
+```python
+from gds_psuu import KPI, final_value, std_agg
+
+kpi = KPI(
+    name="uncertainty",
+    metric=final_value("population"),  # per-run: final value
+    aggregation=std_agg,               # cross-run: standard deviation
+)
+```
+
+If `aggregation` is omitted, `mean_agg` is used by default.
+
+### Per-run access
+
+Metric-based KPIs expose the full distribution:
+
+```python
+results = simulation_results  # from gds-sim
+per_run_values = kpi.per_run(results)  # [val_run1, val_run2, ...]
+aggregated = kpi.compute(results)       # single float
+```
+
+### Legacy KPIs
+
+The older `fn`-based interface operates on the full `Results` at once:
+
+```python
+from gds_psuu import KPI, final_state_mean
+
+kpi = KPI(name="pop", fn=lambda r: final_state_mean(r, "population"))
+```
+
+Legacy KPIs cannot use `per_run()` and don't produce distributions. Prefer metric-based KPIs for new code.
+
+---
+
+## Evaluator
+
+The `Evaluator` bridges parameter points to scored KPIs:
+
+1. Takes a parameter point `{"growth_rate": 0.05}`
+2. Injects params into the `gds-sim` Model
+3. Runs N Monte Carlo simulations
+4. Computes each KPI on the results
+5. Returns `EvaluationResult` with scores and distributions
+
+```python
+from gds_psuu import Evaluator
+
+evaluator = Evaluator(
+    base_model=model,
+    kpis=[kpi1, kpi2],
+    timesteps=100,
+    runs=10,
+)
+result = evaluator.evaluate({"growth_rate": 0.05})
+# result.scores == {"kpi1_name": 42.0, "kpi2_name": 3.14}
+# result.distributions == {"kpi1_name": [per-run values...]}
+```
+
+---
+
+## Optimizer
+
+An `Optimizer` implements the suggest/observe loop:
+
+| Optimizer | Strategy | When to use |
+|-----------|----------|-------------|
+| `GridSearchOptimizer(n_steps)` | Exhaustive cartesian product | 1-2 dimensions, need full coverage |
+| `RandomSearchOptimizer(n_samples, seed)` | Uniform random sampling | Higher dimensions, exploration |
+| `BayesianOptimizer(n_calls, target_kpi)` | Gaussian process surrogate | Expensive evaluations, optimization |
+
+All optimizers implement the same interface:
+
+```python
+optimizer.setup(space, kpi_names)
+while not optimizer.is_exhausted():
+    params = optimizer.suggest()
+    # ... evaluate ...
+    optimizer.observe(params, scores)
+```
+
+---
+
+## Sweep
+
+`Sweep` is the top-level orchestrator that connects everything:
+
+```python
+from gds_psuu import Sweep
+
+sweep = Sweep(
+    model=model,
+    space=space,
+    kpis=kpis,
+    optimizer=optimizer,
+    timesteps=100,
+    runs=10,
+)
+results = sweep.run()
+```
+
+### SweepResults
+
+```python
+results.evaluations       # list[EvaluationResult] -- all evaluations
+results.summaries         # list[EvaluationSummary] -- params + scores only
+results.best("kpi_name")  # best evaluation for a KPI
+results.to_dataframe()    # pandas DataFrame (requires pandas)
+```
diff --git a/docs/psuu/guide/optimizers.md b/docs/psuu/guide/optimizers.md
new file mode 100644
index 0000000..751220b
--- /dev/null
+++ b/docs/psuu/guide/optimizers.md
@@ -0,0 +1,105 @@
+# Optimizers
+
+All optimizers implement the same `Optimizer` interface: `setup()`, `suggest()`, `observe()`, `is_exhausted()`. The `Sweep` class drives this loop automatically.
+
+## GridSearchOptimizer
+
+Exhaustive evaluation of every point in a regular grid.
+
+```python
+from gds_psuu import GridSearchOptimizer
+
+optimizer = GridSearchOptimizer(n_steps=10)
+```
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `n_steps` | `int` | `5` | Points per continuous dimension |
+
+**Behavior:** Generates the full cartesian product via `ParameterSpace.grid_points()`, then evaluates each point exactly once. Does not adapt based on observed scores.
+
+**When to use:** Small parameter spaces (1-2 dimensions), need complete coverage, want reproducible results.
+
+**Total evaluations:** `n_steps^(n_continuous) * product(integer_ranges) * product(discrete_sizes)`
+
+---
+
+## RandomSearchOptimizer
+
+Uniform random sampling across the parameter space.
+
+```python
+from gds_psuu import RandomSearchOptimizer
+
+optimizer = RandomSearchOptimizer(n_samples=50, seed=42)
+```
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `n_samples` | `int` | `20` | Total parameter points to sample |
+| `seed` | `int \| None` | `None` | Random seed for reproducibility |
+
+**Behavior:** Samples each dimension independently -- `uniform(min, max)` for Continuous, `randint(min, max)` for Integer, `choice(values)` for Discrete. Does not adapt based on observed scores.
+
+**When to use:** Higher-dimensional spaces where grid search is infeasible, initial exploration before Bayesian optimization.
+
+---
+
+## BayesianOptimizer
+
+Gaussian process surrogate model that learns from past evaluations.
+
+!!! note "Optional dependency"
+    Requires `scikit-optimize`. Install with: `uv add "gds-psuu[bayesian]"`
+
+```python
+from gds_psuu.optimizers.bayesian import BayesianOptimizer
+
+optimizer = BayesianOptimizer(
+    n_calls=30,
+    target_kpi="avg_final_pop",
+    maximize=True,
+    seed=42,
+)
+```
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `n_calls` | `int` | `20` | Total evaluations (initial + optimization) |
+| `target_kpi` | `str \| None` | `None` | KPI to optimize (defaults to first) |
+| `maximize` | `bool` | `True` | Maximize (True) or minimize (False) |
+| `seed` | `int \| None` | `None` | Random seed |
+
+**Behavior:** Starts with random exploration (`min(5, n_calls)` initial points), then uses a Gaussian process surrogate to balance exploration and exploitation. Optimizes a single target KPI.
+
+**When to use:** Expensive simulations where you want to find the optimum with fewer evaluations. Works best with continuous parameters.
+
+---
+
+## Custom Optimizers
+
+Subclass `Optimizer` to implement your own search strategy:
+
+```python
+from gds_psuu.optimizers.base import Optimizer
+from gds_psuu.space import ParameterSpace
+from gds_psuu.types import KPIScores, ParamPoint
+
+
+class MyOptimizer(Optimizer):
+    def setup(self, space: ParameterSpace, kpi_names: list[str]) -> None:
+        # Initialize search state
+        ...
+
+    def suggest(self) -> ParamPoint:
+        # Return next parameter point to evaluate
+        ...
+
+    def observe(self, params: ParamPoint, scores: KPIScores) -> None:
+        # Learn from evaluation results
+        ...
+
+    def is_exhausted(self) -> bool:
+        # Return True when search is complete
+        ...
+```
diff --git a/docs/psuu/guide/spaces.md b/docs/psuu/guide/spaces.md
new file mode 100644
index 0000000..963fc6f
--- /dev/null
+++ b/docs/psuu/guide/spaces.md
@@ -0,0 +1,102 @@
+# Parameter Spaces
+
+## Dimension Types
+
+### Continuous
+
+A real-valued range with inclusive bounds.
+
+```python
+from gds_psuu import Continuous
+
+dim = Continuous(min_val=0.0, max_val=1.0)
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `min_val` | `float` | Lower bound (inclusive) |
+| `max_val` | `float` | Upper bound (inclusive) |
+
+Validation: `min_val < max_val`, both must be finite.
+
+Grid behavior: `n_steps` evenly spaced points from `min_val` to `max_val`.
+
+---
+
+### Integer
+
+An integer range with inclusive bounds.
+
+```python
+from gds_psuu import Integer
+
+dim = Integer(min_val=1, max_val=10)
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `min_val` | `int` | Lower bound (inclusive) |
+| `max_val` | `int` | Upper bound (inclusive) |
+
+Validation: `min_val < max_val`.
+
+Grid behavior: all integers from `min_val` to `max_val` (ignores `n_steps`).
+
+---
+
+### Discrete
+
+An explicit set of allowed values (any hashable type).
+
+```python
+from gds_psuu import Discrete
+
+dim = Discrete(values=("adam", "sgd", "rmsprop"))
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `values` | `tuple[Any, ...]` | Allowed values |
+
+Validation: at least 1 value.
+
+Grid behavior: all values (ignores `n_steps`).
+
+---
+
+## ParameterSpace
+
+Combines dimensions into a named parameter space:
+
+```python
+from gds_psuu import Continuous, Integer, Discrete, ParameterSpace
+
+space = ParameterSpace(params={
+    "learning_rate": Continuous(min_val=0.001, max_val=0.1),
+    "batch_size": Integer(min_val=16, max_val=128),
+    "optimizer": Discrete(values=("adam", "sgd")),
+})
+```
+
+### Grid Generation
+
+```python
+points = space.grid_points(n_steps=5)
+# Returns list of dicts, e.g.:
+# [
+#     {"learning_rate": 0.001, "batch_size": 16, "optimizer": "adam"},
+#     {"learning_rate": 0.001, "batch_size": 16, "optimizer": "sgd"},
+#     ...
+# ]
+```
+
+The total number of grid points is the cartesian product:
+`n_steps * (max_int - min_int + 1) * len(discrete_values)`
+
+For the example above: `5 * 113 * 2 = 1130` points.
+
+### Properties
+
+| Property | Returns | Description |
+|----------|---------|-------------|
+| `dimension_names` | `list[str]` | Ordered list of parameter names |
diff --git a/docs/psuu/index.md b/docs/psuu/index.md
new file mode 100644
index 0000000..ba6e652
--- /dev/null
+++ b/docs/psuu/index.md
@@ -0,0 +1,92 @@
+# gds-psuu
+
+[![PyPI](https://img.shields.io/pypi/v/gds-psuu)](https://pypi.org/project/gds-psuu/)
+[![Python](https://img.shields.io/pypi/pyversions/gds-psuu)](https://pypi.org/project/gds-psuu/)
+[![License](https://img.shields.io/github/license/BlockScience/gds-core)](https://github.com/BlockScience/gds-core/blob/main/LICENSE)
+
+**Parameter space search under uncertainty** -- explore, evaluate, and optimize simulation parameters with Monte Carlo awareness.
+
+## What is this?
+
+`gds-psuu` bridges `gds-sim` simulations with systematic parameter exploration. It provides:
+
+- **Parameter spaces** -- `Continuous`, `Integer`, and `Discrete` dimensions with validation
+- **Composable KPIs** -- `Metric` (per-run scalar) + `Aggregation` (cross-run reducer) = `KPI`
+- **3 search strategies** -- Grid, Random, and Bayesian (optuna) optimizers
+- **Monte Carlo awareness** -- per-run distributions tracked alongside aggregated scores
+- **Zero mandatory dependencies** beyond `gds-sim` and `pydantic`
+
+## Architecture
+
+```
+gds-sim (pip install gds-sim)
+|
+|  Simulation engine: Model, StateUpdateBlock,
+|  Simulation, Results (columnar storage).
+|
++-- gds-psuu (pip install gds-psuu)
+    |
+    |  Parameter search: ParameterSpace, Metric, Aggregation,
+    |  KPI, Evaluator, Sweep, Optimizer.
+    |
+    +-- Your application
+        |
+        |  Concrete models, parameter studies,
+        |  sensitivity analysis, optimization.
+```
+
+## Conceptual Hierarchy
+
+The package follows a clear hierarchy from parameters to optimization:
+
+```
+Parameter Point          {"growth_rate": 0.05}
+    |
+    v
+Simulation              Model + timesteps + N runs
+    |
+    v
+Results                 Columnar data (timestep, substep, run, state vars)
+    |
+    v
+Metric (per-run)        final_value("pop") -> scalar per run
+    |
+    v
+Aggregation (cross-run) mean_agg, std_agg, probability_above(...)
+    |
+    v
+KPI (composed)          KPI(metric=..., aggregation=...) -> single score
+    |
+    v
+Sweep                   Optimizer drives suggest/evaluate/observe loop
+    |
+    v
+SweepResults            All evaluations + best() selection
+```
+
+## How the Sweep Loop Works
+
+```
+Optimizer.suggest()  -->  Evaluator.evaluate(params)  -->  Optimizer.observe(scores)
+       ^                          |                              |
+       |                   gds-sim Simulation                    |
+       +------------------------ repeat --------------------------+
+```
+
+1. The **Optimizer** suggests a parameter point
+2. The **Evaluator** injects params into a `gds-sim` Model, runs N Monte Carlo simulations
+3. Each **KPI** extracts a per-run **Metric**, then **Aggregates** across runs into a single score
+4. The **Optimizer** observes the scores and decides what to try next
+
+## Quick Start
+
+```bash
+uv add gds-psuu
+# or: pip install gds-psuu
+```
+
+See [Getting Started](getting-started.md) for a full walkthrough.
+
+## Credits
+
+Built on [gds-sim](../sim/index.md) by [BlockScience](https://block.science).
diff --git a/mkdocs.yml b/mkdocs.yml
index 1f921a9..c8a9910 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -109,6 +109,11 @@ plugins:
           - guides/dsl-roadmap.md
           - guides/research-boundaries.md
           - guides/view-stratification.md
+        PSUU (gds-psuu):
+          - {psuu/index.md: "Parameter space search under uncertainty for gds-sim"}
+          - {psuu/getting-started.md: "Installation + first parameter sweep"}
+          - psuu/guide/*.md
+          - psuu/api/*.md
         Ecosystem:
           - {framework/ecosystem.md: "All packages, their imports, and dependency relationships"}
 
@@ -311,6 +316,22 @@ nav:
               - gds_software.dependency.compile: software/api/dep-compile.md
               - gds_software.dependency.checks: software/api/dep-checks.md
               - gds_software.verification: software/api/verification.md
+      - PSUU:
+          - Overview: psuu/index.md
+          - Getting Started: psuu/getting-started.md
+          - User Guide:
+              - Concepts: psuu/guide/concepts.md
+              - Parameter Spaces: psuu/guide/spaces.md
+              - Optimizers: psuu/guide/optimizers.md
+          - API Reference:
+              - gds_psuu: psuu/api/init.md
+              - gds_psuu.metric: psuu/api/metric.md
+              - gds_psuu.kpi: psuu/api/kpi.md
+              - gds_psuu.evaluation: psuu/api/evaluation.md
+              - gds_psuu.space: psuu/api/space.md
+              - gds_psuu.optimizers: psuu/api/optimizers.md
+              - gds_psuu.sweep: psuu/api/sweep.md
+              - gds_psuu.results: psuu/api/results.md
   - Design & Research:
       - Layer 0 Milestone: guides/architecture-milestone-layer0.md
       - DSL Roadmap: guides/dsl-roadmap.md
diff --git a/packages/gds-psuu/gds_psuu/__init__.py b/packages/gds-psuu/gds_psuu/__init__.py
index f93f9c7..beb4d17 100644
--- a/packages/gds-psuu/gds_psuu/__init__.py
+++ b/packages/gds-psuu/gds_psuu/__init__.py
@@ -5,6 +5,19 @@
 from gds_psuu.errors import PsuuError, PsuuSearchError, PsuuValidationError
 from gds_psuu.evaluation import EvaluationResult, Evaluator
 from gds_psuu.kpi import KPI, final_state_mean, final_state_std, time_average
+from gds_psuu.metric import (
+    Aggregation,
+    Metric,
+    final_value,
+    max_value,
+    mean_agg,
+    min_value,
+    percentile_agg,
+    probability_above,
+    probability_below,
+    std_agg,
+    trajectory_mean,
+)
 from gds_psuu.objective import Objective, SingleKPI, WeightedSum
 from gds_psuu.optimizers.base import Optimizer
 from gds_psuu.optimizers.bayesian import BayesianOptimizer
@@ -27,9 +40,11 @@
     ParameterSpace,
 )
 from gds_psuu.sweep import Sweep
-from gds_psuu.types import KPIFn, KPIScores, ParamPoint
+from gds_psuu.types import AggregationFn, KPIFn, KPIScores, MetricFn, ParamPoint
 
 __all__ = [
+    "Aggregation",
+    "AggregationFn",
     "Analyzer",
     "BayesianOptimizer",
     "Constraint",
@@ -45,6 +60,8 @@
     "KPIFn",
     "KPIScores",
     "LinearConstraint",
+    "Metric",
+    "MetricFn",
     "MorrisAnalyzer",
     "OATAnalyzer",
     "Objective",
@@ -62,5 +79,14 @@
     "WeightedSum",
     "final_state_mean",
     "final_state_std",
+    "final_value",
+    "max_value",
+    "mean_agg",
+    "min_value",
+    "percentile_agg",
+    "probability_above",
+    "probability_below",
+    "std_agg",
     "time_average",
+    "trajectory_mean",
 ]
diff --git a/packages/gds-psuu/gds_psuu/evaluation.py b/packages/gds-psuu/gds_psuu/evaluation.py
index 06fd5ad..4c1bcc0 100644
--- a/packages/gds-psuu/gds_psuu/evaluation.py
+++ b/packages/gds-psuu/gds_psuu/evaluation.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Any
 
 from gds_sim import Model, Results, Simulation
@@ -20,6 +20,8 @@ class EvaluationResult:
     scores: KPIScores
     results: Results
     run_count: int
+    distributions: dict[str, list[float]] = field(default_factory=dict)
+    """Per-run metric values for metric-based KPIs."""
 
 
 class Evaluator(BaseModel):
@@ -36,7 +38,8 @@ def evaluate(self, params: ParamPoint) -> EvaluationResult:
         """Evaluate a single parameter point.
 
         Injects params as singleton lists into the model, runs the simulation,
-        and computes KPI scores.
+        and computes KPI scores. For metric-based KPIs, also records per-run
+        distributions.
         """
         # Build params dict: each value as a singleton list for gds-sim
         sim_params: dict[str, list[Any]] = {k: [v] for k, v in params.items()}
@@ -51,11 +54,18 @@ def evaluate(self, params: ParamPoint) -> EvaluationResult:
         sim = Simulation(model=model, timesteps=self.timesteps, runs=self.runs)
         results = sim.run()
 
-        scores: KPIScores = {kpi.name: kpi.fn(results) for kpi in self.kpis}
+        scores: KPIScores = {}
+        distributions: dict[str, list[float]] = {}
+
+        for kpi in self.kpis:
+            scores[kpi.name] = kpi.compute(results)
+            if kpi.metric is not None:
+                distributions[kpi.name] = kpi.per_run(results)
 
         return EvaluationResult(
             params=params,
             scores=scores,
             results=results,
             run_count=self.runs,
+            distributions=distributions,
         )
diff --git a/packages/gds-psuu/gds_psuu/kpi.py b/packages/gds-psuu/gds_psuu/kpi.py
index 45393c6..b805ec1 100644
--- a/packages/gds-psuu/gds_psuu/kpi.py
+++ b/packages/gds-psuu/gds_psuu/kpi.py
@@ -2,10 +2,17 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Self
 
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, model_validator
 
+from gds_psuu.errors import PsuuValidationError
+from gds_psuu.metric import (
+    Aggregation,
+    Metric,
+    _extract_run_ids,
+    mean_agg,
+)
 from gds_psuu.types import KPIFn  # noqa: TC001
 
 if TYPE_CHECKING:
@@ -13,12 +20,58 @@
 
 
 class KPI(BaseModel):
-    """Named KPI backed by a scoring function."""
+    """Named KPI backed by either a legacy fn or a Metric + Aggregation pair.
+
+    Legacy usage (backwards compatible)::
+
+        KPI(name="avg_pop", fn=lambda r: final_state_mean(r, "population"))
+
+    Composable usage::
+
+        KPI(name="avg_pop", metric=final_value("population"), aggregation=mean_agg)
+    """
 
     model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
 
     name: str
-    fn: KPIFn
+    fn: KPIFn | None = None
+    metric: Metric | None = None
+    aggregation: Aggregation | None = None
+
+    @model_validator(mode="after")
+    def _validate_specification(self) -> Self:
+        has_fn = self.fn is not None
+        has_metric = self.metric is not None
+        if not has_fn and not has_metric:
+            raise PsuuValidationError("KPI must have either 'fn' or 'metric' specified")
+        if has_fn and has_metric:
+            raise PsuuValidationError(
+                "KPI cannot have both 'fn' and 'metric' specified"
+            )
+        return self
+
+    def compute(self, results: Results) -> float:
+        """Compute the aggregated KPI score from results."""
+        if self.fn is not None:
+            return self.fn(results)
+        assert self.metric is not None
+        agg = self.aggregation or mean_agg
+        per_run = self.per_run(results)
+        return agg.fn(per_run)
+
+    def per_run(self, results: Results) -> list[float]:
+        """Compute per-run metric values. Only available for metric-based KPIs."""
+        if self.metric is None:
+            raise PsuuValidationError(
+                "per_run() requires a metric-based KPI, not a legacy fn-based KPI"
+            )
+        run_ids = _extract_run_ids(results)
+        return [self.metric.fn(results, r) for r in run_ids]
+
+
+# ---------------------------------------------------------------------------
+# Legacy helper functions (backwards compatible)
+# ---------------------------------------------------------------------------
 
 
 def final_state_mean(results: Results, key: str) -> float:
diff --git a/packages/gds-psuu/gds_psuu/metric.py b/packages/gds-psuu/gds_psuu/metric.py
new file mode 100644
index 0000000..5242075
--- /dev/null
+++ b/packages/gds-psuu/gds_psuu/metric.py
@@ -0,0 +1,214 @@
+"""Metric and Aggregation primitives for composable KPI construction."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from pydantic import BaseModel, ConfigDict
+
+from gds_psuu.types import AggregationFn, MetricFn  # noqa: TC001
+
+if TYPE_CHECKING:
+    from gds_sim import Results
+
+
+class Metric(BaseModel):
+    """Per-run scalar extracted from simulation output."""
+
+    model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
+
+    name: str
+    fn: MetricFn
+
+
+class Aggregation(BaseModel):
+    """Combines per-run metric values across Monte Carlo runs."""
+
+    model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
+
+    name: str
+    fn: AggregationFn
+
+
+# ---------------------------------------------------------------------------
+# Built-in metric factories
+# ---------------------------------------------------------------------------
+
+
+def _extract_run_ids(results: Results) -> list[int]:
+    """Get unique run IDs from results."""
+    cols = results._trimmed_columns()
+    runs = cols["run"]
+    seen: set[int] = set()
+    ordered: list[int] = []
+    for r in runs:
+        if r not in seen:
+            seen.add(r)
+            ordered.append(r)
+    return ordered
+
+
+def _final_value_fn(results: Results, run: int, key: str) -> float:
+    """Extract the final-timestep value for a specific run."""
+    cols = results._trimmed_columns()
+    timesteps = cols["timestep"]
+    substeps = cols["substep"]
+    runs_col = cols["run"]
+    values = cols[key]
+    n = results._size
+
+    max_t = 0
+    for i in range(n):
+        if runs_col[i] == run:
+            t = timesteps[i]
+            if t > max_t:
+                max_t = t
+
+    max_s = 0
+    for i in range(n):
+        if runs_col[i] == run and timesteps[i] == max_t:
+            s = substeps[i]
+            if s > max_s:
+                max_s = s
+
+    for i in range(n):
+        if runs_col[i] == run and timesteps[i] == max_t and substeps[i] == max_s:
+            return float(values[i])
+
+    return 0.0
+
+
+def final_value(key: str) -> Metric:
+    """Metric: value of a state variable at the final timestep of a run."""
+    return Metric(
+        name=f"final_{key}",
+        fn=lambda results, run, _key=key: _final_value_fn(results, run, _key),
+    )
+
+
+def _trajectory_mean_fn(results: Results, run: int, key: str) -> float:
+    """Mean of a state variable across all timesteps for a specific run."""
+    cols = results._trimmed_columns()
+    runs_col = cols["run"]
+    values = cols[key]
+    n = results._size
+
+    total = 0.0
+    count = 0
+    for i in range(n):
+        if runs_col[i] == run:
+            total += float(values[i])
+            count += 1
+
+    return total / count if count else 0.0
+
+
+def trajectory_mean(key: str) -> Metric:
+    """Metric: mean of a state variable over time for a single run."""
+    return Metric(
+        name=f"mean_{key}",
+        fn=lambda results, run, _key=key: _trajectory_mean_fn(results, run, _key),
+    )
+
+
+def _max_value_fn(results: Results, run: int, key: str) -> float:
+    """Max of a state variable across all timesteps for a specific run."""
+    cols = results._trimmed_columns()
+    runs_col = cols["run"]
+    values = cols[key]
+    n = results._size
+
+    max_val = float("-inf")
+    for i in range(n):
+        if runs_col[i] == run:
+            v = float(values[i])
+            if v > max_val:
+                max_val = v
+    return max_val if max_val != float("-inf") else 0.0
+
+
+def max_value(key: str) -> Metric:
+    """Metric: maximum value of a state variable within a single run."""
+    return Metric(
+        name=f"max_{key}",
+        fn=lambda results, run, _key=key: _max_value_fn(results, run, _key),
+    )
+
+
+def _min_value_fn(results: Results, run: int, key: str) -> float:
+    """Min of a state variable across all timesteps for a specific run."""
+    cols = results._trimmed_columns()
+    runs_col = cols["run"]
+    values = cols[key]
+    n = results._size
+
+    min_val = float("inf")
+    for i in range(n):
+        if runs_col[i] == run:
+            v = float(values[i])
+            if v < min_val:
+                min_val = v
+    return min_val if min_val != float("inf") else 0.0
+
+
+def min_value(key: str) -> Metric:
+    """Metric: minimum value of a state variable within a single run."""
+    return Metric(
+        name=f"min_{key}",
+        fn=lambda results, run, _key=key: _min_value_fn(results, run, _key),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Built-in aggregations
+# ---------------------------------------------------------------------------
+
+mean_agg = Aggregation(
+    name="mean",
+    fn=lambda vals: sum(vals) / len(vals) if vals else 0.0,
+)
+
+std_agg = Aggregation(
+    name="std",
+    fn=lambda vals: (
+        (sum((x - sum(vals) / len(vals)) ** 2 for x in vals) / (len(vals) - 1)) ** 0.5
+        if len(vals) > 1
+        else 0.0
+    ),
+)
+
+
+def percentile_agg(p: float) -> Aggregation:
+    """Aggregation: p-th percentile across runs."""
+
+    def _fn(vals: list[float]) -> float:
+        if not vals:
+            return 0.0
+        s = sorted(vals)
+        k = (p / 100.0) * (len(s) - 1)
+        lo = int(k)
+        hi = min(lo + 1, len(s) - 1)
+        frac = k - lo
+        return s[lo] + frac * (s[hi] - s[lo])
+
+    return Aggregation(name=f"p{p}", fn=_fn)
+
+
+def probability_above(threshold: float) -> Aggregation:
+    """Aggregation: fraction of runs where metric exceeds threshold."""
+    return Aggregation(
+        name=f"P(>{threshold})",
+        fn=lambda vals: (
+            sum(1 for v in vals if v > threshold) / len(vals) if vals else 0.0
+        ),
+    )
+
+
+def probability_below(threshold: float) -> Aggregation:
+    """Aggregation: fraction of runs where metric is below threshold."""
+    return Aggregation(
+        name=f"P(<{threshold})",
+        fn=lambda vals: (
+            sum(1 for v in vals if v < threshold) / len(vals) if vals else 0.0
+        ),
+    )
diff --git a/packages/gds-psuu/gds_psuu/types.py b/packages/gds-psuu/gds_psuu/types.py
index 8b97744..d7e8ec2 100644
--- a/packages/gds-psuu/gds_psuu/types.py
+++ b/packages/gds-psuu/gds_psuu/types.py
@@ -13,5 +13,11 @@
 KPIFn = Callable[[Results], float]
 """Computes a scalar KPI score from simulation results (all Monte Carlo runs)."""
 
+MetricFn = Callable[[Results, int], float]
+"""Computes a scalar from a single run. Args: results, run_number."""
+
+AggregationFn = Callable[[list[float]], float]
+"""Combines per-run metric values into a single scalar."""
+
 KPIScores = dict[str, float]
 """Maps KPI names to their computed scalar scores."""
diff --git a/packages/gds-psuu/tests/test_metric.py b/packages/gds-psuu/tests/test_metric.py
new file mode 100644
index 0000000..122a77f
--- /dev/null
+++ b/packages/gds-psuu/tests/test_metric.py
@@ -0,0 +1,284 @@
+"""Tests for Metric/Aggregation primitives and composable KPIs."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from gds_sim import Results
+
+from gds_psuu import (
+    KPI,
+    Continuous,
+    Evaluator,
+    GridSearchOptimizer,
+    ParameterSpace,
+    PsuuValidationError,
+    Sweep,
+    final_state_mean,
+    final_value,
+    max_value,
+    mean_agg,
+    min_value,
+    percentile_agg,
+    probability_above,
+    probability_below,
+    std_agg,
+    trajectory_mean,
+)
+
+if TYPE_CHECKING:
+    from gds_sim import Model
+
+
+def _make_results(values: list[list[float]], key: str = "x") -> Results:
+    """Create Results with multiple runs. Each inner list is one run's trajectory."""
+    r = Results(state_keys=[key])
+    for run_id, trajectory in enumerate(values, start=1):
+        for t, val in enumerate(trajectory):
+            r.append(
+                {key: val},
+                timestep=t,
+                substep=1,
+                run=run_id,
+                subset=0,
+            )
+    return r
+
+
+class TestMetricFactories:
+    def test_final_value(self) -> None:
+        results = _make_results([[10.0, 20.0, 30.0]], key="pop")
+        m = final_value("pop")
+        assert m.fn(results, 1) == 30.0
+
+    def test_final_value_multi_run(self) -> None:
+        results = _make_results([[10.0, 20.0], [100.0, 200.0]], key="pop")
+        m = final_value("pop")
+        assert m.fn(results, 1) == 20.0
+        assert m.fn(results, 2) == 200.0
+
+    def test_trajectory_mean(self) -> None:
+        results = _make_results([[10.0, 20.0, 30.0]], key="x")
+        m = trajectory_mean("x")
+        assert m.fn(results, 1) == 20.0
+
+    def test_max_value(self) -> None:
+        results = _make_results([[5.0, 100.0, 3.0]], key="x")
+        m = max_value("x")
+        assert m.fn(results, 1) == 100.0
+
+    def test_min_value(self) -> None:
+        results = _make_results([[5.0, 1.0, 3.0]], key="x")
+        m = min_value("x")
+        assert m.fn(results, 1) == 1.0
+
+    def test_metric_name(self) -> None:
+        assert final_value("pop").name == "final_pop"
+        assert trajectory_mean("x").name == "mean_x"
+        assert max_value("y").name == "max_y"
+        assert min_value("z").name == "min_z"
+
+
+class TestAggregations:
+    def test_mean_agg(self) -> None:
+        assert mean_agg.fn([10.0, 20.0, 30.0]) == 20.0
+
+    def test_mean_agg_empty(self) -> None:
+        assert mean_agg.fn([]) == 0.0
+
+    def test_std_agg(self) -> None:
+        vals = [10.0, 20.0, 30.0]
+        result = std_agg.fn(vals)
+        assert result == pytest.approx(10.0)
+
+    def test_std_agg_single(self) -> None:
+        assert std_agg.fn([5.0]) == 0.0
+
+    def test_percentile_agg(self) -> None:
+        agg = percentile_agg(50)
+        assert agg.fn([1.0, 2.0, 3.0, 4.0, 5.0]) == 3.0
+
+    def test_percentile_agg_boundary(self) -> None:
+        assert percentile_agg(0).fn([10.0, 20.0, 30.0]) == 10.0
+        assert percentile_agg(100).fn([10.0, 20.0, 30.0]) == 30.0
+
+    def test_percentile_agg_empty(self) -> None:
+        assert percentile_agg(50).fn([]) == 0.0
+
+    def test_probability_above(self) -> None:
+        agg = probability_above(15.0)
+        assert agg.fn([10.0, 20.0, 30.0]) == pytest.approx(2 / 3)
+
+    def test_probability_above_empty(self) -> None:
+        assert probability_above(0).fn([]) == 0.0
+
+    def test_probability_below(self) -> None:
+        agg = probability_below(25.0)
+        assert agg.fn([10.0, 20.0, 30.0]) == pytest.approx(2 / 3)
+
+    def test_probability_below_empty(self) -> None:
+        assert probability_below(0).fn([]) == 0.0
+
+
+class TestComposableKPI:
+    def test_metric_based_kpi(self) -> None:
+        results = _make_results([[10.0, 20.0], [10.0, 40.0], [10.0, 60.0]], key="pop")
+        kpi = KPI(name="avg_final", metric=final_value("pop"), aggregation=mean_agg)
+        assert kpi.compute(results) == 40.0
+
+    def test_metric_defaults_to_mean(self) -> None:
+        results = _make_results([[10.0, 20.0], [10.0, 40.0]], key="pop")
+        kpi = KPI(name="avg_final", metric=final_value("pop"))
+        assert kpi.compute(results) == 30.0  # mean of [20, 40]
+
+    def test_metric_with_percentile(self) -> None:
+        results = _make_results(
+            [[0.0, v] for v in [10.0, 20.0, 30.0, 40.0, 50.0]], key="x"
+        )
+        kpi = KPI(
+            name="p50_x",
+            metric=final_value("x"),
+            aggregation=percentile_agg(50),
+        )
+        assert kpi.compute(results) == 30.0
+
+    def test_metric_with_probability(self) -> None:
+        results = _make_results([[0.0, v] for v in [10.0, 20.0, 30.0]], key="x")
+        kpi = KPI(
+            name="risk",
+            metric=final_value("x"),
+            aggregation=probability_below(25.0),
+        )
+        assert kpi.compute(results) == pytest.approx(2 / 3)
+
+    def test_per_run(self) -> None:
+        results = _make_results([[10.0, 20.0], [10.0, 40.0], [10.0, 60.0]], key="pop")
+        kpi = KPI(name="final_pop", metric=final_value("pop"))
+        per_run = kpi.per_run(results)
+        assert per_run == [20.0, 40.0, 60.0]
+
+    def test_per_run_on_fn_kpi_raises(self) -> None:
+        kpi = KPI(name="legacy", fn=lambda r: 0.0)
+        results = Results(state_keys=[])
+        with pytest.raises(PsuuValidationError, match="metric-based"):
+            kpi.per_run(results)
+
+
+class TestLegacyKPI:
+    def test_fn_based_still_works(self) -> None:
+        results = _make_results([[10.0, 20.0, 30.0]], key="pop")
+        kpi = KPI(
+            name="legacy",
+            fn=lambda r: final_state_mean(r, "pop"),
+        )
+        assert kpi.compute(results) == 30.0
+
+    def test_validation_neither(self) -> None:
+        with pytest.raises(
+            (PsuuValidationError, ValueError),
+            match="either 'fn' or 'metric'",
+        ):
+            KPI(name="bad")
+
+    def test_validation_both(self) -> None:
+        with pytest.raises(
+            (PsuuValidationError, ValueError),
+            match="cannot have both",
+        ):
+            KPI(
+                name="bad",
+                fn=lambda r: 0.0,
+                metric=final_value("x"),
+            )
+
+
+class TestEvaluatorDistributions:
+    def test_distributions_populated(self, simple_model: Model) -> None:
+        evaluator = Evaluator(
+            base_model=simple_model,
+            kpis=[
+                KPI(name="final_pop", metric=final_value("population")),
+            ],
+            timesteps=5,
+            runs=3,
+        )
+        result = evaluator.evaluate({"growth_rate": 0.05})
+        assert "final_pop" in result.distributions
+        assert len(result.distributions["final_pop"]) == 3
+
+    def test_distributions_empty_for_legacy_kpi(self, simple_model: Model) -> None:
+        evaluator = Evaluator(
+            base_model=simple_model,
+            kpis=[
+                KPI(
+                    name="legacy_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                ),
+            ],
+            timesteps=5,
+            runs=1,
+        )
+        result = evaluator.evaluate({"growth_rate": 0.05})
+        assert "legacy_pop" not in result.distributions
+
+    def test_mixed_kpis(self, simple_model: Model) -> None:
+        evaluator = Evaluator(
+            base_model=simple_model,
+            kpis=[
+                KPI(
+                    name="legacy",
+                    fn=lambda r: final_state_mean(r, "population"),
+                ),
+                KPI(name="composable", metric=final_value("population")),
+            ],
+            timesteps=5,
+            runs=2,
+        )
+        result = evaluator.evaluate({"growth_rate": 0.05})
+        assert "legacy" not in result.distributions
+        assert "composable" in result.distributions
+        assert len(result.distributions["composable"]) == 2
+
+
+class TestSweepWithComposableKPI:
+    def test_sweep_composable(self, simple_model: Model) -> None:
+        sweep = Sweep(
+            model=simple_model,
+            space=ParameterSpace(
+                params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)}
+            ),
+            kpis=[
+                KPI(name="avg_pop", metric=final_value("population")),
+            ],
+            optimizer=GridSearchOptimizer(n_steps=3),
+            timesteps=5,
+            runs=2,
+        )
+        results = sweep.run()
+        assert len(results.evaluations) == 3
+        # Each evaluation should have distributions
+        for ev in results.evaluations:
+            assert "avg_pop" in ev.distributions
+            assert len(ev.distributions["avg_pop"]) == 2
+
+    def test_sweep_legacy_still_works(self, simple_model: Model) -> None:
+        sweep = Sweep(
+            model=simple_model,
+            space=ParameterSpace(
+                params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)}
+            ),
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                ),
+            ],
+            optimizer=GridSearchOptimizer(n_steps=2),
+            timesteps=5,
+            runs=1,
+        )
+        results = sweep.run()
+        assert len(results.evaluations) == 2
+        best = results.best("final_pop")
+        assert best.params["growth_rate"] == 0.1