diff --git a/docs/index.md b/docs/index.md index 3b1f228..0a6a9bc 100644 --- a/docs/index.md +++ b/docs/index.md @@ -36,6 +36,8 @@ Key guides include embedded [marimo](https://marimo.io) notebooks — run code, | `gds-games` | `ogs` | Typed DSL for compositional game theory (Open Games) | | `gds-software` | `gds_software` | Software architecture DSL (DFD, state machine, C4, ERD, etc.) | | `gds-business` | `gds_business` | Business dynamics DSL (CLD, supply chain, value stream map) | +| `gds-sim` | `gds_sim` | Simulation engine — Model, Simulation, Results | +| `gds-psuu` | `gds_psuu` | Parameter space search under uncertainty for gds-sim | | `gds-examples` | — | Tutorial models demonstrating framework features | ## Architecture @@ -51,6 +53,10 @@ gds-software ← software architecture DSL (depends on gds-framework) gds-business ← business dynamics DSL (depends on gds-framework) ↑ gds-examples ← tutorials (depends on gds-framework + gds-viz) + +gds-sim ← simulation engine (standalone — no gds-framework dep) + ↑ +gds-psuu ← parameter search under uncertainty (depends on gds-sim) ``` ## License diff --git a/docs/psuu/api/evaluation.md b/docs/psuu/api/evaluation.md new file mode 100644 index 0000000..22f5a48 --- /dev/null +++ b/docs/psuu/api/evaluation.md @@ -0,0 +1,8 @@ +# gds_psuu.evaluation + +Evaluation bridge between parameter points and gds-sim. + +::: gds_psuu.evaluation + options: + show_source: true + members_order: source diff --git a/docs/psuu/api/init.md b/docs/psuu/api/init.md new file mode 100644 index 0000000..6bdb0fb --- /dev/null +++ b/docs/psuu/api/init.md @@ -0,0 +1,8 @@ +# gds_psuu + +Public API -- top-level exports. + +::: gds_psuu + options: + show_submodules: false + members: false diff --git a/docs/psuu/api/kpi.md b/docs/psuu/api/kpi.md new file mode 100644 index 0000000..63b4178 --- /dev/null +++ b/docs/psuu/api/kpi.md @@ -0,0 +1,8 @@ +# gds_psuu.kpi + +KPI wrapper and legacy helper functions. + +::: gds_psuu.kpi + options: + show_source: true + members_order: source diff --git a/docs/psuu/api/metric.md b/docs/psuu/api/metric.md new file mode 100644 index 0000000..9a3ccbf --- /dev/null +++ b/docs/psuu/api/metric.md @@ -0,0 +1,8 @@ +# gds_psuu.metric + +Metric and Aggregation primitives for composable KPI construction. + +::: gds_psuu.metric + options: + show_source: true + members_order: source diff --git a/docs/psuu/api/optimizers.md b/docs/psuu/api/optimizers.md new file mode 100644 index 0000000..ffc727e --- /dev/null +++ b/docs/psuu/api/optimizers.md @@ -0,0 +1,31 @@ +# gds_psuu.optimizers + +Search strategy implementations. + +## Base + +::: gds_psuu.optimizers.base + options: + show_source: true + members_order: source + +## Grid Search + +::: gds_psuu.optimizers.grid + options: + show_source: true + members_order: source + +## Random Search + +::: gds_psuu.optimizers.random + options: + show_source: true + members_order: source + +## Bayesian (optional) + +::: gds_psuu.optimizers.bayesian + options: + show_source: true + members_order: source diff --git a/docs/psuu/api/results.md b/docs/psuu/api/results.md new file mode 100644 index 0000000..c4f8231 --- /dev/null +++ b/docs/psuu/api/results.md @@ -0,0 +1,8 @@ +# gds_psuu.results + +Sweep results and summary types. + +::: gds_psuu.results + options: + show_source: true + members_order: source diff --git a/docs/psuu/api/space.md b/docs/psuu/api/space.md new file mode 100644 index 0000000..d697c61 --- /dev/null +++ b/docs/psuu/api/space.md @@ -0,0 +1,8 @@ +# gds_psuu.space + +Parameter space definitions for search. + +::: gds_psuu.space + options: + show_source: true + members_order: source diff --git a/docs/psuu/api/sweep.md b/docs/psuu/api/sweep.md new file mode 100644 index 0000000..6c49ff6 --- /dev/null +++ b/docs/psuu/api/sweep.md @@ -0,0 +1,8 @@ +# gds_psuu.sweep + +Sweep orchestrator -- the main entry point for parameter search. + +::: gds_psuu.sweep + options: + show_source: true + members_order: source diff --git a/docs/psuu/getting-started.md b/docs/psuu/getting-started.md new file mode 100644 index 0000000..55c4728 --- /dev/null +++ b/docs/psuu/getting-started.md @@ -0,0 +1,182 @@ +# Getting Started + +## Installation + +```bash +uv add gds-psuu +# or: pip install gds-psuu +``` + +For Bayesian optimization (optional): + +```bash +uv add "gds-psuu[bayesian]" +# or: pip install gds-psuu[bayesian] +``` + +For development (monorepo): + +```bash +git clone https://github.com/BlockScience/gds-core.git +cd gds-core +uv sync --all-packages +``` + +## Your First Parameter Sweep + +Define a `gds-sim` model, then sweep a parameter to find the best value: + +```python +from gds_sim import Model, StateUpdateBlock +from gds_psuu import ( + KPI, + Continuous, + GridSearchOptimizer, + ParameterSpace, + Sweep, + final_value, + mean_agg, +) + + +# 1. Define a growth model +def growth_policy(state, params, **kw): + return {"delta": state["population"] * params["growth_rate"]} + + +def update_pop(state, params, *, signal=None, **kw): + return ("population", state["population"] + signal["delta"]) + + +model = Model( + initial_state={"population": 100.0}, + state_update_blocks=[ + StateUpdateBlock( + policies={"growth": growth_policy}, + variables={"population": update_pop}, + ) + ], +) + +# 2. Define what to search +space = ParameterSpace( + params={"growth_rate": Continuous(min_val=0.01, max_val=0.2)} +) + +# 3. Define what to measure +kpis = [ + KPI( + name="avg_final_pop", + metric=final_value("population"), # per-run: final value + aggregation=mean_agg, # cross-run: mean + ), +] + +# 4. Run the sweep +sweep = Sweep( + model=model, + space=space, + kpis=kpis, + optimizer=GridSearchOptimizer(n_steps=5), + timesteps=10, + runs=3, # 3 Monte Carlo runs per parameter point +) +results = sweep.run() + +# 5. Inspect results +best = results.best("avg_final_pop") +print(f"Best growth_rate: {best.params['growth_rate']:.3f}") +print(f"Best avg final pop: {best.scores['avg_final_pop']:.1f}") +``` + +## Composable KPIs + +The key design is the **Metric + Aggregation = KPI** pattern: + +```python +from gds_psuu import ( + KPI, + final_value, + trajectory_mean, + max_value, + mean_agg, + std_agg, + percentile_agg, + probability_above, +) + +# Mean of final population across runs +avg_final = KPI(name="avg_pop", metric=final_value("population"), aggregation=mean_agg) + +# Standard deviation of final population (measures uncertainty) +std_final = KPI(name="std_pop", metric=final_value("population"), aggregation=std_agg) + +# 90th percentile of trajectory means +p90_mean = KPI(name="p90_mean", metric=trajectory_mean("population"), aggregation=percentile_agg(90)) + +# Probability that max population exceeds 500 +risk = KPI(name="boom_risk", metric=max_value("population"), aggregation=probability_above(500.0)) +``` + +**Metric** extracts a scalar from each run. **Aggregation** reduces the per-run values to a single score. + +If no aggregation is specified, `mean_agg` is used by default: + +```python +# These are equivalent: +KPI(name="avg_pop", metric=final_value("population")) +KPI(name="avg_pop", metric=final_value("population"), aggregation=mean_agg) +``` + +## Per-Run Distributions + +Metric-based KPIs track the full distribution across Monte Carlo runs: + +```python +results = sweep.run() + +for ev in results.evaluations: + dist = ev.distributions["avg_final_pop"] + print(f" params={ev.params}, per_run={dist}") + # e.g. per_run=[265.3, 265.3, 265.3] for deterministic model +``` + +## Multiple Optimizers + +```python +from gds_psuu import GridSearchOptimizer, RandomSearchOptimizer + +# Exhaustive grid (good for 1-2 dimensions) +grid = GridSearchOptimizer(n_steps=10) # 10 points per continuous dim + +# Random sampling (good for higher dimensions) +rand = RandomSearchOptimizer(n_samples=50, seed=42) +``` + +For Bayesian optimization (requires `gds-psuu[bayesian]`): + +```python +from gds_psuu.optimizers.bayesian import BayesianOptimizer + +bayes = BayesianOptimizer(n_calls=30, target_kpi="avg_final_pop", seed=42) +``` + +## Legacy KPI Support + +The older `fn`-based KPI interface still works: + +```python +from gds_psuu import KPI, final_state_mean + +# Legacy style (backwards compatible) +kpi = KPI(name="pop", fn=lambda r: final_state_mean(r, "population")) +``` + +Legacy KPIs don't track per-run distributions -- use metric-based KPIs for full Monte Carlo awareness. + +## Next Steps + +- [Concepts](guide/concepts.md) -- Metric, Aggregation, KPI, and the full conceptual hierarchy +- [Parameter Spaces](guide/spaces.md) -- dimensions, validation, and grid generation +- [Optimizers](guide/optimizers.md) -- grid, random, and Bayesian search strategies +- [API Reference](api/init.md) -- complete auto-generated API docs diff --git a/docs/psuu/guide/concepts.md b/docs/psuu/guide/concepts.md new file mode 100644 index 0000000..d78e811 --- /dev/null +++ b/docs/psuu/guide/concepts.md @@ -0,0 +1,215 @@ +# Concepts + +This page explains the core abstractions in `gds-psuu` and how they compose. + +## The Hierarchy + +``` +Parameter Point -> Simulation -> Results -> Metric -> Aggregation -> KPI +``` + +Each layer transforms data from the previous one. The sweep loop orchestrates the full pipeline across many parameter points. + +--- + +## Parameter Space + +A `ParameterSpace` defines what to search. Each dimension has a name and a type: + +| Dimension | Description | Grid behavior | +|-----------|-------------|---------------| +| `Continuous(min_val, max_val)` | Real-valued range | `n_steps` evenly spaced points | +| `Integer(min_val, max_val)` | Integer range (inclusive) | All integers in range | +| `Discrete(values=(...))` | Explicit set of values | All values | + +```python +from gds_psuu import Continuous, Integer, Discrete, ParameterSpace + +space = ParameterSpace(params={ + "learning_rate": Continuous(min_val=0.001, max_val=0.1), + "hidden_layers": Integer(min_val=1, max_val=5), + "activation": Discrete(values=("relu", "tanh", "sigmoid")), +}) +``` + +Validation enforces `min_val < max_val` and at least one parameter. + +--- + +## Metric + +A `Metric` extracts a **single scalar from one simulation run**. It receives the full `Results` object and a run ID. + +```python +from gds_psuu import Metric + +# Built-in factories +from gds_psuu import final_value, trajectory_mean, max_value, min_value + +final_value("population") # value at last timestep +trajectory_mean("population") # mean over all timesteps +max_value("population") # maximum over all timesteps +min_value("population") # minimum over all timesteps +``` + +Custom metrics: + +```python +Metric( + name="range", + fn=lambda results, run: ( + max_value("x").fn(results, run) - min_value("x").fn(results, run) + ), +) +``` + +The `MetricFn` signature is `(Results, int) -> float` where the int is the run ID. + +--- + +## Aggregation + +An `Aggregation` reduces a **list of per-run values into a single scalar**. It operates on `list[float]` and returns `float`. + +```python +from gds_psuu import mean_agg, std_agg, percentile_agg, probability_above, probability_below + +mean_agg # arithmetic mean +std_agg # sample standard deviation +percentile_agg(50) # median (50th percentile) +percentile_agg(95) # 95th percentile +probability_above(100.0) # fraction of runs > 100 +probability_below(0.0) # fraction of runs < 0 (risk measure) +``` + +Custom aggregations: + +```python +from gds_psuu import Aggregation + +cv_agg = Aggregation( + name="cv", + fn=lambda vals: ( + (sum((x - sum(vals)/len(vals))**2 for x in vals) / (len(vals)-1))**0.5 + / (sum(vals)/len(vals)) + if len(vals) > 1 and sum(vals) != 0 else 0.0 + ), +) +``` + +--- + +## KPI + +A `KPI` composes a Metric and an Aggregation into a named score: + +```python +from gds_psuu import KPI, final_value, std_agg + +kpi = KPI( + name="uncertainty", + metric=final_value("population"), # per-run: final value + aggregation=std_agg, # cross-run: standard deviation +) +``` + +If `aggregation` is omitted, `mean_agg` is used by default. + +### Per-run access + +Metric-based KPIs expose the full distribution: + +```python +results = simulation_results # from gds-sim +per_run_values = kpi.per_run(results) # [val_run1, val_run2, ...] +aggregated = kpi.compute(results) # single float +``` + +### Legacy KPIs + +The older `fn`-based interface operates on the full `Results` at once: + +```python +from gds_psuu import KPI, final_state_mean + +kpi = KPI(name="pop", fn=lambda r: final_state_mean(r, "population")) +``` + +Legacy KPIs cannot use `per_run()` and don't produce distributions. Prefer metric-based KPIs for new code. + +--- + +## Evaluator + +The `Evaluator` bridges parameter points to scored KPIs: + +1. Takes a parameter point `{"growth_rate": 0.05}` +2. Injects params into the `gds-sim` Model +3. Runs N Monte Carlo simulations +4. Computes each KPI on the results +5. Returns `EvaluationResult` with scores and distributions + +```python +from gds_psuu import Evaluator + +evaluator = Evaluator( + base_model=model, + kpis=[kpi1, kpi2], + timesteps=100, + runs=10, +) +result = evaluator.evaluate({"growth_rate": 0.05}) +# result.scores == {"kpi1_name": 42.0, "kpi2_name": 3.14} +# result.distributions == {"kpi1_name": [per-run values...]} +``` + +--- + +## Optimizer + +An `Optimizer` implements the suggest/observe loop: + +| Optimizer | Strategy | When to use | +|-----------|----------|-------------| +| `GridSearchOptimizer(n_steps)` | Exhaustive cartesian product | 1-2 dimensions, need full coverage | +| `RandomSearchOptimizer(n_samples, seed)` | Uniform random sampling | Higher dimensions, exploration | +| `BayesianOptimizer(n_calls, target_kpi)` | Gaussian process surrogate | Expensive evaluations, optimization | + +All optimizers implement the same interface: + +```python +optimizer.setup(space, kpi_names) +while not optimizer.is_exhausted(): + params = optimizer.suggest() + # ... evaluate ... + optimizer.observe(params, scores) +``` + +--- + +## Sweep + +`Sweep` is the top-level orchestrator that connects everything: + +```python +from gds_psuu import Sweep + +sweep = Sweep( + model=model, + space=space, + kpis=kpis, + optimizer=optimizer, + timesteps=100, + runs=10, +) +results = sweep.run() +``` + +### SweepResults + +```python +results.evaluations # list[EvaluationResult] -- all evaluations +results.summaries # list[EvaluationSummary] -- params + scores only +results.best("kpi_name") # best evaluation for a KPI +results.to_dataframe() # pandas DataFrame (requires pandas) +``` diff --git a/docs/psuu/guide/optimizers.md b/docs/psuu/guide/optimizers.md new file mode 100644 index 0000000..751220b --- /dev/null +++ b/docs/psuu/guide/optimizers.md @@ -0,0 +1,105 @@ +# Optimizers + +All optimizers implement the same `Optimizer` interface: `setup()`, `suggest()`, `observe()`, `is_exhausted()`. The `Sweep` class drives this loop automatically. + +## GridSearchOptimizer + +Exhaustive evaluation of every point in a regular grid. + +```python +from gds_psuu import GridSearchOptimizer + +optimizer = GridSearchOptimizer(n_steps=10) +``` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `n_steps` | `int` | `5` | Points per continuous dimension | + +**Behavior:** Generates the full cartesian product via `ParameterSpace.grid_points()`, then evaluates each point exactly once. Does not adapt based on observed scores. + +**When to use:** Small parameter spaces (1-2 dimensions), need complete coverage, want reproducible results. + +**Total evaluations:** `n_steps^(n_continuous) * product(integer_ranges) * product(discrete_sizes)` + +--- + +## RandomSearchOptimizer + +Uniform random sampling across the parameter space. + +```python +from gds_psuu import RandomSearchOptimizer + +optimizer = RandomSearchOptimizer(n_samples=50, seed=42) +``` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `n_samples` | `int` | `20` | Total parameter points to sample | +| `seed` | `int \| None` | `None` | Random seed for reproducibility | + +**Behavior:** Samples each dimension independently -- `uniform(min, max)` for Continuous, `randint(min, max)` for Integer, `choice(values)` for Discrete. Does not adapt based on observed scores. + +**When to use:** Higher-dimensional spaces where grid search is infeasible, initial exploration before Bayesian optimization. + +--- + +## BayesianOptimizer + +Gaussian process surrogate model that learns from past evaluations. + +!!! note "Optional dependency" + Requires `scikit-optimize`. Install with: `uv add "gds-psuu[bayesian]"` + +```python +from gds_psuu.optimizers.bayesian import BayesianOptimizer + +optimizer = BayesianOptimizer( + n_calls=30, + target_kpi="avg_final_pop", + maximize=True, + seed=42, +) +``` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `n_calls` | `int` | `20` | Total evaluations (initial + optimization) | +| `target_kpi` | `str \| None` | `None` | KPI to optimize (defaults to first) | +| `maximize` | `bool` | `True` | Maximize (True) or minimize (False) | +| `seed` | `int \| None` | `None` | Random seed | + +**Behavior:** Starts with random exploration (`min(5, n_calls)` initial points), then uses a Gaussian process surrogate to balance exploration and exploitation. Optimizes a single target KPI. + +**When to use:** Expensive simulations where you want to find the optimum with fewer evaluations. Works best with continuous parameters. + +--- + +## Custom Optimizers + +Subclass `Optimizer` to implement your own search strategy: + +```python +from gds_psuu.optimizers.base import Optimizer +from gds_psuu.space import ParameterSpace +from gds_psuu.types import KPIScores, ParamPoint + + +class MyOptimizer(Optimizer): + def setup(self, space: ParameterSpace, kpi_names: list[str]) -> None: + # Initialize search state + ... + + def suggest(self) -> ParamPoint: + # Return next parameter point to evaluate + ... + + def observe(self, params: ParamPoint, scores: KPIScores) -> None: + # Learn from evaluation results + ... + + def is_exhausted(self) -> bool: + # Return True when search is complete + ... +``` diff --git a/docs/psuu/guide/spaces.md b/docs/psuu/guide/spaces.md new file mode 100644 index 0000000..963fc6f --- /dev/null +++ b/docs/psuu/guide/spaces.md @@ -0,0 +1,102 @@ +# Parameter Spaces + +## Dimension Types + +### Continuous + +A real-valued range with inclusive bounds. + +```python +from gds_psuu import Continuous + +dim = Continuous(min_val=0.0, max_val=1.0) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `min_val` | `float` | Lower bound (inclusive) | +| `max_val` | `float` | Upper bound (inclusive) | + +Validation: `min_val < max_val`, both must be finite. + +Grid behavior: `n_steps` evenly spaced points from `min_val` to `max_val`. + +--- + +### Integer + +An integer range with inclusive bounds. + +```python +from gds_psuu import Integer + +dim = Integer(min_val=1, max_val=10) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `min_val` | `int` | Lower bound (inclusive) | +| `max_val` | `int` | Upper bound (inclusive) | + +Validation: `min_val < max_val`. + +Grid behavior: all integers from `min_val` to `max_val` (ignores `n_steps`). + +--- + +### Discrete + +An explicit set of allowed values (any hashable type). + +```python +from gds_psuu import Discrete + +dim = Discrete(values=("adam", "sgd", "rmsprop")) +``` + +| Field | Type | Description | +|-------|------|-------------| +| `values` | `tuple[Any, ...]` | Allowed values | + +Validation: at least 1 value. + +Grid behavior: all values (ignores `n_steps`). + +--- + +## ParameterSpace + +Combines dimensions into a named parameter space: + +```python +from gds_psuu import Continuous, Integer, Discrete, ParameterSpace + +space = ParameterSpace(params={ + "learning_rate": Continuous(min_val=0.001, max_val=0.1), + "batch_size": Integer(min_val=16, max_val=128), + "optimizer": Discrete(values=("adam", "sgd")), +}) +``` + +### Grid Generation + +```python +points = space.grid_points(n_steps=5) +# Returns list of dicts, e.g.: +# [ +# {"learning_rate": 0.001, "batch_size": 16, "optimizer": "adam"}, +# {"learning_rate": 0.001, "batch_size": 16, "optimizer": "sgd"}, +# ... +# ] +``` + +The total number of grid points is the cartesian product: +`n_steps * (max_int - min_int + 1) * len(discrete_values)` + +For the example above: `5 * 113 * 2 = 1130` points. + +### Properties + +| Property | Returns | Description | +|----------|---------|-------------| +| `dimension_names` | `list[str]` | Ordered list of parameter names | diff --git a/docs/psuu/index.md b/docs/psuu/index.md new file mode 100644 index 0000000..ba6e652 --- /dev/null +++ b/docs/psuu/index.md @@ -0,0 +1,92 @@ +# gds-psuu + +[![PyPI](https://img.shields.io/pypi/v/gds-psuu)](https://pypi.org/project/gds-psuu/) +[![Python](https://img.shields.io/pypi/pyversions/gds-psuu)](https://pypi.org/project/gds-psuu/) +[![License](https://img.shields.io/github/license/BlockScience/gds-core)](https://github.com/BlockScience/gds-core/blob/main/LICENSE) + +**Parameter space search under uncertainty** -- explore, evaluate, and optimize simulation parameters with Monte Carlo awareness. + +## What is this? + +`gds-psuu` bridges `gds-sim` simulations with systematic parameter exploration. It provides: + +- **Parameter spaces** -- `Continuous`, `Integer`, and `Discrete` dimensions with validation +- **Composable KPIs** -- `Metric` (per-run scalar) + `Aggregation` (cross-run reducer) = `KPI` +- **3 search strategies** -- Grid, Random, and Bayesian (optuna) optimizers +- **Monte Carlo awareness** -- per-run distributions tracked alongside aggregated scores +- **Zero mandatory dependencies** beyond `gds-sim` and `pydantic` + +## Architecture + +``` +gds-sim (pip install gds-sim) +| +| Simulation engine: Model, StateUpdateBlock, +| Simulation, Results (columnar storage). +| ++-- gds-psuu (pip install gds-psuu) + | + | Parameter search: ParameterSpace, Metric, Aggregation, + | KPI, Evaluator, Sweep, Optimizer. + | + +-- Your application + | + | Concrete models, parameter studies, + | sensitivity analysis, optimization. +``` + +## Conceptual Hierarchy + +The package follows a clear hierarchy from parameters to optimization: + +``` +Parameter Point {"growth_rate": 0.05} + | + v +Simulation Model + timesteps + N runs + | + v +Results Columnar data (timestep, substep, run, state vars) + | + v +Metric (per-run) final_value("pop") -> scalar per run + | + v +Aggregation (cross-run) mean_agg, std_agg, probability_above(...) + | + v +KPI (composed) KPI(metric=..., aggregation=...) -> single score + | + v +Sweep Optimizer drives suggest/evaluate/observe loop + | + v +SweepResults All evaluations + best() selection +``` + +## How the Sweep Loop Works + +``` +Optimizer.suggest() --> Evaluator.evaluate(params) --> Optimizer.observe(scores) + ^ | | + | gds-sim Simulation | + +------------------------ repeat --------------------------+ +``` + +1. The **Optimizer** suggests a parameter point +2. The **Evaluator** injects params into a `gds-sim` Model, runs N Monte Carlo simulations +3. Each **KPI** extracts a per-run **Metric**, then **Aggregates** across runs into a single score +4. The **Optimizer** observes the scores and decides what to try next + +## Quick Start + +```bash +uv add gds-psuu +# or: pip install gds-psuu +``` + +See [Getting Started](getting-started.md) for a full walkthrough. + +## Credits + +Built on [gds-sim](../sim/index.md) by [BlockScience](https://block.science). diff --git a/mkdocs.yml b/mkdocs.yml index 1f921a9..c8a9910 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -109,6 +109,11 @@ plugins: - guides/dsl-roadmap.md - guides/research-boundaries.md - guides/view-stratification.md + PSUU (gds-psuu): + - {psuu/index.md: "Parameter space search under uncertainty for gds-sim"} + - {psuu/getting-started.md: "Installation + first parameter sweep"} + - psuu/guide/*.md + - psuu/api/*.md Ecosystem: - {framework/ecosystem.md: "All packages, their imports, and dependency relationships"} @@ -311,6 +316,22 @@ nav: - gds_software.dependency.compile: software/api/dep-compile.md - gds_software.dependency.checks: software/api/dep-checks.md - gds_software.verification: software/api/verification.md + - PSUU: + - Overview: psuu/index.md + - Getting Started: psuu/getting-started.md + - User Guide: + - Concepts: psuu/guide/concepts.md + - Parameter Spaces: psuu/guide/spaces.md + - Optimizers: psuu/guide/optimizers.md + - API Reference: + - gds_psuu: psuu/api/init.md + - gds_psuu.metric: psuu/api/metric.md + - gds_psuu.kpi: psuu/api/kpi.md + - gds_psuu.evaluation: psuu/api/evaluation.md + - gds_psuu.space: psuu/api/space.md + - gds_psuu.optimizers: psuu/api/optimizers.md + - gds_psuu.sweep: psuu/api/sweep.md + - gds_psuu.results: psuu/api/results.md - Design & Research: - Layer 0 Milestone: guides/architecture-milestone-layer0.md - DSL Roadmap: guides/dsl-roadmap.md diff --git a/packages/gds-psuu/gds_psuu/__init__.py b/packages/gds-psuu/gds_psuu/__init__.py index f93f9c7..beb4d17 100644 --- a/packages/gds-psuu/gds_psuu/__init__.py +++ b/packages/gds-psuu/gds_psuu/__init__.py @@ -5,6 +5,19 @@ from gds_psuu.errors import PsuuError, PsuuSearchError, PsuuValidationError from gds_psuu.evaluation import EvaluationResult, Evaluator from gds_psuu.kpi import KPI, final_state_mean, final_state_std, time_average +from gds_psuu.metric import ( + Aggregation, + Metric, + final_value, + max_value, + mean_agg, + min_value, + percentile_agg, + probability_above, + probability_below, + std_agg, + trajectory_mean, +) from gds_psuu.objective import Objective, SingleKPI, WeightedSum from gds_psuu.optimizers.base import Optimizer from gds_psuu.optimizers.bayesian import BayesianOptimizer @@ -27,9 +40,11 @@ ParameterSpace, ) from gds_psuu.sweep import Sweep -from gds_psuu.types import KPIFn, KPIScores, ParamPoint +from gds_psuu.types import AggregationFn, KPIFn, KPIScores, MetricFn, ParamPoint __all__ = [ + "Aggregation", + "AggregationFn", "Analyzer", "BayesianOptimizer", "Constraint", @@ -45,6 +60,8 @@ "KPIFn", "KPIScores", "LinearConstraint", + "Metric", + "MetricFn", "MorrisAnalyzer", "OATAnalyzer", "Objective", @@ -62,5 +79,14 @@ "WeightedSum", "final_state_mean", "final_state_std", + "final_value", + "max_value", + "mean_agg", + "min_value", + "percentile_agg", + "probability_above", + "probability_below", + "std_agg", "time_average", + "trajectory_mean", ] diff --git a/packages/gds-psuu/gds_psuu/evaluation.py b/packages/gds-psuu/gds_psuu/evaluation.py index 06fd5ad..4c1bcc0 100644 --- a/packages/gds-psuu/gds_psuu/evaluation.py +++ b/packages/gds-psuu/gds_psuu/evaluation.py @@ -2,7 +2,7 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any from gds_sim import Model, Results, Simulation @@ -20,6 +20,8 @@ class EvaluationResult: scores: KPIScores results: Results run_count: int + distributions: dict[str, list[float]] = field(default_factory=dict) + """Per-run metric values for metric-based KPIs.""" class Evaluator(BaseModel): @@ -36,7 +38,8 @@ def evaluate(self, params: ParamPoint) -> EvaluationResult: """Evaluate a single parameter point. Injects params as singleton lists into the model, runs the simulation, - and computes KPI scores. + and computes KPI scores. For metric-based KPIs, also records per-run + distributions. """ # Build params dict: each value as a singleton list for gds-sim sim_params: dict[str, list[Any]] = {k: [v] for k, v in params.items()} @@ -51,11 +54,18 @@ def evaluate(self, params: ParamPoint) -> EvaluationResult: sim = Simulation(model=model, timesteps=self.timesteps, runs=self.runs) results = sim.run() - scores: KPIScores = {kpi.name: kpi.fn(results) for kpi in self.kpis} + scores: KPIScores = {} + distributions: dict[str, list[float]] = {} + + for kpi in self.kpis: + scores[kpi.name] = kpi.compute(results) + if kpi.metric is not None: + distributions[kpi.name] = kpi.per_run(results) return EvaluationResult( params=params, scores=scores, results=results, run_count=self.runs, + distributions=distributions, ) diff --git a/packages/gds-psuu/gds_psuu/kpi.py b/packages/gds-psuu/gds_psuu/kpi.py index 45393c6..b805ec1 100644 --- a/packages/gds-psuu/gds_psuu/kpi.py +++ b/packages/gds-psuu/gds_psuu/kpi.py @@ -2,10 +2,17 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, model_validator +from gds_psuu.errors import PsuuValidationError +from gds_psuu.metric import ( + Aggregation, + Metric, + _extract_run_ids, + mean_agg, +) from gds_psuu.types import KPIFn # noqa: TC001 if TYPE_CHECKING: @@ -13,12 +20,58 @@ class KPI(BaseModel): - """Named KPI backed by a scoring function.""" + """Named KPI backed by either a legacy fn or a Metric + Aggregation pair. + + Legacy usage (backwards compatible):: + + KPI(name="avg_pop", fn=lambda r: final_state_mean(r, "population")) + + Composable usage:: + + KPI(name="avg_pop", metric=final_value("population"), aggregation=mean_agg) + """ model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True) name: str - fn: KPIFn + fn: KPIFn | None = None + metric: Metric | None = None + aggregation: Aggregation | None = None + + @model_validator(mode="after") + def _validate_specification(self) -> Self: + has_fn = self.fn is not None + has_metric = self.metric is not None + if not has_fn and not has_metric: + raise PsuuValidationError("KPI must have either 'fn' or 'metric' specified") + if has_fn and has_metric: + raise PsuuValidationError( + "KPI cannot have both 'fn' and 'metric' specified" + ) + return self + + def compute(self, results: Results) -> float: + """Compute the aggregated KPI score from results.""" + if self.fn is not None: + return self.fn(results) + assert self.metric is not None + agg = self.aggregation or mean_agg + per_run = self.per_run(results) + return agg.fn(per_run) + + def per_run(self, results: Results) -> list[float]: + """Compute per-run metric values. Only available for metric-based KPIs.""" + if self.metric is None: + raise PsuuValidationError( + "per_run() requires a metric-based KPI, not a legacy fn-based KPI" + ) + run_ids = _extract_run_ids(results) + return [self.metric.fn(results, r) for r in run_ids] + + +# --------------------------------------------------------------------------- +# Legacy helper functions (backwards compatible) +# --------------------------------------------------------------------------- def final_state_mean(results: Results, key: str) -> float: diff --git a/packages/gds-psuu/gds_psuu/metric.py b/packages/gds-psuu/gds_psuu/metric.py new file mode 100644 index 0000000..5242075 --- /dev/null +++ b/packages/gds-psuu/gds_psuu/metric.py @@ -0,0 +1,214 @@ +"""Metric and Aggregation primitives for composable KPI construction.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from pydantic import BaseModel, ConfigDict + +from gds_psuu.types import AggregationFn, MetricFn # noqa: TC001 + +if TYPE_CHECKING: + from gds_sim import Results + + +class Metric(BaseModel): + """Per-run scalar extracted from simulation output.""" + + model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True) + + name: str + fn: MetricFn + + +class Aggregation(BaseModel): + """Combines per-run metric values across Monte Carlo runs.""" + + model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True) + + name: str + fn: AggregationFn + + +# --------------------------------------------------------------------------- +# Built-in metric factories +# --------------------------------------------------------------------------- + + +def _extract_run_ids(results: Results) -> list[int]: + """Get unique run IDs from results.""" + cols = results._trimmed_columns() + runs = cols["run"] + seen: set[int] = set() + ordered: list[int] = [] + for r in runs: + if r not in seen: + seen.add(r) + ordered.append(r) + return ordered + + +def _final_value_fn(results: Results, run: int, key: str) -> float: + """Extract the final-timestep value for a specific run.""" + cols = results._trimmed_columns() + timesteps = cols["timestep"] + substeps = cols["substep"] + runs_col = cols["run"] + values = cols[key] + n = results._size + + max_t = 0 + for i in range(n): + if runs_col[i] == run: + t = timesteps[i] + if t > max_t: + max_t = t + + max_s = 0 + for i in range(n): + if runs_col[i] == run and timesteps[i] == max_t: + s = substeps[i] + if s > max_s: + max_s = s + + for i in range(n): + if runs_col[i] == run and timesteps[i] == max_t and substeps[i] == max_s: + return float(values[i]) + + return 0.0 + + +def final_value(key: str) -> Metric: + """Metric: value of a state variable at the final timestep of a run.""" + return Metric( + name=f"final_{key}", + fn=lambda results, run, _key=key: _final_value_fn(results, run, _key), + ) + + +def _trajectory_mean_fn(results: Results, run: int, key: str) -> float: + """Mean of a state variable across all timesteps for a specific run.""" + cols = results._trimmed_columns() + runs_col = cols["run"] + values = cols[key] + n = results._size + + total = 0.0 + count = 0 + for i in range(n): + if runs_col[i] == run: + total += float(values[i]) + count += 1 + + return total / count if count else 0.0 + + +def trajectory_mean(key: str) -> Metric: + """Metric: mean of a state variable over time for a single run.""" + return Metric( + name=f"mean_{key}", + fn=lambda results, run, _key=key: _trajectory_mean_fn(results, run, _key), + ) + + +def _max_value_fn(results: Results, run: int, key: str) -> float: + """Max of a state variable across all timesteps for a specific run.""" + cols = results._trimmed_columns() + runs_col = cols["run"] + values = cols[key] + n = results._size + + max_val = float("-inf") + for i in range(n): + if runs_col[i] == run: + v = float(values[i]) + if v > max_val: + max_val = v + return max_val if max_val != float("-inf") else 0.0 + + +def max_value(key: str) -> Metric: + """Metric: maximum value of a state variable within a single run.""" + return Metric( + name=f"max_{key}", + fn=lambda results, run, _key=key: _max_value_fn(results, run, _key), + ) + + +def _min_value_fn(results: Results, run: int, key: str) -> float: + """Min of a state variable across all timesteps for a specific run.""" + cols = results._trimmed_columns() + runs_col = cols["run"] + values = cols[key] + n = results._size + + min_val = float("inf") + for i in range(n): + if runs_col[i] == run: + v = float(values[i]) + if v < min_val: + min_val = v + return min_val if min_val != float("inf") else 0.0 + + +def min_value(key: str) -> Metric: + """Metric: minimum value of a state variable within a single run.""" + return Metric( + name=f"min_{key}", + fn=lambda results, run, _key=key: _min_value_fn(results, run, _key), + ) + + +# --------------------------------------------------------------------------- +# Built-in aggregations +# --------------------------------------------------------------------------- + +mean_agg = Aggregation( + name="mean", + fn=lambda vals: sum(vals) / len(vals) if vals else 0.0, +) + +std_agg = Aggregation( + name="std", + fn=lambda vals: ( + (sum((x - sum(vals) / len(vals)) ** 2 for x in vals) / (len(vals) - 1)) ** 0.5 + if len(vals) > 1 + else 0.0 + ), +) + + +def percentile_agg(p: float) -> Aggregation: + """Aggregation: p-th percentile across runs.""" + + def _fn(vals: list[float]) -> float: + if not vals: + return 0.0 + s = sorted(vals) + k = (p / 100.0) * (len(s) - 1) + lo = int(k) + hi = min(lo + 1, len(s) - 1) + frac = k - lo + return s[lo] + frac * (s[hi] - s[lo]) + + return Aggregation(name=f"p{p}", fn=_fn) + + +def probability_above(threshold: float) -> Aggregation: + """Aggregation: fraction of runs where metric exceeds threshold.""" + return Aggregation( + name=f"P(>{threshold})", + fn=lambda vals: ( + sum(1 for v in vals if v > threshold) / len(vals) if vals else 0.0 + ), + ) + + +def probability_below(threshold: float) -> Aggregation: + """Aggregation: fraction of runs where metric is below threshold.""" + return Aggregation( + name=f"P(<{threshold})", + fn=lambda vals: ( + sum(1 for v in vals if v < threshold) / len(vals) if vals else 0.0 + ), + ) diff --git a/packages/gds-psuu/gds_psuu/types.py b/packages/gds-psuu/gds_psuu/types.py index 8b97744..d7e8ec2 100644 --- a/packages/gds-psuu/gds_psuu/types.py +++ b/packages/gds-psuu/gds_psuu/types.py @@ -13,5 +13,11 @@ KPIFn = Callable[[Results], float] """Computes a scalar KPI score from simulation results (all Monte Carlo runs).""" +MetricFn = Callable[[Results, int], float] +"""Computes a scalar from a single run. Args: results, run_number.""" + +AggregationFn = Callable[[list[float]], float] +"""Combines per-run metric values into a single scalar.""" + KPIScores = dict[str, float] """Maps KPI names to their computed scalar scores.""" diff --git a/packages/gds-psuu/tests/test_metric.py b/packages/gds-psuu/tests/test_metric.py new file mode 100644 index 0000000..122a77f --- /dev/null +++ b/packages/gds-psuu/tests/test_metric.py @@ -0,0 +1,284 @@ +"""Tests for Metric/Aggregation primitives and composable KPIs.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest +from gds_sim import Results + +from gds_psuu import ( + KPI, + Continuous, + Evaluator, + GridSearchOptimizer, + ParameterSpace, + PsuuValidationError, + Sweep, + final_state_mean, + final_value, + max_value, + mean_agg, + min_value, + percentile_agg, + probability_above, + probability_below, + std_agg, + trajectory_mean, +) + +if TYPE_CHECKING: + from gds_sim import Model + + +def _make_results(values: list[list[float]], key: str = "x") -> Results: + """Create Results with multiple runs. Each inner list is one run's trajectory.""" + r = Results(state_keys=[key]) + for run_id, trajectory in enumerate(values, start=1): + for t, val in enumerate(trajectory): + r.append( + {key: val}, + timestep=t, + substep=1, + run=run_id, + subset=0, + ) + return r + + +class TestMetricFactories: + def test_final_value(self) -> None: + results = _make_results([[10.0, 20.0, 30.0]], key="pop") + m = final_value("pop") + assert m.fn(results, 1) == 30.0 + + def test_final_value_multi_run(self) -> None: + results = _make_results([[10.0, 20.0], [100.0, 200.0]], key="pop") + m = final_value("pop") + assert m.fn(results, 1) == 20.0 + assert m.fn(results, 2) == 200.0 + + def test_trajectory_mean(self) -> None: + results = _make_results([[10.0, 20.0, 30.0]], key="x") + m = trajectory_mean("x") + assert m.fn(results, 1) == 20.0 + + def test_max_value(self) -> None: + results = _make_results([[5.0, 100.0, 3.0]], key="x") + m = max_value("x") + assert m.fn(results, 1) == 100.0 + + def test_min_value(self) -> None: + results = _make_results([[5.0, 1.0, 3.0]], key="x") + m = min_value("x") + assert m.fn(results, 1) == 1.0 + + def test_metric_name(self) -> None: + assert final_value("pop").name == "final_pop" + assert trajectory_mean("x").name == "mean_x" + assert max_value("y").name == "max_y" + assert min_value("z").name == "min_z" + + +class TestAggregations: + def test_mean_agg(self) -> None: + assert mean_agg.fn([10.0, 20.0, 30.0]) == 20.0 + + def test_mean_agg_empty(self) -> None: + assert mean_agg.fn([]) == 0.0 + + def test_std_agg(self) -> None: + vals = [10.0, 20.0, 30.0] + result = std_agg.fn(vals) + assert result == pytest.approx(10.0) + + def test_std_agg_single(self) -> None: + assert std_agg.fn([5.0]) == 0.0 + + def test_percentile_agg(self) -> None: + agg = percentile_agg(50) + assert agg.fn([1.0, 2.0, 3.0, 4.0, 5.0]) == 3.0 + + def test_percentile_agg_boundary(self) -> None: + assert percentile_agg(0).fn([10.0, 20.0, 30.0]) == 10.0 + assert percentile_agg(100).fn([10.0, 20.0, 30.0]) == 30.0 + + def test_percentile_agg_empty(self) -> None: + assert percentile_agg(50).fn([]) == 0.0 + + def test_probability_above(self) -> None: + agg = probability_above(15.0) + assert agg.fn([10.0, 20.0, 30.0]) == pytest.approx(2 / 3) + + def test_probability_above_empty(self) -> None: + assert probability_above(0).fn([]) == 0.0 + + def test_probability_below(self) -> None: + agg = probability_below(25.0) + assert agg.fn([10.0, 20.0, 30.0]) == pytest.approx(2 / 3) + + def test_probability_below_empty(self) -> None: + assert probability_below(0).fn([]) == 0.0 + + +class TestComposableKPI: + def test_metric_based_kpi(self) -> None: + results = _make_results([[10.0, 20.0], [10.0, 40.0], [10.0, 60.0]], key="pop") + kpi = KPI(name="avg_final", metric=final_value("pop"), aggregation=mean_agg) + assert kpi.compute(results) == 40.0 + + def test_metric_defaults_to_mean(self) -> None: + results = _make_results([[10.0, 20.0], [10.0, 40.0]], key="pop") + kpi = KPI(name="avg_final", metric=final_value("pop")) + assert kpi.compute(results) == 30.0 # mean of [20, 40] + + def test_metric_with_percentile(self) -> None: + results = _make_results( + [[0.0, v] for v in [10.0, 20.0, 30.0, 40.0, 50.0]], key="x" + ) + kpi = KPI( + name="p50_x", + metric=final_value("x"), + aggregation=percentile_agg(50), + ) + assert kpi.compute(results) == 30.0 + + def test_metric_with_probability(self) -> None: + results = _make_results([[0.0, v] for v in [10.0, 20.0, 30.0]], key="x") + kpi = KPI( + name="risk", + metric=final_value("x"), + aggregation=probability_below(25.0), + ) + assert kpi.compute(results) == pytest.approx(2 / 3) + + def test_per_run(self) -> None: + results = _make_results([[10.0, 20.0], [10.0, 40.0], [10.0, 60.0]], key="pop") + kpi = KPI(name="final_pop", metric=final_value("pop")) + per_run = kpi.per_run(results) + assert per_run == [20.0, 40.0, 60.0] + + def test_per_run_on_fn_kpi_raises(self) -> None: + kpi = KPI(name="legacy", fn=lambda r: 0.0) + results = Results(state_keys=[]) + with pytest.raises(PsuuValidationError, match="metric-based"): + kpi.per_run(results) + + +class TestLegacyKPI: + def test_fn_based_still_works(self) -> None: + results = _make_results([[10.0, 20.0, 30.0]], key="pop") + kpi = KPI( + name="legacy", + fn=lambda r: final_state_mean(r, "pop"), + ) + assert kpi.compute(results) == 30.0 + + def test_validation_neither(self) -> None: + with pytest.raises( + (PsuuValidationError, ValueError), + match="either 'fn' or 'metric'", + ): + KPI(name="bad") + + def test_validation_both(self) -> None: + with pytest.raises( + (PsuuValidationError, ValueError), + match="cannot have both", + ): + KPI( + name="bad", + fn=lambda r: 0.0, + metric=final_value("x"), + ) + + +class TestEvaluatorDistributions: + def test_distributions_populated(self, simple_model: Model) -> None: + evaluator = Evaluator( + base_model=simple_model, + kpis=[ + KPI(name="final_pop", metric=final_value("population")), + ], + timesteps=5, + runs=3, + ) + result = evaluator.evaluate({"growth_rate": 0.05}) + assert "final_pop" in result.distributions + assert len(result.distributions["final_pop"]) == 3 + + def test_distributions_empty_for_legacy_kpi(self, simple_model: Model) -> None: + evaluator = Evaluator( + base_model=simple_model, + kpis=[ + KPI( + name="legacy_pop", + fn=lambda r: final_state_mean(r, "population"), + ), + ], + timesteps=5, + runs=1, + ) + result = evaluator.evaluate({"growth_rate": 0.05}) + assert "legacy_pop" not in result.distributions + + def test_mixed_kpis(self, simple_model: Model) -> None: + evaluator = Evaluator( + base_model=simple_model, + kpis=[ + KPI( + name="legacy", + fn=lambda r: final_state_mean(r, "population"), + ), + KPI(name="composable", metric=final_value("population")), + ], + timesteps=5, + runs=2, + ) + result = evaluator.evaluate({"growth_rate": 0.05}) + assert "legacy" not in result.distributions + assert "composable" in result.distributions + assert len(result.distributions["composable"]) == 2 + + +class TestSweepWithComposableKPI: + def test_sweep_composable(self, simple_model: Model) -> None: + sweep = Sweep( + model=simple_model, + space=ParameterSpace( + params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)} + ), + kpis=[ + KPI(name="avg_pop", metric=final_value("population")), + ], + optimizer=GridSearchOptimizer(n_steps=3), + timesteps=5, + runs=2, + ) + results = sweep.run() + assert len(results.evaluations) == 3 + # Each evaluation should have distributions + for ev in results.evaluations: + assert "avg_pop" in ev.distributions + assert len(ev.distributions["avg_pop"]) == 2 + + def test_sweep_legacy_still_works(self, simple_model: Model) -> None: + sweep = Sweep( + model=simple_model, + space=ParameterSpace( + params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)} + ), + kpis=[ + KPI( + name="final_pop", + fn=lambda r: final_state_mean(r, "population"), + ), + ], + optimizer=GridSearchOptimizer(n_steps=2), + timesteps=5, + runs=1, + ) + results = sweep.run() + assert len(results.evaluations) == 2 + best = results.best("final_pop") + assert best.params["growth_rate"] == 0.1