diff --git a/packages/gds-psuu/gds_psuu/__init__.py b/packages/gds-psuu/gds_psuu/__init__.py index 99df342..820ce45 100644 --- a/packages/gds-psuu/gds_psuu/__init__.py +++ b/packages/gds-psuu/gds_psuu/__init__.py @@ -5,7 +5,9 @@ from gds_psuu.errors import PsuuError, PsuuSearchError, PsuuValidationError from gds_psuu.evaluation import EvaluationResult, Evaluator from gds_psuu.kpi import KPI, final_state_mean, final_state_std, time_average +from gds_psuu.objective import Objective, SingleKPI, WeightedSum from gds_psuu.optimizers.base import Optimizer +from gds_psuu.optimizers.bayesian import BayesianOptimizer from gds_psuu.optimizers.grid import GridSearchOptimizer from gds_psuu.optimizers.random import RandomSearchOptimizer from gds_psuu.results import EvaluationSummary, SweepResults @@ -22,7 +24,7 @@ from gds_psuu.types import KPIFn, KPIScores, ParamPoint __all__ = [ - "KPI", + "BayesianOptimizer", "Constraint", "Continuous", "Discrete", @@ -32,9 +34,11 @@ "FunctionalConstraint", "GridSearchOptimizer", "Integer", + "KPI", "KPIFn", "KPIScores", "LinearConstraint", + "Objective", "Optimizer", "ParamPoint", "ParameterSpace", @@ -42,8 +46,10 @@ "PsuuSearchError", "PsuuValidationError", "RandomSearchOptimizer", + "SingleKPI", "Sweep", "SweepResults", + "WeightedSum", "final_state_mean", "final_state_std", "time_average", diff --git a/packages/gds-psuu/gds_psuu/objective.py b/packages/gds-psuu/gds_psuu/objective.py new file mode 100644 index 0000000..c21f8c3 --- /dev/null +++ b/packages/gds-psuu/gds_psuu/objective.py @@ -0,0 +1,50 @@ +"""Composable objective functions for multi-KPI optimization.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Self + +from pydantic import BaseModel, ConfigDict, model_validator + +from gds_psuu.errors import PsuuValidationError +from gds_psuu.types import KPIScores # noqa: TC001 + + +class Objective(BaseModel, ABC): + """Reduces KPIScores to a single scalar for optimizer consumption.""" + + model_config = ConfigDict(frozen=True) + + @abstractmethod + def score(self, kpi_scores: KPIScores) -> float: + """Compute a scalar objective value from KPI scores.""" + + +class SingleKPI(Objective): + """Optimize a single KPI.""" + + name: str + maximize: bool = True + + def score(self, kpi_scores: KPIScores) -> float: + val = kpi_scores[self.name] + return val if self.maximize else -val + + +class WeightedSum(Objective): + """Weighted linear combination of KPIs. + + Use negative weights to minimize a KPI. + """ + + weights: dict[str, float] + + @model_validator(mode="after") + def _validate_nonempty(self) -> Self: + if not self.weights: + raise PsuuValidationError("WeightedSum must have at least 1 weight") + return self + + def score(self, kpi_scores: KPIScores) -> float: + return sum(w * kpi_scores[k] for k, w in self.weights.items()) diff --git a/packages/gds-psuu/gds_psuu/optimizers/__init__.py b/packages/gds-psuu/gds_psuu/optimizers/__init__.py index bb91678..e16c58c 100644 --- a/packages/gds-psuu/gds_psuu/optimizers/__init__.py +++ b/packages/gds-psuu/gds_psuu/optimizers/__init__.py @@ -1,10 +1,12 @@ """Optimizer implementations for parameter space search.""" from gds_psuu.optimizers.base import Optimizer +from gds_psuu.optimizers.bayesian import BayesianOptimizer from gds_psuu.optimizers.grid import GridSearchOptimizer from gds_psuu.optimizers.random import RandomSearchOptimizer __all__ = [ + "BayesianOptimizer", "GridSearchOptimizer", "Optimizer", "RandomSearchOptimizer", diff --git a/packages/gds-psuu/gds_psuu/optimizers/bayesian.py b/packages/gds-psuu/gds_psuu/optimizers/bayesian.py index 015b4a1..0cff90d 100644 --- a/packages/gds-psuu/gds_psuu/optimizers/bayesian.py +++ b/packages/gds-psuu/gds_psuu/optimizers/bayesian.py @@ -1,4 +1,4 @@ -"""Bayesian optimizer — wraps scikit-optimize (optional dependency).""" +"""Bayesian optimizer — wraps optuna (optional dependency).""" from __future__ import annotations @@ -12,44 +12,44 @@ from gds_psuu.types import KPIScores, ParamPoint try: - from skopt import Optimizer as SkoptOptimizer # type: ignore[import-untyped] - from skopt.space import Categorical, Real # type: ignore[import-untyped] - from skopt.space import Integer as SkoptInteger + import optuna - _HAS_SKOPT = True + _HAS_OPTUNA = True except ImportError: # pragma: no cover - _HAS_SKOPT = False + _HAS_OPTUNA = False class BayesianOptimizer(Optimizer): - """Bayesian optimization using Gaussian process surrogate. + """Bayesian optimization using optuna's TPE sampler. - Requires ``scikit-optimize``. Install with:: + Requires ``optuna``. Install with:: - pip install gds-psuu[bayesian] + uv add gds-psuu[bayesian] Optimizes a single target KPI (by default the first one registered). """ def __init__( self, - n_calls: int = 20, + n_trials: int = 20, target_kpi: str | None = None, maximize: bool = True, seed: int | None = None, ) -> None: - if not _HAS_SKOPT: # pragma: no cover + if not _HAS_OPTUNA: # pragma: no cover raise ImportError( - "scikit-optimize is required for BayesianOptimizer. " - "Install with: pip install gds-psuu[bayesian]" + "optuna is required for BayesianOptimizer. " + "Install with: uv add gds-psuu[bayesian]" ) - self._n_calls = n_calls + self._n_trials = n_trials self._target_kpi = target_kpi self._maximize = maximize self._seed = seed - self._optimizer: Any = None + self._study: Any = None + self._space: ParameterSpace | None = None self._param_names: list[str] = [] self._count: int = 0 + self._current_trial: Any = None def setup(self, space: ParameterSpace, kpi_names: list[str]) -> None: if self._target_kpi is None: @@ -59,38 +59,47 @@ def setup(self, space: ParameterSpace, kpi_names: list[str]) -> None: f"Target KPI '{self._target_kpi}' not found in {kpi_names}" ) + self._space = space self._param_names = space.dimension_names - dimensions: list[Any] = [] - for dim in space.params.values(): - if isinstance(dim, Continuous): - dimensions.append(Real(dim.min_val, dim.max_val)) - elif isinstance(dim, Integer): - dimensions.append(SkoptInteger(dim.min_val, dim.max_val)) - elif isinstance(dim, Discrete): - dimensions.append(Categorical(list(dim.values))) - self._optimizer = SkoptOptimizer( - dimensions=dimensions, - random_state=self._seed, - n_initial_points=min(5, self._n_calls), + sampler = optuna.samplers.TPESampler(seed=self._seed) + direction = "maximize" if self._maximize else "minimize" + optuna.logging.set_verbosity(optuna.logging.WARNING) + self._study = optuna.create_study( + direction=direction, + sampler=sampler, ) self._count = 0 def suggest(self) -> ParamPoint: - assert self._optimizer is not None, "Call setup() before suggest()" - point = self._optimizer.ask() - return dict(zip(self._param_names, point, strict=True)) + assert self._study is not None, "Call setup() before suggest()" + assert self._space is not None + + self._current_trial = self._study.ask() + point: ParamPoint = {} + for name, dim in self._space.params.items(): + if isinstance(dim, Continuous): + point[name] = self._current_trial.suggest_float( + name, dim.min_val, dim.max_val + ) + elif isinstance(dim, Integer): + point[name] = self._current_trial.suggest_int( + name, dim.min_val, dim.max_val + ) + elif isinstance(dim, Discrete): + point[name] = self._current_trial.suggest_categorical( + name, list(dim.values) + ) + return point def observe(self, params: ParamPoint, scores: KPIScores) -> None: - assert self._optimizer is not None + assert self._study is not None assert self._target_kpi is not None - point = [params[name] for name in self._param_names] + assert self._current_trial is not None value = scores[self._target_kpi] - # skopt minimizes, so negate if we want to maximize - if self._maximize: - value = -value - self._optimizer.tell(point, value) + self._study.tell(self._current_trial, value) + self._current_trial = None self._count += 1 def is_exhausted(self) -> bool: - return self._count >= self._n_calls + return self._count >= self._n_trials diff --git a/packages/gds-psuu/gds_psuu/results.py b/packages/gds-psuu/gds_psuu/results.py index 6709586..5560a94 100644 --- a/packages/gds-psuu/gds_psuu/results.py +++ b/packages/gds-psuu/gds_psuu/results.py @@ -2,13 +2,16 @@ from __future__ import annotations -from typing import Any +from typing import TYPE_CHECKING, Any from pydantic import BaseModel, ConfigDict from gds_psuu.evaluation import EvaluationResult # noqa: TC001 from gds_psuu.types import KPIScores, ParamPoint # noqa: TC001 +if TYPE_CHECKING: + from gds_psuu.objective import Objective + class EvaluationSummary(BaseModel): """Summary of a single evaluation (without raw simulation data).""" @@ -54,6 +57,21 @@ def best(self, kpi: str, *, maximize: bool = True) -> EvaluationSummary: ) return EvaluationSummary(params=best_eval.params, scores=best_eval.scores) + def best_by_objective(self, objective: Objective) -> EvaluationSummary: + """Return the evaluation with the best objective score. + + The objective reduces multiple KPI scores to a single scalar. + Higher is better. + """ + if not self.evaluations: + raise ValueError("No evaluations to search") + + best_eval = max( + self.evaluations, + key=lambda e: objective.score(e.scores), + ) + return EvaluationSummary(params=best_eval.params, scores=best_eval.scores) + def to_dataframe(self) -> Any: """Convert to pandas DataFrame. Requires ``pandas`` installed.""" try: diff --git a/packages/gds-psuu/gds_psuu/sweep.py b/packages/gds-psuu/gds_psuu/sweep.py index 837bde9..4a1490b 100644 --- a/packages/gds-psuu/gds_psuu/sweep.py +++ b/packages/gds-psuu/gds_psuu/sweep.py @@ -7,6 +7,7 @@ from gds_psuu.evaluation import EvaluationResult, Evaluator from gds_psuu.kpi import KPI # noqa: TC001 +from gds_psuu.objective import Objective # noqa: TC001 from gds_psuu.optimizers.base import Optimizer # noqa: TC001 from gds_psuu.results import SweepResults from gds_psuu.space import ParameterSpace # noqa: TC001 @@ -25,6 +26,7 @@ class Sweep(BaseModel): space: ParameterSpace kpis: list[KPI] optimizer: Optimizer + objective: Objective | None = None timesteps: int = 100 runs: int = 1 diff --git a/packages/gds-psuu/pyproject.toml b/packages/gds-psuu/pyproject.toml index fbad9ac..2377555 100644 --- a/packages/gds-psuu/pyproject.toml +++ b/packages/gds-psuu/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ [project.optional-dependencies] pandas = ["pandas>=2.0"] -bayesian = ["scikit-optimize>=0.10"] +bayesian = ["optuna>=4.0"] [project.urls] Homepage = "https://github.com/BlockScience/gds-core" diff --git a/packages/gds-psuu/tests/test_objective.py b/packages/gds-psuu/tests/test_objective.py new file mode 100644 index 0000000..7569759 --- /dev/null +++ b/packages/gds-psuu/tests/test_objective.py @@ -0,0 +1,225 @@ +"""Tests for composable objective functions.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest +from gds_sim import Results + +from gds_psuu import ( + KPI, + Continuous, + GridSearchOptimizer, + Objective, + ParameterSpace, + PsuuValidationError, + SingleKPI, + Sweep, + WeightedSum, + final_state_mean, +) +from gds_psuu.evaluation import EvaluationResult +from gds_psuu.results import SweepResults + +if TYPE_CHECKING: + from gds_sim import Model + + +def _make_eval(params: dict, scores: dict) -> EvaluationResult: + return EvaluationResult( + params=params, + scores=scores, + results=Results(state_keys=[]), + run_count=1, + ) + + +class TestSingleKPI: + def test_maximize(self) -> None: + obj = SingleKPI(name="profit") + assert obj.score({"profit": 100.0, "risk": 5.0}) == 100.0 + + def test_minimize(self) -> None: + obj = SingleKPI(name="risk", maximize=False) + assert obj.score({"profit": 100.0, "risk": 5.0}) == -5.0 + + +class TestWeightedSum: + def test_basic(self) -> None: + obj = WeightedSum(weights={"profit": 0.7, "risk": -0.3}) + score = obj.score({"profit": 100.0, "risk": 10.0}) + assert score == pytest.approx(0.7 * 100 + (-0.3) * 10) + + def test_single_weight(self) -> None: + obj = WeightedSum(weights={"kpi": 2.0}) + assert obj.score({"kpi": 50.0}) == 100.0 + + def test_empty_weights_rejected(self) -> None: + with pytest.raises( + (PsuuValidationError, ValueError), + match="at least 1 weight", + ): + WeightedSum(weights={}) + + +class TestObjectiveProtocol: + def test_is_abstract(self) -> None: + with pytest.raises(TypeError): + Objective() # type: ignore[abstract] + + +class TestBestByObjective: + def test_best_weighted_sum(self) -> None: + evals = [ + _make_eval({"x": 1}, {"profit": 100.0, "risk": 50.0}), + _make_eval({"x": 2}, {"profit": 80.0, "risk": 10.0}), + _make_eval({"x": 3}, {"profit": 90.0, "risk": 30.0}), + ] + sr = SweepResults( + evaluations=evals, + kpi_names=["profit", "risk"], + optimizer_name="test", + ) + obj = WeightedSum(weights={"profit": 1.0, "risk": -1.0}) + # Scores: 50, 70, 60 → best is x=2 + best = sr.best_by_objective(obj) + assert best.params == {"x": 2} + + def test_best_single_kpi(self) -> None: + evals = [ + _make_eval({"x": 1}, {"kpi": 10.0}), + _make_eval({"x": 2}, {"kpi": 30.0}), + ] + sr = SweepResults( + evaluations=evals, + kpi_names=["kpi"], + optimizer_name="test", + ) + best = sr.best_by_objective(SingleKPI(name="kpi")) + assert best.params == {"x": 2} + + def test_best_by_objective_empty(self) -> None: + sr = SweepResults( + evaluations=[], + kpi_names=["kpi"], + optimizer_name="test", + ) + with pytest.raises(ValueError, match="No evaluations"): + sr.best_by_objective(SingleKPI(name="kpi")) + + +class TestSweepWithObjective: + def test_sweep_with_objective(self, simple_model: Model) -> None: + sweep = Sweep( + model=simple_model, + space=ParameterSpace( + params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)} + ), + kpis=[ + KPI( + name="final_pop", + fn=lambda r: final_state_mean(r, "population"), + ) + ], + objective=SingleKPI(name="final_pop"), + optimizer=GridSearchOptimizer(n_steps=3), + timesteps=5, + runs=1, + ) + results = sweep.run() + assert len(results.evaluations) == 3 + + def test_sweep_without_objective_backwards_compat( + self, simple_model: Model + ) -> None: + sweep = Sweep( + model=simple_model, + space=ParameterSpace( + params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)} + ), + kpis=[ + KPI( + name="final_pop", + fn=lambda r: final_state_mean(r, "population"), + ) + ], + optimizer=GridSearchOptimizer(n_steps=2), + timesteps=5, + runs=1, + ) + results = sweep.run() + assert len(results.evaluations) == 2 + # Old best() still works + best = results.best("final_pop") + assert "growth_rate" in best.params + + +try: + import optuna # noqa: F401 + + _has_optuna = True +except ImportError: + _has_optuna = False + + +@pytest.mark.skipif(not _has_optuna, reason="optuna not installed") +class TestBayesianOptimizer: + def test_bayesian_sweep(self, simple_model: Model) -> None: + from gds_psuu import BayesianOptimizer + + sweep = Sweep( + model=simple_model, + space=ParameterSpace( + params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)} + ), + kpis=[ + KPI( + name="final_pop", + fn=lambda r: final_state_mean(r, "population"), + ) + ], + optimizer=BayesianOptimizer(n_trials=5, target_kpi="final_pop", seed=42), + timesteps=5, + runs=1, + ) + results = sweep.run() + assert len(results.evaluations) == 5 + + def test_bayesian_minimize(self, simple_model: Model) -> None: + from gds_psuu import BayesianOptimizer + + sweep = Sweep( + model=simple_model, + space=ParameterSpace( + params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)} + ), + kpis=[ + KPI( + name="final_pop", + fn=lambda r: final_state_mean(r, "population"), + ) + ], + optimizer=BayesianOptimizer(n_trials=5, maximize=False, seed=42), + timesteps=5, + runs=1, + ) + results = sweep.run() + assert len(results.evaluations) == 5 + + def test_bayesian_bad_target_kpi(self) -> None: + from gds_psuu import BayesianOptimizer + from gds_psuu.errors import PsuuSearchError + + opt = BayesianOptimizer(n_trials=5, target_kpi="nonexistent") + space = ParameterSpace(params={"x": Continuous(min_val=0, max_val=1)}) + with pytest.raises(PsuuSearchError, match="not found"): + opt.setup(space, ["kpi_a"]) + + def test_bayesian_defaults_to_first_kpi(self) -> None: + from gds_psuu import BayesianOptimizer + + opt = BayesianOptimizer(n_trials=5, seed=0) + space = ParameterSpace(params={"x": Continuous(min_val=0, max_val=1)}) + opt.setup(space, ["alpha", "beta"]) + assert opt._target_kpi == "alpha"