Multi-Model Cost Optimizer for Agentic Workloads
AgentCostOps helps you minimize LLM costs in production agentic systems through intelligent model routing, semantic caching, budget enforcement, and spending analytics.
- ModelRouter — Rule-based and classifier-based routing to select the optimal model per request
- SemanticCache — TF-IDF/embedding-based response caching to avoid redundant LLM calls
- BudgetManager — Per-project spending tracking with limits, alerts, and throttling
- CostAnalyzer — Spending reports and actionable optimization opportunities
- PlanAndExecute — Two-phase orchestration pattern using cheap models for execution
pip install agentcostops
# With optional extras:
pip install agentcostops[embeddings,redis]from agentcostops.router import ModelConfig, RuleBasedRouter
from agentcostops.cache import SemanticCache
from agentcostops.budget import BudgetManager
from agentcostops.analytics import CostAnalyzer
# 1. Configure models
router = RuleBasedRouter(models={
"frontier": ModelConfig(
name="gpt-4o",
cost_per_1k_input=0.005,
cost_per_1k_output=0.015,
capabilities=["complex_reasoning", "code_generation"],
),
"mid_tier": ModelConfig(
name="gpt-4o-mini",
cost_per_1k_input=0.00015,
cost_per_1k_output=0.0006,
capabilities=["summarization", "extraction"],
),
})
# 2. Route a request
result = router.route(task="Summarize this document", quality_requirement="standard")
print(result.selected_model) # "mid_tier"
print(f"Est. cost: ${result.estimated_cost:.6f}")
# 3. Cache responses semantically
cache = SemanticCache(similarity_threshold=0.92)
response = cache.get_or_generate(
prompt="What is the return policy?",
generator=lambda p: call_your_llm(p),
cost_per_call=0.01,
)
print(response.hit) # False on first call, True on similar follow-ups
# 4. Track budget
budget = BudgetManager(limits={"daily": 50.0, "monthly": 1000.0, "per_request": 0.10})
budget.record(project="my_agent", cost=0.023, model="gpt-4o-mini", tokens_in=500, tokens_out=200)
report = budget.report(project="my_agent")
print(f"Month spend: ${report.month_spend:.4f}")
# 5. Analyze opportunities
analyzer = CostAnalyzer(budget)
insights = analyzer.analyze(project="my_agent", period_days=7)
for opp in insights.opportunities:
print(f"[{opp.priority.upper()}] {opp.category}: {opp.description}")Apache 2.0 — see LICENSE.