diff --git a/src/conductor/engine/pricing.py b/src/conductor/engine/pricing.py index 6fcc6cd..675d72f 100644 --- a/src/conductor/engine/pricing.py +++ b/src/conductor/engine/pricing.py @@ -11,49 +11,66 @@ @dataclass(frozen=True) class ModelPricing: - """Pricing per million tokens for a model. + """Pricing and metadata per model. Attributes: input_per_mtok: Cost per million input tokens (USD). output_per_mtok: Cost per million output tokens (USD). cache_read_per_mtok: Cost per million cache read tokens (USD). cache_write_per_mtok: Cost per million cache write tokens (USD). + context_window: Context window size in tokens, or None if unknown. """ input_per_mtok: float output_per_mtok: float cache_read_per_mtok: float = 0.0 cache_write_per_mtok: float = 0.0 + context_window: int | None = None -# Default pricing table (January 2026) -# Sources: OpenAI pricing page, Anthropic pricing page +# Default model table (pricing + context window metadata) +# Sources: OpenAI pricing page, Anthropic pricing page, provider docs DEFAULT_PRICING: dict[str, ModelPricing] = { # OpenAI / Copilot models - "gpt-4-turbo": ModelPricing(input_per_mtok=10.00, output_per_mtok=30.00), - "gpt-4o": ModelPricing(input_per_mtok=2.50, output_per_mtok=10.00), - "gpt-4o-mini": ModelPricing(input_per_mtok=0.15, output_per_mtok=0.60), - "gpt-4.1-mini": ModelPricing(input_per_mtok=0.15, output_per_mtok=0.60), # Alias - "gpt-4": ModelPricing(input_per_mtok=30.00, output_per_mtok=60.00), - "gpt-3.5-turbo": ModelPricing(input_per_mtok=0.50, output_per_mtok=1.50), + "gpt-4-turbo": ModelPricing( + input_per_mtok=10.00, output_per_mtok=30.00, context_window=128_000 + ), + "gpt-4o": ModelPricing(input_per_mtok=2.50, output_per_mtok=10.00, context_window=128_000), + "gpt-4o-mini": ModelPricing(input_per_mtok=0.15, output_per_mtok=0.60, context_window=128_000), + "gpt-4.1": ModelPricing(input_per_mtok=2.00, output_per_mtok=8.00, context_window=1_047_576), + "gpt-4.1-mini": ModelPricing( + input_per_mtok=0.15, output_per_mtok=0.60, context_window=1_047_576 + ), + "gpt-4": ModelPricing(input_per_mtok=30.00, output_per_mtok=60.00, context_window=8_192), + "gpt-3.5-turbo": ModelPricing(input_per_mtok=0.50, output_per_mtok=1.50, context_window=16_385), + "gpt-5.2": ModelPricing(input_per_mtok=2.00, output_per_mtok=8.00, context_window=400_000), + "gpt-5.1": ModelPricing(input_per_mtok=2.00, output_per_mtok=8.00, context_window=400_000), + # O-series + "o1": ModelPricing(input_per_mtok=15.00, output_per_mtok=60.00, context_window=200_000), + "o1-mini": ModelPricing(input_per_mtok=3.00, output_per_mtok=12.00, context_window=128_000), + "o1-preview": ModelPricing(input_per_mtok=15.00, output_per_mtok=60.00, context_window=128_000), + "o3-mini": ModelPricing(input_per_mtok=1.10, output_per_mtok=4.40, context_window=200_000), # Claude 4.5 Series (newest) "claude-opus-4-5": ModelPricing( input_per_mtok=5.00, output_per_mtok=25.00, cache_read_per_mtok=0.50, cache_write_per_mtok=6.25, + context_window=200_000, ), "claude-sonnet-4-5": ModelPricing( input_per_mtok=3.00, output_per_mtok=15.00, cache_read_per_mtok=0.30, cache_write_per_mtok=3.75, + context_window=200_000, ), "claude-haiku-4-5": ModelPricing( input_per_mtok=1.00, output_per_mtok=5.00, cache_read_per_mtok=0.10, cache_write_per_mtok=1.25, + context_window=200_000, ), # Short aliases for Claude 4.5 Series (used in workflow files) "opus-4.5": ModelPricing( @@ -61,18 +78,43 @@ class ModelPricing: output_per_mtok=25.00, cache_read_per_mtok=0.50, cache_write_per_mtok=6.25, + context_window=200_000, ), "sonnet-4.5": ModelPricing( input_per_mtok=3.00, output_per_mtok=15.00, cache_read_per_mtok=0.30, cache_write_per_mtok=3.75, + context_window=200_000, ), "haiku-4.5": ModelPricing( input_per_mtok=1.00, output_per_mtok=5.00, cache_read_per_mtok=0.10, cache_write_per_mtok=1.25, + context_window=200_000, + ), + # Claude 4.6 Series + "claude-opus-4.6": ModelPricing( + input_per_mtok=5.00, + output_per_mtok=25.00, + cache_read_per_mtok=0.50, + cache_write_per_mtok=6.25, + context_window=1_000_000, + ), + "claude-opus-4.6-1m": ModelPricing( + input_per_mtok=5.00, + output_per_mtok=25.00, + cache_read_per_mtok=0.50, + cache_write_per_mtok=6.25, + context_window=1_000_000, + ), + "claude-sonnet-4.6": ModelPricing( + input_per_mtok=3.00, + output_per_mtok=15.00, + cache_read_per_mtok=0.30, + cache_write_per_mtok=3.75, + context_window=1_000_000, ), # Claude 4 Series "claude-opus-4": ModelPricing( @@ -80,57 +122,91 @@ class ModelPricing: output_per_mtok=75.00, cache_read_per_mtok=1.50, cache_write_per_mtok=18.75, + context_window=200_000, ), "claude-sonnet-4": ModelPricing( input_per_mtok=3.00, output_per_mtok=15.00, cache_read_per_mtok=0.30, cache_write_per_mtok=3.75, + context_window=200_000, ), "claude-haiku-4": ModelPricing( input_per_mtok=0.25, output_per_mtok=1.25, cache_read_per_mtok=0.03, cache_write_per_mtok=0.30, + context_window=200_000, ), - # Claude 3.7 Series (aliases to 4 series for backward compatibility) + # Claude 3.x Series "claude-3-7-sonnet": ModelPricing( input_per_mtok=3.00, output_per_mtok=15.00, cache_read_per_mtok=0.30, cache_write_per_mtok=3.75, + context_window=200_000, + ), + "claude-3.7-sonnet": ModelPricing( + input_per_mtok=3.00, + output_per_mtok=15.00, + cache_read_per_mtok=0.30, + cache_write_per_mtok=3.75, + context_window=200_000, ), - # Claude 3.5 Series "claude-3-5-sonnet": ModelPricing( input_per_mtok=3.00, output_per_mtok=15.00, cache_read_per_mtok=0.30, cache_write_per_mtok=3.75, + context_window=200_000, + ), + "claude-3.5-sonnet": ModelPricing( + input_per_mtok=3.00, + output_per_mtok=15.00, + cache_read_per_mtok=0.30, + cache_write_per_mtok=3.75, + context_window=200_000, ), "claude-3-5-haiku": ModelPricing( input_per_mtok=0.80, output_per_mtok=4.00, cache_read_per_mtok=0.08, cache_write_per_mtok=1.00, + context_window=200_000, + ), + "claude-3.5-haiku": ModelPricing( + input_per_mtok=0.80, + output_per_mtok=4.00, + cache_read_per_mtok=0.08, + cache_write_per_mtok=1.00, + context_window=200_000, ), - # Claude 3 Series (legacy) "claude-3-opus": ModelPricing( input_per_mtok=15.00, output_per_mtok=75.00, cache_read_per_mtok=1.50, cache_write_per_mtok=18.75, + context_window=200_000, ), "claude-3-sonnet": ModelPricing( input_per_mtok=3.00, output_per_mtok=15.00, cache_read_per_mtok=0.30, cache_write_per_mtok=3.75, + context_window=200_000, ), "claude-3-haiku": ModelPricing( input_per_mtok=0.25, output_per_mtok=1.25, cache_read_per_mtok=0.03, cache_write_per_mtok=0.30, + context_window=200_000, + ), + # Gemini + "gemini-3.1-pro-preview": ModelPricing( + input_per_mtok=1.25, + output_per_mtok=5.00, + context_window=1_000_000, ), } @@ -164,9 +240,11 @@ def get_pricing( # Try fuzzy matching for versioned model names # e.g., "claude-sonnet-4-20250514" -> "claude-sonnet-4" # e.g., "gpt-4o-2024-08-06" -> "gpt-4o" - for known_model, pricing in DEFAULT_PRICING.items(): + # Sort keys longest-first so "o1-mini" matches before "o1" + sorted_keys = sorted(DEFAULT_PRICING.keys(), key=lambda k: len(k), reverse=True) + for known_model in sorted_keys: if model.startswith(known_model): - return pricing + return DEFAULT_PRICING[known_model] # Try removing date suffix patterns for common formats # e.g., "claude-3-5-sonnet-20241022" -> "claude-3-5-sonnet" @@ -179,9 +257,9 @@ def get_pricing( return DEFAULT_PRICING[simplified] # Try matching simplified version against known models - for known_model, pricing in DEFAULT_PRICING.items(): + for known_model in sorted_keys: if simplified.startswith(known_model): - return pricing + return DEFAULT_PRICING[known_model] return None diff --git a/src/conductor/engine/workflow.py b/src/conductor/engine/workflow.py index 53ab0bd..509e986 100644 --- a/src/conductor/engine/workflow.py +++ b/src/conductor/engine/workflow.py @@ -449,6 +449,17 @@ async def _execute_script(self, agent: AgentDef, context: dict[str, Any]) -> Scr operation_name=f"script '{agent.name}'", ) + def _get_context_window_for_agent(self, agent: AgentDef) -> int | None: + """Return the context window size for an agent's model.""" + from conductor.engine.pricing import get_pricing + + model = agent.model + if not model: + return None + + pricing = get_pricing(model) + return pricing.context_window if pricing else None + async def run(self, inputs: dict[str, Any]) -> dict[str, Any]: """Execute the workflow from entry_point to $end. @@ -1179,6 +1190,7 @@ async def _execute_loop(self, current_agent_name: str) -> dict[str, Any]: "agent_name": agent.name, "iteration": agent_execution_count, "agent_type": agent.type or "agent", + "context_window_max": self._get_context_window_for_agent(agent), }, ) @@ -1417,6 +1429,8 @@ async def _execute_loop(self, current_agent_name: str) -> dict[str, Any]: "cost_usd": usage.cost_usd, "output": output.content, "output_keys": output_keys, + "context_window_used": output.input_tokens, + "context_window_max": self._get_context_window_for_agent(agent), }, ) @@ -2043,6 +2057,8 @@ async def execute_single_agent(agent: AgentDef) -> tuple[str, Any]: "model": output.model, "tokens": output.tokens_used, "cost_usd": usage.cost_usd, + "context_window_used": output.input_tokens, + "context_window_max": self._get_context_window_for_agent(agent), }, ) diff --git a/src/conductor/web/frontend/src/components/detail/MetadataGrid.tsx b/src/conductor/web/frontend/src/components/detail/MetadataGrid.tsx index e11e0f2..a1c08a3 100644 --- a/src/conductor/web/frontend/src/components/detail/MetadataGrid.tsx +++ b/src/conductor/web/frontend/src/components/detail/MetadataGrid.tsx @@ -1,4 +1,4 @@ -import { formatElapsed, formatCost, formatTokens } from '@/lib/utils'; +import { formatElapsed, formatCost, formatTokens, formatContextFull } from '@/lib/utils'; interface MetadataGridProps { items: Array<{ label: string; value: string | number | null | undefined }>; @@ -31,6 +31,8 @@ export function buildAgentMetadata(nd: { input_tokens?: number; output_tokens?: number; cost_usd?: number; + context_window_used?: number; + context_window_max?: number; iteration?: number; error_type?: string; error_message?: string; @@ -44,6 +46,9 @@ export function buildAgentMetadata(nd: { items.push({ label: 'In / Out', value: `${formatTokens(nd.input_tokens)} / ${formatTokens(nd.output_tokens)}` }); } if (nd.cost_usd != null) items.push({ label: 'Cost', value: formatCost(nd.cost_usd) }); + if (nd.context_window_used != null && nd.context_window_max != null) { + items.push({ label: 'Context', value: formatContextFull(nd.context_window_used, nd.context_window_max) }); + } if (nd.iteration != null) items.push({ label: 'Iteration', value: nd.iteration }); if (nd.error_type) items.push({ label: 'Error', value: nd.error_type }); if (nd.error_message) items.push({ label: 'Message', value: nd.error_message }); diff --git a/src/conductor/web/frontend/src/components/graph/AgentNode.tsx b/src/conductor/web/frontend/src/components/graph/AgentNode.tsx index ed7b69b..e56a52d 100644 --- a/src/conductor/web/frontend/src/components/graph/AgentNode.tsx +++ b/src/conductor/web/frontend/src/components/graph/AgentNode.tsx @@ -2,7 +2,7 @@ import { memo, useEffect, useRef, useState } from 'react'; import { Handle, Position, type NodeProps } from '@xyflow/react'; import { Bot } from 'lucide-react'; import { cn, formatElapsed, formatTokens, formatCost } from '@/lib/utils'; -import { NODE_STATUS_HEX } from '@/lib/constants'; +import { NODE_STATUS_HEX, CONTEXT_WARN_PCT, CONTEXT_DANGER_PCT } from '@/lib/constants'; import { useWorkflowStore } from '@/stores/workflow-store'; import { NodeTooltip } from './NodeTooltip'; import type { GraphNodeData } from './graph-layout'; @@ -23,6 +23,7 @@ export const AgentNode = memo(function AgentNode({ data, id, selected }: NodePro const iteration = useWorkflowStore((s) => s.nodes[id]?.iteration); const errorType = useWorkflowStore((s) => s.nodes[id]?.error_type); const errorMessage = useWorkflowStore((s) => s.nodes[id]?.error_message); + const contextPct = useWorkflowStore((s) => s.nodes[id]?.context_pct); // Live elapsed timer for running nodes const liveElapsed = useLiveElapsed(id, status); @@ -105,6 +106,23 @@ export const AgentNode = memo(function AgentNode({ data, id, selected }: NodePro )} + {/* Context window progress bar */} + {contextPct != null && ( +