diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8b3f0cb..ec218e3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -101,12 +101,21 @@ jobs: - name: Install dependencies run: uv sync --group dev + - name: Remove bundled Copilot CLI binary + run: | + # The github-copilot-sdk >=0.1.23 bundles a CLI binary that tries to + # authenticate with GitHub on startup. Remove it so tests that invoke + # the real CLI path fail fast instead of hanging on auth. + find .venv -path '*/copilot/bin/copilot*' -delete 2>/dev/null || true + - name: Run tests with coverage - run: uv run pytest --cov=src/conductor --cov-report=xml --cov-report=term-missing -m "not real_api" + timeout-minutes: 10 + run: uv run pytest --cov=src/conductor --cov-report=xml --cov-report=term-missing -m "not real_api and not performance" env: # Fake API key for mock tests to prevent accidental real API calls. # Real API tests (marked with @pytest.mark.real_api) are excluded from CI - # via the '-m "not real_api"' filter and must be run manually with valid key. + # via the marker filter. Performance tests are also excluded as they + # contain timing-sensitive assertions that are flaky on shared CI runners. # This ensures CI tests are fast, free, and don't leak credentials. ANTHROPIC_API_KEY: "sk-ant-test-fake-key-for-mocking" diff --git a/pyproject.toml b/pyproject.toml index 46e8f4d..c80a588 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "ruamel.yaml>=0.18.0", "jinja2>=3.1.0", "simpleeval>=1.0.0", - "github-copilot-sdk>=0.1.0,<0.1.31", # 0.1.31+ has permission-denied regression, see #27 + "github-copilot-sdk>=0.1.28,<0.1.31", # >=0.1.28 for on_permission_request; <0.1.31 regression, see #27 "anthropic>=0.77.0,<1.0.0", "mcp>=1.0.0", "fastapi>=0.115.0", diff --git a/src/conductor/config/schema.py b/src/conductor/config/schema.py index ae2a88a..1f1ad13 100644 --- a/src/conductor/config/schema.py +++ b/src/conductor/config/schema.py @@ -426,13 +426,23 @@ class AgentDef(BaseModel): """Per-script timeout in seconds.""" max_session_seconds: float | None = Field(None, ge=1.0) - """Maximum wall-clock duration for this agent's Copilot SDK session in seconds. + """Maximum wall-clock duration for this agent's session in seconds. Overrides the workflow-level runtime.max_session_seconds for this agent. - Only applies to Copilot provider agents (not script or human_gate). + Only applies to provider-backed agents (not script or human_gate). Example: A source-gathering agent that should finish in ~60s can set - max_session_seconds: 60 instead of using the default 30-minute timeout. + max_session_seconds: 60 instead of using the default timeout. + """ + + max_agent_iterations: int | None = Field(None, ge=1, le=500) + """Maximum tool-use iterations for this agent execution. + + Overrides the workflow-level runtime.max_agent_iterations for this agent. + Only applies to provider-backed agents (not script or human_gate). + + Example: A complex coding agent that needs many tool calls can set + max_agent_iterations: 200 instead of using the default limit. """ @field_validator("timeout") @@ -473,6 +483,8 @@ def validate_agent_type(self) -> AgentDef: raise ValueError("script agents cannot have 'options'") if self.max_session_seconds: raise ValueError("script agents cannot have 'max_session_seconds'") + if self.max_agent_iterations is not None: + raise ValueError("script agents cannot have 'max_agent_iterations'") return self @@ -579,15 +591,26 @@ class RuntimeConfig(BaseModel): """ max_session_seconds: float | None = Field(None, ge=1.0) - """Maximum wall-clock duration for Copilot SDK sessions in seconds. + """Maximum wall-clock duration for agent sessions in seconds. - Sets the default max_session_seconds for all agents using the Copilot provider. + Sets the default max_session_seconds for all agents. Individual agents can override this with their own max_session_seconds field. - Default is None, which uses the Copilot provider's built-in default (1800s / 30 min). + Default is None, which uses the provider's built-in default + (Copilot: 1800s / 30 min, Claude: unlimited). Set a lower value for workflows where agents should finish quickly. """ + max_agent_iterations: int | None = Field(None, ge=1, le=500) + """Maximum tool-use iterations per agent execution. + + Caps the number of tool-use roundtrips an agent can perform in a single + execution. This prevents runaway tool loops. + + Default is None, which uses the provider's built-in default + (Claude: 50, Copilot: unlimited). + """ + class WorkflowDef(BaseModel): """Top-level workflow configuration.""" diff --git a/src/conductor/providers/claude.py b/src/conductor/providers/claude.py index e4a1e8c..c518937 100644 --- a/src/conductor/providers/claude.py +++ b/src/conductor/providers/claude.py @@ -22,6 +22,7 @@ import json import logging import random +import time from typing import TYPE_CHECKING, Any, Protocol from pydantic import BaseModel @@ -112,6 +113,8 @@ def __init__( timeout: float = 600.0, retry_config: RetryConfig | None = None, mcp_servers: dict[str, Any] | None = None, + max_agent_iterations: int | None = None, + max_session_seconds: float | None = None, ) -> None: """Initialize the Claude provider. @@ -127,6 +130,10 @@ def __init__( retry_config: Optional retry configuration. Uses default if not provided. mcp_servers: Optional MCP server configurations for tool support. Each server config should have: command, args, env (optional). + max_agent_iterations: Maximum tool-use iterations per agent execution. + Defaults to 50 if not specified. + max_session_seconds: Maximum wall-clock duration for agent sessions. + Defaults to None (unlimited). Raises: ProviderError: If SDK is not installed. @@ -157,6 +164,10 @@ def __init__( self._retry_history: list[dict[str, Any]] = [] # For testing/debugging retries self._max_parse_recovery_attempts = 2 # Max retry attempts for malformed JSON self._max_schema_depth = 10 # Max nesting depth for recursive schema building + self._default_max_agent_iterations = ( + max_agent_iterations if max_agent_iterations is not None else 50 + ) + self._default_max_session_seconds = max_session_seconds # MCP server configuration for tool support self._mcp_servers_config = mcp_servers @@ -590,6 +601,18 @@ async def _execute_with_retry( temperature = self._default_temperature max_tokens = self._default_max_tokens + # Resolve per-agent iteration and session limits + max_agent_iterations = ( + agent.max_agent_iterations + if agent.max_agent_iterations is not None + else self._default_max_agent_iterations + ) + max_session_seconds = ( + agent.max_session_seconds + if agent.max_session_seconds is not None + else self._default_max_session_seconds + ) + # Validate max_tokens against model-specific limits if "haiku" in model.lower(): if max_tokens > 4096: @@ -639,6 +662,8 @@ async def _execute_with_retry( tools=request_tools, output_schema=agent.output, has_output_schema=has_output_schema, + max_iterations=max_agent_iterations, + max_session_seconds=max_session_seconds, interrupt_signal=interrupt_signal, event_callback=event_callback, ) @@ -882,7 +907,8 @@ async def _execute_agentic_loop( tools: list[dict[str, Any]] | None, output_schema: dict[str, OutputField] | None, has_output_schema: bool, - max_iterations: int = 10, + max_iterations: int = 50, + max_session_seconds: float | None = None, interrupt_signal: asyncio.Event | None = None, event_callback: EventCallback | None = None, ) -> tuple[ClaudeResponse, int | None, bool]: @@ -907,6 +933,8 @@ async def _execute_agentic_loop( output_schema: Expected output schema. has_output_schema: Whether agent has output schema defined. max_iterations: Maximum number of tool-use iterations to prevent infinite loops. + max_session_seconds: Maximum wall-clock duration for this agentic loop. + None means no time limit. interrupt_signal: Optional event that signals a mid-agent interrupt. event_callback: Optional callback for streaming SDK events upstream. @@ -920,11 +948,22 @@ async def _execute_agentic_loop( working_messages = list(messages) total_tokens = 0 iteration = 0 + session_start = time.monotonic() while iteration < max_iterations: iteration += 1 logger.debug(f"Agentic loop iteration {iteration}/{max_iterations}") + # Check wall-clock session timeout + if max_session_seconds is not None: + elapsed = time.monotonic() - session_start + if elapsed > max_session_seconds: + raise ProviderError( + f"Agent exceeded maximum session duration of {max_session_seconds:.0f}s " + f"after {iteration} tool-use iterations", + is_retryable=False, + ) + # Emit turn start event if event_callback: try: diff --git a/src/conductor/providers/copilot.py b/src/conductor/providers/copilot.py index f9d6c88..7583783 100644 --- a/src/conductor/providers/copilot.py +++ b/src/conductor/providers/copilot.py @@ -149,6 +149,7 @@ def __init__( mcp_servers: dict[str, Any] | None = None, idle_recovery_config: IdleRecoveryConfig | None = None, temperature: float | None = None, + max_agent_iterations: int | None = None, ) -> None: """Initialize the Copilot provider. @@ -164,6 +165,8 @@ def __init__( idle_recovery_config: Optional idle detection and recovery configuration. Uses default if not provided. temperature: Default temperature for generation (0.0-1.0). Optional. + max_agent_iterations: Maximum tool-use iterations per agent execution. + None means no iteration limit (only wall-clock timeout applies). """ self._client: Any = None # Will hold Copilot SDK client self._mock_handler = mock_handler @@ -176,6 +179,7 @@ def __init__( self._start_lock = asyncio.Lock() self._idle_recovery_config = idle_recovery_config or IdleRecoveryConfig() self._temperature = temperature + self._default_max_agent_iterations = max_agent_iterations self._session_ids: dict[str, str] = {} self._resume_session_ids: dict[str, str] = {} self._interrupted_session: Any = None @@ -490,6 +494,13 @@ async def _execute_sdk_call( agent.max_session_seconds or self._idle_recovery_config.max_session_seconds ) + # Resolve per-agent max_agent_iterations override + effective_max_iterations = ( + agent.max_agent_iterations + if agent.max_agent_iterations is not None + else self._default_max_agent_iterations + ) + session_destroyed = False try: # Send initial prompt and get response @@ -501,6 +512,7 @@ async def _execute_sdk_call( interrupt_signal=interrupt_signal, event_callback=event_callback, max_session_seconds=effective_max_session, + max_agent_iterations=effective_max_iterations, ) response_content = sdk_response.content @@ -628,6 +640,7 @@ async def _send_and_wait( interrupt_signal: asyncio.Event | None = None, event_callback: EventCallback | None = None, max_session_seconds: float | None = None, + max_agent_iterations: int | None = None, ) -> SDKResponse: """Send a prompt to the session and wait for response. @@ -642,6 +655,8 @@ async def _send_and_wait( event_callback: Optional callback for streaming SDK events upstream. max_session_seconds: Per-agent wall-clock session limit override. If None, uses the provider-level IdleRecoveryConfig default. + max_agent_iterations: Maximum tool-use iterations for this session. + None means no iteration limit. Returns: SDKResponse with content and usage data. If interrupted, @@ -661,6 +676,9 @@ async def _send_and_wait( # Mutable container for usage data: [input_tokens, output_tokens, cache_read, cache_write] usage_ref: list[int | None] = [None, None, None, None] + # Mutable container for tool iteration counting + tool_iteration_ref: list[int] = [0] + def on_event(event: Any) -> None: nonlocal response_content, error_message event_type = event.type.value if hasattr(event.type, "value") else str(event.type) @@ -712,6 +730,8 @@ def on_event(event: Any) -> None: event.data, "name", "unknown" ) last_activity_ref[1] = tool_name + # Count tool-use iterations + tool_iteration_ref[0] += 1 # Forward structured events upstream via event_callback if event_callback is not None: @@ -753,6 +773,8 @@ def on_event(event: Any) -> None: full_enabled, last_activity_ref, max_session_seconds=max_session_seconds, + tool_iteration_ref=tool_iteration_ref, + max_agent_iterations=max_agent_iterations, ) if error_message: @@ -1309,6 +1331,8 @@ async def _wait_with_idle_detection( full_enabled: bool, last_activity_ref: list[Any], max_session_seconds: float | None = None, + tool_iteration_ref: list[int] | None = None, + max_agent_iterations: int | None = None, ) -> None: """Wait for session completion with idle detection and recovery. @@ -1326,10 +1350,14 @@ async def _wait_with_idle_detection( for tracking last activity. max_session_seconds: Per-agent wall-clock session limit override. If None, uses the provider-level IdleRecoveryConfig default. + tool_iteration_ref: Mutable [count] tracking tool execution starts. + max_agent_iterations: Maximum tool-use iterations allowed. + None means no iteration limit. Raises: - ProviderError: If all recovery attempts are exhausted, or if the - session exceeds max_session_seconds wall-clock duration. + ProviderError: If all recovery attempts are exhausted, if the + session exceeds max_session_seconds wall-clock duration, or + if max_agent_iterations is exceeded. """ recovery_attempts = 0 idle_timeout = self._idle_recovery_config.idle_timeout_seconds @@ -1364,6 +1392,22 @@ async def _wait_with_idle_detection( is_retryable=False, # Don't retry — same root cause will recur ) + # Check tool-use iteration limit + if ( + max_agent_iterations is not None + and tool_iteration_ref is not None + and tool_iteration_ref[0] > max_agent_iterations + ): + raise ProviderError( + f"Agent exceeded maximum tool-use iterations ({max_agent_iterations})", + suggestion=( + "The agent performed too many tool calls. " + "Increase max_agent_iterations in runtime config or per-agent " + "settings if the agent legitimately needs more iterations." + ), + is_retryable=False, + ) + try: # Wait for done with idle timeout await asyncio.wait_for( diff --git a/src/conductor/providers/factory.py b/src/conductor/providers/factory.py index 26c29be..324c1c7 100644 --- a/src/conductor/providers/factory.py +++ b/src/conductor/providers/factory.py @@ -23,6 +23,7 @@ async def create_provider( max_tokens: int | None = None, timeout: float | None = None, max_session_seconds: float | None = None, + max_agent_iterations: int | None = None, ) -> AgentProvider: """Factory function to create the appropriate provider. @@ -41,8 +42,8 @@ async def create_provider( temperature: Default temperature for generation (0.0-1.0). max_tokens: Maximum output tokens. timeout: Request timeout in seconds. - max_session_seconds: Maximum wall-clock duration for Copilot SDK sessions. - Only applies to the Copilot provider. + max_session_seconds: Maximum wall-clock duration for agent sessions. + max_agent_iterations: Maximum tool-use iterations per agent execution. Returns: Configured AgentProvider instance. @@ -67,6 +68,7 @@ async def create_provider( model=default_model, temperature=temperature, idle_recovery_config=idle_recovery_config, + max_agent_iterations=max_agent_iterations, ) case "openai-agents": raise ProviderError( @@ -85,6 +87,8 @@ async def create_provider( max_tokens=max_tokens, timeout=timeout if timeout is not None else 600.0, mcp_servers=mcp_servers, + max_agent_iterations=max_agent_iterations, + max_session_seconds=max_session_seconds, ) case _: raise ProviderError( @@ -135,6 +139,7 @@ async def create_provider( max_tokens = getattr(runtime_config, "max_tokens", None) timeout = getattr(runtime_config, "timeout", None) max_session_seconds = getattr(runtime_config, "max_session_seconds", None) + max_agent_iterations = getattr(runtime_config, "max_agent_iterations", None) return await create_provider( provider_type=provider_type, @@ -144,4 +149,5 @@ async def create_provider( max_tokens=max_tokens, timeout=timeout, max_session_seconds=max_session_seconds, + max_agent_iterations=max_agent_iterations, ) diff --git a/src/conductor/providers/registry.py b/src/conductor/providers/registry.py index b33894c..0236200 100644 --- a/src/conductor/providers/registry.py +++ b/src/conductor/providers/registry.py @@ -117,6 +117,7 @@ async def _get_or_create_provider(self, provider_type: ProviderType) -> AgentPro max_tokens=runtime.max_tokens, timeout=runtime.timeout, max_session_seconds=runtime.max_session_seconds, + max_agent_iterations=runtime.max_agent_iterations, ) # Pass stored resume session IDs to newly created providers diff --git a/tests/test_agent_iteration_limits.py b/tests/test_agent_iteration_limits.py new file mode 100644 index 0000000..bf4b233 --- /dev/null +++ b/tests/test_agent_iteration_limits.py @@ -0,0 +1,482 @@ +"""Tests for configurable agent iteration limits and session timeouts. + +Covers: +- Schema validation for max_agent_iterations on RuntimeConfig and AgentDef +- Factory threading of max_agent_iterations to both providers +- Claude provider defaults, per-agent overrides, and session timeout +- Copilot provider iteration counting and enforcement +""" + +from __future__ import annotations + +import asyncio +import time +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from pydantic import ValidationError + +from conductor.config.schema import AgentDef, RuntimeConfig +from conductor.exceptions import ProviderError +from conductor.providers.copilot import CopilotProvider +from conductor.providers.factory import create_provider + +# --------------------------------------------------------------------------- +# Schema tests +# --------------------------------------------------------------------------- + + +class TestRuntimeConfigMaxAgentIterations: + """RuntimeConfig.max_agent_iterations validation.""" + + def test_default_is_none(self) -> None: + rc = RuntimeConfig() + assert rc.max_agent_iterations is None + + def test_valid_value(self) -> None: + rc = RuntimeConfig(max_agent_iterations=100) + assert rc.max_agent_iterations == 100 + + def test_min_value(self) -> None: + rc = RuntimeConfig(max_agent_iterations=1) + assert rc.max_agent_iterations == 1 + + def test_max_value(self) -> None: + rc = RuntimeConfig(max_agent_iterations=500) + assert rc.max_agent_iterations == 500 + + def test_rejects_zero(self) -> None: + with pytest.raises(ValidationError): + RuntimeConfig(max_agent_iterations=0) + + def test_rejects_negative(self) -> None: + with pytest.raises(ValidationError): + RuntimeConfig(max_agent_iterations=-1) + + def test_rejects_over_500(self) -> None: + with pytest.raises(ValidationError): + RuntimeConfig(max_agent_iterations=501) + + +class TestAgentDefMaxAgentIterations: + """AgentDef.max_agent_iterations validation.""" + + def test_default_is_none(self) -> None: + agent = AgentDef(name="test", prompt="do stuff") + assert agent.max_agent_iterations is None + + def test_valid_value(self) -> None: + agent = AgentDef(name="test", prompt="do stuff", max_agent_iterations=200) + assert agent.max_agent_iterations == 200 + + def test_rejects_zero(self) -> None: + with pytest.raises(ValidationError): + AgentDef(name="test", prompt="do stuff", max_agent_iterations=0) + + def test_rejects_negative(self) -> None: + with pytest.raises(ValidationError): + AgentDef(name="test", prompt="do stuff", max_agent_iterations=-5) + + def test_rejects_over_500(self) -> None: + with pytest.raises(ValidationError): + AgentDef(name="test", prompt="do stuff", max_agent_iterations=501) + + def test_script_agent_rejects_max_agent_iterations(self) -> None: + with pytest.raises(ValidationError, match="max_agent_iterations"): + AgentDef( + name="test", + type="script", + command="echo hi", + max_agent_iterations=10, + ) + + def test_script_agent_without_max_agent_iterations_ok(self) -> None: + agent = AgentDef(name="test", type="script", command="echo hi") + assert agent.max_agent_iterations is None + + +# --------------------------------------------------------------------------- +# Factory tests +# --------------------------------------------------------------------------- + + +class TestFactoryMaxAgentIterations: + """max_agent_iterations flows through create_provider.""" + + @patch("conductor.providers.factory.ANTHROPIC_SDK_AVAILABLE", True) + @patch("conductor.providers.claude.AsyncAnthropic") + @patch("conductor.providers.claude.anthropic") + @pytest.mark.asyncio + async def test_flows_to_claude_provider( + self, mock_anthropic_module: Any, mock_anthropic_class: Any + ) -> None: + mock_anthropic_module.__version__ = "0.77.0" + mock_client = MagicMock() + mock_client.models.list = AsyncMock(return_value=MagicMock(data=[])) + mock_anthropic_class.return_value = mock_client + + provider = await create_provider("claude", validate=False, max_agent_iterations=100) + assert provider._default_max_agent_iterations == 100 + + @pytest.mark.asyncio + async def test_flows_to_copilot_provider(self) -> None: + provider = await create_provider("copilot", validate=False, max_agent_iterations=75) + assert isinstance(provider, CopilotProvider) + assert provider._default_max_agent_iterations == 75 + await provider.close() + + @pytest.mark.asyncio + async def test_copilot_default_is_none(self) -> None: + provider = await create_provider("copilot", validate=False) + assert isinstance(provider, CopilotProvider) + assert provider._default_max_agent_iterations is None + await provider.close() + + @patch("conductor.providers.factory.ANTHROPIC_SDK_AVAILABLE", True) + @patch("conductor.providers.claude.AsyncAnthropic") + @patch("conductor.providers.claude.anthropic") + @pytest.mark.asyncio + async def test_claude_default_is_50( + self, mock_anthropic_module: Any, mock_anthropic_class: Any + ) -> None: + mock_anthropic_module.__version__ = "0.77.0" + mock_client = MagicMock() + mock_client.models.list = AsyncMock(return_value=MagicMock(data=[])) + mock_anthropic_class.return_value = mock_client + + provider = await create_provider("claude", validate=False) + assert provider._default_max_agent_iterations == 50 + + +class TestFactoryMaxSessionSeconds: + """max_session_seconds flows to Claude provider.""" + + @patch("conductor.providers.factory.ANTHROPIC_SDK_AVAILABLE", True) + @patch("conductor.providers.claude.AsyncAnthropic") + @patch("conductor.providers.claude.anthropic") + @pytest.mark.asyncio + async def test_flows_to_claude_provider( + self, mock_anthropic_module: Any, mock_anthropic_class: Any + ) -> None: + mock_anthropic_module.__version__ = "0.77.0" + mock_client = MagicMock() + mock_client.models.list = AsyncMock(return_value=MagicMock(data=[])) + mock_anthropic_class.return_value = mock_client + + provider = await create_provider("claude", validate=False, max_session_seconds=900.0) + assert provider._default_max_session_seconds == 900.0 + + @patch("conductor.providers.factory.ANTHROPIC_SDK_AVAILABLE", True) + @patch("conductor.providers.claude.AsyncAnthropic") + @patch("conductor.providers.claude.anthropic") + @pytest.mark.asyncio + async def test_claude_default_is_none( + self, mock_anthropic_module: Any, mock_anthropic_class: Any + ) -> None: + mock_anthropic_module.__version__ = "0.77.0" + mock_client = MagicMock() + mock_client.models.list = AsyncMock(return_value=MagicMock(data=[])) + mock_anthropic_class.return_value = mock_client + + provider = await create_provider("claude", validate=False) + assert provider._default_max_session_seconds is None + + +# --------------------------------------------------------------------------- +# Claude provider tests +# --------------------------------------------------------------------------- + + +class TestClaudeProviderIterationLimit: + """Claude provider _execute_agentic_loop respects iteration and session limits.""" + + @patch("conductor.providers.claude.ANTHROPIC_SDK_AVAILABLE", True) + @patch("conductor.providers.claude.AsyncAnthropic") + @patch("conductor.providers.claude.anthropic") + def test_default_max_agent_iterations_is_50( + self, mock_anthropic_module: Any, mock_anthropic_class: Any + ) -> None: + from conductor.providers.claude import ClaudeProvider + + mock_anthropic_module.__version__ = "0.77.0" + mock_anthropic_class.return_value = MagicMock() + + provider = ClaudeProvider() + assert provider._default_max_agent_iterations == 50 + + @patch("conductor.providers.claude.ANTHROPIC_SDK_AVAILABLE", True) + @patch("conductor.providers.claude.AsyncAnthropic") + @patch("conductor.providers.claude.anthropic") + def test_custom_max_agent_iterations( + self, mock_anthropic_module: Any, mock_anthropic_class: Any + ) -> None: + from conductor.providers.claude import ClaudeProvider + + mock_anthropic_module.__version__ = "0.77.0" + mock_anthropic_class.return_value = MagicMock() + + provider = ClaudeProvider(max_agent_iterations=200) + assert provider._default_max_agent_iterations == 200 + + @patch("conductor.providers.claude.ANTHROPIC_SDK_AVAILABLE", True) + @patch("conductor.providers.claude.AsyncAnthropic") + @patch("conductor.providers.claude.anthropic") + def test_max_session_seconds_stored( + self, mock_anthropic_module: Any, mock_anthropic_class: Any + ) -> None: + from conductor.providers.claude import ClaudeProvider + + mock_anthropic_module.__version__ = "0.77.0" + mock_anthropic_class.return_value = MagicMock() + + provider = ClaudeProvider(max_session_seconds=600.0) + assert provider._default_max_session_seconds == 600.0 + + @patch("conductor.providers.claude.ANTHROPIC_SDK_AVAILABLE", True) + @patch("conductor.providers.claude.AsyncAnthropic") + @patch("conductor.providers.claude.anthropic") + def test_max_session_seconds_default_none( + self, mock_anthropic_module: Any, mock_anthropic_class: Any + ) -> None: + from conductor.providers.claude import ClaudeProvider + + mock_anthropic_module.__version__ = "0.77.0" + mock_anthropic_class.return_value = MagicMock() + + provider = ClaudeProvider() + assert provider._default_max_session_seconds is None + + @patch("conductor.providers.claude.ANTHROPIC_SDK_AVAILABLE", True) + @patch("conductor.providers.claude.AsyncAnthropic") + @patch("conductor.providers.claude.anthropic") + @pytest.mark.asyncio + async def test_session_timeout_fires( + self, mock_anthropic_module: Any, mock_anthropic_class: Any + ) -> None: + """Test that _execute_agentic_loop raises ProviderError on session timeout.""" + from conductor.providers.claude import ClaudeProvider + + mock_anthropic_module.__version__ = "0.77.0" + mock_client = MagicMock() + mock_anthropic_class.return_value = mock_client + + provider = ClaudeProvider() + + # Create a mock response that always returns tool_use (never terminates) + mock_tool_use_block = MagicMock() + mock_tool_use_block.type = "tool_use" + mock_tool_use_block.name = "some_tool" + mock_tool_use_block.id = "tool_1" + mock_tool_use_block.input = {"arg": "value"} + + mock_response = MagicMock() + mock_response.content = [mock_tool_use_block] + mock_response.usage = MagicMock(input_tokens=10, output_tokens=20) + + # Mock the API call and MCP manager + provider._execute_api_call = AsyncMock(return_value=mock_response) + mock_mcp = MagicMock() + mock_mcp.has_servers.return_value = True + mock_mcp.call_tool = AsyncMock(return_value="tool result") + provider._mcp_manager = mock_mcp + + # Use a very short session timeout and mock time to trigger it + call_count = [0] + + def mock_monotonic() -> float: + call_count[0] += 1 + # First call returns start time, subsequent calls return past the timeout + if call_count[0] <= 1: + return 1000.0 + return 1002.0 # 2 seconds past start + + with patch("conductor.providers.claude.time") as mock_time: + mock_time.monotonic = mock_monotonic + + with pytest.raises(ProviderError, match="maximum session duration"): + await provider._execute_agentic_loop( + messages=[{"role": "user", "content": "test"}], + model="test-model", + temperature=None, + max_tokens=1024, + tools=None, + output_schema=None, + has_output_schema=False, + max_iterations=100, + max_session_seconds=1.0, # 1 second timeout + ) + + @patch("conductor.providers.claude.ANTHROPIC_SDK_AVAILABLE", True) + @patch("conductor.providers.claude.AsyncAnthropic") + @patch("conductor.providers.claude.anthropic") + @pytest.mark.asyncio + async def test_per_agent_override_wins( + self, mock_anthropic_module: Any, mock_anthropic_class: Any + ) -> None: + """Test that agent-level max_agent_iterations overrides provider default.""" + from conductor.providers.claude import ClaudeProvider + + mock_anthropic_module.__version__ = "0.77.0" + mock_client = MagicMock() + mock_anthropic_class.return_value = mock_client + + provider = ClaudeProvider(max_agent_iterations=50) + + # Create an agent with per-agent override + agent = AgentDef(name="test", prompt="do stuff", max_agent_iterations=200) + + # The resolution happens in _execute_with_retry, check it directly + resolved = ( + agent.max_agent_iterations + if agent.max_agent_iterations is not None + else provider._default_max_agent_iterations + ) + assert resolved == 200 + + @patch("conductor.providers.claude.ANTHROPIC_SDK_AVAILABLE", True) + @patch("conductor.providers.claude.AsyncAnthropic") + @patch("conductor.providers.claude.anthropic") + @pytest.mark.asyncio + async def test_provider_default_used_when_agent_has_none( + self, mock_anthropic_module: Any, mock_anthropic_class: Any + ) -> None: + """Test that provider default is used when agent doesn't override.""" + from conductor.providers.claude import ClaudeProvider + + mock_anthropic_module.__version__ = "0.77.0" + mock_client = MagicMock() + mock_anthropic_class.return_value = mock_client + + provider = ClaudeProvider(max_agent_iterations=75) + + agent = AgentDef(name="test", prompt="do stuff") + + resolved = ( + agent.max_agent_iterations + if agent.max_agent_iterations is not None + else provider._default_max_agent_iterations + ) + assert resolved == 75 + + +# --------------------------------------------------------------------------- +# Copilot provider tests +# --------------------------------------------------------------------------- + + +class TestCopilotProviderIterationLimit: + """Copilot provider iteration counting in _wait_with_idle_detection.""" + + def test_default_max_agent_iterations_is_none(self) -> None: + provider = CopilotProvider(mock_handler=lambda a, p, c: {"result": "ok"}) + assert provider._default_max_agent_iterations is None + + def test_custom_max_agent_iterations(self) -> None: + provider = CopilotProvider( + mock_handler=lambda a, p, c: {"result": "ok"}, + max_agent_iterations=42, + ) + assert provider._default_max_agent_iterations == 42 + + @pytest.mark.asyncio + async def test_iteration_limit_raises_provider_error(self) -> None: + """Test that exceeding max_agent_iterations raises ProviderError.""" + provider = CopilotProvider(mock_handler=lambda a, p, c: {"result": "ok"}) + + done = asyncio.Event() + last_activity_ref: list[Any] = [None, None, time.monotonic()] + + # Simulate 11 tool iterations already counted + tool_iteration_ref = [11] + + with pytest.raises(ProviderError, match="maximum tool-use iterations"): + await provider._wait_with_idle_detection( + done=done, + session=MagicMock(), + verbose_enabled=False, + full_enabled=False, + last_activity_ref=last_activity_ref, + max_session_seconds=9999.0, # large enough to not trigger + tool_iteration_ref=tool_iteration_ref, + max_agent_iterations=10, + ) + + @pytest.mark.asyncio + async def test_no_limit_when_none(self) -> None: + """Test that None max_agent_iterations means no iteration limit.""" + provider = CopilotProvider(mock_handler=lambda a, p, c: {"result": "ok"}) + + done = asyncio.Event() + done.set() # Signal completion immediately + last_activity_ref: list[Any] = [None, None, time.monotonic()] + + # Even with high iteration count, should not raise when limit is None + tool_iteration_ref = [9999] + + # Should complete without error (done is already set) + await provider._wait_with_idle_detection( + done=done, + session=MagicMock(), + verbose_enabled=False, + full_enabled=False, + last_activity_ref=last_activity_ref, + max_session_seconds=9999.0, + tool_iteration_ref=tool_iteration_ref, + max_agent_iterations=None, + ) + + @pytest.mark.asyncio + async def test_within_limit_completes_normally(self) -> None: + """Test that within-limit iterations complete normally.""" + provider = CopilotProvider(mock_handler=lambda a, p, c: {"result": "ok"}) + + done = asyncio.Event() + done.set() # Signal completion immediately + last_activity_ref: list[Any] = [None, None, time.monotonic()] + + tool_iteration_ref = [5] + + # Should complete without error + await provider._wait_with_idle_detection( + done=done, + session=MagicMock(), + verbose_enabled=False, + full_enabled=False, + last_activity_ref=last_activity_ref, + max_session_seconds=9999.0, + tool_iteration_ref=tool_iteration_ref, + max_agent_iterations=10, + ) + + def test_per_agent_override_resolution(self) -> None: + """Test that agent-level max_agent_iterations overrides provider default.""" + provider = CopilotProvider( + mock_handler=lambda a, p, c: {"result": "ok"}, + max_agent_iterations=50, + ) + + agent = AgentDef(name="test", prompt="do stuff", max_agent_iterations=200) + + resolved = ( + agent.max_agent_iterations + if agent.max_agent_iterations is not None + else provider._default_max_agent_iterations + ) + assert resolved == 200 + + def test_provider_default_when_agent_none(self) -> None: + """Test provider default used when agent has no override.""" + provider = CopilotProvider( + mock_handler=lambda a, p, c: {"result": "ok"}, + max_agent_iterations=50, + ) + + agent = AgentDef(name="test", prompt="do stuff") + + resolved = ( + agent.max_agent_iterations + if agent.max_agent_iterations is not None + else provider._default_max_agent_iterations + ) + assert resolved == 50 diff --git a/tests/test_integration/test_claude_mcp_tool_filter.py b/tests/test_integration/test_claude_mcp_tool_filter.py index 6c93e68..814ef1b 100644 --- a/tests/test_integration/test_claude_mcp_tool_filter.py +++ b/tests/test_integration/test_claude_mcp_tool_filter.py @@ -82,6 +82,8 @@ def _make_provider_with_mcp() -> ClaudeProvider: provider._retry_history = [] provider._max_parse_recovery_attempts = 2 provider._max_schema_depth = 10 + provider._default_max_agent_iterations = 50 + provider._default_max_session_seconds = None # Pre-wire a mock MCP manager so _ensure_mcp_connected is a no-op mock_mcp = MagicMock() diff --git a/tests/test_providers/test_claude_event_callback.py b/tests/test_providers/test_claude_event_callback.py index 4635611..4ff17e5 100644 --- a/tests/test_providers/test_claude_event_callback.py +++ b/tests/test_providers/test_claude_event_callback.py @@ -61,6 +61,8 @@ def _make_provider_with_mcp() -> ClaudeProvider: provider._retry_history = [] provider._max_parse_recovery_attempts = 2 provider._max_schema_depth = 10 + provider._default_max_agent_iterations = 50 + provider._default_max_session_seconds = None mock_mcp_manager = MagicMock() mock_mcp_manager.has_servers.return_value = True @@ -85,6 +87,8 @@ def _make_bare_provider() -> ClaudeProvider: provider._retry_history = [] provider._max_parse_recovery_attempts = 2 provider._max_schema_depth = 10 + provider._default_max_agent_iterations = 50 + provider._default_max_session_seconds = None return provider @@ -538,6 +542,8 @@ async def test_callback_reaches_agentic_loop(self) -> None: agent = MagicMock() agent.output = None agent.model = None + agent.max_agent_iterations = None + agent.max_session_seconds = None await provider._execute_with_retry( agent=agent, diff --git a/tests/test_providers/test_claude_interrupt.py b/tests/test_providers/test_claude_interrupt.py index 9c042db..1063ce0 100644 --- a/tests/test_providers/test_claude_interrupt.py +++ b/tests/test_providers/test_claude_interrupt.py @@ -39,6 +39,8 @@ def _make_provider() -> ClaudeProvider: provider._retry_history = [] provider._max_parse_recovery_attempts = 2 provider._max_schema_depth = 10 + provider._default_max_agent_iterations = 50 + provider._default_max_session_seconds = None return provider diff --git a/tests/test_providers/test_claude_parameter_passing.py b/tests/test_providers/test_claude_parameter_passing.py index f1c983c..8147800 100644 --- a/tests/test_providers/test_claude_parameter_passing.py +++ b/tests/test_providers/test_claude_parameter_passing.py @@ -45,6 +45,8 @@ async def test_common_parameters_passed_from_factory(self, mock_claude_class: Mo max_tokens=4096, timeout=120.0, mcp_servers=None, + max_agent_iterations=None, + max_session_seconds=None, ) @patch("conductor.providers.claude.ANTHROPIC_SDK_AVAILABLE", True) diff --git a/tests/test_providers/test_registry.py b/tests/test_providers/test_registry.py index 8a03fcc..290ac3f 100644 --- a/tests/test_providers/test_registry.py +++ b/tests/test_providers/test_registry.py @@ -330,6 +330,7 @@ async def test_runtime_config_passed_to_provider(self, mock_create: MagicMock) - max_tokens=4096, timeout=60.0, max_session_seconds=None, + max_agent_iterations=None, ) @patch("conductor.providers.registry.create_provider") diff --git a/uv.lock b/uv.lock index b993124..8f149e0 100644 --- a/uv.lock +++ b/uv.lock @@ -180,7 +180,7 @@ dev = [ requires-dist = [ { name = "anthropic", specifier = ">=0.77.0,<1.0.0" }, { name = "fastapi", specifier = ">=0.115.0" }, - { name = "github-copilot-sdk", specifier = ">=0.1.0,<0.1.31" }, + { name = "github-copilot-sdk", specifier = ">=0.1.28,<0.1.31" }, { name = "jinja2", specifier = ">=3.1.0" }, { name = "mcp", specifier = ">=1.0.0" }, { name = "pydantic", specifier = ">=2.0.0" }, @@ -364,16 +364,19 @@ wheels = [ [[package]] name = "github-copilot-sdk" -version = "0.1.18" +version = "0.1.30" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "python-dateutil" }, - { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bf/00/be64b9b33015d5e79fb5e5e95d871484e79a907b3792935b855ab40308ce/github_copilot_sdk-0.1.18.tar.gz", hash = "sha256:b2d56d40c0f48e81f2899d32fb4a8d2b8df22620913547da93fddf9b2f368e9e", size = 81318, upload-time = "2026-01-24T18:09:57.617Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/0f/f832b32bca9d89a26a2b810c69fdc37ac925e34855ee93a11bb3d90ca2b7/github_copilot_sdk-0.1.18-py3-none-any.whl", hash = "sha256:99cfdf4d4d0da6d92d5bf36a952546157785df83d6b0783b3f7a8e93a2762171", size = 33740, upload-time = "2026-01-24T18:09:55.696Z" }, + { url = "https://files.pythonhosted.org/packages/18/37/92b8037c0673999ac1c49e9d079cf6d36283e6ee3453d66b54878da81bc8/github_copilot_sdk-0.1.30-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:47e95246a63beeebf192db6013662c5f39778ccfa6b1b718b79cbec6b6a88bf8", size = 58182964, upload-time = "2026-03-03T17:21:53.564Z" }, + { url = "https://files.pythonhosted.org/packages/08/79/9d0628fa819df73e92ebbd4af949cdd82850cc4bde79b3e78040fcd8ed80/github_copilot_sdk-0.1.30-py3-none-macosx_11_0_arm64.whl", hash = "sha256:601cbe1c5a576906b73cbf8591429451c91148bff5a564e56e1e83ff99b2dc10", size = 54935274, upload-time = "2026-03-03T17:21:57.494Z" }, + { url = "https://files.pythonhosted.org/packages/10/5d/f407e9c9155f912780b4587ab74abf3b94fae91af0463bad317cc8aacdfe/github_copilot_sdk-0.1.30-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:735fb90683bea27a418a0d45df430492db2a395e5ae88d575ac138be49d6cf07", size = 61071530, upload-time = "2026-03-03T17:22:01.601Z" }, + { url = "https://files.pythonhosted.org/packages/b8/9f/5c2ab2baf5f185150058c774da2b5e4c613b4532c48b499ce127419da461/github_copilot_sdk-0.1.30-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:21ade06dfe5ca111663c42fff000ab3ec6595e51b1cf4ab56ff550cdd7a2992f", size = 59252204, upload-time = "2026-03-03T17:22:05.706Z" }, + { url = "https://files.pythonhosted.org/packages/ef/80/4e72ccdc8868250ba8c5d48a1fef5a8244361c2a586820de9b77df0c79ed/github_copilot_sdk-0.1.30-py3-none-win_amd64.whl", hash = "sha256:f1be9e49da2af370a914d4425bfecbc2daecf8e5de0074beaa1e22735bdd5da6", size = 53691358, upload-time = "2026-03-03T17:22:09.474Z" }, + { url = "https://files.pythonhosted.org/packages/53/4f/25ff085d0d5d50d1197fd6ae9a53adc4cc8298940212f5a69f7ced68c33e/github_copilot_sdk-0.1.30-py3-none-win_arm64.whl", hash = "sha256:3e0691eb3030c385f629d63d74ded938e0577fcd98f452259efd5d7fb2283576", size = 51699653, upload-time = "2026-03-03T17:22:13.215Z" }, ] [[package]]