diff --git a/src/conductor/providers/copilot.py b/src/conductor/providers/copilot.py index d1368c7..12cbef7 100644 --- a/src/conductor/providers/copilot.py +++ b/src/conductor/providers/copilot.py @@ -163,6 +163,20 @@ def __init__( self._interrupted_session: Any = None self._abort_supported: bool | None = None + @staticmethod + def _default_permission_handler( + request: dict[str, Any], + invocation: dict[str, str], + ) -> dict[str, Any]: + """Default permission handler that approves all requests. + + SDK v0.1.28+ requires a permission handler on session creation. + In orchestration mode, we approve all tool permissions since the + workflow author controls which tools are available to each agent. + """ + logger.debug("auto-approved permission request: %s", request) + return {"kind": "approved"} + async def execute( self, agent: AgentDef, @@ -411,6 +425,7 @@ async def _execute_sdk_call( # Build session config with MCP servers from workflow configuration session_config: dict[str, Any] = { "model": model, + "on_permission_request": self._default_permission_handler, } # Add temperature if configured @@ -426,7 +441,10 @@ async def _execute_sdk_call( resume_sid = self._resume_session_ids.get(agent.name) if resume_sid is not None: try: - session = await self._client.resume_session(resume_sid) + session = await self._client.resume_session( + resume_sid, + {"on_permission_request": self._default_permission_handler}, + ) logger.info(f"Resumed Copilot session {resume_sid} for agent '{agent.name}'") except Exception as exc: logger.warning( @@ -621,6 +639,18 @@ def on_event(event: Any) -> None: nonlocal response_content, error_message event_type = event.type.value if hasattr(event.type, "value") else str(event.type) + # Log every SDK event for debugging stalls (visible via --log-file) + if logger.isEnabledFor(logging.DEBUG): + tool_info = "" + if ( + event_type == "tool.execution_start" + and hasattr(event, "data") + and event.data is not None + ): + tn = getattr(event.data, "tool_name", None) or getattr(event.data, "name", "?") + tool_info = f" tool={tn}" + logger.debug("sdk_event: %s%s", event_type, tool_info) + # Update last activity on EVERY event (this is key for idle detection!) last_activity_ref[0] = event_type last_activity_ref[2] = time.monotonic() @@ -1267,6 +1297,11 @@ async def _wait_with_idle_detection( idle_timeout = self._idle_recovery_config.idle_timeout_seconds while True: + # Check if done was already set (avoids race where session.idle + # arrived between a previous done.clear() and the next wait). + if done.is_set(): + return + try: # Wait for done with idle timeout await asyncio.wait_for( @@ -1288,7 +1323,11 @@ async def _wait_with_idle_detection( # just hasn't finished yet. Reset recovery counter (new task) # and keep waiting. recovery_attempts = 0 - done.clear() + # Only clear if done hasn't been set in the meantime + # (prevents race where session.idle arrives right as we + # check time_since_last_event). + if not done.is_set(): + done.clear() continue # Genuinely idle — no events for the full timeout period @@ -1321,9 +1360,10 @@ async def _wait_with_idle_detection( recovery_prompt = self._build_recovery_prompt(last_event_type, last_tool_call) await session.send({"prompt": recovery_prompt}) - # Reset the done event to wait again - # (it may have been set by a previous partial response) - done.clear() + # Reset the done event to wait again — but only if it hasn't + # been set since the recovery prompt was sent. + if not done.is_set(): + done.clear() async def _ensure_client_started(self) -> None: """Ensure the Copilot client is started.""" diff --git a/tests/test_providers/test_copilot_resume.py b/tests/test_providers/test_copilot_resume.py index 94cbc5d..a7132b0 100644 --- a/tests/test_providers/test_copilot_resume.py +++ b/tests/test_providers/test_copilot_resume.py @@ -161,7 +161,10 @@ async def _fake_send(msg: Any) -> None: ): await provider.execute(agent, {}, "Continue research") - mock_client.resume_session.assert_called_once_with(resumed_sid) + mock_client.resume_session.assert_called_once_with( + resumed_sid, + {"on_permission_request": CopilotProvider._default_permission_handler}, + ) mock_client.create_session.assert_not_called() @pytest.mark.asyncio @@ -201,7 +204,10 @@ async def _fake_send(msg: Any) -> None: ): await provider.execute(agent, {}, "Continue research") - mock_client.resume_session.assert_called_once_with("stale-sid") + mock_client.resume_session.assert_called_once_with( + "stale-sid", + {"on_permission_request": CopilotProvider._default_permission_handler}, + ) mock_client.create_session.assert_called_once() # Session ID should now reflect the new session assert provider.get_session_ids()["researcher"] == "sess-new"