-
Notifications
You must be signed in to change notification settings - Fork 11
fix(copilot): add permission handler, event logging, and idle detection race fix #15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -163,6 +163,20 @@ def __init__( | |
| self._interrupted_session: Any = None | ||
| self._abort_supported: bool | None = None | ||
|
|
||
| @staticmethod | ||
| def _default_permission_handler( | ||
| request: dict[str, Any], | ||
| invocation: dict[str, str], | ||
| ) -> dict[str, Any]: | ||
| """Default permission handler that approves all requests. | ||
|
|
||
| SDK v0.1.28+ requires a permission handler on session creation. | ||
| In orchestration mode, we approve all tool permissions since the | ||
| workflow author controls which tools are available to each agent. | ||
| """ | ||
| logger.debug("auto-approved permission request: %s", request) | ||
| return {"kind": "approved"} | ||
|
|
||
| async def execute( | ||
| self, | ||
| agent: AgentDef, | ||
|
|
@@ -411,6 +425,7 @@ async def _execute_sdk_call( | |
| # Build session config with MCP servers from workflow configuration | ||
| session_config: dict[str, Any] = { | ||
| "model": model, | ||
| "on_permission_request": self._default_permission_handler, | ||
| } | ||
|
|
||
| # Add temperature if configured | ||
|
|
@@ -426,7 +441,10 @@ async def _execute_sdk_call( | |
| resume_sid = self._resume_session_ids.get(agent.name) | ||
| if resume_sid is not None: | ||
| try: | ||
| session = await self._client.resume_session(resume_sid) | ||
| session = await self._client.resume_session( | ||
| resume_sid, | ||
| {"on_permission_request": self._default_permission_handler}, | ||
| ) | ||
| logger.info(f"Resumed Copilot session {resume_sid} for agent '{agent.name}'") | ||
| except Exception as exc: | ||
| logger.warning( | ||
|
|
@@ -621,6 +639,18 @@ def on_event(event: Any) -> None: | |
| nonlocal response_content, error_message | ||
| event_type = event.type.value if hasattr(event.type, "value") else str(event.type) | ||
|
|
||
| # Log every SDK event for debugging stalls (visible via --log-file) | ||
| if logger.isEnabledFor(logging.DEBUG): | ||
| tool_info = "" | ||
| if ( | ||
| event_type == "tool.execution_start" | ||
| and hasattr(event, "data") | ||
| and event.data is not None | ||
| ): | ||
| tn = getattr(event.data, "tool_name", None) or getattr(event.data, "name", "?") | ||
| tool_info = f" tool={tn}" | ||
| logger.debug("sdk_event: %s%s", event_type, tool_info) | ||
|
|
||
| # Update last activity on EVERY event (this is key for idle detection!) | ||
| last_activity_ref[0] = event_type | ||
| last_activity_ref[2] = time.monotonic() | ||
|
|
@@ -1267,6 +1297,11 @@ async def _wait_with_idle_detection( | |
| idle_timeout = self._idle_recovery_config.idle_timeout_seconds | ||
|
|
||
| while True: | ||
| # Check if done was already set (avoids race where session.idle | ||
| # arrived between a previous done.clear() and the next wait). | ||
| if done.is_set(): | ||
| return | ||
|
|
||
| try: | ||
| # Wait for done with idle timeout | ||
| await asyncio.wait_for( | ||
|
|
@@ -1288,7 +1323,11 @@ async def _wait_with_idle_detection( | |
| # just hasn't finished yet. Reset recovery counter (new task) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ✅ Good fix. Without this guard, if |
||
| # and keep waiting. | ||
| recovery_attempts = 0 | ||
| done.clear() | ||
| # Only clear if done hasn't been set in the meantime | ||
| # (prevents race where session.idle arrives right as we | ||
| # check time_since_last_event). | ||
| if not done.is_set(): | ||
| done.clear() | ||
| continue | ||
|
|
||
| # Genuinely idle — no events for the full timeout period | ||
|
|
@@ -1321,9 +1360,10 @@ async def _wait_with_idle_detection( | |
| recovery_prompt = self._build_recovery_prompt(last_event_type, last_tool_call) | ||
| await session.send({"prompt": recovery_prompt}) | ||
|
|
||
| # Reset the done event to wait again | ||
| # (it may have been set by a previous partial response) | ||
| done.clear() | ||
| # Reset the done event to wait again — but only if it hasn't | ||
| # been set since the recovery prompt was sent. | ||
| if not done.is_set(): | ||
| done.clear() | ||
|
|
||
| async def _ensure_client_started(self) -> None: | ||
| """Ensure the Copilot client is started.""" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
✅ Good fix. This early-return guard closes a real race window where
session.idlecould arrive between a previousdone.clear()and theawait wait_for(), causing the completion signal to be lost and an unnecessary full idle-timeout wait.