From 9ece7a62df9fc170a12194a0926da041d67f7389 Mon Sep 17 00:00:00 2001 From: Dan Green Date: Fri, 17 Apr 2026 13:06:59 -0700 Subject: [PATCH 1/3] fix(web): stop busy-looping on stale gate_response messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `wait_for_gate_response` previously re-queued any message whose `agent_name` did not match the expected agent, then slept 10ms and retried. Because `asyncio.Queue` has no deduplication, the same stale message would immediately be dequeued again on the next iteration — producing a tight loop that re-enqueues and re-inspects the same dict forever (cpu-bound, 100 iter/sec per stale message). In practice this never triggered during normal flow because conductor presents one gate at a time and the `has_connections()` short-circuit used to hide the issue. With that short-circuit now gone (PR to always race both tasks), and with any client ever producing a duplicate click, the spin becomes reachable. Fix: since conductor only ever awaits one gate at a time, any non-matching `gate_response` is definitionally stale (a duplicate click from a dashboard that missed the first resolution, or a message for a gate that already completed). Re-queueing can never deliver it — the matching `wait_for_gate_response` call is already gone. Discard stale messages with a warning log so the queue drains cleanly and the next `await .get()` blocks properly on an empty queue. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/conductor/web/server.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/conductor/web/server.py b/src/conductor/web/server.py index a7d39bf..892166a 100644 --- a/src/conductor/web/server.py +++ b/src/conductor/web/server.py @@ -286,8 +286,14 @@ async def wait_for_gate_response(self, agent_name: str) -> dict[str, Any]: """Wait for a gate response from a web client. Blocks until a ``gate_response`` message is received via WebSocket - that matches the given agent name. Non-matching messages are - re-queued so they are not lost. + that matches the given agent name. + + Non-matching messages are discarded with a warning. Because + conductor only presents one gate at a time, any ``gate_response`` + addressed to a different agent is stale (e.g. a duplicate click + from a dashboard that missed the first resolution) and cannot be + delivered — re-queueing would only cause it to be re-examined on + every subsequent gate with no chance of ever matching. Args: agent_name: The name of the human_gate agent to wait for. @@ -300,10 +306,11 @@ async def wait_for_gate_response(self, agent_name: str) -> dict[str, Any]: msg = await self._gate_response_queue.get() if msg.get("agent_name") == agent_name: return msg - # Not for this agent — put it back - self._gate_response_queue.put_nowait(msg) - # Yield to avoid busy-loop - await asyncio.sleep(0.01) + logger.warning( + "Discarding stale gate_response for agent %r while waiting on %r", + msg.get("agent_name"), + agent_name, + ) # ------------------------------------------------------------------ # Auto-shutdown (--web-bg) From d68f2ea5159928f2b96727bab155d7ece3d02abe Mon Sep 17 00:00:00 2001 From: Daniel Green Date: Fri, 17 Apr 2026 13:23:22 -0700 Subject: [PATCH 2/3] test(web): cover stale gate_response discard path Adds TestWaitForGateResponse with two cases: - Returns the matching message. - Discards stale (non-matching) messages without re-queueing, regression-covering the busy-loop bug fixed in this PR. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/test_web/test_server.py | 44 +++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/test_web/test_server.py b/tests/test_web/test_server.py index e8484c7..98d84ac 100644 --- a/tests/test_web/test_server.py +++ b/tests/test_web/test_server.py @@ -499,6 +499,50 @@ async def _cancel_serve(self: object) -> None: await dashboard.start() +class TestWaitForGateResponse: + """Tests for WebDashboard.wait_for_gate_response stale-message handling.""" + + @pytest.mark.asyncio + async def test_returns_matching_response(self) -> None: + """Returns the message whose agent_name matches the awaited agent.""" + _, dashboard = _make_dashboard() + await dashboard._gate_response_queue.put( + {"agent_name": "plan_approval", "selected_value": "approved"} + ) + + msg = await asyncio.wait_for( + dashboard.wait_for_gate_response("plan_approval"), timeout=1.0 + ) + + assert msg["selected_value"] == "approved" + + @pytest.mark.asyncio + async def test_discards_stale_non_matching_messages(self) -> None: + """Non-matching gate_response messages are discarded, not re-queued. + + Regression test for the busy-loop bug where stale messages (e.g. a + duplicate click for a previously-resolved gate) were re-queued with + a 10ms sleep, spinning at ~100Hz forever because ``asyncio.Queue`` + has no dedup. + """ + _, dashboard = _make_dashboard() + # Enqueue a stale message followed by the matching one. + await dashboard._gate_response_queue.put( + {"agent_name": "old_gate", "selected_value": "approved"} + ) + await dashboard._gate_response_queue.put( + {"agent_name": "current_gate", "selected_value": "rejected"} + ) + + msg = await asyncio.wait_for( + dashboard.wait_for_gate_response("current_gate"), timeout=1.0 + ) + + assert msg["agent_name"] == "current_gate" + assert msg["selected_value"] == "rejected" + assert dashboard._gate_response_queue.empty() + + async def _short_grace(event: asyncio.Event, delay: float) -> None: """Helper for testing: short grace period.""" await asyncio.sleep(delay) From 6f9d8d9c50c218a59ea9df89c9f19dd5981b842d Mon Sep 17 00:00:00 2001 From: Daniel Green Date: Fri, 17 Apr 2026 13:59:21 -0700 Subject: [PATCH 3/3] style: ruff format test_server.py Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/test_web/test_server.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/test_web/test_server.py b/tests/test_web/test_server.py index 98d84ac..31db239 100644 --- a/tests/test_web/test_server.py +++ b/tests/test_web/test_server.py @@ -510,9 +510,7 @@ async def test_returns_matching_response(self) -> None: {"agent_name": "plan_approval", "selected_value": "approved"} ) - msg = await asyncio.wait_for( - dashboard.wait_for_gate_response("plan_approval"), timeout=1.0 - ) + msg = await asyncio.wait_for(dashboard.wait_for_gate_response("plan_approval"), timeout=1.0) assert msg["selected_value"] == "approved" @@ -534,9 +532,7 @@ async def test_discards_stale_non_matching_messages(self) -> None: {"agent_name": "current_gate", "selected_value": "rejected"} ) - msg = await asyncio.wait_for( - dashboard.wait_for_gate_response("current_gate"), timeout=1.0 - ) + msg = await asyncio.wait_for(dashboard.wait_for_gate_response("current_gate"), timeout=1.0) assert msg["agent_name"] == "current_gate" assert msg["selected_value"] == "rejected"