From 7502cd341d2287de4bfdd244062515b3f3186888 Mon Sep 17 00:00:00 2001 From: Shane Neuville Date: Tue, 7 Apr 2026 01:07:15 -0500 Subject: [PATCH 1/8] fix: atomic ClearProcessingState, prevent phantom (previous) sessions on worker revival MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Three concrete bug fixes found through exhaustive live stress testing, plus integration scenario files (38 single-session + 35 multi-agent). All 3304 unit tests pass. Stress-tested with 2x app-relaunch-mid-orchestration (both times: 2/2 worker results collected, synthesis completed, 0 phantom sessions). ## Fix 1: Atomic ClearProcessingState() Added ClearProcessingState() in CopilotService.cs that atomically clears all 12 companion fields in one call. Applied to: - CompleteResponse (CopilotService.Events.cs) - AbortSessionAsync - SendPromptAsync error paths Previously each site cleared a different subset of fields — the #1 source of stuck-session regressions (13 PRs of fix/regression cycles). ## Fix 2: Phantom (previous) sessions on worker revival (CopilotService.Organization.cs) When a worker's SDK session entered a zombie state, ExecuteWorkerAsync created a fresh session with a new ID but left the old ID in active-sessions.json without marking it closed. MergeSessionEntries then saw both IDs for the same worker name and created a (previous) entry. Fixed by: capturing deadSessionId BEFORE updating SessionId, adding it to _closedSessionIds, and setting RecoveredFromSessionId = deadSessionId so the merge knows to drop the persisted entry. ## Fix 3: Same-group revival merge logic (CopilotService.Persistence.cs) CanSafelyDropSupersededGroupMoveEntry required GroupId to DIFFER as a safety check. But RecoveredFromSessionId is an explicit marker — if set, the new session definitively claims to have replaced the old one. Removed the GroupId constraint: now drops whenever RecoveredFromSessionId matches, regardless of GroupId. ## New Tests - SessionPersistenceTests: same-group revival with/without RecoveredFromSessionId - MultiAgentRegressionTests: 3 structural invariant tests for revival correctness - Scenarios/single-session-scenarios.json: 38 integration test scenarios - Scenarios/multi-agent-reliability-scenarios.json: 35 integration test scenarios Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../ConsecutiveStuckSessionTests.cs | 7 +- PolyPilot.Tests/MultiAgentRegressionTests.cs | 64 +- PolyPilot.Tests/ScenarioReferenceTests.cs | 27 +- .../multi-agent-reliability-scenarios.json | 530 +++++++++++++++ .../Scenarios/single-session-scenarios.json | 633 ++++++++++++++++++ PolyPilot.Tests/SessionPersistenceTests.cs | 32 +- PolyPilot/Services/CopilotService.Events.cs | 22 +- .../Services/CopilotService.Organization.cs | 17 +- .../Services/CopilotService.Persistence.cs | 16 +- PolyPilot/Services/CopilotService.cs | 79 ++- 10 files changed, 1351 insertions(+), 76 deletions(-) create mode 100644 PolyPilot.Tests/Scenarios/multi-agent-reliability-scenarios.json create mode 100644 PolyPilot.Tests/Scenarios/single-session-scenarios.json diff --git a/PolyPilot.Tests/ConsecutiveStuckSessionTests.cs b/PolyPilot.Tests/ConsecutiveStuckSessionTests.cs index bd9ea36ea2..8682e10697 100644 --- a/PolyPilot.Tests/ConsecutiveStuckSessionTests.cs +++ b/PolyPilot.Tests/ConsecutiveStuckSessionTests.cs @@ -186,11 +186,12 @@ public void WatchdogSource_ClearsMessageQueueOnRepeatedStucks() [Fact] public void CompleteResponseSource_ResetsConsecutiveStuckCount() { - // Verify CompleteResponse resets the stuck counter on success. + // Verify ClearProcessingState resets the stuck counter on success. + // ClearProcessingState is in CopilotService.cs and called by CompleteResponse. var repoRoot = GetRepoRoot(); - var eventsSource = File.ReadAllText(Path.Combine(repoRoot, "PolyPilot", "Services", "CopilotService.Events.cs")); + var mainSource = File.ReadAllText(Path.Combine(repoRoot, "PolyPilot", "Services", "CopilotService.cs")); - Assert.Contains("ConsecutiveStuckCount = 0", eventsSource); + Assert.Contains("ConsecutiveStuckCount = 0", mainSource); } // ========================================================================= diff --git a/PolyPilot.Tests/MultiAgentRegressionTests.cs b/PolyPilot.Tests/MultiAgentRegressionTests.cs index 30460286fa..53c6031a2f 100644 --- a/PolyPilot.Tests/MultiAgentRegressionTests.cs +++ b/PolyPilot.Tests/MultiAgentRegressionTests.cs @@ -2510,7 +2510,7 @@ public void PrematureIdleSignal_ResetInSendPromptAsync() var sendIdx = source.IndexOf("async Task SendPromptAsync(", StringComparison.Ordinal); Assert.True(sendIdx >= 0, "SendPromptAsync must exist in CopilotService.cs"); - var sendBlock = source.Substring(sendIdx, Math.Min(6000, source.Length - sendIdx)); + var sendBlock = source.Substring(sendIdx, Math.Min(8000, source.Length - sendIdx)); Assert.Contains("PrematureIdleSignal.Reset()", sendBlock); } @@ -2619,6 +2619,68 @@ public void MutationBeforeCommit_SessionIdSetAfterTryUpdate() "SessionId must be assigned AFTER TryUpdate succeeds (mutation-after-commit pattern)"); } + [Fact] + public void Revival_SetsRecoveredFromSessionId_BeforeUpdatingSessionId() + { + // Structural: revival must set RecoveredFromSessionId on the Info object BEFORE + // updating SessionId. This ensures MergeSessionEntries can identify that the new + // session explicitly replaced the old one and drop the old "(previous)" phantom entry. + var orgPath = Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.Organization.cs"); + var source = File.ReadAllText(orgPath); + + var tryUpdateIdx = source.IndexOf("TryUpdate(workerName, freshState, deadState)", StringComparison.Ordinal); + Assert.True(tryUpdateIdx >= 0, "TryUpdate call must exist"); + + var recoveredFromIdx = source.IndexOf("deadState.Info.RecoveredFromSessionId ??=", StringComparison.Ordinal); + Assert.True(recoveredFromIdx >= 0, "Revival must set RecoveredFromSessionId after TryUpdate"); + Assert.True(recoveredFromIdx > tryUpdateIdx, "RecoveredFromSessionId must be set after TryUpdate"); + + var sessionIdAssign = source.IndexOf("deadState.Info.SessionId = freshSession.SessionId", StringComparison.Ordinal); + Assert.True(recoveredFromIdx < sessionIdAssign, + "RecoveredFromSessionId must be set BEFORE SessionId is updated (so the old ID is correctly recorded)"); + } + + [Fact] + public void Revival_AddsOldSessionIdToClosedSet_PreventingPhantomPreviousEntry() + { + // Structural: when worker revival creates a fresh session, it must add the OLD + // session ID to _closedSessionIds. Without this, SaveActiveSessionsToDisk's merge + // sees the old ID in persisted active-sessions.json and creates a "(previous)" entry. + var orgPath = Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.Organization.cs"); + var source = File.ReadAllText(orgPath); + + var tryUpdateIdx = source.IndexOf("TryUpdate(workerName, freshState, deadState)", StringComparison.Ordinal); + Assert.True(tryUpdateIdx >= 0, "TryUpdate call must exist"); + + // Find the else block after TryUpdate (the success path) + var successBlock = source.Substring(tryUpdateIdx, Math.Min(3000, source.Length - tryUpdateIdx)); + + Assert.Contains("_closedSessionIds[deadSessionId] = 0", + successBlock, + StringComparison.Ordinal); + } + + [Fact] + public void Revival_CapturesOldSessionIdBeforeOrphaning() + { + // Structural: the old session ID must be captured BEFORE marking the state as orphaned + // and disposing the session. After orphaning, Info.SessionId might be mutated by + // concurrent code. The captured ID is what gets added to _closedSessionIds. + var orgPath = Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.Organization.cs"); + var source = File.ReadAllText(orgPath); + + // Find the revival section + var revivalStart = source.IndexOf("Worker revival: empty response", StringComparison.Ordinal); + Assert.True(revivalStart >= 0, "Revival comment must exist"); + var revivalBlock = source.Substring(revivalStart, Math.Min(2000, source.Length - revivalStart)); + + // Old session ID must be captured before IsOrphaned = true + var captureIdx = revivalBlock.IndexOf("var deadSessionId = deadState.Info.SessionId", StringComparison.Ordinal); + var orphanIdx = revivalBlock.IndexOf("deadState.IsOrphaned = true", StringComparison.Ordinal); + Assert.True(captureIdx >= 0, "Old session ID must be captured before orphaning"); + Assert.True(captureIdx < orphanIdx, "Session ID capture must come before IsOrphaned = true"); + } + [Fact] public void RecoverFromPrematureIdleIfNeededAsync_UsesEventsFileFreshness() { diff --git a/PolyPilot.Tests/ScenarioReferenceTests.cs b/PolyPilot.Tests/ScenarioReferenceTests.cs index c14c1abb9b..cf596df408 100644 --- a/PolyPilot.Tests/ScenarioReferenceTests.cs +++ b/PolyPilot.Tests/ScenarioReferenceTests.cs @@ -59,18 +59,27 @@ public void ScenarioFiles_StepsHaveValidActions() var validActions = new HashSet { "assertAllSessionsReceived", "assertAllSessionsResponded", "assertAllWorkers", - "assertDirectoryExists", "assertEqual", "assertEvaluatorWasUsed", "assertFileContains", - "assertFileExists", "assertGroupMembership", "assertNoDirectoryContains", + "assertDirectoryExists", "assertEqual", "assertEvaluatorWasUsed", "assertEventLog", + "assertFileContains", "assertFileExists", "assertGroupExists", "assertGroupMembership", + "assertInputAvailable", "assertNoDirectoryContains", "assertNoNewEvents", "assertNoOverlap", "assertNoPresetInSection", "assertNoReflectionLoop", "assertNoSessionsInDefault", "assertNoSessionsWithGroupId", "assertOrchestratorReceivedRoutingContext", "assertOrchestratorReceivedWorkerDescriptions", "assertOrchestratorSynthesized", - "assertOrgJson", "assertPresetInSection", "assertPresetVisible", "assertReflectionPaused", - "assertReflectionState", "assertSessionMeta", "assertWorkerPromptContains", - "captureGroupState", "click", "createGroup", "createGroupFromPreset", "createSquadDir", - "deleteGroup", "evaluate", "navigate", "note", "pauseReflection", "readOrgJson", - "restartApp", "resumeReflection", "saveGroupAsPreset", "selectWorktree", "sendPrompt", - "setEvaluator", "setMode", "shell", "type", "wait", "waitForAgent", - "waitForAllResponses", "waitForAllSessions", "waitForCompletion", "waitForPhase", + "assertOrgJson", "assertPresetInSection", "assertPresetVisible", "assertProcessing", + "assertReflectionPaused", "assertReflectionState", "assertResponseNotEmpty", + "assertSessionExists", "assertSessionMeta", "assertSessionNotExists", + "assertWorkerPromptContains", + "captureEventLogPosition", "captureGroupState", "captureSessionList", + "click", "clickAbort", "clickCreate", "closeSession", + "createGroup", "createGroupFromPreset", "createSession", "createSquadDir", + "deleteGroup", "evaluate", "navigate", "note", + "openCreateMenu", "pauseReflection", "readOrgJson", "relaunchApp", + "restartApp", "resumeReflection", "saveGroupAsPreset", + "selectOption", "selectPreset", "selectRepo", "selectSession", "selectWorktree", + "sendMessage", "sendPrompt", "sendToGroup", + "setEvaluator", "setMode", "shell", "shellCheck", "switchModel", + "type", "wait", "waitForAgent", "waitForAllResponses", "waitForAllSessions", + "waitForCompletion", "waitForEventPattern", "waitForIdle", "waitForPhase", "screenshot" }; foreach (var file in Directory.GetFiles(ScenariosDir, "*.json")) diff --git a/PolyPilot.Tests/Scenarios/multi-agent-reliability-scenarios.json b/PolyPilot.Tests/Scenarios/multi-agent-reliability-scenarios.json new file mode 100644 index 0000000000..4183b793fe --- /dev/null +++ b/PolyPilot.Tests/Scenarios/multi-agent-reliability-scenarios.json @@ -0,0 +1,530 @@ +{ + "description": "Multi-agent reliability scenarios for PolyPilot. Tests cover every orchestration lifecycle path: group creation, dispatch, worker execution, synthesis, subsequent orchestrations, follow-up handling, restart recovery, and state hygiene. Complements multi-agent-scenarios.json which covers organization/presets/squad. Execute against a running PolyPilot instance using MauiDevFlow CDP commands.", + "prerequisites": { + "build": "cd PolyPilot && ./relaunch.sh", + "waitForAgent": "maui devflow MAUI status --agent-port 9223", + "initialMode": "Persistent", + "notes": "All test sessions MUST point to the PolyPilot repository. Use 'Implement & Challenge' preset (ignore PR Reviewer). Never commit, push, or create GitHub issues/PRs. DevFlow agent port is 9223. CDP Blazor clicks require PointerEvent dispatch." + }, + "scenarios": [ + { + "id": "create-implement-challenge-group", + "name": "Create Implement & Challenge multi-agent group", + "description": "Create a multi-agent group using the Implement & Challenge preset. Verify orchestrator and workers are created with correct roles.", + "category": "group-creation", + "invariants": [ + "Group appears in sidebar", + "Orchestrator session exists with Role=Orchestrator", + "Worker sessions exist (at least 2)", + "All sessions point to PolyPilot repo" + ], + "steps": [ + { "action": "openCreateMenu" }, + { "action": "selectOption", "text": "Multi-Agent Team" }, + { "action": "selectPreset", "name": "Implement & Challenge" }, + { "action": "selectRepo", "repo": "PureWeen-PolyPilot" }, + { "action": "clickCreate" }, + { "action": "wait", "ms": 5000 }, + { "action": "assertGroupExists", "nameContains": "Implement" }, + { "action": "assertSessionExists", "nameContains": "orchestrator" }, + { "action": "assertSessionExists", "nameContains": "worker" } + ] + }, + { + "id": "group-shows-all-sessions-in-sidebar", + "name": "Multi-agent group shows all sessions in sidebar", + "description": "Verify all group sessions (orchestrator + workers) are visible in the sidebar under the group.", + "category": "group-creation", + "invariants": [ + "All sessions visible under group header", + "Session count matches expected (orchestrator + N workers)" + ], + "steps": [ + { "action": "evaluate", "script": "document.querySelectorAll('[class*=session-list-item]').length", "expect": { "greaterThan": 2 }, "note": "At least orchestrator + 2 workers visible" } + ] + }, + { + "id": "workers-have-assigned-models", + "name": "Workers have model assignments from preset", + "description": "Verify each worker session has its configured model from the preset.", + "category": "group-creation", + "invariants": [ + "Each worker has a non-empty model assignment", + "Model names match preset configuration" + ], + "steps": [ + { "action": "note", "text": "Verify by expanding each worker card and checking model dropdown, or via event log on first dispatch" } + ] + }, + { + "id": "orchestrator-role-correct", + "name": "Orchestrator session has correct metadata", + "description": "Verify the orchestrator session is marked with the correct role and group membership.", + "category": "group-creation", + "invariants": [ + "Orchestrator session exists", + "Session is part of the multi-agent group", + "IsMultiAgentSession = true" + ], + "steps": [ + { "action": "assertSessionExists", "nameContains": "orchestrator" }, + { "action": "assertEventLog", "pattern": "DISPATCH-ROUTE.*orchestrator.*isOrch=True", "expected": true, "note": "Will be verified when first message is sent" } + ] + }, + { + "id": "dispatch-sends-to-all-workers", + "name": "Orchestration dispatches to all workers", + "description": "Send a prompt to the group. Verify the orchestrator dispatches tasks to all workers via event log.", + "category": "dispatch", + "invariants": [ + "Event log shows [DISPATCH-ROUTE] with correct mode", + "Event log shows [SEND] for each worker", + "Event log shows [DISPATCH] with worker names" + ], + "steps": [ + { "action": "sendToGroup", "text": "Add a one-line code comment to the top of README.md explaining what PolyPilot is. Keep it simple." }, + { "action": "wait", "ms": 10000, "note": "Wait for orchestrator to plan and dispatch" }, + { "action": "assertEventLog", "pattern": "\\[DISPATCH-ROUTE\\].*Orchestrator", "expected": true }, + { "action": "assertEventLog", "pattern": "\\[DISPATCH\\].*Dispatching", "expected": true }, + { "action": "assertEventLog", "pattern": "\\[SEND\\].*worker", "expected": true } + ] + }, + { + "id": "dispatch-stagger-delay", + "name": "Workers dispatched with stagger delay", + "description": "Verify workers are dispatched approximately 1 second apart, not all at once.", + "category": "dispatch", + "invariants": [ + "Worker [SEND] timestamps are ~1s apart" + ], + "steps": [ + { "action": "shellCheck", "command": "grep '\\[SEND\\].*worker' ~/.polypilot/event-diagnostics.log | tail -3", "note": "Verify timestamps show staggered dispatch" } + ] + }, + { + "id": "pending-orchestration-saved-during-dispatch", + "name": "PendingOrchestration file created during dispatch", + "description": "During worker execution, verify pending-orchestration.json exists with correct data.", + "category": "dispatch", + "invariants": [ + "File exists at ~/.polypilot/pending-orchestration.json", + "Contains correct group ID and worker names" + ], + "steps": [ + { "action": "shellCheck", "command": "cat ~/.polypilot/pending-orchestration.json 2>/dev/null | head -5", "expect": { "contains": "worker" } } + ] + }, + { + "id": "orchestrator-plans-with-worker-blocks", + "name": "Orchestrator planning includes @worker blocks", + "description": "Verify the orchestrator's response contains @worker task assignments.", + "category": "dispatch", + "invariants": [ + "Event log shows 'Early dispatch: @worker blocks detected'", + "Assignments parsed from orchestrator response" + ], + "steps": [ + { "action": "assertEventLog", "pattern": "Early dispatch.*@worker", "expected": true } + ] + }, + { + "id": "workers-process-independently", + "name": "Workers process their tasks independently", + "description": "Monitor workers during execution. Each should cycle through TurnStart/TurnEnd events independently.", + "category": "worker-execution", + "invariants": [ + "Each worker has AssistantTurnStartEvent and AssistantTurnEndEvent in event log", + "Workers process concurrently" + ], + "steps": [ + { "action": "waitForEventPattern", "pattern": "\\[EVT\\].*worker.*TurnStart", "timeout": 30, "note": "Wait for workers to start processing" }, + { "action": "assertEventLog", "pattern": "\\[EVT\\].*worker.*TurnEndEvent", "expected": true, "note": "Workers cycling through events" } + ] + }, + { + "id": "worker-tool-use-completes", + "name": "Workers using tools complete without getting stuck", + "description": "Workers often use tools (grep, file reads). Verify tool execution completes.", + "category": "worker-execution", + "invariants": [ + "Workers complete (SessionIdleEvent or CompleteResponse)", + "No workers stuck with HasUsedToolsThisTurn=true indefinitely" + ], + "steps": [ + { "action": "waitForEventPattern", "pattern": "\\[COMPLETE\\].*worker", "timeout": 600, "note": "Wait for at least one worker to complete" } + ] + }, + { + "id": "worker-subagent-idle-defer", + "name": "Worker IDLE-DEFER fires and resolves for subagents", + "description": "If a worker dispatches sub-agents (task tool), IDLE-DEFER should fire and eventually resolve.", + "category": "worker-execution", + "invariants": [ + "If [IDLE-DEFER] appears, it's eventually followed by [COMPLETE]", + "Worker doesn't stay stuck in IDLE-DEFER forever" + ], + "steps": [ + { "action": "note", "text": "This scenario is best verified by checking event log after orchestration completes" }, + { "action": "shellCheck", "command": "defer_count=$(grep -c '\\[IDLE-DEFER\\]' ~/.polypilot/event-diagnostics.log); complete_count=$(grep -c '\\[COMPLETE\\]' ~/.polypilot/event-diagnostics.log); echo \"defers=$defer_count completes=$complete_count\"" } + ] + }, + { + "id": "worker-timeout-handled-gracefully", + "name": "Worker timeout produces error result, doesn't block orchestration", + "description": "If a worker times out (>10 min), the orchestration should still continue with an error result.", + "category": "worker-execution", + "invariants": [ + "Orchestration doesn't hang forever waiting for a dead worker", + "Synthesis includes failure message for timed-out worker" + ], + "steps": [ + { "action": "note", "text": "Difficult to force a timeout in live testing. Verify by checking event log for any [WATCHDOG] entries on workers and confirming orchestration still completed." } + ] + }, + { + "id": "worker-error-doesnt-block-orchestration", + "name": "One worker error doesn't block the whole orchestration", + "description": "If one worker fails, the other worker should still complete and synthesis should include the failure.", + "category": "worker-execution", + "invariants": [ + "Orchestration completes even if one worker errored", + "Synthesis prompt includes error message for failed worker" + ], + "steps": [ + { "action": "note", "text": "Verify from event log: if any [ERROR] entries exist for workers, check that [DISPATCH] Collected results still fires" } + ] + }, + { + "id": "synthesis-collects-all-worker-results", + "name": "Synthesis includes all worker responses", + "description": "After workers complete, verify the synthesis prompt is sent to the orchestrator with all results.", + "category": "synthesis", + "invariants": [ + "Event log shows 'Collected N/N worker results for synthesis'", + "All workers accounted for" + ], + "steps": [ + { "action": "waitForEventPattern", "pattern": "Collected.*worker results for synthesis", "timeout": 600 }, + { "action": "assertEventLog", "pattern": "Collected.*worker results", "expected": true } + ] + }, + { + "id": "synthesis-completes-orchestrator", + "name": "Orchestrator completes after synthesis", + "description": "After synthesis prompt is sent, the orchestrator should process it and complete.", + "category": "synthesis", + "invariants": [ + "Orchestrator shows [SEND] for synthesis prompt", + "Orchestrator shows [COMPLETE] after synthesis", + "IsProcessing = false on orchestrator" + ], + "steps": [ + { "action": "waitForEventPattern", "pattern": "\\[COMPLETE\\].*orchestrator", "timeout": 120, "note": "Wait for synthesis completion" }, + { "action": "assertProcessing", "sessionContains": "orchestrator", "expected": false } + ] + }, + { + "id": "pending-orchestration-cleared-after-synthesis", + "name": "PendingOrchestration file cleared after synthesis", + "description": "After orchestration completes, pending-orchestration.json should be empty or deleted.", + "category": "synthesis", + "invariants": [ + "File is empty, contains '{}', or doesn't exist" + ], + "steps": [ + { "action": "shellCheck", "command": "cat ~/.polypilot/pending-orchestration.json 2>/dev/null || echo 'file not found'", "expect": { "oneOf": ["file not found", "{}", "null", ""] } } + ] + }, + { + "id": "all-workers-idle-after-orchestration", + "name": "All workers have IsProcessing=false after orchestration", + "description": "After orchestration completes, verify every worker session is idle.", + "category": "synthesis", + "invariants": [ + "Every worker session has IsProcessing = false", + "No stop buttons visible for any worker" + ], + "steps": [ + { "action": "evaluate", "script": "document.querySelectorAll('.send-btn.stop-btn, .card-stop-btn').length", "expect": 0, "note": "No sessions processing" } + ] + }, + { + "id": "no-orphaned-workers-after-orchestration", + "name": "No orphaned worker state after orchestration", + "description": "After orchestration, no workers should be stuck in a partial processing state.", + "category": "synthesis", + "invariants": [ + "No worker [SEND] without corresponding [COMPLETE]", + "No stuck TCS (TaskCompletionSource)" + ], + "steps": [ + { "action": "shellCheck", "command": "for w in 1 2; do sends=$(grep -c \"\\[SEND\\].*worker-$w\" ~/.polypilot/event-diagnostics.log); completes=$(grep -c \"\\[COMPLETE\\].*worker-$w\" ~/.polypilot/event-diagnostics.log); echo \"worker-$w: sends=$sends completes=$completes\"; done" } + ] + }, + { + "id": "second-orchestration-works", + "name": "Second orchestration to same group completes", + "description": "After one orchestration completes, send a new prompt to the same group. Verify full lifecycle works again.", + "category": "subsequent-orchestrations", + "invariants": [ + "Second dispatch fires", + "Workers process again", + "Synthesis completes again", + "IsProcessing = false on all sessions" + ], + "steps": [ + { "action": "sendToGroup", "text": "Now add a comment to the bottom of README.md saying 'Built with GitHub Copilot'. Keep it simple." }, + { "action": "waitForEventPattern", "pattern": "\\[DISPATCH\\].*Dispatching", "timeout": 60, "note": "Wait for second dispatch" }, + { "action": "waitForEventPattern", "pattern": "\\[COMPLETE\\].*orchestrator", "timeout": 600, "note": "Wait for second orchestration to complete" }, + { "action": "evaluate", "script": "document.querySelectorAll('.send-btn.stop-btn, .card-stop-btn').length", "expect": 0 } + ] + }, + { + "id": "third-orchestration-no-degradation", + "name": "Third orchestration shows no accumulated state issues", + "description": "A third consecutive orchestration should work as cleanly as the first.", + "category": "subsequent-orchestrations", + "invariants": [ + "No accumulated stuck state from previous orchestrations", + "Event log clean for this orchestration" + ], + "steps": [ + { "action": "sendToGroup", "text": "Check if README.md has any typos and fix them. If none, just say so." }, + { "action": "waitForEventPattern", "pattern": "\\[COMPLETE\\].*orchestrator", "timeout": 600 }, + { "action": "evaluate", "script": "document.querySelectorAll('.send-btn.stop-btn, .card-stop-btn').length", "expect": 0 } + ] + }, + { + "id": "orchestration-after-worker-error-works", + "name": "Orchestration works after a previous worker error", + "description": "If a previous orchestration had a worker error, the next one should still work.", + "category": "subsequent-orchestrations", + "invariants": [ + "All workers dispatch correctly", + "No lingering error state from previous run" + ], + "steps": [ + { "action": "note", "text": "This is verified by the second/third orchestration tests above. If any had errors, this confirms recovery." } + ] + }, + { + "id": "follow-up-during-dispatch-queued", + "name": "Follow-up message during orchestration is queued, not steered", + "description": "Send a follow-up while workers are busy. It should be queued (not steer the orchestrator). PR #375 fix.", + "category": "follow-up-steering", + "invariants": [ + "Event log shows QUEUED, not STEER", + "Orchestration not disrupted by follow-up", + "Follow-up processed after current orchestration completes" + ], + "steps": [ + { "action": "sendToGroup", "text": "Review all the files in PolyPilot/Models/ directory." }, + { "action": "wait", "ms": 5000, "note": "Workers start processing" }, + { "action": "sendToGroup", "text": "Also check the Services directory." }, + { "action": "assertEventLog", "pattern": "QUEUED", "expected": true, "note": "Follow-up queued" }, + { "action": "waitForEventPattern", "pattern": "\\[COMPLETE\\].*orchestrator", "timeout": 600 } + ] + }, + { + "id": "follow-up-after-completion-works", + "name": "Follow-up after orchestration completes starts new orchestration", + "description": "Send a message after orchestration is fully complete. It should start a new orchestration.", + "category": "follow-up-steering", + "invariants": [ + "New [DISPATCH-ROUTE] appears", + "New orchestration lifecycle begins" + ], + "steps": [ + { "action": "assertProcessing", "sessionContains": "orchestrator", "expected": false, "note": "Verify idle first" }, + { "action": "sendToGroup", "text": "Summarize what changes were made across all previous tasks." }, + { "action": "waitForEventPattern", "pattern": "\\[DISPATCH-ROUTE\\]", "timeout": 60 }, + { "action": "waitForEventPattern", "pattern": "\\[COMPLETE\\].*orchestrator", "timeout": 600 } + ] + }, + { + "id": "abort-orchestration-cleans-up", + "name": "Aborting orchestration cleans up all sessions", + "description": "Abort the orchestrator while workers are running. Verify clean state for all sessions.", + "category": "follow-up-steering", + "invariants": [ + "All sessions reach IsProcessing = false", + "No stuck workers", + "PendingOrchestration cleared" + ], + "steps": [ + { "action": "sendToGroup", "text": "Do a comprehensive review of every single file in the repository." }, + { "action": "wait", "ms": 10000, "note": "Workers start processing" }, + { "action": "clickAbort", "sessionContains": "orchestrator" }, + { "action": "wait", "ms": 10000 }, + { "action": "evaluate", "script": "document.querySelectorAll('.send-btn.stop-btn, .card-stop-btn').length", "expect": 0, "note": "All sessions idle after abort" } + ] + }, + { + "id": "group-survives-relaunch", + "name": "Multi-agent group persists across app relaunch", + "description": "Relaunch the app and verify the multi-agent group is restored with all sessions.", + "category": "restart-recovery", + "invariants": [ + "Group appears in sidebar after relaunch", + "Orchestrator session restored", + "Worker sessions restored", + "All sessions idle (not stuck processing)" + ], + "steps": [ + { "action": "captureGroupState", "capture": "beforeRelaunch" }, + { "action": "relaunchApp" }, + { "action": "waitForAgent", "timeout": 120 }, + { "action": "assertSessionExists", "nameContains": "orchestrator" }, + { "action": "assertSessionExists", "nameContains": "worker" }, + { "action": "evaluate", "script": "document.querySelectorAll('.send-btn.stop-btn, .card-stop-btn').length", "expect": 0 } + ] + }, + { + "id": "no-phantom-worker-sessions", + "name": "No phantom or duplicate worker sessions after relaunch", + "description": "After relaunch, verify no duplicate worker sessions or phantom '(resumed)' entries.", + "category": "restart-recovery", + "invariants": [ + "No duplicate session names", + "No '(resumed)' in any session name" + ], + "steps": [ + { "action": "evaluate", "script": "Array.from(document.querySelectorAll('.session-name, .session-title')).filter(e => e.textContent.includes('resumed')).length", "expect": 0 } + ] + }, + { + "id": "pending-orchestration-recovery", + "name": "PendingOrchestration recovers after relaunch during dispatch", + "description": "If the app relaunches while workers are processing, PendingOrchestration should enable recovery.", + "category": "restart-recovery", + "invariants": [ + "If pending-orchestration.json existed, recovery was attempted", + "Workers either completed or were collected with partial results" + ], + "steps": [ + { "action": "note", "text": "Best tested by: 1) Start orchestration, 2) Immediately relaunch, 3) Check event log for 'Resuming pending orchestration'" }, + { "action": "shellCheck", "command": "grep 'Resuming pending orchestration\\|pending orchestration' ~/.polypilot/event-diagnostics.log | tail -3 || echo 'no pending recovery'" } + ] + }, + { + "id": "post-relaunch-new-orchestration-works", + "name": "New orchestration works after relaunch", + "description": "After relaunching and restoring the group, send a new prompt. Verify full lifecycle works.", + "category": "restart-recovery", + "invariants": [ + "New dispatch fires", + "Workers process and complete", + "Synthesis completes" + ], + "steps": [ + { "action": "sendToGroup", "text": "What is in the README.md file? Summarize it briefly." }, + { "action": "waitForEventPattern", "pattern": "\\[DISPATCH\\]", "timeout": 60 }, + { "action": "waitForEventPattern", "pattern": "\\[COMPLETE\\].*orchestrator", "timeout": 600 }, + { "action": "evaluate", "script": "document.querySelectorAll('.send-btn.stop-btn, .card-stop-btn').length", "expect": 0 } + ] + }, + { + "id": "workers-not-stuck-after-relaunch", + "name": "No workers stuck in IsProcessing after relaunch", + "description": "After relaunch, verify no worker sessions are stuck processing.", + "category": "restart-recovery", + "invariants": [ + "All workers have IsProcessing = false within 30s of relaunch", + "No [WATCHDOG] kill entries needed" + ], + "steps": [ + { "action": "wait", "ms": 30000, "note": "Wait for any watchdog cleanup" }, + { "action": "evaluate", "script": "document.querySelectorAll('.send-btn.stop-btn, .card-stop-btn').length", "expect": 0 } + ] + }, + { + "id": "event-log-dispatch-complete-pairs", + "name": "Every [DISPATCH] has completion in event log", + "description": "Audit the event log: every worker dispatch should have a completion.", + "category": "state-hygiene", + "invariants": [ + "Number of worker [SEND] ≤ worker [COMPLETE] + [ABORT]", + "No orphaned dispatches" + ], + "steps": [ + { "action": "shellCheck", "command": "sends=$(grep -c '\\[SEND\\].*worker' ~/.polypilot/event-diagnostics.log); completes=$(grep -c '\\[COMPLETE\\].*worker\\|\\[ABORT\\].*worker' ~/.polypilot/event-diagnostics.log); echo \"worker sends=$sends completions=$completes\"" } + ] + }, + { + "id": "no-stuck-tcs-after-orchestration", + "name": "No unresolved TaskCompletionSources after orchestration", + "description": "After orchestration, verify no TCS objects are waiting for completion.", + "category": "state-hygiene", + "invariants": [ + "All sessions idle", + "No pending awaits blocking orchestrator" + ], + "steps": [ + { "action": "assertProcessing", "sessionContains": "orchestrator", "expected": false }, + { "action": "note", "text": "TCS state is internal — best verified by confirming orchestrator can accept new messages" }, + { "action": "sendToGroup", "text": "Say 'TCS test OK'." }, + { "action": "waitForEventPattern", "pattern": "\\[COMPLETE\\].*orchestrator", "timeout": 120 } + ] + }, + { + "id": "watchdog-doesnt-kill-active-workers", + "name": "Watchdog doesn't kill workers that are actively processing", + "description": "During a long orchestration, verify the watchdog doesn't prematurely kill active workers.", + "category": "state-hygiene", + "invariants": [ + "No [WATCHDOG].*kill entries for workers with recent events", + "Watchdog Case B defers correctly when events.jsonl is fresh" + ], + "steps": [ + { "action": "shellCheck", "command": "grep '\\[WATCHDOG\\].*worker.*kill\\|\\[WATCHDOG\\].*worker.*stuck' ~/.polypilot/event-diagnostics.log | wc -l | tr -d ' '", "expect": "0" } + ] + }, + { + "id": "idle-defer-resolves-correctly", + "name": "IDLE-DEFER fires and resolves without leaving sessions stuck", + "description": "If IDLE-DEFER fires during worker execution, it should eventually resolve to completion.", + "category": "state-hygiene", + "invariants": [ + "Every [IDLE-DEFER] is eventually followed by [COMPLETE] for the same session", + "No sessions left in permanent IDLE-DEFER state" + ], + "steps": [ + { "action": "shellCheck", "command": "grep '\\[IDLE-DEFER\\]' ~/.polypilot/event-diagnostics.log | wc -l | tr -d ' '", "note": "Count IDLE-DEFER entries" }, + { "action": "evaluate", "script": "document.querySelectorAll('.send-btn.stop-btn, .card-stop-btn').length", "expect": 0, "note": "No sessions stuck" } + ] + }, + { + "id": "reconnect-during-orchestration-no-deadlock", + "name": "Reconnect during orchestration doesn't cause deadlock", + "description": "If a reconnect happens mid-orchestration, TCS should be cancelled and orchestration should not deadlock.", + "category": "state-hygiene", + "invariants": [ + "No permanent deadlock after reconnect", + "Orchestration either recovers or fails cleanly" + ], + "steps": [ + { "action": "note", "text": "Best tested by triggering a reconnect mid-orchestration (e.g., server restart). In live testing, verify no sessions are stuck after any reconnect events in the log." }, + { "action": "shellCheck", "command": "grep '\\[RECONNECT\\]' ~/.polypilot/event-diagnostics.log | tail -3 || echo 'no reconnects'" } + ] + }, + { + "id": "delete-group-cleans-everything", + "name": "Deleting multi-agent group removes all sessions cleanly", + "description": "Delete the test multi-agent group. Verify all sessions are removed with no orphans.", + "category": "state-hygiene", + "invariants": [ + "All group sessions removed from sidebar", + "No orphaned worker sessions", + "PendingOrchestration cleared", + "Organization state updated" + ], + "steps": [ + { "action": "deleteGroup", "nameContains": "Implement" }, + { "action": "wait", "ms": 3000 }, + { "action": "assertSessionNotExists", "nameContains": "orchestrator" }, + { "action": "assertSessionNotExists", "nameContains": "worker" }, + { "action": "shellCheck", "command": "cat ~/.polypilot/pending-orchestration.json 2>/dev/null || echo 'clean'", "expect": { "oneOf": ["clean", "{}", "null", ""] } } + ] + } + ] +} diff --git a/PolyPilot.Tests/Scenarios/single-session-scenarios.json b/PolyPilot.Tests/Scenarios/single-session-scenarios.json new file mode 100644 index 0000000000..da5567f728 --- /dev/null +++ b/PolyPilot.Tests/Scenarios/single-session-scenarios.json @@ -0,0 +1,633 @@ +{ + "description": "Single-session reliability scenarios for PolyPilot. Tests cover every way to interact with a single session: sending messages, tool use, abort, rapid input, session lifecycle, restart recovery, state hygiene, and edge cases. Execute against a running PolyPilot instance using MauiDevFlow CDP commands.", + "prerequisites": { + "build": "cd PolyPilot && ./relaunch.sh", + "waitForAgent": "maui devflow MAUI status --agent-port 9223", + "initialMode": "Persistent", + "notes": "All test sessions MUST point to the PolyPilot repository (PureWeen-PolyPilot). Never use other repos. Never commit, push, or create GitHub issues/PRs during testing. The DevFlow agent port is 9223 (bypass broker with --agent-port 9223). CDP click events require PointerEvent dispatch for Blazor compatibility." + }, + "cdpHelpers": { + "clickBlazorButton": "btn.dispatchEvent(new PointerEvent('pointerdown', {bubbles: true, clientX: x, clientY: y})); btn.dispatchEvent(new PointerEvent('pointerup', {bubbles: true, clientX: x, clientY: y})); btn.dispatchEvent(new MouseEvent('click', {bubbles: true, clientX: x, clientY: y}));", + "setInputValue": "const el = document.getElementById(inputId); el.value = text; el.dispatchEvent(new Event('input', {bubbles: true}));", + "sendMessage": "1) Set textarea value via getElementValue-compatible path, 2) PointerEvent click on send-btn[title='Send message']", + "checkProcessing": "document.querySelectorAll('.send-btn.stop-btn').length > 0 means session is processing", + "checkEventLog": "grep 'SESSION_NAME' ~/.polypilot/event-diagnostics.log | tail -N" + }, + "scenarios": [ + { + "id": "send-simple-message", + "name": "Simple message sends and receives a response", + "description": "Send 'What is 2+2? Reply with just the number.' and verify the response arrives, IsProcessing clears, and event log shows clean [SEND]→[COMPLETE].", + "category": "basic-message-flow", + "invariants": [ + "Response text appears in chat history", + "IsProcessing = false after completion", + "Event log shows [SEND] followed by [COMPLETE]", + "No [ERROR] or [WATCHDOG] entries for this session", + "Response length > 0" + ], + "steps": [ + { "action": "createSession", "name": "TestSimpleSend", "repo": "PureWeen-PolyPilot" }, + { "action": "selectSession", "name": "TestSimpleSend" }, + { "action": "sendMessage", "text": "What is 2+2? Reply with just the number." }, + { "action": "waitForIdle", "timeout": 60 }, + { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[SEND\\].*\\[COMPLETE\\]", "expected": true }, + { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[ERROR\\]|\\[WATCHDOG\\]", "expected": false }, + { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false }, + { "action": "assertResponseNotEmpty", "session": "TestSimpleSend" } + ] + }, + { + "id": "send-long-message", + "name": "Long multi-paragraph prompt processes correctly", + "description": "Send a multi-paragraph prompt to verify large inputs don't break processing.", + "category": "basic-message-flow", + "invariants": [ + "Response arrives for large input", + "IsProcessing = false after completion", + "No truncation or corruption of input" + ], + "steps": [ + { "action": "selectSession", "name": "TestSimpleSend" }, + { "action": "sendMessage", "text": "Please analyze the following requirements and provide your thoughts:\n\n1. The system must handle concurrent requests\n2. It must maintain session state across restarts\n3. Error recovery should be automatic\n4. All state transitions must be logged\n\nWhat design patterns would you recommend?" }, + { "action": "waitForIdle", "timeout": 120 }, + { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false }, + { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[COMPLETE\\]", "expected": true } + ] + }, + { + "id": "response-renders-markdown", + "name": "Response renders markdown as formatted HTML", + "description": "Send a prompt requesting markdown output and verify it renders as HTML.", + "category": "basic-message-flow", + "invariants": [ + "Response contains HTML elements (not raw markdown)", + "Code blocks, headers, or lists render correctly" + ], + "steps": [ + { "action": "selectSession", "name": "TestSimpleSend" }, + { "action": "sendMessage", "text": "Write a short bullet list of 3 items about testing." }, + { "action": "waitForIdle", "timeout": 60 }, + { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false }, + { "action": "evaluate", "script": "document.querySelectorAll('.message-content li, .message-content ul').length > 0", "expect": true, "note": "Markdown list rendered as HTML" } + ] + }, + { + "id": "response-contains-code-block", + "name": "Code block renders with syntax highlighting", + "description": "Request code output and verify it renders in a code block.", + "category": "basic-message-flow", + "invariants": [ + "Response contains a
 or  element",
+        "Code content is present"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "Write a one-line Python hello world program. Just the code, no explanation." },
+        { "action": "waitForIdle", "timeout": 60 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false },
+        { "action": "evaluate", "script": "document.querySelectorAll('.message-content pre, .message-content code').length > 0", "expect": true, "note": "Code block rendered" }
+      ]
+    },
+    {
+      "id": "send-empty-validation",
+      "name": "Empty send is prevented",
+      "description": "Attempt to send an empty message and verify it's blocked.",
+      "category": "basic-message-flow",
+      "invariants": [
+        "No [SEND] entry in event log for empty message",
+        "Session remains idle"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "captureEventLogPosition", "capture": "beforeEmpty" },
+        { "action": "sendMessage", "text": "" },
+        { "action": "wait", "ms": 2000 },
+        { "action": "assertNoNewEvents", "session": "TestSimpleSend", "since": "beforeEmpty", "pattern": "\\[SEND\\]" }
+      ]
+    },
+    {
+      "id": "tool-file-listing",
+      "name": "Tool execution: file listing completes",
+      "description": "Send a file listing request that requires tool use. Verify tools execute and response contains expected files.",
+      "category": "tool-execution",
+      "invariants": [
+        "Tool execution events appear in event log",
+        "Response mentions expected files (README.md, PolyPilot.slnx)",
+        "IsProcessing = false after completion",
+        "No [WATCHDOG] entries"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "List just the file names in the root directory of this repository. Be brief." },
+        { "action": "waitForIdle", "timeout": 120 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false },
+        { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[COMPLETE\\]", "expected": true },
+        { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[WATCHDOG\\]", "expected": false }
+      ]
+    },
+    {
+      "id": "tool-file-read",
+      "name": "Tool execution: file read completes",
+      "description": "Request reading a specific file to verify file read tools work.",
+      "category": "tool-execution",
+      "invariants": [
+        "Response contains content from the file",
+        "IsProcessing = false after completion"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "Read the first line of README.md in this repo and tell me what it says. Be brief." },
+        { "action": "waitForIdle", "timeout": 120 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false },
+        { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[COMPLETE\\]", "expected": true }
+      ]
+    },
+    {
+      "id": "tool-multiple-rounds",
+      "name": "Multi-round tool use completes without getting stuck",
+      "description": "Send a request requiring multiple tool calls in sequence. Verify all rounds complete.",
+      "category": "tool-execution",
+      "invariants": [
+        "Multiple TurnStart/TurnEnd cycles in event log",
+        "Session completes with [COMPLETE]",
+        "IsProcessing = false after all rounds"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "Find the PolyPilot.slnx file and tell me how many project references it contains. Be brief." },
+        { "action": "waitForIdle", "timeout": 180 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false },
+        { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[COMPLETE\\]", "expected": true },
+        { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[WATCHDOG\\]", "expected": false }
+      ]
+    },
+    {
+      "id": "tool-execution-shows-progress",
+      "name": "Processing indicator shows during tool use",
+      "description": "During tool execution, verify the UI shows working/tool call indicators.",
+      "category": "tool-execution",
+      "invariants": [
+        "Stop button visible during processing",
+        "Processing indicator updates"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "Search the codebase for all files containing 'IsProcessing' and count them. Be brief." },
+        { "action": "wait", "ms": 5000, "note": "Allow time for tool execution to start" },
+        { "action": "evaluate", "script": "document.querySelectorAll('.send-btn.stop-btn').length", "expect": { "greaterThan": 0 }, "note": "Stop button visible means processing" },
+        { "action": "waitForIdle", "timeout": 180 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false }
+      ]
+    },
+    {
+      "id": "tool-long-running",
+      "name": "Long tool execution doesn't trigger false watchdog",
+      "description": "A prompt requiring extended tool use completes without watchdog interference.",
+      "category": "tool-execution",
+      "invariants": [
+        "No [WATCHDOG] kill entries in event log",
+        "Session completes naturally with [COMPLETE]"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "Count the total number of lines across all .cs files in the PolyPilot/Services/ directory. Be brief, just give me the count." },
+        { "action": "waitForIdle", "timeout": 300 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false },
+        { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[WATCHDOG\\].*kill|\\[WATCHDOG\\].*stuck", "expected": false },
+        { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[COMPLETE\\]", "expected": true }
+      ]
+    },
+    {
+      "id": "abort-during-thinking",
+      "name": "Abort during 'Thinking...' phase cleans up state",
+      "description": "Send a prompt, abort during the thinking phase. Verify IsProcessing clears and all companion fields reset.",
+      "category": "abort-cancel",
+      "invariants": [
+        "IsProcessing = false within 5 seconds of abort",
+        "No lingering spinner or 'Thinking...' indicator",
+        "Event log shows [ABORT]",
+        "Session accepts new input after abort"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "Write a very detailed 5000-word essay about software testing methodologies." },
+        { "action": "wait", "ms": 3000, "note": "Wait for thinking phase" },
+        { "action": "clickAbort", "session": "TestSimpleSend" },
+        { "action": "wait", "ms": 5000 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false },
+        { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[ABORT\\]", "expected": true }
+      ]
+    },
+    {
+      "id": "abort-during-tool-use",
+      "name": "Abort during tool execution cleans up state",
+      "description": "Send a tool-using prompt, abort during tool execution. Verify clean state.",
+      "category": "abort-cancel",
+      "invariants": [
+        "IsProcessing = false after abort",
+        "ActiveToolCallCount = 0",
+        "Event log shows [ABORT]"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "Read every single .cs file in the repository and summarize each one in detail." },
+        { "action": "wait", "ms": 10000, "note": "Wait for tool execution to start" },
+        { "action": "clickAbort", "session": "TestSimpleSend" },
+        { "action": "wait", "ms": 5000 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false },
+        { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[ABORT\\]", "expected": true }
+      ]
+    },
+    {
+      "id": "abort-then-send-new",
+      "name": "Session accepts new message after abort",
+      "description": "Abort a session, then immediately send a new prompt. Verify no deadlock.",
+      "category": "abort-cancel",
+      "invariants": [
+        "New message processes successfully after abort",
+        "IsProcessing = false after new message completes",
+        "No deadlock or stuck state"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "Write a very long detailed analysis of every design pattern ever created." },
+        { "action": "wait", "ms": 5000 },
+        { "action": "clickAbort", "session": "TestSimpleSend" },
+        { "action": "wait", "ms": 3000 },
+        { "action": "sendMessage", "text": "Say 'hello'. Just that one word." },
+        { "action": "waitForIdle", "timeout": 60 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false },
+        { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[COMPLETE\\]", "expected": true }
+      ]
+    },
+    {
+      "id": "abort-when-idle-no-crash",
+      "name": "Abort on idle session doesn't crash",
+      "description": "Try to abort a session that's already idle. Verify no crash or error.",
+      "category": "abort-cancel",
+      "invariants": [
+        "No crash or error",
+        "Session remains idle and functional"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false },
+        { "action": "clickAbort", "session": "TestSimpleSend", "note": "Abort when already idle" },
+        { "action": "wait", "ms": 2000 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false },
+        { "action": "sendMessage", "text": "Say 'still working'. Just those two words." },
+        { "action": "waitForIdle", "timeout": 60 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false }
+      ]
+    },
+    {
+      "id": "back-to-back-simple",
+      "name": "Back-to-back messages both complete",
+      "description": "Send message 1, wait for completion, immediately send message 2. Both should complete.",
+      "category": "rapid-input",
+      "invariants": [
+        "Both messages get [COMPLETE] in event log",
+        "IsProcessing = false after both complete",
+        "No deadlock"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "Say 'first'. Just that word." },
+        { "action": "waitForIdle", "timeout": 60 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false },
+        { "action": "sendMessage", "text": "Say 'second'. Just that word." },
+        { "action": "waitForIdle", "timeout": 60 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false },
+        { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[COMPLETE\\]", "count": { "atLeast": 2 } }
+      ]
+    },
+    {
+      "id": "send-while-processing",
+      "name": "Send during processing is handled correctly",
+      "description": "Send a second message while the first is still processing. Verify no crash, second message eventually processes.",
+      "category": "rapid-input",
+      "invariants": [
+        "No crash or exception",
+        "Both messages eventually complete or second is queued",
+        "Session reaches idle state"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "List every .cs file in the PolyPilot/Services directory." },
+        { "action": "wait", "ms": 2000, "note": "First message starts processing" },
+        { "action": "sendMessage", "text": "Say 'queued message received'." },
+        { "action": "waitForIdle", "timeout": 180 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false }
+      ]
+    },
+    {
+      "id": "rapid-three-messages",
+      "name": "Three rapid messages don't cause permanent stuck state",
+      "description": "Send 3 messages in quick succession. Verify session eventually reaches idle.",
+      "category": "rapid-input",
+      "invariants": [
+        "Session eventually reaches idle (IsProcessing = false)",
+        "No permanent stuck state",
+        "Event log has at least one [COMPLETE]"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "Say 'one'." },
+        { "action": "wait", "ms": 500 },
+        { "action": "sendMessage", "text": "Say 'two'." },
+        { "action": "wait", "ms": 500 },
+        { "action": "sendMessage", "text": "Say 'three'." },
+        { "action": "waitForIdle", "timeout": 180 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false }
+      ]
+    },
+    {
+      "id": "create-new-session",
+      "name": "Create new session appears in sidebar",
+      "description": "Create a new session and verify it appears in the session list.",
+      "category": "session-lifecycle",
+      "invariants": [
+        "New session appears in sidebar",
+        "Session is empty (no messages)",
+        "Session accepts input"
+      ],
+      "steps": [
+        { "action": "createSession", "name": "TestLifecycle", "repo": "PureWeen-PolyPilot" },
+        { "action": "assertSessionExists", "name": "TestLifecycle" },
+        { "action": "selectSession", "name": "TestLifecycle" },
+        { "action": "assertInputAvailable", "session": "TestLifecycle" }
+      ]
+    },
+    {
+      "id": "create-session-with-repo-context",
+      "name": "Session with repo context has tool access",
+      "description": "Create a session pointed at PolyPilot repo, verify it can use tools.",
+      "category": "session-lifecycle",
+      "invariants": [
+        "Session has repo context",
+        "Tool calls work (file listing succeeds)"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestLifecycle" },
+        { "action": "sendMessage", "text": "What repository am I in? Be brief." },
+        { "action": "waitForIdle", "timeout": 60 },
+        { "action": "assertProcessing", "session": "TestLifecycle", "expected": false }
+      ]
+    },
+    {
+      "id": "switch-between-sessions",
+      "name": "Switching sessions preserves each one's history",
+      "description": "Switch between two sessions and verify each retains its own chat history.",
+      "category": "session-lifecycle",
+      "invariants": [
+        "Each session shows its own history after switching",
+        "No cross-contamination between sessions"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "assertResponseNotEmpty", "session": "TestSimpleSend", "note": "First session has history" },
+        { "action": "selectSession", "name": "TestLifecycle" },
+        { "action": "wait", "ms": 1000 },
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "assertResponseNotEmpty", "session": "TestSimpleSend", "note": "History preserved after switch" }
+      ]
+    },
+    {
+      "id": "close-session-removed",
+      "name": "Closed session doesn't appear after removal",
+      "description": "Close a session and verify it's removed from the sidebar.",
+      "category": "session-lifecycle",
+      "invariants": [
+        "Session removed from sidebar",
+        "Not restored on next check"
+      ],
+      "steps": [
+        { "action": "createSession", "name": "TestClose", "repo": "PureWeen-PolyPilot" },
+        { "action": "assertSessionExists", "name": "TestClose" },
+        { "action": "closeSession", "name": "TestClose" },
+        { "action": "wait", "ms": 2000 },
+        { "action": "assertSessionNotExists", "name": "TestClose" }
+      ]
+    },
+    {
+      "id": "session-history-persists-to-disk",
+      "name": "Chat history persists across interactions",
+      "description": "Send a message, then verify the chat database has the message stored.",
+      "category": "session-lifecycle",
+      "invariants": [
+        "Message appears in chat history",
+        "History survives session switch and return"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "Say 'persistence test 12345'. Just those exact words." },
+        { "action": "waitForIdle", "timeout": 60 },
+        { "action": "selectSession", "name": "TestLifecycle" },
+        { "action": "wait", "ms": 1000 },
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "evaluate", "script": "document.querySelector('.expanded-view-container')?.textContent?.includes('persistence test 12345') || document.querySelector('.chat-messages')?.textContent?.includes('persistence test 12345')", "expect": true }
+      ]
+    },
+    {
+      "id": "session-survives-relaunch",
+      "name": "Session restores correctly after app relaunch",
+      "description": "Send a message, relaunch the app, verify session is restored with history.",
+      "category": "connection-recovery",
+      "invariants": [
+        "Session appears in sidebar after relaunch",
+        "Chat history is preserved",
+        "Session is idle (not stuck in processing)"
+      ],
+      "steps": [
+        { "action": "captureSessionList", "capture": "beforeRelaunch" },
+        { "action": "relaunchApp" },
+        { "action": "waitForAgent", "timeout": 120 },
+        { "action": "assertSessionExists", "name": "TestSimpleSend" },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false }
+      ]
+    },
+    {
+      "id": "idle-session-no-resumed-tag",
+      "name": "Idle session doesn't get '(resumed)' tag after relaunch",
+      "description": "After relaunch, idle sessions should not have phantom '(resumed)' markers.",
+      "category": "connection-recovery",
+      "invariants": [
+        "No session names contain '(resumed)' text",
+        "All sessions have their original names"
+      ],
+      "steps": [
+        { "action": "evaluate", "script": "Array.from(document.querySelectorAll('.session-name, .session-title')).filter(e => e.textContent.includes('resumed')).length", "expect": 0, "note": "No phantom (resumed) sessions" }
+      ]
+    },
+    {
+      "id": "no-phantom-sessions-after-relaunch",
+      "name": "No duplicate or phantom sessions after relaunch",
+      "description": "After relaunch, verify no duplicate sessions appeared.",
+      "category": "connection-recovery",
+      "invariants": [
+        "Session count matches pre-relaunch count (±1 for any cleaned up)",
+        "No duplicate session names"
+      ],
+      "steps": [
+        { "action": "evaluate", "script": "new Set(Array.from(document.querySelectorAll('.session-name')).map(e => e.textContent.trim())).size === document.querySelectorAll('.session-name').length", "expect": true, "note": "No duplicate session names" }
+      ]
+    },
+    {
+      "id": "post-relaunch-send-works",
+      "name": "Session is functional after relaunch",
+      "description": "After relaunch, send a message to a restored session and verify it completes.",
+      "category": "connection-recovery",
+      "invariants": [
+        "Message sends successfully",
+        "Response arrives",
+        "IsProcessing = false after completion"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "Say 'post relaunch OK'. Just those exact words." },
+        { "action": "waitForIdle", "timeout": 120 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false },
+        { "action": "assertEventLog", "session": "TestSimpleSend", "pattern": "\\[COMPLETE\\]", "expected": true }
+      ]
+    },
+    {
+      "id": "no-stuck-processing-after-relaunch",
+      "name": "No sessions stuck in IsProcessing after relaunch",
+      "description": "After relaunch, verify no sessions are stuck in processing state.",
+      "category": "connection-recovery",
+      "invariants": [
+        "Zero visible stop buttons (no sessions processing)",
+        "No 'Thinking...' indicators visible"
+      ],
+      "steps": [
+        { "action": "wait", "ms": 30000, "note": "Wait for watchdog to clear any stuck sessions" },
+        { "action": "evaluate", "script": "document.querySelectorAll('.send-btn.stop-btn, .card-stop-btn').length", "expect": 0, "note": "No sessions stuck processing" }
+      ]
+    },
+    {
+      "id": "isprocessing-false-when-idle",
+      "name": "IsProcessing is false on all idle sessions",
+      "description": "Check that every session in the list has IsProcessing = false when no messages are being sent.",
+      "category": "state-hygiene",
+      "invariants": [
+        "No stop buttons visible",
+        "No thinking/working indicators"
+      ],
+      "steps": [
+        { "action": "evaluate", "script": "document.querySelectorAll('.send-btn.stop-btn, .card-stop-btn').length", "expect": 0 },
+        { "action": "evaluate", "script": "document.querySelectorAll('.thinking-indicator, .processing-indicator').length", "expect": 0 }
+      ]
+    },
+    {
+      "id": "event-log-send-complete-pairs",
+      "name": "Every [SEND] has a matching [COMPLETE] or [ABORT]",
+      "description": "Audit the event diagnostics log. Every [SEND] entry should have a corresponding completion.",
+      "category": "state-hygiene",
+      "invariants": [
+        "Number of [SEND] entries ≤ [COMPLETE] + [ABORT] entries (for test sessions)",
+        "No orphaned [SEND] without resolution"
+      ],
+      "steps": [
+        { "action": "shellCheck", "command": "sends=$(grep -c '\\[SEND\\].*TestSimpleSend' ~/.polypilot/event-diagnostics.log); completes=$(grep -c '\\[COMPLETE\\].*TestSimpleSend\\|\\[ABORT\\].*TestSimpleSend' ~/.polypilot/event-diagnostics.log); echo \"sends=$sends completes=$completes\"; [ $sends -le $completes ]" }
+      ]
+    },
+    {
+      "id": "no-watchdog-on-normal-sessions",
+      "name": "Watchdog doesn't fire for normally-completing sessions",
+      "description": "Verify that sessions completing normally don't trigger watchdog entries.",
+      "category": "state-hygiene",
+      "invariants": [
+        "No [WATCHDOG] entries for test sessions",
+        "All completions are via [COMPLETE] or [ABORT]"
+      ],
+      "steps": [
+        { "action": "shellCheck", "command": "grep '\\[WATCHDOG\\].*TestSimpleSend' ~/.polypilot/event-diagnostics.log | grep -v 'deferred' | wc -l | tr -d ' '", "expect": "0" }
+      ]
+    },
+    {
+      "id": "processing-indicator-shows-and-clears",
+      "name": "Processing indicator shows during work and clears after",
+      "description": "Send a message, verify processing indicator appears, then clears after completion.",
+      "category": "state-hygiene",
+      "invariants": [
+        "Processing indicator visible during processing",
+        "Processing indicator gone after completion"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "List the top-level directories in this repository." },
+        { "action": "wait", "ms": 3000 },
+        { "action": "evaluate", "script": "document.querySelectorAll('.send-btn.stop-btn, .card-stop-btn').length > 0 || true", "expect": true, "note": "May or may not catch the processing window" },
+        { "action": "waitForIdle", "timeout": 120 },
+        { "action": "evaluate", "script": "document.querySelectorAll('.send-btn.stop-btn, .card-stop-btn').length", "expect": 0, "note": "Processing indicator cleared" }
+      ]
+    },
+    {
+      "id": "session-not-stuck-after-idle",
+      "name": "No phantom 'Thinking...' after session completes",
+      "description": "After a message completes, verify the session is truly idle with no lingering indicators.",
+      "category": "state-hygiene",
+      "invariants": [
+        "No thinking/working text visible for session",
+        "Input is enabled and accepts typing",
+        "Send button is not a stop button"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false },
+        { "action": "assertInputAvailable", "session": "TestSimpleSend" }
+      ]
+    },
+    {
+      "id": "send-special-characters",
+      "name": "Special characters in prompt don't cause issues",
+      "description": "Send a prompt with quotes, backticks, unicode, and special characters.",
+      "category": "edge-cases",
+      "invariants": [
+        "Message sends successfully",
+        "Response arrives",
+        "No crash or error"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "sendMessage", "text": "Echo this back: Hello 'world' \"test\" `code`  & © ñ 🎉" },
+        { "action": "waitForIdle", "timeout": 60 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false }
+      ]
+    },
+    {
+      "id": "model-switch-then-send",
+      "name": "Session works after model switch",
+      "description": "Switch the model for a session, then send a message. Verify it completes.",
+      "category": "edge-cases",
+      "invariants": [
+        "Model switch succeeds",
+        "Message sends and completes with new model",
+        "IsProcessing = false after completion"
+      ],
+      "steps": [
+        { "action": "selectSession", "name": "TestSimpleSend" },
+        { "action": "switchModel", "note": "Switch to a different available model via UI dropdown" },
+        { "action": "sendMessage", "text": "Say 'model switch OK'. Just those words." },
+        { "action": "waitForIdle", "timeout": 120 },
+        { "action": "assertProcessing", "session": "TestSimpleSend", "expected": false }
+      ]
+    },
+    {
+      "id": "stale-session-loads-ok",
+      "name": "Old sessions load without errors",
+      "description": "Verify that existing sessions from before testing load and don't cause errors.",
+      "category": "edge-cases",
+      "invariants": [
+        "Pre-existing sessions visible in sidebar",
+        "No error messages or crash indicators",
+        "App remains responsive"
+      ],
+      "steps": [
+        { "action": "evaluate", "script": "document.querySelectorAll('.session-list-item, .session-card').length > 0", "expect": true, "note": "Sessions loaded" },
+        { "action": "evaluate", "script": "document.querySelectorAll('.error-banner, .crash-indicator, .fatal-error').length", "expect": 0, "note": "No error banners" }
+      ]
+    }
+  ]
+}
diff --git a/PolyPilot.Tests/SessionPersistenceTests.cs b/PolyPilot.Tests/SessionPersistenceTests.cs
index d57c3e022e..6dcbb92836 100644
--- a/PolyPilot.Tests/SessionPersistenceTests.cs
+++ b/PolyPilot.Tests/SessionPersistenceTests.cs
@@ -1721,10 +1721,11 @@ public void Merge_NameCollision_DifferentGroupId_WithoutRecoveryMarker_KeepsPrev
     }
 
     [Fact]
-    public void Merge_NameCollision_SameGroupId_StillCreatesPrevious()
+    public void Merge_NameCollision_SameGroupId_WithoutRecoveryMarker_StillCreatesPrevious()
     {
-        // When the collision happens within the same group (e.g., reconnect replaced
-        // the session), the old entry should still be preserved as "(previous)".
+        // When the collision happens within the same group but there is NO explicit
+        // RecoveredFromSessionId marker, we cannot be sure the new session intentionally
+        // replaced the old one — preserve the old entry as "(previous)" to avoid data loss.
         var active = new List
         {
             new() { SessionId = "new-id", DisplayName = "MyWorker", Model = "m",
@@ -1743,6 +1744,31 @@ public void Merge_NameCollision_SameGroupId_StillCreatesPrevious()
         Assert.Equal("MyWorker (previous)", result[1].DisplayName);
     }
 
+    [Fact]
+    public void Merge_NameCollision_SameGroupId_WithExplicitRecovery_DropsPersistedEntry()
+    {
+        // Worker revival (empty response → fresh session) sets RecoveredFromSessionId on the
+        // new session to record that it explicitly replaced the old one.  The merge must
+        // drop the old persisted entry so it never appears as a "(previous)" phantom.
+        var active = new List
+        {
+            new() { SessionId = "new-id", DisplayName = "MyWorker", Model = "m",
+                     WorkingDirectory = "/w", GroupId = "same-group", RecoveredFromSessionId = "old-id" }
+        };
+        var persisted = new List
+        {
+            new() { SessionId = "old-id", DisplayName = "MyWorker", Model = "m",
+                     WorkingDirectory = "/w", GroupId = "same-group" }
+        };
+
+        var result = CopilotService.MergeSessionEntries(active, persisted, new HashSet(), new HashSet(), _ => true);
+
+        // Only the active (revival) entry should remain — no "(previous)" phantom
+        Assert.Single(result);
+        Assert.Equal("new-id", result[0].SessionId);
+        Assert.Equal("MyWorker", result[0].DisplayName);
+    }
+
     [Fact]
     public void Merge_NameCollision_NullGroupIds_StillCreatesPrevious()
     {
diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs
index 72281e1219..ca173ded5b 100644
--- a/PolyPilot/Services/CopilotService.Events.cs
+++ b/PolyPilot/Services/CopilotService.Events.cs
@@ -1640,29 +1640,9 @@ private void CompleteResponse(SessionState state, long? expectedGeneration = nul
         // Clear IsProcessing BEFORE completing the TCS — if the continuation runs
         // synchronously (e.g., in orchestrator reflection loops), the next SendPromptAsync
         // call must see IsProcessing=false or it throws "already processing".
-        state.CurrentResponse.Clear();
-        state.FlushedResponse.Clear();
-        ClearFlushedReplayDedup(state);
-        state.PendingReasoningMessages.Clear();
-        // Accumulate API time before clearing ProcessingStartedAt
-        if (state.Info.ProcessingStartedAt is { } started)
-        {
-            state.Info.TotalApiTimeSeconds += (DateTime.UtcNow - started).TotalSeconds;
-            state.Info.PremiumRequestsUsed++;
-        }
-        state.AllowTurnStartRearm = true; // session.idle/turn-end completion can be premature; allow one late TurnStart recovery
-        state.Info.IsProcessing = false;
-        state.Info.IsResumed = false;
-        Interlocked.Exchange(ref state.SendingFlag, 0); // Release atomic send lock
-        state.Info.ConsecutiveStuckCount = 0;
+        ClearProcessingState(state);
         // A successful completion proves the server is healthy — reset the
         // service-level watchdog timeout counter to prevent false recovery triggers.
-        Interlocked.Exchange(ref _consecutiveWatchdogTimeouts, 0);
-        state.Info.ProcessingStartedAt = null;
-        state.Info.ToolCallCount = 0;
-        state.Info.ProcessingPhase = 0;
-        state.Info.ClearPermissionDenials();
-        state.Info.LastUpdatedAt = DateTime.Now;
         state.ResponseCompletion?.TrySetResult(fullResponse);
         
         // Fire completion notification BEFORE OnStateChanged — this ensures
diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs
index 698ac072b2..94d2d77ef1 100644
--- a/PolyPilot/Services/CopilotService.Organization.cs
+++ b/PolyPilot/Services/CopilotService.Organization.cs
@@ -2411,6 +2411,10 @@ private async Task ExecuteWorkerAsync(string workerName, string ta
                     Debug($"[DISPATCH] Worker '{workerName}' returned empty — attempting fresh session revival");
                     if (_sessions.TryGetValue(workerName, out var deadState))
                     {
+                        // Capture the old session ID BEFORE orphaning — used to prevent
+                        // MergeSessionEntries from creating a "(previous)" phantom entry.
+                        var deadSessionId = deadState.Info.SessionId;
+
                         // Mark old state as orphaned so any lingering callbacks are no-ops
                         deadState.IsOrphaned = true;
                         try { await deadState.Session.DisposeAsync(); } catch { }
@@ -2438,11 +2442,18 @@ private async Task ExecuteWorkerAsync(string workerName, string ta
                             }
                             else
                             {
-                                // Commit SessionId only after TryUpdate succeeds — avoids
-                                // mutating shared Info on a path that might discard the state.
+                                // Commit SessionId + recovery marker only after TryUpdate succeeds.
+                                // RecoveredFromSessionId records that the new session explicitly replaced
+                                // the old one — MergeSessionEntries uses this to drop the old persisted
+                                // entry instead of renaming it "(previous)" (same-group revival case).
+                                deadState.Info.RecoveredFromSessionId ??= deadSessionId;
                                 deadState.Info.SessionId = freshSession.SessionId;
+                                // Add the old session ID to the closed set so SaveActiveSessionsToDisk's
+                                // merge logic drops it instead of creating a "(previous)" phantom entry.
+                                if (!string.IsNullOrEmpty(deadSessionId))
+                                    _closedSessionIds[deadSessionId] = 0;
                                 DisposePrematureIdleSignal(deadState);
-                                Debug($"[DISPATCH] Worker '{workerName}' revived with fresh session '{freshSession.SessionId}'");
+                                Debug($"[DISPATCH] Worker '{workerName}' revived with fresh session '{freshSession.SessionId}' (replaced '{deadSessionId}')");
                                 response = await SendPromptAndWaitAsync(workerName, workerPrompt, cancellationToken, originalPrompt: originalPrompt);
                             }
                         }
diff --git a/PolyPilot/Services/CopilotService.Persistence.cs b/PolyPilot/Services/CopilotService.Persistence.cs
index 5b8de35890..ab5e6d315e 100644
--- a/PolyPilot/Services/CopilotService.Persistence.cs
+++ b/PolyPilot/Services/CopilotService.Persistence.cs
@@ -242,15 +242,21 @@ private static bool CanSafelyDropSupersededGroupMoveEntry(
         ActiveSessionEntry existing,
         ActiveSessionEntry activeCounterpart)
     {
-        if (string.IsNullOrEmpty(existing.GroupId) ||
-            string.IsNullOrEmpty(activeCounterpart.GroupId) ||
-            string.Equals(existing.GroupId, activeCounterpart.GroupId, StringComparison.OrdinalIgnoreCase))
+        // Require an explicit recovery marker — without it we don't know if the new session
+        // is really a replacement for the old one (it might just be a same-named unrelated session).
+        if (string.IsNullOrEmpty(activeCounterpart.RecoveredFromSessionId) ||
+            !string.Equals(activeCounterpart.RecoveredFromSessionId, existing.SessionId, StringComparison.OrdinalIgnoreCase))
         {
             return false;
         }
 
-        return !string.IsNullOrEmpty(activeCounterpart.RecoveredFromSessionId) &&
-               string.Equals(activeCounterpart.RecoveredFromSessionId, existing.SessionId, StringComparison.OrdinalIgnoreCase);
+        // If RecoveredFromSessionId explicitly matches, allow drop regardless of group.
+        // Covers two cases:
+        //   1. Cross-group: scattered team reconstruction moved the session to a new group
+        //   2. Same-group: worker revival (empty response → fresh session within the same group)
+        // Both are safe to drop because the active entry explicitly records that it recovered
+        // history from the old one — there is no user-visible history loss.
+        return true;
     }
 
     /// 
diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs
index d89b23bcfd..e78068c2bd 100644
--- a/PolyPilot/Services/CopilotService.cs
+++ b/PolyPilot/Services/CopilotService.cs
@@ -757,6 +757,46 @@ private static void ClearDeferredIdleTracking(SessionState state, bool preserveC
         }
     }
 
+    /// 
+    /// Atomically clears ALL processing state on a session. This is the single source of truth
+    /// for what must be reset when a session stops processing. Every code path that sets
+    /// IsProcessing=false MUST call this method instead of manually clearing individual fields.
+    /// 
+    /// Historical context: 13+ PRs of fix/regression cycles were caused by 22 sites that set
+    /// IsProcessing=false but each forgot different companion fields. This method eliminates
+    /// that bug category entirely.
+    /// 
+    /// The session state to clear.
+    /// True to accumulate API time from ProcessingStartedAt (normal completion).
+    /// False for error/abort paths where timing is not meaningful.
+    private void ClearProcessingState(SessionState state, bool accumulateApiTime = true)
+    {
+        if (accumulateApiTime && state.Info.ProcessingStartedAt is { } started)
+        {
+            state.Info.TotalApiTimeSeconds += (DateTime.UtcNow - started).TotalSeconds;
+            state.Info.PremiumRequestsUsed++;
+        }
+
+        state.CurrentResponse.Clear();
+        state.FlushedResponse.Clear();
+        ClearFlushedReplayDedup(state);
+        state.PendingReasoningMessages.Clear();
+
+        state.Info.IsProcessing = false;
+        state.Info.IsResumed = false;
+        state.Info.ProcessingStartedAt = null;
+        state.Info.ToolCallCount = 0;
+        state.Info.ProcessingPhase = 0;
+        state.Info.ConsecutiveStuckCount = 0;
+        state.Info.ClearPermissionDenials();
+        state.Info.LastUpdatedAt = DateTime.Now;
+
+        Interlocked.Exchange(ref state.SendingFlag, 0);
+        Interlocked.Exchange(ref _consecutiveWatchdogTimeouts, 0);
+
+        state.AllowTurnStartRearm = true;
+    }
+
     /// Ping interval to prevent the headless server from killing idle sessions.
     /// The server has a ~35 minute idle timeout; pinging every 15 minutes keeps sessions alive.
     internal const int KeepalivePingIntervalSeconds = 15 * 60; // 15 minutes
@@ -3276,6 +3316,10 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis
                 if (session != null)
                 {
                     session.IsProcessing = false;
+                    session.IsResumed = false;
+                    session.ProcessingStartedAt = null;
+                    session.ToolCallCount = 0;
+                    session.ProcessingPhase = 0;
                     OnStateChanged?.Invoke();
                 }
                 throw;
@@ -3937,14 +3981,8 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis
                     state.HasUsedToolsThisTurn = false;
                     ClearDeferredIdleTracking(state);
                     Interlocked.Exchange(ref state.SuccessfulToolCountThisTurn, 0);
-                    state.Info.IsResumed = false;
+                    ClearProcessingState(state);
                     state.AllowTurnStartRearm = false; // Explicit reconnect failure is terminal for this turn
-                    state.Info.IsProcessing = false;
-                    if (state.Info.ProcessingStartedAt is { } rcStarted)
-                        state.Info.TotalApiTimeSeconds += (DateTime.UtcNow - rcStarted).TotalSeconds;
-                    state.Info.ProcessingStartedAt = null;
-                    state.Info.ToolCallCount = 0;
-                    state.Info.ProcessingPhase = 0;
                     OnStateChanged?.Invoke();
                     throw;
                 }
@@ -3961,14 +3999,8 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis
                 state.HasUsedToolsThisTurn = false;
                 ClearDeferredIdleTracking(state);
                 Interlocked.Exchange(ref state.SuccessfulToolCountThisTurn, 0);
-                state.Info.IsResumed = false;
+                ClearProcessingState(state);
                 state.AllowTurnStartRearm = false; // Explicit send failure is terminal for this turn
-                state.Info.IsProcessing = false;
-                if (state.Info.ProcessingStartedAt is { } saStarted)
-                    state.Info.TotalApiTimeSeconds += (DateTime.UtcNow - saStarted).TotalSeconds;
-                state.Info.ProcessingStartedAt = null;
-                state.Info.ToolCallCount = 0;
-                state.Info.ProcessingPhase = 0;
                 OnStateChanged?.Invoke();
                 throw;
             }
@@ -4057,11 +4089,7 @@ public async Task AbortSessionAsync(string sessionName, bool markAsInterrupted =
             // Optimistically clear processing state and queue
             if (_sessions.TryGetValue(sessionName, out var remoteState))
             {
-                remoteState.Info.IsProcessing = false;
-                remoteState.Info.IsResumed = false;
-                remoteState.Info.ProcessingStartedAt = null;
-                remoteState.Info.ToolCallCount = 0;
-                remoteState.Info.ProcessingPhase = 0;
+                ClearProcessingState(remoteState, accumulateApiTime: false);
                 remoteState.Info.MessageQueue.Clear();
                 OnStateChanged?.Invoke();
             }
@@ -4103,22 +4131,14 @@ public async Task AbortSessionAsync(string sessionName, bool markAsInterrupted =
         state.CurrentResponse.Clear();
 
         Debug($"[ABORT] '{sessionName}' user abort, clearing IsProcessing");
-        state.Info.IsProcessing = false;
+        ClearProcessingState(state);
         state.WasUserAborted = true; // Suppress EVT-REARM for in-flight TurnStart events after abort
         state.AllowTurnStartRearm = false; // Abort is explicit terminal intent — do not revive on late TurnStart replays
-        state.Info.IsResumed = false;
-        if (state.Info.ProcessingStartedAt is { } abortStarted)
-            state.Info.TotalApiTimeSeconds += (DateTime.UtcNow - abortStarted).TotalSeconds;
-        state.Info.ProcessingStartedAt = null;
-        state.Info.ToolCallCount = 0;
-        state.Info.ProcessingPhase = 0;
         Interlocked.Exchange(ref state.ActiveToolCallCount, 0);
         state.HasUsedToolsThisTurn = false;
         ClearDeferredIdleTracking(state);
         state.IsReconnectedSend = false; // INV-1: clear all per-turn flags on abort
         Interlocked.Exchange(ref state.SuccessfulToolCountThisTurn, 0);
-        // Release send lock — allows a subsequent SteerSessionAsync to acquire it immediately
-        Interlocked.Exchange(ref state.SendingFlag, 0);
         // Clear queued messages so they don't auto-send after abort
         state.Info.MessageQueue.Clear();
         _queuedImagePaths.TryRemove(sessionName, out _);
@@ -4128,9 +4148,6 @@ public async Task AbortSessionAsync(string sessionName, bool markAsInterrupted =
         CancelTurnEndFallback(state);
         CancelProcessingWatchdog(state);
         CancelToolHealthCheck(state);
-        state.FlushedResponse.Clear();
-        state.PendingReasoningMessages.Clear();
-        state.Info.ClearPermissionDenials(); // INV-1: clear on all termination paths
         // Complete TCS AFTER state cleanup (INV-O3)
         state.ResponseCompletion?.TrySetCanceled();
         // Fire completion notification so orchestrator loops are unblocked (INV-O4)

From 1a81df16d356fd70a9a73827c94363d64abc6fe1 Mon Sep 17 00:00:00 2001
From: Shane Neuville 
Date: Tue, 7 Apr 2026 01:18:31 -0500
Subject: [PATCH 2/8] fix: prevent DISPATCH-RECOVER false-positive on app
 reconnect
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the app restarts mid-orchestration, EnsureSessionConnectedAsync writes
session.resume to events.jsonl. This advances the file mtime during the
2000ms grace period, triggering a false 'premature idle detected' even
though the worker actually completed normally.

The fix: after detecting mtime changed during the grace period (and in
the polling loop), read the last event type from events.jsonl. If it's
session.resume or session.shutdown, the mtime change was caused by a
reconnect/terminal write — not by ongoing processing. Skip recovery.

Also adds a baseline for the polling loop: when a session.resume write
is detected, update stableMtime to the current value and keep polling
(don't trigger recovery on the reconnect write itself).

This eliminates the ~14s DISPATCH-RECOVER latency that appeared on every
normal worker completion after a relaunch, reducing worker overhead from
~28s to ~2s.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PolyPilot.Tests/MultiAgentRegressionTests.cs  | 27 +++++++++++++++--
 .../Services/CopilotService.Organization.cs   | 30 +++++++++++++++----
 2 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/PolyPilot.Tests/MultiAgentRegressionTests.cs b/PolyPilot.Tests/MultiAgentRegressionTests.cs
index 53c6031a2f..deb8cda740 100644
--- a/PolyPilot.Tests/MultiAgentRegressionTests.cs
+++ b/PolyPilot.Tests/MultiAgentRegressionTests.cs
@@ -2560,7 +2560,7 @@ public void RecoverFromPrematureIdleIfNeededAsync_SubscribesToOnSessionComplete(
         // Find the method definition (not a call site)
         var methodIdx = source.IndexOf("private async Task RecoverFromPrematureIdleIfNeededAsync", StringComparison.Ordinal);
         Assert.True(methodIdx >= 0, "RecoverFromPrematureIdleIfNeededAsync method definition must exist");
-        var methodBlock = source.Substring(methodIdx, Math.Min(8000, source.Length - methodIdx));
+        var methodBlock = source.Substring(methodIdx, Math.Min(12000, source.Length - methodIdx));
 
         Assert.Contains("OnSessionComplete +=", methodBlock);
         Assert.Contains("OnSessionComplete -=", methodBlock); // Must unsubscribe in finally
@@ -2692,7 +2692,7 @@ public void RecoverFromPrematureIdleIfNeededAsync_UsesEventsFileFreshness()
 
         var methodIdx = source.IndexOf("private async Task RecoverFromPrematureIdleIfNeededAsync", StringComparison.Ordinal);
         Assert.True(methodIdx >= 0, "RecoverFromPrematureIdleIfNeededAsync method definition must exist");
-        var methodBlock = source.Substring(methodIdx, Math.Min(8000, source.Length - methodIdx));
+        var methodBlock = source.Substring(methodIdx, Math.Min(12000, source.Length - methodIdx));
 
         Assert.Contains("IsEventsFileActive", methodBlock);
     }
@@ -2723,7 +2723,7 @@ public void RecoverFromPrematureIdleIfNeededAsync_LoopsOnRepeatedPrematureIdle()
 
         var methodIdx = source.IndexOf("private async Task RecoverFromPrematureIdleIfNeededAsync", StringComparison.Ordinal);
         Assert.True(methodIdx >= 0, "RecoverFromPrematureIdleIfNeededAsync method definition must exist");
-        var methodBlock = source.Substring(methodIdx, Math.Min(8000, source.Length - methodIdx));
+        var methodBlock = source.Substring(methodIdx, Math.Min(12000, source.Length - methodIdx));
 
         // Must have a loop for repeated premature idle rounds
         Assert.Contains("while (", methodBlock);
@@ -2803,6 +2803,27 @@ public void RecoverFromPrematureIdleIfNeededAsync_UsesMtimeComparisonForInitialD
         Assert.True(delayIdx < assignIdx, "Grace period delay must precede stable-mtime assignment");
     }
 
+    [Fact]
+    public void RecoverFromPrematureIdleIfNeededAsync_GracePeriodSkipsReconnectWrites()
+    {
+        // Structural: the grace period mtime check must guard against reconnect writes.
+        // When the app restarts mid-orchestration, EnsureSessionConnectedAsync writes
+        // session.resume to events.jsonl — this advances mtime but is NOT premature idle.
+        // The fix: after detecting mtime changed, check last event type and skip if it's
+        // session.resume or session.shutdown (terminal/reconnect events).
+        var orgPath = Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.Organization.cs");
+        var source = File.ReadAllText(orgPath);
+
+        var methodIdx = source.IndexOf("private async Task RecoverFromPrematureIdleIfNeededAsync", StringComparison.Ordinal);
+        Assert.True(methodIdx >= 0, "RecoverFromPrematureIdleIfNeededAsync method definition must exist");
+        var methodBlock = source.Substring(methodIdx, Math.Min(6000, source.Length - methodIdx));
+
+        // Must use GetLastEventType to check the content, not just raw mtime delta
+        Assert.Contains("GetLastEventType", methodBlock);
+        // Must skip session.resume writes (reconnect scenario)
+        Assert.Contains("session.resume", methodBlock);
+    }
+
     [Fact]
     public void RecoverFromPrematureIdleIfNeededAsync_PollingLoopUsesMtimeComparison()
     {
diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs
index 94d2d77ef1..59a5506247 100644
--- a/PolyPilot/Services/CopilotService.Organization.cs
+++ b/PolyPilot/Services/CopilotService.Organization.cs
@@ -2693,8 +2693,19 @@ bool WaitForPrematureIdleSignal()
             try { await Task.Delay(PrematureIdleEventsGracePeriodMs, cancellationToken); }
             catch (OperationCanceledException) { return initialResponse; }
             stableMtime = GetEventsFileMtime(state.Info.SessionId);
-            // Mtime changed → CLI wrote new events during grace period → genuine premature idle
-            detected = stableMtime.HasValue && stableMtime.Value > (mtimeBefore ?? DateTime.MinValue);
+            // Mtime changed → check it's from real processing, not a reconnect session.resume write.
+            // When the app restarts mid-orchestration, EnsureSessionConnectedAsync writes session.resume
+            // to events.jsonl which advances mtime — that is NOT premature idle.
+            if (stableMtime.HasValue && stableMtime.Value > (mtimeBefore ?? DateTime.MinValue))
+            {
+                var eventsPath = Path.Combine(SessionStatePath, state.Info.SessionId ?? "", "events.jsonl");
+                var lastEvent = GetLastEventType(eventsPath);
+                // session.resume = reconnect write (not new processing)
+                // session.shutdown = terminal
+                detected = lastEvent != null && lastEvent != "session.resume" && lastEvent != "session.shutdown";
+                if (!detected)
+                    Debug($"[DISPATCH-RECOVER] Skipping grace-period false positive: last event={lastEvent ?? "none"} is a reconnect/terminal write, not premature idle");
+            }
         }
 
         if (!detected)
@@ -2713,12 +2724,21 @@ bool WaitForPrematureIdleSignal()
                     break;
                 }
                 
-                // Check if events.jsonl mtime advanced past the stable baseline
+                // Check if events.jsonl mtime advanced past the stable baseline.
+                // Skip session.resume writes (from app reconnect) — they advance mtime but
+                // are not signs of premature idle.
                 var currentMtime = GetEventsFileMtime(state.Info.SessionId);
                 if (currentMtime.HasValue && currentMtime.Value > (stableMtime ?? DateTime.MinValue))
                 {
-                    detected = true;
-                    break;
+                    var eventsPath = Path.Combine(SessionStatePath, state.Info.SessionId ?? "", "events.jsonl");
+                    var lastEvent = GetLastEventType(eventsPath);
+                    if (lastEvent != null && lastEvent != "session.resume" && lastEvent != "session.shutdown")
+                    {
+                        detected = true;
+                        break;
+                    }
+                    // Reconnect/terminal write — update baseline and keep polling
+                    stableMtime = currentMtime;
                 }
             }
         }

From 6ba0cfc6ed60ec433927bf1db98f3065d365fcb8 Mon Sep 17 00:00:00 2001
From: Shane Neuville 
Date: Tue, 7 Apr 2026 01:38:30 -0500
Subject: [PATCH 3/8] perf: remove polling loop from
 RecoverFromPrematureIdleIfNeededAsync
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 10-second secondary polling loop was running after every worker
completion — even when no premature idle was detected — adding 12s of
unnecessary latency to every multi-agent task.

The two-phase mtime check (settle 500ms + observe 1500ms = 2000ms total)
already handles all realistic premature-idle cases:
- Premature idle + new events within 2s: detected via endMtime > stableMtime
- PrematureIdleSignal: checked fast-path before entering the grace period
- session.resume writes: explicitly skipped (reconnect, not premature idle)

After the 2000ms check returns not-detected, return immediately.
Worker completion now registers in ~2s instead of 12s.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PolyPilot.Tests/MultiAgentRegressionTests.cs  | 66 ++++++++-------
 .../Services/CopilotService.Organization.cs   | 81 +++++++------------
 2 files changed, 66 insertions(+), 81 deletions(-)

diff --git a/PolyPilot.Tests/MultiAgentRegressionTests.cs b/PolyPilot.Tests/MultiAgentRegressionTests.cs
index deb8cda740..b0514d22b6 100644
--- a/PolyPilot.Tests/MultiAgentRegressionTests.cs
+++ b/PolyPilot.Tests/MultiAgentRegressionTests.cs
@@ -2450,17 +2450,6 @@ public void LongRunningOrchestrator_UserFollowup_MustQueue()
 
     #region Premature Idle Recovery Tests (PR #375 — SDK bug #299)
 
-    [Fact]
-    public void PrematureIdleDetectionWindowMs_IsReasonable()
-    {
-        // The detection window must be long enough for EVT-REARM to fire on the UI thread
-        // after the premature idle, but short enough not to delay normal completions excessively.
-        Assert.True(CopilotService.PrematureIdleDetectionWindowMs >= 3000,
-            "Detection window must be >= 3s to allow UI thread EVT-REARM dispatch");
-        Assert.True(CopilotService.PrematureIdleDetectionWindowMs <= 10_000,
-            "Detection window must be <= 10s to avoid excessive delay on normal completions");
-    }
-
     [Fact]
     public void PrematureIdleRecoveryTimeoutMs_IsReasonable()
     {
@@ -2746,6 +2735,18 @@ public void PrematureIdleEventsFileFreshnessSeconds_ConstantExists()
         Assert.True(constIdx >= 0, "Must be an internal const int");
     }
 
+    [Fact]
+    public void PrematureIdleEventsSettleMs_ConstantExists()
+    {
+        // Settle period constant: time to wait before taking the stable baseline snapshot.
+        // Must be > 0 (need time for OS to flush mtime) and < GracePeriodMs (must leave
+        // observation window after settle).
+        Assert.True(CopilotService.PrematureIdleEventsSettleMs > 0,
+            "Settle must be > 0ms");
+        Assert.True(CopilotService.PrematureIdleEventsSettleMs < CopilotService.PrematureIdleEventsGracePeriodMs,
+            "Settle must be < GracePeriodMs to leave an observation window");
+    }
+
     [Fact]
     public void PrematureIdleEventsGracePeriodMs_ConstantExists()
     {
@@ -2781,26 +2782,31 @@ public void GetEventsFileMtime_HelperExists()
     [Fact]
     public void RecoverFromPrematureIdleIfNeededAsync_UsesMtimeComparisonForInitialDetection()
     {
-        // Structural: instead of raw IsEventsFileActive (which sees the idle event's own write
-        // as "fresh" and false-positives), the method must snapshot mtime, wait the grace period,
-        // then compare mtimes. Only a changed mtime proves the CLI is still writing.
+        // Structural: two-phase mtime check avoids OS mtime-flush false positives.
+        // Phase 1: settle (500ms) to let OS flush the completing write's mtime update
+        // Phase 2: observe (1500ms) to see if MORE writes happen (genuine premature idle)
+        // Only writes AFTER the settle baseline trigger detection.
         var orgPath = Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.Organization.cs");
         var source = File.ReadAllText(orgPath);
 
         var methodIdx = source.IndexOf("private async Task RecoverFromPrematureIdleIfNeededAsync", StringComparison.Ordinal);
         Assert.True(methodIdx >= 0, "RecoverFromPrematureIdleIfNeededAsync must exist");
-        var methodBlock = source.Substring(methodIdx, Math.Min(4000, source.Length - methodIdx));
+        var methodBlock = source.Substring(methodIdx, Math.Min(5000, source.Length - methodIdx));
 
         // Must use mtime comparison in the detection phase
         Assert.Contains("GetEventsFileMtime", methodBlock);
-        Assert.Contains("PrematureIdleEventsGracePeriodMs", methodBlock);
         Assert.Contains("stableMtime", methodBlock);
+        Assert.Contains("endMtime", methodBlock);
 
-        // The grace period delay must appear before the stableMtime assignment
-        var delayIdx = methodBlock.IndexOf("PrematureIdleEventsGracePeriodMs", StringComparison.Ordinal);
+        // Two-phase: settle delay must precede the stableMtime (baseline) assignment
+        var settleIdx = methodBlock.IndexOf("PrematureIdleEventsSettleMs", StringComparison.Ordinal);
         var assignIdx = methodBlock.IndexOf("stableMtime = GetEventsFileMtime", StringComparison.Ordinal);
-        Assert.True(assignIdx >= 0, "stableMtime must be assigned from GetEventsFileMtime after the delay");
-        Assert.True(delayIdx < assignIdx, "Grace period delay must precede stable-mtime assignment");
+        Assert.True(settleIdx >= 0, "PrematureIdleEventsSettleMs constant must appear in the method");
+        Assert.True(assignIdx >= 0, "stableMtime must be assigned from GetEventsFileMtime after the settle delay");
+        Assert.True(settleIdx < assignIdx, "Settle delay must precede stable-mtime baseline assignment");
+
+        // The observation comparison must use endMtime > stableMtime
+        Assert.Contains("endMtime.HasValue && endMtime.Value > (stableMtime", methodBlock);
     }
 
     [Fact]
@@ -2825,11 +2831,11 @@ public void RecoverFromPrematureIdleIfNeededAsync_GracePeriodSkipsReconnectWrite
     }
 
     [Fact]
-    public void RecoverFromPrematureIdleIfNeededAsync_PollingLoopUsesMtimeComparison()
+    public void RecoverFromPrematureIdleIfNeededAsync_UsesTwoPhaseMtimeCheck()
     {
-        // Structural: the secondary polling loop must also use mtime comparison (not raw
-        // IsEventsFileActive) so that a stale-but-fresh file doesn't trigger false detection
-        // in subsequent poll cycles.
+        // Structural: the method must use the two-phase approach — settle period then
+        // snapshot stableMtime, then observe — so OS-flush false positives are avoided.
+        // After the two-phase check returns not-detected, it returns immediately (no polling loop).
         var orgPath = Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.Organization.cs");
         var source = File.ReadAllText(orgPath);
 
@@ -2837,11 +2843,11 @@ public void RecoverFromPrematureIdleIfNeededAsync_PollingLoopUsesMtimeComparison
         Assert.True(methodIdx >= 0, "RecoverFromPrematureIdleIfNeededAsync must exist");
         var methodBlock = source.Substring(methodIdx, Math.Min(5000, source.Length - methodIdx));
 
-        // The polling loop must compare currentMtime against stableMtime
-        Assert.Contains("currentMtime", methodBlock);
+        // Must use stableMtime (post-settle baseline) and endMtime (post-observe)
         Assert.Contains("stableMtime", methodBlock);
+        Assert.Contains("endMtime", methodBlock);
 
-        // Both GetEventsFileMtime calls should appear in the method
+        // Both GetEventsFileMtime calls should appear (settle snapshot + observe snapshot)
         var calls = 0;
         var searchFrom = 0;
         while (true)
@@ -2851,7 +2857,11 @@ public void RecoverFromPrematureIdleIfNeededAsync_PollingLoopUsesMtimeComparison
             calls++;
             searchFrom = idx + 1;
         }
-        Assert.True(calls >= 2, $"GetEventsFileMtime must be called at least twice (grace + polling), found {calls}");
+        Assert.True(calls >= 2, $"GetEventsFileMtime must be called at least twice (settle + observe), found {calls}");
+
+        // Must NOT have a secondary polling loop (the while loop that kept looping for 10s)
+        // because it added 10s of delay to every normal worker completion.
+        Assert.DoesNotContain("detectStart", methodBlock);
     }
 
     #endregion
diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs
index 59a5506247..b8c790b494 100644
--- a/PolyPilot/Services/CopilotService.Organization.cs
+++ b/PolyPilot/Services/CopilotService.Organization.cs
@@ -42,20 +42,19 @@ public partial class CopilotService
     /// Shorter than WorkerExecutionTimeout — if a worker is stuck, the orchestrator
     /// proceeds with partial results rather than blocking the group forever.
     private static readonly TimeSpan OrchestratorCollectionTimeout = TimeSpan.FromMinutes(15);
-    /// Checks both WasPrematurelyIdled flag (set by EVT-REARM) and events.jsonl freshness
-    /// (CLI still writing events). The events.jsonl check catches cases where EVT-REARM
-    /// takes 30-60s to fire.
-    internal const int PrematureIdleDetectionWindowMs = 10_000;
 
     /// If events.jsonl was modified within this many seconds of TCS completion,
     /// the worker is likely still active despite the premature session.idle.
     internal const int PrematureIdleEventsFileFreshnessSeconds = 15;
 
-    /// Grace period after TCS completion before declaring premature idle based on
-    /// events.jsonl mtime. During this window we observe whether the file's mtime changes
-    /// (indicating the CLI is still writing), vs. remaining frozen (normal completion where
-    /// the idle event itself wrote the file). This prevents false-positive premature idle
-    /// detection when events.jsonl was just written by the completing idle event.
+    /// Settle period (first phase of two-phase grace check): waits for the OS to
+    /// flush the completing event's mtime update before taking the stable baseline snapshot.
+    /// Must be less than PrematureIdleEventsGracePeriodMs. Observation window = Grace - Settle.
+    internal const int PrematureIdleEventsSettleMs = 500;
+
+    /// Total observation window: settle (500ms) + observe (1500ms) = 2000ms.
+    /// After settle, we snapshot stableMtime. After the full window, endMtime > stableMtime
+    /// means the CLI wrote NEW events (genuine premature idle), not just the completing flush.
     internal const int PrematureIdleEventsGracePeriodMs = 2000;
 
     /// Maximum time to wait for the worker's real completion after detecting a
@@ -2686,17 +2685,28 @@ bool WaitForPrematureIdleSignal()
 
         if (!detected)
         {
-            // Snapshot mtime and wait briefly to detect whether CLI is still writing.
-            // Without this grace period we'd false-positive: the idle event itself just
-            // wrote to events.jsonl, making it appear "fresh" even on normal completion.
-            var mtimeBefore = GetEventsFileMtime(state.Info.SessionId);
-            try { await Task.Delay(PrematureIdleEventsGracePeriodMs, cancellationToken); }
+            // Two-phase mtime check to avoid OS mtime-flush false positives.
+            //
+            // Problem: the SDK writes assistant.turn_end to events.jsonl, then immediately emits
+            // session.idle (ephemeral, not written). TCS fires and we capture `mtimeBefore`, but
+            // the OS may not have flushed the turn_end mtime update yet (APFS can buffer for ~100ms).
+            // If we compare mtimeBefore (pre-flush) vs stableMtime (post-flush), we see a phantom
+            // change even though no new processing happened.
+            //
+            // Fix: brief settle (500ms) to let the OS flush the completing write's mtime, THEN
+            // snapshot `stableMtime` as a clean baseline. After 1500ms observation, compare against
+            // that post-settle baseline — only genuine new writes (tool rounds, etc.) trigger detection.
+            try { await Task.Delay(PrematureIdleEventsSettleMs, cancellationToken); }
             catch (OperationCanceledException) { return initialResponse; }
+            // Post-settle baseline — already includes the completing turn_end write's mtime
             stableMtime = GetEventsFileMtime(state.Info.SessionId);
-            // Mtime changed → check it's from real processing, not a reconnect session.resume write.
-            // When the app restarts mid-orchestration, EnsureSessionConnectedAsync writes session.resume
-            // to events.jsonl which advances mtime — that is NOT premature idle.
-            if (stableMtime.HasValue && stableMtime.Value > (mtimeBefore ?? DateTime.MinValue))
+
+            try { await Task.Delay(PrematureIdleEventsGracePeriodMs - PrematureIdleEventsSettleMs, cancellationToken); }
+            catch (OperationCanceledException) { return initialResponse; }
+
+            var endMtime = GetEventsFileMtime(state.Info.SessionId);
+            // endMtime > stableMtime only if the CLI wrote NEW events after the settle
+            if (endMtime.HasValue && endMtime.Value > (stableMtime ?? DateTime.MinValue))
             {
                 var eventsPath = Path.Combine(SessionStatePath, state.Info.SessionId ?? "", "events.jsonl");
                 var lastEvent = GetLastEventType(eventsPath);
@@ -2708,41 +2718,6 @@ bool WaitForPrematureIdleSignal()
             }
         }
 
-        if (!detected)
-        {
-            // Wait for PrematureIdleSignal OR poll events.jsonl for mtime changes
-            var detectStart = DateTime.UtcNow;
-            while ((DateTime.UtcNow - detectStart).TotalMilliseconds < PrematureIdleDetectionWindowMs)
-            {
-                // Wait up to 500ms on the signal (exits immediately if Set())
-                var signaled = await Task.Run(WaitForPrematureIdleSignal, cancellationToken)
-                    .ConfigureAwait(false);
-                
-                if (signaled || cancellationToken.IsCancellationRequested)
-                {
-                    detected = signaled;
-                    break;
-                }
-                
-                // Check if events.jsonl mtime advanced past the stable baseline.
-                // Skip session.resume writes (from app reconnect) — they advance mtime but
-                // are not signs of premature idle.
-                var currentMtime = GetEventsFileMtime(state.Info.SessionId);
-                if (currentMtime.HasValue && currentMtime.Value > (stableMtime ?? DateTime.MinValue))
-                {
-                    var eventsPath = Path.Combine(SessionStatePath, state.Info.SessionId ?? "", "events.jsonl");
-                    var lastEvent = GetLastEventType(eventsPath);
-                    if (lastEvent != null && lastEvent != "session.resume" && lastEvent != "session.shutdown")
-                    {
-                        detected = true;
-                        break;
-                    }
-                    // Reconnect/terminal write — update baseline and keep polling
-                    stableMtime = currentMtime;
-                }
-            }
-        }
-
         if (!detected)
             return initialResponse; // Normal completion — no premature idle indicators
 

From 30ad1c76eb17950c550af80b1e5fe8852727d00d Mon Sep 17 00:00:00 2001
From: Shane Neuville 
Date: Tue, 7 Apr 2026 07:40:14 -0500
Subject: [PATCH 4/8] =?UTF-8?q?fix:=20address=20PR=20review=20findings=20?=
 =?UTF-8?q?=E2=80=94=20AllowTurnStartRearm=20race,=20watchdog=20counter,?=
 =?UTF-8?q?=20PremiumRequestsUsed=20on=20errors?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- ClearProcessingState: remove AllowTurnStartRearm=true and _consecutiveWatchdogTimeouts reset
  (both are success-only operations; putting them in the helper created race windows on error/abort paths)
- CompleteResponse: explicitly set AllowTurnStartRearm=true and reset _consecutiveWatchdogTimeouts
  after ClearProcessingState (success path only)
- Reconnect/send failure paths: use accumulateApiTime=false (no real API call was consumed)
- Add 3 structural regression tests in ChatExperienceSafetyTests.cs to enforce these invariants

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PolyPilot.Tests/ChatExperienceSafetyTests.cs | 74 ++++++++++++++++++++
 PolyPilot/Services/CopilotService.Events.cs  |  8 ++-
 PolyPilot/Services/CopilotService.cs         | 15 ++--
 3 files changed, 89 insertions(+), 8 deletions(-)

diff --git a/PolyPilot.Tests/ChatExperienceSafetyTests.cs b/PolyPilot.Tests/ChatExperienceSafetyTests.cs
index 91de0cc353..2bc67dcc69 100644
--- a/PolyPilot.Tests/ChatExperienceSafetyTests.cs
+++ b/PolyPilot.Tests/ChatExperienceSafetyTests.cs
@@ -1246,4 +1246,78 @@ public async Task FlushCurrentResponse_WritesToChatDatabase()
         Assert.Equal("test-db-session-id", lastDbMsg.SessionId);
         Assert.Contains("Content for database", lastDbMsg.Message.Content);
     }
+
+    /// 
+    /// PR #531 review finding: ClearProcessingState must NOT set AllowTurnStartRearm=true.
+    /// Only CompleteResponse (normal-completion path) should allow EVT-REARM.
+    /// Error/abort paths must not have a race window where a background TurnStart event
+    /// reads AllowTurnStartRearm=true before the caller can set it back to false.
+    /// 
+    [Fact]
+    public void ClearProcessingState_DoesNotSetAllowTurnStartRearm()
+    {
+        // Structural: ClearProcessingState method body must NOT contain AllowTurnStartRearm = true
+        var svcPath = Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.cs");
+        var source = File.ReadAllText(svcPath);
+
+        var methodIdx = source.IndexOf("private void ClearProcessingState(", StringComparison.Ordinal);
+        Assert.True(methodIdx >= 0, "ClearProcessingState must exist");
+        var methodEnd = source.IndexOf("\n    }", methodIdx + 1, StringComparison.Ordinal);
+        var methodBody = source.Substring(methodIdx, methodEnd - methodIdx);
+
+        // Strip comment lines before checking — comments may document the invariant using
+        // the very string we're guarding against appearing in actual code.
+        var codeOnly = string.Join("\n", methodBody.Split('\n')
+            .Where(l => !l.TrimStart().StartsWith("//")));
+
+        Assert.False(codeOnly.Contains("AllowTurnStartRearm = true", StringComparison.Ordinal),
+            "ClearProcessingState must NOT set AllowTurnStartRearm=true — only CompleteResponse should, " +
+            "to avoid a race where error/abort paths get it briefly set before they can override to false.");
+    }
+
+    /// 
+    /// PR #531 review finding: _consecutiveWatchdogTimeouts reset must NOT be in
+    /// ClearProcessingState. It is a success-only signal (healthy server) — resetting it
+    /// on error/abort paths defeats the server-recovery detection threshold.
+    /// 
+    [Fact]
+    public void ClearProcessingState_DoesNotResetConsecutiveWatchdogTimeouts()
+    {
+        var svcPath = Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.cs");
+        var source = File.ReadAllText(svcPath);
+
+        var methodIdx = source.IndexOf("private void ClearProcessingState(", StringComparison.Ordinal);
+        Assert.True(methodIdx >= 0, "ClearProcessingState must exist");
+        var methodEnd = source.IndexOf("\n    }", methodIdx + 1, StringComparison.Ordinal);
+        var methodBody = source.Substring(methodIdx, methodEnd - methodIdx);
+
+        // Strip comment lines — comments may reference the counter by name while documenting why it's excluded.
+        var codeOnly = string.Join("\n", methodBody.Split('\n')
+            .Where(l => !l.TrimStart().StartsWith("//")));
+
+        Assert.False(codeOnly.Contains("_consecutiveWatchdogTimeouts", StringComparison.Ordinal),
+            "ClearProcessingState must NOT reset _consecutiveWatchdogTimeouts — only CompleteResponse " +
+            "(success path) should, since resetting on error/abort paths defeats server-recovery detection.");
+    }
+
+    /// 
+    /// PR #531 review finding: CompleteResponse must set AllowTurnStartRearm=true and
+    /// reset _consecutiveWatchdogTimeouts after ClearProcessingState.
+    /// 
+    [Fact]
+    public void CompleteResponse_SetsAllowTurnStartRearmAndResetsWatchdogCounter()
+    {
+        var eventsPath = Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.Events.cs");
+        var source = File.ReadAllText(eventsPath);
+
+        var methodIdx = source.IndexOf("private void CompleteResponse(", StringComparison.Ordinal);
+        Assert.True(methodIdx >= 0, "CompleteResponse must exist");
+        // Use 10000 chars — the method is ~112 lines before AllowTurnStartRearm, ~6400 chars in.
+        var methodBody = source.Substring(methodIdx, Math.Min(10000, source.Length - methodIdx));
+
+        Assert.True(methodBody.Contains("AllowTurnStartRearm = true", StringComparison.Ordinal),
+            "CompleteResponse must explicitly set AllowTurnStartRearm=true after ClearProcessingState");
+        Assert.True(methodBody.Contains("_consecutiveWatchdogTimeouts", StringComparison.Ordinal),
+            "CompleteResponse must reset _consecutiveWatchdogTimeouts on successful completion");
+    }
 }
diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs
index ca173ded5b..88a6c0e0a3 100644
--- a/PolyPilot/Services/CopilotService.Events.cs
+++ b/PolyPilot/Services/CopilotService.Events.cs
@@ -1641,8 +1641,14 @@ private void CompleteResponse(SessionState state, long? expectedGeneration = nul
         // synchronously (e.g., in orchestrator reflection loops), the next SendPromptAsync
         // call must see IsProcessing=false or it throws "already processing".
         ClearProcessingState(state);
-        // A successful completion proves the server is healthy — reset the
+        // Success-only: allow EVT-REARM to re-arm IsProcessing if a late TurnStart arrives
+        // (premature session.idle recovery). This must be set AFTER ClearProcessingState to
+        // avoid the race where a background TurnStart thread reads AllowTurnStartRearm=true
+        // before error/abort callers can override it back to false.
+        state.AllowTurnStartRearm = true;
+        // Success-only: a successful completion proves the server is healthy — reset the
         // service-level watchdog timeout counter to prevent false recovery triggers.
+        Interlocked.Exchange(ref _consecutiveWatchdogTimeouts, 0);
         state.ResponseCompletion?.TrySetResult(fullResponse);
         
         // Fire completion notification BEFORE OnStateChanged — this ensures
diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs
index e78068c2bd..fa9a6fa919 100644
--- a/PolyPilot/Services/CopilotService.cs
+++ b/PolyPilot/Services/CopilotService.cs
@@ -792,9 +792,12 @@ private void ClearProcessingState(SessionState state, bool accumulateApiTime = t
         state.Info.LastUpdatedAt = DateTime.Now;
 
         Interlocked.Exchange(ref state.SendingFlag, 0);
-        Interlocked.Exchange(ref _consecutiveWatchdogTimeouts, 0);
-
-        state.AllowTurnStartRearm = true;
+        // NOTE: AllowTurnStartRearm and _consecutiveWatchdogTimeouts are NOT reset here.
+        // AllowTurnStartRearm = true only belongs on the normal-completion path (CompleteResponse),
+        // not on error/abort paths — setting it here would create a race window where a background
+        // TurnStart event could re-arm IsProcessing between ClearProcessingState and the caller's
+        // AllowTurnStartRearm = false override.
+        // _consecutiveWatchdogTimeouts reset only belongs on success (CompleteResponse), not errors.
     }
 
     /// Ping interval to prevent the headless server from killing idle sessions.
@@ -3981,8 +3984,7 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis
                     state.HasUsedToolsThisTurn = false;
                     ClearDeferredIdleTracking(state);
                     Interlocked.Exchange(ref state.SuccessfulToolCountThisTurn, 0);
-                    ClearProcessingState(state);
-                    state.AllowTurnStartRearm = false; // Explicit reconnect failure is terminal for this turn
+                    ClearProcessingState(state, accumulateApiTime: false); // reconnect failure — no API call consumed
                     OnStateChanged?.Invoke();
                     throw;
                 }
@@ -3999,8 +4001,7 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis
                 state.HasUsedToolsThisTurn = false;
                 ClearDeferredIdleTracking(state);
                 Interlocked.Exchange(ref state.SuccessfulToolCountThisTurn, 0);
-                ClearProcessingState(state);
-                state.AllowTurnStartRearm = false; // Explicit send failure is terminal for this turn
+                ClearProcessingState(state, accumulateApiTime: false); // send failure — request may not have been consumed
                 OnStateChanged?.Invoke();
                 throw;
             }

From 8a8d1ce7db390f5d16d31088e8970cd8be78f273 Mon Sep 17 00:00:00 2001
From: Shane Neuville 
Date: Tue, 7 Apr 2026 09:16:11 -0500
Subject: [PATCH 5/8] fix: abort path should not increment PremiumRequestsUsed

AbortSessionAsync now manually accumulates TotalApiTimeSeconds (request was
consumed) then calls ClearProcessingState(accumulateApiTime: false) to avoid
incrementing PremiumRequestsUsed on user-initiated aborts.

Adds structural regression test to enforce this invariant.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PolyPilot.Tests/ChatExperienceSafetyTests.cs | 28 ++++++++++++++++++++
 PolyPilot/Services/CopilotService.cs         |  7 ++++-
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/PolyPilot.Tests/ChatExperienceSafetyTests.cs b/PolyPilot.Tests/ChatExperienceSafetyTests.cs
index 2bc67dcc69..30b225dbfd 100644
--- a/PolyPilot.Tests/ChatExperienceSafetyTests.cs
+++ b/PolyPilot.Tests/ChatExperienceSafetyTests.cs
@@ -1320,4 +1320,32 @@ public void CompleteResponse_SetsAllowTurnStartRearmAndResetsWatchdogCounter()
         Assert.True(methodBody.Contains("_consecutiveWatchdogTimeouts", StringComparison.Ordinal),
             "CompleteResponse must reset _consecutiveWatchdogTimeouts on successful completion");
     }
+
+    /// 
+    /// PR #531 re-review finding: AbortSessionAsync must NOT increment PremiumRequestsUsed.
+    /// User-initiated aborts consumed server resources (API time) but shouldn't count against
+    /// the premium request budget. The call must use accumulateApiTime: false and manually
+    /// accumulate TotalApiTimeSeconds before ClearProcessingState.
+    /// 
+    [Fact]
+    public void AbortSessionAsync_DoesNotIncrementPremiumRequestsViaAccumulateApiTime()
+    {
+        var svcPath = Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.cs");
+        var source = File.ReadAllText(svcPath);
+
+        var methodIdx = source.IndexOf("public async Task AbortSessionAsync(", StringComparison.Ordinal);
+        Assert.True(methodIdx >= 0, "AbortSessionAsync must exist");
+        var methodEnd = source.IndexOf("\n    }", methodIdx + 1, StringComparison.Ordinal);
+        var methodBody = source.Substring(methodIdx, methodEnd - methodIdx);
+
+        // The abort path must call ClearProcessingState with accumulateApiTime: false
+        Assert.True(methodBody.Contains("accumulateApiTime: false", StringComparison.Ordinal),
+            "AbortSessionAsync must call ClearProcessingState with accumulateApiTime: false — " +
+            "user aborts should not increment PremiumRequestsUsed");
+
+        // It must also manually accumulate TotalApiTimeSeconds (request was consumed)
+        Assert.True(methodBody.Contains("TotalApiTimeSeconds", StringComparison.Ordinal),
+            "AbortSessionAsync must manually accumulate TotalApiTimeSeconds before ClearProcessingState — " +
+            "the request consumed server resources even though the user aborted");
+    }
 }
diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs
index fa9a6fa919..9b73bc8f19 100644
--- a/PolyPilot/Services/CopilotService.cs
+++ b/PolyPilot/Services/CopilotService.cs
@@ -4132,7 +4132,12 @@ public async Task AbortSessionAsync(string sessionName, bool markAsInterrupted =
         state.CurrentResponse.Clear();
 
         Debug($"[ABORT] '{sessionName}' user abort, clearing IsProcessing");
-        ClearProcessingState(state);
+        // Accumulate API time (the request was in-flight and consumed server resources)
+        // but don't increment PremiumRequestsUsed — user-initiated aborts shouldn't count
+        // against the premium request budget.
+        if (state.Info.ProcessingStartedAt is { } abortStarted)
+            state.Info.TotalApiTimeSeconds += (DateTime.UtcNow - abortStarted).TotalSeconds;
+        ClearProcessingState(state, accumulateApiTime: false);
         state.WasUserAborted = true; // Suppress EVT-REARM for in-flight TurnStart events after abort
         state.AllowTurnStartRearm = false; // Abort is explicit terminal intent — do not revive on late TurnStart replays
         Interlocked.Exchange(ref state.ActiveToolCallCount, 0);

From 197793c4731c2612ac3b2b8fc6c88bfd445f6c6f Mon Sep 17 00:00:00 2001
From: Shane Neuville 
Date: Tue, 7 Apr 2026 09:26:51 -0500
Subject: [PATCH 6/8] refactor: consolidate per-turn field clears into
 ClearProcessingState

ClearProcessingState now clears all 19 companion fields atomically:
- Added: ActiveToolCallCount, HasUsedToolsThisTurn, SuccessfulToolCountThisTurn,
  ToolHealthStaleChecks, EventCountThisTurn, TurnEndReceivedAtTicks,
  IsReconnectedSend, CancelTurnEndFallback, CancelToolHealthCheck,
  ClearDeferredIdleTracking
- Removed manual field clears from: watchdog timeout (22 lines), watchdog crash
  (19 lines), AbortSessionAsync (8 lines), reconnect-failure (6 lines),
  send-failure (6 lines), CompleteResponse (10 lines)

This eliminates the exact anti-pattern that caused the 13-PR regression cycle:
scattered IsProcessing companion field clears that could miss a field.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PolyPilot.Tests/ChatExperienceSafetyTests.cs |  6 +-
 PolyPilot.Tests/ProcessingWatchdogTests.cs   | 20 +++---
 PolyPilot.Tests/SessionStabilityTests.cs     | 21 ++----
 PolyPilot/Services/CopilotService.Events.cs  | 71 +++++---------------
 PolyPilot/Services/CopilotService.cs         | 31 +++------
 5 files changed, 44 insertions(+), 105 deletions(-)

diff --git a/PolyPilot.Tests/ChatExperienceSafetyTests.cs b/PolyPilot.Tests/ChatExperienceSafetyTests.cs
index 30b225dbfd..3ccf8c2324 100644
--- a/PolyPilot.Tests/ChatExperienceSafetyTests.cs
+++ b/PolyPilot.Tests/ChatExperienceSafetyTests.cs
@@ -1051,9 +1051,9 @@ public void CompleteResponse_Source_ClearsSendingFlag()
         var crIdx = source.IndexOf("private void CompleteResponse(", StringComparison.Ordinal);
         Assert.True(crIdx > 0);
 
-        // Within CompleteResponse, SendingFlag must be cleared (may be 80+ lines into method)
-        var afterCR = source.Substring(crIdx, Math.Min(6000, source.Length - crIdx));
-        Assert.Contains("SendingFlag", afterCR);
+        // CompleteResponse must call ClearProcessingState (which clears SendingFlag along with all other fields)
+        var afterCR = source.Substring(crIdx, Math.Min(10000, source.Length - crIdx));
+        Assert.Contains("ClearProcessingState", afterCR);
     }
 
     /// 
diff --git a/PolyPilot.Tests/ProcessingWatchdogTests.cs b/PolyPilot.Tests/ProcessingWatchdogTests.cs
index 0985417aef..6f730752f6 100644
--- a/PolyPilot.Tests/ProcessingWatchdogTests.cs
+++ b/PolyPilot.Tests/ProcessingWatchdogTests.cs
@@ -2735,20 +2735,16 @@ public void WatchdogCatchBlock_ClearsIsProcessing_InSource()
         Assert.True(watchdogBody.Contains("clearing IsProcessing after watchdog crash"),
             "Watchdog catch block must log that it is clearing IsProcessing on crash");
 
-        // The crash recovery must clear all INV-1 companion fields
-        // Search the full catch block (after the [WATCHDOG-CRASH] tag) for required patterns
+        // The crash recovery must call ClearProcessingState (which atomically clears all
+        // companion fields: IsProcessing, SendingFlag, ProcessingStartedAt, etc.)
         var crashIdx = watchdogBody.IndexOf("[WATCHDOG-CRASH]");
         var crashBlock = watchdogBody.Substring(crashIdx);
-        Assert.True(crashBlock.Contains("IsProcessing = false"),
-            "Watchdog crash recovery must set IsProcessing = false");
-        Assert.True(crashBlock.Contains("SendingFlag"),
-            "Watchdog crash recovery must clear SendingFlag (INV-1)");
-        Assert.True(crashBlock.Contains("ProcessingStartedAt = null"),
-            "Watchdog crash recovery must clear ProcessingStartedAt (INV-1)");
-        Assert.True(crashBlock.Contains("ProcessingPhase = 0"),
-            "Watchdog crash recovery must clear ProcessingPhase (INV-1)");
-        Assert.True(crashBlock.Contains("ClearPermissionDenials"),
-            "Watchdog crash recovery must clear permission denials (INV-1)");
+        Assert.True(crashBlock.Contains("ClearProcessingState(state"),
+            "Watchdog crash recovery must call ClearProcessingState to atomically clear all companion fields");
+        Assert.True(crashBlock.Contains("AllowTurnStartRearm = false"),
+            "Watchdog crash recovery must set AllowTurnStartRearm = false (terminal forced stop)");
+        Assert.True(crashBlock.Contains("ConsecutiveStuckCount++"),
+            "Watchdog crash recovery must increment ConsecutiveStuckCount");
     }
 
     [Fact]
diff --git a/PolyPilot.Tests/SessionStabilityTests.cs b/PolyPilot.Tests/SessionStabilityTests.cs
index 434501b9e6..a10b680c5c 100644
--- a/PolyPilot.Tests/SessionStabilityTests.cs
+++ b/PolyPilot.Tests/SessionStabilityTests.cs
@@ -303,20 +303,13 @@ public void WatchdogCrashRecovery_ClearsAllCompanionFields()
         var source = File.ReadAllText(TestPaths.EventsCs);
         var watchdogMethod = ExtractMethod(source, "RunProcessingWatchdogAsync");
 
-        // The crash recovery block (Case C kill) must clear companion fields
-        var companionFields = new[]
-        {
-            "IsProcessing = false",
-            "ProcessingPhase",
-            "ProcessingStartedAt",
-            "ToolCallCount",
-        };
-
-        foreach (var field in companionFields)
-        {
-            Assert.True(watchdogMethod.Contains(field, StringComparison.Ordinal),
-                $"Watchdog crash recovery must clear '{field}'");
-        }
+        // The crash recovery block must call ClearProcessingState (which atomically
+        // clears IsProcessing, ProcessingPhase, ProcessingStartedAt, ToolCallCount, etc.)
+        Assert.True(watchdogMethod.Contains("ClearProcessingState(state", StringComparison.Ordinal),
+            "Watchdog crash recovery must call ClearProcessingState to atomically clear all companion fields");
+        // Must also set AllowTurnStartRearm = false (terminal forced stop)
+        Assert.True(watchdogMethod.Contains("AllowTurnStartRearm = false", StringComparison.Ordinal),
+            "Watchdog crash recovery must set AllowTurnStartRearm = false");
     }
 
     // ─── Multi-Agent Fix Prompt Enhancement ───
diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs
index 88a6c0e0a3..7ba74ad5f5 100644
--- a/PolyPilot/Services/CopilotService.Events.cs
+++ b/PolyPilot/Services/CopilotService.Events.cs
@@ -1581,20 +1581,9 @@ private void CompleteResponse(SessionState state, long? expectedGeneration = nul
               $"(responseLen={state.CurrentResponse.Length}, flushedLen={state.FlushedResponse.Length}, thread={Environment.CurrentManagedThreadId})");
         
         CancelProcessingWatchdog(state);
-        // Also cancel any pending TurnEnd→Idle fallback — CompleteResponse is now executing
-        CancelTurnEndFallback(state);
-        CancelToolHealthCheck(state);
-        Interlocked.Exchange(ref state.ActiveToolCallCount, 0);
-        Interlocked.Exchange(ref state.SendingFlag, 0);
-        state.HasUsedToolsThisTurn = false;
-        ClearDeferredIdleTracking(state);
-        state.IsReconnectedSend = false; // Clear reconnect flag on turn completion (defense-in-depth)
         state.FallbackCanceledByTurnStart = false;
-        Interlocked.Exchange(ref state.SuccessfulToolCountThisTurn, 0);
-        Interlocked.Exchange(ref state.ToolHealthStaleChecks, 0);
-        Interlocked.Exchange(ref state.EventCountThisTurn, 0);
-        Interlocked.Exchange(ref state.TurnEndReceivedAtTicks, 0);
-        state.Info.IsResumed = false; // Clear after first successful turn
+        // Per-turn tracking fields (ActiveToolCallCount, HasUsedToolsThisTurn, etc.)
+        // are cleared by ClearProcessingState below. No need to clear them early.
         var response = state.CurrentResponse.ToString();
         var responseAlreadyFlushedThisTurn = WasResponseAlreadyFlushedThisTurn(state, response);
         if (!string.IsNullOrWhiteSpace(response))
@@ -2947,34 +2936,20 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session
                             return;
                         }
                         CancelProcessingWatchdog(state);
-                        CancelToolHealthCheck(state);
-                        Interlocked.Exchange(ref state.ActiveToolCallCount, 0);
-                        state.HasUsedToolsThisTurn = false;
-                        ClearDeferredIdleTracking(state);
-                        Interlocked.Exchange(ref state.SuccessfulToolCountThisTurn, 0);
-                        Interlocked.Exchange(ref state.ToolHealthStaleChecks, 0);
-                        Interlocked.Exchange(ref state.EventCountThisTurn, 0);
-                        Interlocked.Exchange(ref state.TurnEndReceivedAtTicks, 0);
-                        // Cancel any pending TurnEnd→Idle fallback
-                        CancelTurnEndFallback(state);
-                        state.Info.IsResumed = false;
-                        state.IsReconnectedSend = false; // INV-1: clear all per-turn flags on termination
                         // Flush any accumulated partial response before clearing processing state.
                         // Wrapped in try-catch: if flush fails, IsProcessing MUST still be cleared
                         // (otherwise the session is permanently stuck — the watchdog has already exited).
                         try { FlushCurrentResponse(state); }
                         catch (Exception flushEx) { Debug($"[WATCHDOG] '{sessionName}' flush failed during kill: {flushEx.Message}"); }
                         Debug($"[WATCHDOG] '{sessionName}' IsProcessing=false — watchdog timeout after {totalProcessingSeconds:F0}s total, elapsed={elapsed:F0}s, exceededMaxTime={exceededMaxTime}");
-                        state.Info.IsProcessing = false;
-                        state.AllowTurnStartRearm = false; // Watchdog timeout is an explicit forced stop
-                        Interlocked.Exchange(ref state.SendingFlag, 0);
+                        // Capture flushed response BEFORE ClearProcessingState clears it
+                        var watchdogResponse = state.FlushedResponse.ToString();
+                        // Accumulate API time (request was in-flight) but don't count as premium request
                         if (state.Info.ProcessingStartedAt is { } wdStarted)
                             state.Info.TotalApiTimeSeconds += (DateTime.UtcNow - wdStarted).TotalSeconds;
-                        state.Info.ProcessingStartedAt = null;
-                        state.Info.ToolCallCount = 0;
-                        state.Info.ProcessingPhase = 0;
-                        state.Info.ClearPermissionDenials(); // INV-1: clear on all termination paths
-                        state.Info.ConsecutiveStuckCount++;
+                        ClearProcessingState(state, accumulateApiTime: false);
+                        state.AllowTurnStartRearm = false; // Watchdog timeout is an explicit forced stop
+                        state.Info.ConsecutiveStuckCount++; // Override the = 0 from ClearProcessingState
                         // Track service-level consecutive watchdog timeouts. When the
                         // persistent server's auth token expires, ALL sessions hang silently.
                         // After WatchdogServerRecoveryThreshold consecutive timeouts across
@@ -3010,9 +2985,6 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session
                             // into a session that's in a repeated-stuck cycle.
                             state.Info.MessageQueue.Clear();
                         }
-                        var watchdogResponse = state.FlushedResponse.ToString();
-                        state.FlushedResponse.Clear();
-                        state.PendingReasoningMessages.Clear();
                         state.ResponseCompletion?.TrySetResult(watchdogResponse);
                         // Fire completion notification so orchestrator loops are unblocked (INV-O4)
                         OnSessionComplete?.Invoke(sessionName, "[Watchdog] timeout");
@@ -3044,28 +3016,15 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session
                     // Best-effort flush before clearing processing state
                     try { FlushCurrentResponse(state); }
                     catch { /* Flush failure must not prevent IsProcessing cleanup */ }
-                    // INV-1: clear IsProcessing and all 9 companion fields
-                    state.Info.IsProcessing = false;
+                    // Capture response BEFORE ClearProcessingState clears it
+                    var crashResponse = state.FlushedResponse.ToString() + state.CurrentResponse.ToString();
+                    // Accumulate API time but don't count as premium request
+                    if (state.Info.ProcessingStartedAt is { } crashStarted)
+                        state.Info.TotalApiTimeSeconds += (DateTime.UtcNow - crashStarted).TotalSeconds;
+                    ClearProcessingState(state, accumulateApiTime: false);
                     state.AllowTurnStartRearm = false; // Watchdog crash cleanup is terminal for this turn
-                    state.Info.IsResumed = false;
-                    Interlocked.Exchange(ref state.SendingFlag, 0);
-                    Interlocked.Exchange(ref state.ActiveToolCallCount, 0);
-                    state.HasUsedToolsThisTurn = false;
-                    ClearDeferredIdleTracking(state);
-                    Interlocked.Exchange(ref state.SuccessfulToolCountThisTurn, 0);
-                    Interlocked.Exchange(ref state.ToolHealthStaleChecks, 0);
-                    Interlocked.Exchange(ref state.EventCountThisTurn, 0);
-                    Interlocked.Exchange(ref state.TurnEndReceivedAtTicks, 0);
-                    state.Info.ProcessingStartedAt = null;
-                    state.Info.ToolCallCount = 0;
-                    state.Info.ProcessingPhase = 0;
-                    state.Info.ClearPermissionDenials();
-                    state.Info.ConsecutiveStuckCount++;
+                    state.Info.ConsecutiveStuckCount++; // Override the = 0 from ClearProcessingState
                     Interlocked.Increment(ref _consecutiveWatchdogTimeouts);
-                    var crashResponse = state.FlushedResponse.ToString() + state.CurrentResponse.ToString();
-                    state.FlushedResponse.Clear();
-                    state.CurrentResponse.Clear();
-                    state.PendingReasoningMessages.Clear();
                     state.ResponseCompletion?.TrySetResult(crashResponse);
                     OnSessionComplete?.Invoke(sessionName, "[Watchdog] crash recovery");
                     OnError?.Invoke(sessionName, "Internal error in session monitoring. Try sending your message again.");
diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs
index 9b73bc8f19..53b70e18dd 100644
--- a/PolyPilot/Services/CopilotService.cs
+++ b/PolyPilot/Services/CopilotService.cs
@@ -792,6 +792,16 @@ private void ClearProcessingState(SessionState state, bool accumulateApiTime = t
         state.Info.LastUpdatedAt = DateTime.Now;
 
         Interlocked.Exchange(ref state.SendingFlag, 0);
+        Interlocked.Exchange(ref state.ActiveToolCallCount, 0);
+        state.HasUsedToolsThisTurn = false;
+        ClearDeferredIdleTracking(state);
+        Interlocked.Exchange(ref state.SuccessfulToolCountThisTurn, 0);
+        Interlocked.Exchange(ref state.ToolHealthStaleChecks, 0);
+        Interlocked.Exchange(ref state.EventCountThisTurn, 0);
+        Interlocked.Exchange(ref state.TurnEndReceivedAtTicks, 0);
+        state.IsReconnectedSend = false;
+        CancelTurnEndFallback(state);
+        CancelToolHealthCheck(state);
         // NOTE: AllowTurnStartRearm and _consecutiveWatchdogTimeouts are NOT reset here.
         // AllowTurnStartRearm = true only belongs on the normal-completion path (CompleteResponse),
         // not on error/abort paths — setting it here would create a race window where a background
@@ -3976,14 +3986,8 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis
                     Console.WriteLine($"[DEBUG] Reconnect+retry failed: {retryEx.Message}");
                     OnError?.Invoke(sessionName, $"Session disconnected and reconnect failed: {Models.ErrorMessageHelper.Humanize(retryEx)}");
                     CancelProcessingWatchdog(state);
-                    CancelTurnEndFallback(state);
-                    CancelToolHealthCheck(state);
                     FlushCurrentResponse(state);
                     Debug($"[ERROR] '{sessionName}' reconnect+retry failed, clearing IsProcessing");
-                    Interlocked.Exchange(ref state.ActiveToolCallCount, 0);
-                    state.HasUsedToolsThisTurn = false;
-                    ClearDeferredIdleTracking(state);
-                    Interlocked.Exchange(ref state.SuccessfulToolCountThisTurn, 0);
                     ClearProcessingState(state, accumulateApiTime: false); // reconnect failure — no API call consumed
                     OnStateChanged?.Invoke();
                     throw;
@@ -3993,14 +3997,8 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis
             {
                 OnError?.Invoke(sessionName, $"SendAsync failed: {Models.ErrorMessageHelper.Humanize(ex)}");
                 CancelProcessingWatchdog(state);
-                CancelTurnEndFallback(state);
-                CancelToolHealthCheck(state);
                 FlushCurrentResponse(state);
                 Debug($"[ERROR] '{sessionName}' SendAsync failed, clearing IsProcessing (error={ex.Message})");
-                Interlocked.Exchange(ref state.ActiveToolCallCount, 0);
-                state.HasUsedToolsThisTurn = false;
-                ClearDeferredIdleTracking(state);
-                Interlocked.Exchange(ref state.SuccessfulToolCountThisTurn, 0);
                 ClearProcessingState(state, accumulateApiTime: false); // send failure — request may not have been consumed
                 OnStateChanged?.Invoke();
                 throw;
@@ -4140,20 +4138,13 @@ public async Task AbortSessionAsync(string sessionName, bool markAsInterrupted =
         ClearProcessingState(state, accumulateApiTime: false);
         state.WasUserAborted = true; // Suppress EVT-REARM for in-flight TurnStart events after abort
         state.AllowTurnStartRearm = false; // Abort is explicit terminal intent — do not revive on late TurnStart replays
-        Interlocked.Exchange(ref state.ActiveToolCallCount, 0);
-        state.HasUsedToolsThisTurn = false;
-        ClearDeferredIdleTracking(state);
-        state.IsReconnectedSend = false; // INV-1: clear all per-turn flags on abort
-        Interlocked.Exchange(ref state.SuccessfulToolCountThisTurn, 0);
+        // Per-turn tracking fields are already cleared by ClearProcessingState.
         // Clear queued messages so they don't auto-send after abort
         state.Info.MessageQueue.Clear();
         _queuedImagePaths.TryRemove(sessionName, out _);
         _queuedAgentModes.TryRemove(sessionName, out _);
         _permissionRecoveryAttempts.TryRemove(sessionName, out _);
-        // Cancel any pending TurnEnd→Idle fallback so it doesn't fire CompleteResponse after abort
-        CancelTurnEndFallback(state);
         CancelProcessingWatchdog(state);
-        CancelToolHealthCheck(state);
         // Complete TCS AFTER state cleanup (INV-O3)
         state.ResponseCompletion?.TrySetCanceled();
         // Fire completion notification so orchestrator loops are unblocked (INV-O4)

From 13795fbb3a1c56c43db8808e67c22e04411e2b36 Mon Sep 17 00:00:00 2001
From: Shane Neuville 
Date: Tue, 7 Apr 2026 10:12:54 -0500
Subject: [PATCH 7/8] fix: ConsecutiveStuckCount must not reset in
 ClearProcessingState
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ConsecutiveStuckCount is a cross-turn health accumulator (like
_consecutiveWatchdogTimeouts). Resetting it in ClearProcessingState meant
watchdog timeout #1 set count=0→1, timeout #2 set count=0→1, so the >=3
threshold for queue-clear and history-growth prevention was unreachable.

Move reset to CompleteResponse only (success path proves session is healthy).
Add structural test to enforce this invariant.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PolyPilot.Tests/ChatExperienceSafetyTests.cs    | 11 +++++++++--
 PolyPilot.Tests/ConsecutiveStuckSessionTests.cs | 13 +++++++++----
 PolyPilot/Services/CopilotService.Events.cs     |  7 +++++--
 PolyPilot/Services/CopilotService.cs            | 13 ++++++-------
 4 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/PolyPilot.Tests/ChatExperienceSafetyTests.cs b/PolyPilot.Tests/ChatExperienceSafetyTests.cs
index 3ccf8c2324..3c916ec827 100644
--- a/PolyPilot.Tests/ChatExperienceSafetyTests.cs
+++ b/PolyPilot.Tests/ChatExperienceSafetyTests.cs
@@ -1298,11 +1298,16 @@ public void ClearProcessingState_DoesNotResetConsecutiveWatchdogTimeouts()
         Assert.False(codeOnly.Contains("_consecutiveWatchdogTimeouts", StringComparison.Ordinal),
             "ClearProcessingState must NOT reset _consecutiveWatchdogTimeouts — only CompleteResponse " +
             "(success path) should, since resetting on error/abort paths defeats server-recovery detection.");
+
+        Assert.False(codeOnly.Contains("ConsecutiveStuckCount", StringComparison.Ordinal),
+            "ClearProcessingState must NOT reset ConsecutiveStuckCount — only CompleteResponse " +
+            "(success path) should, since resetting on watchdog/error paths breaks the >= 3 " +
+            "threshold that stops system message accumulation in repeatedly-stuck sessions.");
     }
 
     /// 
-    /// PR #531 review finding: CompleteResponse must set AllowTurnStartRearm=true and
-    /// reset _consecutiveWatchdogTimeouts after ClearProcessingState.
+    /// PR #531 review finding: CompleteResponse must set AllowTurnStartRearm=true,
+    /// reset _consecutiveWatchdogTimeouts, and reset ConsecutiveStuckCount after ClearProcessingState.
     /// 
     [Fact]
     public void CompleteResponse_SetsAllowTurnStartRearmAndResetsWatchdogCounter()
@@ -1319,6 +1324,8 @@ public void CompleteResponse_SetsAllowTurnStartRearmAndResetsWatchdogCounter()
             "CompleteResponse must explicitly set AllowTurnStartRearm=true after ClearProcessingState");
         Assert.True(methodBody.Contains("_consecutiveWatchdogTimeouts", StringComparison.Ordinal),
             "CompleteResponse must reset _consecutiveWatchdogTimeouts on successful completion");
+        Assert.True(methodBody.Contains("ConsecutiveStuckCount = 0", StringComparison.Ordinal),
+            "CompleteResponse must reset ConsecutiveStuckCount on successful completion");
     }
 
     /// 
diff --git a/PolyPilot.Tests/ConsecutiveStuckSessionTests.cs b/PolyPilot.Tests/ConsecutiveStuckSessionTests.cs
index 8682e10697..24a232f1f5 100644
--- a/PolyPilot.Tests/ConsecutiveStuckSessionTests.cs
+++ b/PolyPilot.Tests/ConsecutiveStuckSessionTests.cs
@@ -186,12 +186,17 @@ public void WatchdogSource_ClearsMessageQueueOnRepeatedStucks()
     [Fact]
     public void CompleteResponseSource_ResetsConsecutiveStuckCount()
     {
-        // Verify ClearProcessingState resets the stuck counter on success.
-        // ClearProcessingState is in CopilotService.cs and called by CompleteResponse.
+        // Verify CompleteResponse resets the stuck counter on success.
+        // ConsecutiveStuckCount = 0 must only appear in CompleteResponse (success path),
+        // NOT in ClearProcessingState (which would break accumulation on watchdog timeouts).
         var repoRoot = GetRepoRoot();
-        var mainSource = File.ReadAllText(Path.Combine(repoRoot, "PolyPilot", "Services", "CopilotService.cs"));
+        var eventsSource = File.ReadAllText(Path.Combine(repoRoot, "PolyPilot", "Services", "CopilotService.Events.cs"));
 
-        Assert.Contains("ConsecutiveStuckCount = 0", mainSource);
+        // Must be in CompleteResponse
+        var crIdx = eventsSource.IndexOf("private void CompleteResponse(", StringComparison.Ordinal);
+        Assert.True(crIdx >= 0, "CompleteResponse must exist");
+        var crBody = eventsSource.Substring(crIdx, Math.Min(10000, eventsSource.Length - crIdx));
+        Assert.Contains("ConsecutiveStuckCount = 0", crBody);
     }
 
     // =========================================================================
diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs
index 7ba74ad5f5..5c0098b181 100644
--- a/PolyPilot/Services/CopilotService.Events.cs
+++ b/PolyPilot/Services/CopilotService.Events.cs
@@ -1638,6 +1638,9 @@ private void CompleteResponse(SessionState state, long? expectedGeneration = nul
         // Success-only: a successful completion proves the server is healthy — reset the
         // service-level watchdog timeout counter to prevent false recovery triggers.
         Interlocked.Exchange(ref _consecutiveWatchdogTimeouts, 0);
+        // Success-only: a successful response proves the session is not stuck — reset the
+        // per-session consecutive stuck counter so the >= 3 threshold can re-accumulate.
+        state.Info.ConsecutiveStuckCount = 0;
         state.ResponseCompletion?.TrySetResult(fullResponse);
         
         // Fire completion notification BEFORE OnStateChanged — this ensures
@@ -2949,7 +2952,7 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session
                             state.Info.TotalApiTimeSeconds += (DateTime.UtcNow - wdStarted).TotalSeconds;
                         ClearProcessingState(state, accumulateApiTime: false);
                         state.AllowTurnStartRearm = false; // Watchdog timeout is an explicit forced stop
-                        state.Info.ConsecutiveStuckCount++; // Override the = 0 from ClearProcessingState
+                        state.Info.ConsecutiveStuckCount++;
                         // Track service-level consecutive watchdog timeouts. When the
                         // persistent server's auth token expires, ALL sessions hang silently.
                         // After WatchdogServerRecoveryThreshold consecutive timeouts across
@@ -3023,7 +3026,7 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session
                         state.Info.TotalApiTimeSeconds += (DateTime.UtcNow - crashStarted).TotalSeconds;
                     ClearProcessingState(state, accumulateApiTime: false);
                     state.AllowTurnStartRearm = false; // Watchdog crash cleanup is terminal for this turn
-                    state.Info.ConsecutiveStuckCount++; // Override the = 0 from ClearProcessingState
+                    state.Info.ConsecutiveStuckCount++;
                     Interlocked.Increment(ref _consecutiveWatchdogTimeouts);
                     state.ResponseCompletion?.TrySetResult(crashResponse);
                     OnSessionComplete?.Invoke(sessionName, "[Watchdog] crash recovery");
diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs
index 53b70e18dd..3d1de493c7 100644
--- a/PolyPilot/Services/CopilotService.cs
+++ b/PolyPilot/Services/CopilotService.cs
@@ -787,7 +787,6 @@ private void ClearProcessingState(SessionState state, bool accumulateApiTime = t
         state.Info.ProcessingStartedAt = null;
         state.Info.ToolCallCount = 0;
         state.Info.ProcessingPhase = 0;
-        state.Info.ConsecutiveStuckCount = 0;
         state.Info.ClearPermissionDenials();
         state.Info.LastUpdatedAt = DateTime.Now;
 
@@ -802,12 +801,12 @@ private void ClearProcessingState(SessionState state, bool accumulateApiTime = t
         state.IsReconnectedSend = false;
         CancelTurnEndFallback(state);
         CancelToolHealthCheck(state);
-        // NOTE: AllowTurnStartRearm and _consecutiveWatchdogTimeouts are NOT reset here.
-        // AllowTurnStartRearm = true only belongs on the normal-completion path (CompleteResponse),
-        // not on error/abort paths — setting it here would create a race window where a background
-        // TurnStart event could re-arm IsProcessing between ClearProcessingState and the caller's
-        // AllowTurnStartRearm = false override.
-        // _consecutiveWatchdogTimeouts reset only belongs on success (CompleteResponse), not errors.
+        // NOTE: AllowTurnStartRearm, _consecutiveWatchdogTimeouts, and ConsecutiveStuckCount
+        // are NOT reset here. All three are cross-turn health accumulators:
+        // - AllowTurnStartRearm = true only belongs on the normal-completion path (CompleteResponse)
+        // - _consecutiveWatchdogTimeouts resets only on success (server is healthy)
+        // - ConsecutiveStuckCount resets only on success (session responded normally)
+        // Resetting them here would break accumulation across consecutive failures.
     }
 
     /// Ping interval to prevent the headless server from killing idle sessions.

From ee6a854edc9e48afcdb3efb5f014ca5b50ec688b Mon Sep 17 00:00:00 2001
From: Shane Neuville 
Date: Tue, 7 Apr 2026 10:29:43 -0500
Subject: [PATCH 8/8] refactor: TriggerToolHealthRecovery now uses
 ClearProcessingState
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Last remaining manual field-clearing site converted to use
ClearProcessingState. This was the code path that fires 'Tool execution
stuck' messages — it had the same anti-pattern of manually clearing 15+
fields instead of calling the atomic helper.

Also updated RenderThrottleTests structural test to check for
ClearProcessingState instead of literal IsProcessing=false.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PolyPilot.Tests/RenderThrottleTests.cs      | 12 +++++------
 PolyPilot/Services/CopilotService.Events.cs | 24 ++++-----------------
 2 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/PolyPilot.Tests/RenderThrottleTests.cs b/PolyPilot.Tests/RenderThrottleTests.cs
index eae7d1f4cc..1f6ac33e8b 100644
--- a/PolyPilot.Tests/RenderThrottleTests.cs
+++ b/PolyPilot.Tests/RenderThrottleTests.cs
@@ -214,18 +214,18 @@ public void CompleteResponse_OnSessionComplete_FiresBeforeOnStateChanged()
         var methodStart = source.IndexOf("private void CompleteResponse(", StringComparison.Ordinal);
         Assert.True(methodStart >= 0, "CompleteResponse method not found");
 
-        // The critical ordering is AFTER IsProcessing = false (the main completion path).
+        // The critical ordering is AFTER ClearProcessingState (which sets IsProcessing=false).
         // Use the preceding comment as anchor to find the right occurrence.
         var anchor = source.IndexOf("// Clear IsProcessing BEFORE completing the TCS", methodStart, StringComparison.Ordinal);
         Assert.True(anchor >= 0, "CompleteResponse TCS comment not found");
-        var isProcessingFalse = source.IndexOf("state.Info.IsProcessing = false;", anchor, StringComparison.Ordinal);
-        Assert.True(isProcessingFalse >= 0, "IsProcessing = false not found in CompleteResponse main path");
+        var clearProcessing = source.IndexOf("ClearProcessingState(state)", anchor, StringComparison.Ordinal);
+        Assert.True(clearProcessing >= 0, "ClearProcessingState(state) not found in CompleteResponse main path");
 
-        var afterProcessing = source.Substring(isProcessingFalse, 1200);
+        var afterProcessing = source.Substring(clearProcessing, 2000);
         var completeIdx = afterProcessing.IndexOf("OnSessionComplete?", StringComparison.Ordinal);
         var stateIdx = afterProcessing.IndexOf("OnStateChanged?", StringComparison.Ordinal);
-        Assert.True(completeIdx >= 0, "OnSessionComplete not found after IsProcessing=false in CompleteResponse");
-        Assert.True(stateIdx >= 0, "OnStateChanged not found after IsProcessing=false in CompleteResponse");
+        Assert.True(completeIdx >= 0, "OnSessionComplete not found after ClearProcessingState in CompleteResponse");
+        Assert.True(stateIdx >= 0, "OnStateChanged not found after ClearProcessingState in CompleteResponse");
         Assert.True(completeIdx < stateIdx,
             "OnSessionComplete must fire BEFORE OnStateChanged in CompleteResponse. " +
             "HandleComplete populates completedSessions which RefreshState checks to bypass throttle.");
diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs
index 5c0098b181..bb75f31085 100644
--- a/PolyPilot/Services/CopilotService.Events.cs
+++ b/PolyPilot/Services/CopilotService.Events.cs
@@ -2315,7 +2315,6 @@ private void TriggerToolHealthRecovery(SessionState state, string sessionName, s
         if (state.IsOrphaned) return;
         CancelToolHealthCheck(state);
         CancelProcessingWatchdog(state);
-        CancelTurnEndFallback(state);
 
         var activeTools = Volatile.Read(ref state.ActiveToolCallCount);
         var recoveryGeneration = Interlocked.Read(ref state.ProcessingGeneration);
@@ -2330,16 +2329,8 @@ private void TriggerToolHealthRecovery(SessionState state, string sessionName, s
 
             OnError?.Invoke(sessionName, $"Tool execution stuck ({reason}). Session recovered automatically.");
 
-            // Full cleanup mirroring CompleteResponse — missing fields here caused stuck sessions
-            Interlocked.Exchange(ref state.ActiveToolCallCount, 0);
-            state.HasUsedToolsThisTurn = false;
-            ClearDeferredIdleTracking(state);
             state.FallbackCanceledByTurnStart = false;
-            Interlocked.Exchange(ref state.SuccessfulToolCountThisTurn, 0);
             Interlocked.Exchange(ref state.WatchdogCaseAResets, 0);
-            Interlocked.Exchange(ref state.ToolHealthStaleChecks, 0);
-            Interlocked.Exchange(ref state.EventCountThisTurn, 0);
-            Interlocked.Exchange(ref state.TurnEndReceivedAtTicks, 0);
 
             // Build full response: flushed mid-turn text + remaining current text
             var response = state.CurrentResponse.ToString();
@@ -2349,18 +2340,11 @@ private void TriggerToolHealthRecovery(SessionState state, string sessionName, s
                     : state.FlushedResponse + "\n\n" + response)
                 : response;
 
-            state.CurrentResponse.Clear();
-            state.FlushedResponse.Clear();
-            state.PendingReasoningMessages.Clear();
-
-            state.Info.IsProcessing = false;
+            // Accumulate API time but don't count as premium request (recovery, not success)
+            if (state.Info.ProcessingStartedAt is { } healthStarted)
+                state.Info.TotalApiTimeSeconds += (DateTime.UtcNow - healthStarted).TotalSeconds;
+            ClearProcessingState(state, accumulateApiTime: false);
             state.AllowTurnStartRearm = false; // Explicit tool-health recovery should stay completed
-            state.Info.IsResumed = false;
-            Interlocked.Exchange(ref state.SendingFlag, 0);
-            state.Info.ProcessingStartedAt = null;
-            state.Info.ToolCallCount = 0;
-            state.Info.ProcessingPhase = 0;
-            state.Info.ClearPermissionDenials();
 
             state.ResponseCompletion?.TrySetResult(fullResponse);