diff --git a/PolyPilot.Tests/InputValidationTests.cs b/PolyPilot.Tests/InputValidationTests.cs index 281873f11a..0e7ddca103 100644 --- a/PolyPilot.Tests/InputValidationTests.cs +++ b/PolyPilot.Tests/InputValidationTests.cs @@ -119,6 +119,11 @@ public void ValidateImagePath_OutOverload_ErrorReturnsEmpty() [Fact] public void ValidateImagePath_SymlinkOutsideImagesDir_ReturnsNotAllowed() { + // Skip on Windows: Unix paths like /etc/passwd are relative on Windows, + // so symlink resolution yields a path inside the images dir, not outside. + if (OperatingSystem.IsWindows()) + return; + var imagesDir = ShowImageTool.GetImagesDir(); Directory.CreateDirectory(imagesDir); var linkPath = Path.Combine(imagesDir, "evil-link.png"); @@ -144,6 +149,11 @@ public void ValidateImagePath_SymlinkOutsideImagesDir_ReturnsNotAllowed() [Fact] public void ValidateImagePath_DirectorySymlinkBypass_ReturnsNotAllowed() { + // Skip on Windows: Unix paths like /etc are relative on Windows, + // so symlink resolution yields a path inside the images dir, not outside. + if (OperatingSystem.IsWindows()) + return; + var imagesDir = ShowImageTool.GetImagesDir(); Directory.CreateDirectory(imagesDir); var symlinkDir = Path.Combine(imagesDir, "evil-subdir"); diff --git a/PolyPilot.Tests/MultiAgentRegressionTests.cs b/PolyPilot.Tests/MultiAgentRegressionTests.cs index ab90802756..1570fc7a3c 100644 --- a/PolyPilot.Tests/MultiAgentRegressionTests.cs +++ b/PolyPilot.Tests/MultiAgentRegressionTests.cs @@ -1462,7 +1462,7 @@ public void ReconnectState_ShouldCarryIsMultiAgentSession() // Find the reconnect block where HasUsedToolsThisTurn is carried forward var reconnectBlock = source.Substring(source.IndexOf("newState.HasUsedToolsThisTurn = state.HasUsedToolsThisTurn")); // IsMultiAgentSession must be carried forward in the same block - Assert.Contains("newState.IsMultiAgentSession = state.IsMultiAgentSession", reconnectBlock.Substring(0, 200)); + Assert.Contains("newState.IsMultiAgentSession = state.IsMultiAgentSession", reconnectBlock.Substring(0, 400)); } [Fact] diff --git a/PolyPilot.Tests/ProcessingWatchdogTests.cs b/PolyPilot.Tests/ProcessingWatchdogTests.cs index 217b93b4d4..ce681363ac 100644 --- a/PolyPilot.Tests/ProcessingWatchdogTests.cs +++ b/PolyPilot.Tests/ProcessingWatchdogTests.cs @@ -56,6 +56,20 @@ public void WatchdogToolExecutionTimeout_IsReasonable() "Tool execution timeout must be greater than base inactivity timeout"); } + [Fact] + public void WatchdogMultiAgentNoToolTimeout_IsReasonable() + { + // Multi-agent no-tool timeout must be between base inactivity and tool execution timeouts. + // Long enough for model reasoning (1-3 min) but short enough to catch dead connections. + Assert.InRange(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, 120, 300); + Assert.True( + CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds > CopilotService.WatchdogInactivityTimeoutSeconds, + "Multi-agent no-tool timeout must be greater than base inactivity timeout"); + Assert.True( + CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds < CopilotService.WatchdogToolExecutionTimeoutSeconds, + "Multi-agent no-tool timeout must be less than tool execution timeout"); + } + [Fact] public void WatchdogTimeout_IsGreaterThanCheckInterval() { @@ -889,18 +903,21 @@ public void HasUsedToolsThisTurn_ResetByCompleteResponse() } /// - /// Mirrors the three-tier timeout selection logic from RunProcessingWatchdogAsync. + /// Mirrors the four-tier timeout selection logic from RunProcessingWatchdogAsync. /// Kept in sync so tests validate the actual production formula. /// private static int ComputeEffectiveTimeout(bool hasActiveTool, bool isResumed, bool hasReceivedEvents, bool hasUsedTools, bool isMultiAgent = false) { var useResumeQuiescence = isResumed && !hasReceivedEvents && !hasActiveTool && !hasUsedTools; - var useToolTimeout = hasActiveTool || (isResumed && !useResumeQuiescence) || hasUsedTools || isMultiAgent; + var useToolTimeout = hasActiveTool || (isResumed && !useResumeQuiescence) || hasUsedTools; + var useMultiAgentNoToolTimeout = isMultiAgent && !hasActiveTool && !hasUsedTools && !isResumed; return useResumeQuiescence ? CopilotService.WatchdogResumeQuiescenceTimeoutSeconds : useToolTimeout ? CopilotService.WatchdogToolExecutionTimeoutSeconds - : CopilotService.WatchdogInactivityTimeoutSeconds; + : useMultiAgentNoToolTimeout + ? CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds + : CopilotService.WatchdogInactivityTimeoutSeconds; } [Fact] @@ -973,12 +990,35 @@ public void WatchdogTimeoutSelection_ResumedWithActiveTool_UsesToolTimeout() } [Fact] - public void WatchdogTimeoutSelection_MultiAgent_UsesToolTimeout() + public void WatchdogTimeoutSelection_MultiAgent_NoTools_UsesMultiAgentNoToolTimeout() { - // Multi-agent sessions use longer tool timeout even without tool activity + // Multi-agent sessions without tool activity use a moderate timeout (180s) + // to catch dead connections faster than 600s while still allowing model reasoning var effectiveTimeout = ComputeEffectiveTimeout( hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true); + Assert.Equal(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, effectiveTimeout); + Assert.Equal(180, effectiveTimeout); + } + + [Fact] + public void WatchdogTimeoutSelection_MultiAgent_WithTools_UsesToolTimeout() + { + // Multi-agent sessions WITH tool activity still use the full 600s timeout + var effectiveTimeout = ComputeEffectiveTimeout( + hasActiveTool: true, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true); + + Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, effectiveTimeout); + Assert.Equal(600, effectiveTimeout); + } + + [Fact] + public void WatchdogTimeoutSelection_MultiAgent_HasUsedTools_UsesToolTimeout() + { + // Multi-agent sessions that have used tools this turn use 600s timeout + var effectiveTimeout = ComputeEffectiveTimeout( + hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: true, isMultiAgent: true); + Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, effectiveTimeout); Assert.Equal(600, effectiveTimeout); } @@ -995,6 +1035,51 @@ public void WatchdogTimeoutSelection_MultiAgentResumed_NoEvents_UsesQuiescenceTi Assert.Equal(30, effectiveTimeout); } + [Fact] + public void WatchdogTimeoutSelection_MultiAgentResumed_WithEvents_UsesToolTimeout() + { + // Multi-agent session that is resumed and has received events MUST use 600s timeout, + // NOT 180s. The resumed+events path takes precedence over multi-agent tier. + var effectiveTimeout = ComputeEffectiveTimeout( + hasActiveTool: false, isResumed: true, hasReceivedEvents: true, hasUsedTools: false, isMultiAgent: true); + + Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, effectiveTimeout); + Assert.Equal(600, effectiveTimeout); + } + + [Fact] + public void WatchdogTimeoutSelection_MultiAgent_EventsOnly_UsesModerateTimeout() + { + // Edge case: multi-agent session that has received events but no tools used + // and is NOT resumed. The hasReceivedEvents flag does NOT affect tier 3. + // Should still use 180s because hasActiveTool=false and hasUsedTools=false. + var effectiveTimeout = ComputeEffectiveTimeout( + hasActiveTool: false, isResumed: false, hasReceivedEvents: true, hasUsedTools: false, isMultiAgent: true); + + Assert.Equal(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, effectiveTimeout); + Assert.Equal(180, effectiveTimeout); + } + + [Fact] + public void WatchdogTimeoutSelection_Transition_180sTo600s_WhenToolStarts() + { + // Documents the transition: when a tool starts (HasUsedToolsThisTurn goes true), + // multi-agent sessions transition from 180s to 600s. + var before = ComputeEffectiveTimeout( + hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true); + Assert.Equal(180, before); + + // Tool starts → hasActiveTool goes true + var during = ComputeEffectiveTimeout( + hasActiveTool: true, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true); + Assert.Equal(600, during); + + // After tool completes → hasUsedTools stays true + var after = ComputeEffectiveTimeout( + hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: true, isMultiAgent: true); + Assert.Equal(600, after); + } + [Fact] public void HasUsedToolsThisTurn_ResetOnNewSend() { @@ -1194,14 +1279,22 @@ public void ResumeQuiescence_OnlyTriggersWhenResumedAndNoEvents() public void ResumeQuiescence_NotResumed_NeverTriggersQuiescence() { // Non-resumed sessions must NEVER get the 30s quiescence timeout, - // regardless of other flags. + // regardless of other flags. Each case uses its appropriate timeout tier: + // - Base inactivity (120s) for standard sessions + // - Tool execution (600s) for sessions with tool activity + // - Multi-agent no-tool (180s) for multi-agent sessions without tools Assert.Equal(120, ComputeEffectiveTimeout( hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false)); Assert.Equal(600, ComputeEffectiveTimeout( hasActiveTool: true, isResumed: false, hasReceivedEvents: false, hasUsedTools: false)); Assert.Equal(600, ComputeEffectiveTimeout( hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: true)); - Assert.Equal(600, ComputeEffectiveTimeout( + Assert.Equal(180, ComputeEffectiveTimeout( + hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true)); + // All must NOT be 30s (quiescence) + Assert.NotEqual(30, ComputeEffectiveTimeout( + hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false)); + Assert.NotEqual(30, ComputeEffectiveTimeout( hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true)); } @@ -1238,10 +1331,15 @@ public void ResumeQuiescence_TransitionsToInactivity_AfterIsResumedCleared() [InlineData(false, true, true, false, false, 600)] // Resumed, events: tool timeout [InlineData(true, true, false, false, false, 600)] // Resumed, active tool: tool timeout [InlineData(false, true, false, true, false, 600)] // Resumed, used tools: tool timeout - [InlineData(false, false, false, false, true, 600)] // Multi-agent: tool timeout + [InlineData(false, false, false, false, true, 180)] // Multi-agent, no tools: 180s (faster dead connection detection) + [InlineData(true, false, false, false, true, 600)] // Multi-agent, active tool: tool timeout + [InlineData(false, false, false, true, true, 600)] // Multi-agent, used tools: tool timeout [InlineData(false, true, false, false, true, 30)] // Resumed+multiAgent, no events: quiescence wins [InlineData(false, false, false, true, false, 600)] // HasUsedTools: tool timeout [InlineData(true, true, true, true, true, 600)] // All flags: tool timeout + [InlineData(false, true, true, false, true, 600)] // Resumed+multiAgent+events: tool timeout (not 180s) + [InlineData(false, false, true, false, true, 180)] // Multi-agent+events, no tools: still 180s (events don't affect tier 3) + [InlineData(true, false, false, true, true, 600)] // Multi-agent+activeTool+hasUsedTools: tool timeout public void WatchdogTimeoutSelection_ExhaustiveMatrix( bool hasActiveTool, bool isResumed, bool hasReceivedEvents, bool hasUsedTools, bool isMultiAgent, int expectedTimeout) @@ -1817,14 +1915,41 @@ public void Regression_PR163_IsResumed_NotClearedDuringToolActivity() // --- PR #195 regression: multi-agent workers killed at 120s --- [Fact] - public void Regression_PR195_MultiAgentWorkers_Use600s() + public void Regression_PR195_MultiAgentWorkers_NotKilledAt120s() { // Multi-agent workers doing text-heavy tasks (PR reviews, no tools) - // were killed at 120s inactivity. Fix: isMultiAgent flag → 600s. - var timeout = ComputeEffectiveTimeout( + // were killed at 120s inactivity. Fix: now uses 180s (multi-agent no-tool timeout). + // Sessions with tool activity use 600s. + var timeoutNoTools = ComputeEffectiveTimeout( hasActiveTool: false, isResumed: false, hasReceivedEvents: true, hasUsedTools: false, isMultiAgent: true); - Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, timeout); + Assert.Equal(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, timeoutNoTools); + Assert.True(timeoutNoTools > 120, "Multi-agent sessions must not be killed at 120s"); + + // If tools are being used, use the full 600s timeout + var timeoutWithTools = ComputeEffectiveTimeout( + hasActiveTool: false, isResumed: false, hasReceivedEvents: true, + hasUsedTools: true, isMultiAgent: true); + Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, timeoutWithTools); + } + + // --- Bug fix: orchestrator stuck after SocketException 10054 --- + + [Fact] + public void Regression_OrchestratorStuck_MultiAgentNoToolsUsesModerateTimeout() + { + // When SocketException 10054 kills the JSON-RPC connection, orchestrator sessions + // would previously wait 600s before the watchdog cleared IsProcessing. + // Fix: multi-agent sessions without tool activity use 180s timeout. + // This is long enough for legitimate model reasoning (1-3 min) + // but short enough to not leave users waiting 10 minutes for a dead connection. + var timeout = ComputeEffectiveTimeout( + hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true); + + Assert.Equal(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, timeout); + Assert.Equal(180, timeout); + Assert.True(timeout < 600, "Multi-agent no-tool timeout must be shorter than tool timeout"); + Assert.True(timeout > 120, "Multi-agent no-tool timeout must be longer than base inactivity timeout"); } // --- PR #211 regression: quiescence must not kill active sessions --- diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs index 1c828bbc1d..e33a04ce83 100644 --- a/PolyPilot/Services/CopilotService.Events.cs +++ b/PolyPilot/Services/CopilotService.Events.cs @@ -1370,6 +1370,12 @@ private void HandleReflectionAdvanceResult(SessionState state, string response, /// If no SDK events arrive for this many seconds while a tool is actively executing, the session is considered stuck. /// This is much longer because legitimate tool executions (e.g., running UI tests, long builds) can take many minutes. internal const int WatchdogToolExecutionTimeoutSeconds = 600; + /// If no SDK events arrive for this many seconds for a multi-agent session WITHOUT active tools, + /// the session is considered stuck. This is shorter than WatchdogToolExecutionTimeoutSeconds because: + /// - Multi-agent sessions without tool activity are likely waiting for model reasoning (1-3 min typical) + /// - If the connection died (SocketException 10054), we don't want users waiting 10 minutes + /// - The orchestration loop has its own CancelAfter timeout as a backup + internal const int WatchdogMultiAgentNoToolTimeoutSeconds = 180; /// If a resumed session receives zero SDK events for this many seconds, it was likely already /// finished when the app restarted. Short enough that users don't have to click Stop, long enough /// for the SDK to start streaming if the turn is genuinely still active. @@ -1481,12 +1487,25 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session // goes true and we fall through to the normal timeout tiers. var useResumeQuiescence = state.Info.IsResumed && !hasReceivedEvents && !hasActiveTool && !hasUsedTools; - var useToolTimeout = hasActiveTool || (state.Info.IsResumed && !useResumeQuiescence) || hasUsedTools || isMultiAgentSession; + // Timeout tier selection: + // 1. Resumed session with no events → 30s quiescence + // 2. Active tool running OR resumed session with events → 600s (tools can take 10+ min) + // 3. Multi-agent session WITHOUT active tools or prior tool use → 180s (catch dead connections faster) + // 4. Standard session → 120s inactivity + // + // The key insight for tier 3: multi-agent sessions that haven't touched tools are likely + // doing pure model reasoning (1-3 min typical). If we hit 3 min with no events, the + // connection is probably dead (SocketException 10054 in SDK event loop). Users shouldn't + // wait 10 min to discover a dead connection. + var useToolTimeout = hasActiveTool || (state.Info.IsResumed && !useResumeQuiescence) || hasUsedTools; + var useMultiAgentNoToolTimeout = isMultiAgentSession && !hasActiveTool && !hasUsedTools && !state.Info.IsResumed; var effectiveTimeout = useResumeQuiescence ? WatchdogResumeQuiescenceTimeoutSeconds : useToolTimeout ? WatchdogToolExecutionTimeoutSeconds - : WatchdogInactivityTimeoutSeconds; + : useMultiAgentNoToolTimeout + ? WatchdogMultiAgentNoToolTimeoutSeconds + : WatchdogInactivityTimeoutSeconds; // Safety net: check absolute max processing time, but only if events have also // gone stale. If events are still flowing (elapsed < effectiveTimeout), the session diff --git a/PolyPilot/Services/CopilotService.Persistence.cs b/PolyPilot/Services/CopilotService.Persistence.cs index b3a4c3bc76..59c7c745b4 100644 --- a/PolyPilot/Services/CopilotService.Persistence.cs +++ b/PolyPilot/Services/CopilotService.Persistence.cs @@ -423,13 +423,19 @@ public async Task RestorePreviousSessionsAsync(CancellationToken cancellationTok } } - IsRestoring = false; } } catch (Exception ex) { Debug($"Failed to load active sessions file: {ex.Message}"); } + finally + { + // CRITICAL: Always reset IsRestoring even on failure. + // If this stays true, the entire UI becomes unresponsive + // (Resume buttons disabled, session interactions blocked). + IsRestoring = false; + } } } diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs index 21c960ee5b..962f02c7ed 100644 --- a/PolyPilot/Services/CopilotService.cs +++ b/PolyPilot/Services/CopilotService.cs @@ -638,10 +638,18 @@ public async Task InitializeAsync(CancellationToken cancellationToken = default) LoadOrganization(); // Restore previous sessions (includes subscribing to untracked server sessions in Persistent mode) - IsRestoring = true; - OnStateChanged?.Invoke(); - await RestorePreviousSessionsAsync(cancellationToken); - IsRestoring = false; + // Use try-finally to GUARANTEE IsRestoring is cleared even if restore throws. + // This is defense-in-depth alongside the finally block inside RestorePreviousSessionsAsync. + try + { + IsRestoring = true; + OnStateChanged?.Invoke(); + await RestorePreviousSessionsAsync(cancellationToken); + } + finally + { + IsRestoring = false; + } // Start health check loop for any codespace groups (regardless of whether sessions were restored) if (CodespacesEnabled)