diff --git a/PolyPilot.Tests/InputValidationTests.cs b/PolyPilot.Tests/InputValidationTests.cs
index 281873f11a..0e7ddca103 100644
--- a/PolyPilot.Tests/InputValidationTests.cs
+++ b/PolyPilot.Tests/InputValidationTests.cs
@@ -119,6 +119,11 @@ public void ValidateImagePath_OutOverload_ErrorReturnsEmpty()
[Fact]
public void ValidateImagePath_SymlinkOutsideImagesDir_ReturnsNotAllowed()
{
+ // Skip on Windows: Unix paths like /etc/passwd are relative on Windows,
+ // so symlink resolution yields a path inside the images dir, not outside.
+ if (OperatingSystem.IsWindows())
+ return;
+
var imagesDir = ShowImageTool.GetImagesDir();
Directory.CreateDirectory(imagesDir);
var linkPath = Path.Combine(imagesDir, "evil-link.png");
@@ -144,6 +149,11 @@ public void ValidateImagePath_SymlinkOutsideImagesDir_ReturnsNotAllowed()
[Fact]
public void ValidateImagePath_DirectorySymlinkBypass_ReturnsNotAllowed()
{
+ // Skip on Windows: Unix paths like /etc are relative on Windows,
+ // so symlink resolution yields a path inside the images dir, not outside.
+ if (OperatingSystem.IsWindows())
+ return;
+
var imagesDir = ShowImageTool.GetImagesDir();
Directory.CreateDirectory(imagesDir);
var symlinkDir = Path.Combine(imagesDir, "evil-subdir");
diff --git a/PolyPilot.Tests/MultiAgentRegressionTests.cs b/PolyPilot.Tests/MultiAgentRegressionTests.cs
index ab90802756..1570fc7a3c 100644
--- a/PolyPilot.Tests/MultiAgentRegressionTests.cs
+++ b/PolyPilot.Tests/MultiAgentRegressionTests.cs
@@ -1462,7 +1462,7 @@ public void ReconnectState_ShouldCarryIsMultiAgentSession()
// Find the reconnect block where HasUsedToolsThisTurn is carried forward
var reconnectBlock = source.Substring(source.IndexOf("newState.HasUsedToolsThisTurn = state.HasUsedToolsThisTurn"));
// IsMultiAgentSession must be carried forward in the same block
- Assert.Contains("newState.IsMultiAgentSession = state.IsMultiAgentSession", reconnectBlock.Substring(0, 200));
+ Assert.Contains("newState.IsMultiAgentSession = state.IsMultiAgentSession", reconnectBlock.Substring(0, 400));
}
[Fact]
diff --git a/PolyPilot.Tests/ProcessingWatchdogTests.cs b/PolyPilot.Tests/ProcessingWatchdogTests.cs
index 217b93b4d4..ce681363ac 100644
--- a/PolyPilot.Tests/ProcessingWatchdogTests.cs
+++ b/PolyPilot.Tests/ProcessingWatchdogTests.cs
@@ -56,6 +56,20 @@ public void WatchdogToolExecutionTimeout_IsReasonable()
"Tool execution timeout must be greater than base inactivity timeout");
}
+ [Fact]
+ public void WatchdogMultiAgentNoToolTimeout_IsReasonable()
+ {
+ // Multi-agent no-tool timeout must be between base inactivity and tool execution timeouts.
+ // Long enough for model reasoning (1-3 min) but short enough to catch dead connections.
+ Assert.InRange(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, 120, 300);
+ Assert.True(
+ CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds > CopilotService.WatchdogInactivityTimeoutSeconds,
+ "Multi-agent no-tool timeout must be greater than base inactivity timeout");
+ Assert.True(
+ CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds < CopilotService.WatchdogToolExecutionTimeoutSeconds,
+ "Multi-agent no-tool timeout must be less than tool execution timeout");
+ }
+
[Fact]
public void WatchdogTimeout_IsGreaterThanCheckInterval()
{
@@ -889,18 +903,21 @@ public void HasUsedToolsThisTurn_ResetByCompleteResponse()
}
///
- /// Mirrors the three-tier timeout selection logic from RunProcessingWatchdogAsync.
+ /// Mirrors the four-tier timeout selection logic from RunProcessingWatchdogAsync.
/// Kept in sync so tests validate the actual production formula.
///
private static int ComputeEffectiveTimeout(bool hasActiveTool, bool isResumed, bool hasReceivedEvents, bool hasUsedTools, bool isMultiAgent = false)
{
var useResumeQuiescence = isResumed && !hasReceivedEvents && !hasActiveTool && !hasUsedTools;
- var useToolTimeout = hasActiveTool || (isResumed && !useResumeQuiescence) || hasUsedTools || isMultiAgent;
+ var useToolTimeout = hasActiveTool || (isResumed && !useResumeQuiescence) || hasUsedTools;
+ var useMultiAgentNoToolTimeout = isMultiAgent && !hasActiveTool && !hasUsedTools && !isResumed;
return useResumeQuiescence
? CopilotService.WatchdogResumeQuiescenceTimeoutSeconds
: useToolTimeout
? CopilotService.WatchdogToolExecutionTimeoutSeconds
- : CopilotService.WatchdogInactivityTimeoutSeconds;
+ : useMultiAgentNoToolTimeout
+ ? CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds
+ : CopilotService.WatchdogInactivityTimeoutSeconds;
}
[Fact]
@@ -973,12 +990,35 @@ public void WatchdogTimeoutSelection_ResumedWithActiveTool_UsesToolTimeout()
}
[Fact]
- public void WatchdogTimeoutSelection_MultiAgent_UsesToolTimeout()
+ public void WatchdogTimeoutSelection_MultiAgent_NoTools_UsesMultiAgentNoToolTimeout()
{
- // Multi-agent sessions use longer tool timeout even without tool activity
+ // Multi-agent sessions without tool activity use a moderate timeout (180s)
+ // to catch dead connections faster than 600s while still allowing model reasoning
var effectiveTimeout = ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true);
+ Assert.Equal(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, effectiveTimeout);
+ Assert.Equal(180, effectiveTimeout);
+ }
+
+ [Fact]
+ public void WatchdogTimeoutSelection_MultiAgent_WithTools_UsesToolTimeout()
+ {
+ // Multi-agent sessions WITH tool activity still use the full 600s timeout
+ var effectiveTimeout = ComputeEffectiveTimeout(
+ hasActiveTool: true, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true);
+
+ Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, effectiveTimeout);
+ Assert.Equal(600, effectiveTimeout);
+ }
+
+ [Fact]
+ public void WatchdogTimeoutSelection_MultiAgent_HasUsedTools_UsesToolTimeout()
+ {
+ // Multi-agent sessions that have used tools this turn use 600s timeout
+ var effectiveTimeout = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: true, isMultiAgent: true);
+
Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, effectiveTimeout);
Assert.Equal(600, effectiveTimeout);
}
@@ -995,6 +1035,51 @@ public void WatchdogTimeoutSelection_MultiAgentResumed_NoEvents_UsesQuiescenceTi
Assert.Equal(30, effectiveTimeout);
}
+ [Fact]
+ public void WatchdogTimeoutSelection_MultiAgentResumed_WithEvents_UsesToolTimeout()
+ {
+ // Multi-agent session that is resumed and has received events MUST use 600s timeout,
+ // NOT 180s. The resumed+events path takes precedence over multi-agent tier.
+ var effectiveTimeout = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: true, hasReceivedEvents: true, hasUsedTools: false, isMultiAgent: true);
+
+ Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, effectiveTimeout);
+ Assert.Equal(600, effectiveTimeout);
+ }
+
+ [Fact]
+ public void WatchdogTimeoutSelection_MultiAgent_EventsOnly_UsesModerateTimeout()
+ {
+ // Edge case: multi-agent session that has received events but no tools used
+ // and is NOT resumed. The hasReceivedEvents flag does NOT affect tier 3.
+ // Should still use 180s because hasActiveTool=false and hasUsedTools=false.
+ var effectiveTimeout = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: true, hasUsedTools: false, isMultiAgent: true);
+
+ Assert.Equal(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, effectiveTimeout);
+ Assert.Equal(180, effectiveTimeout);
+ }
+
+ [Fact]
+ public void WatchdogTimeoutSelection_Transition_180sTo600s_WhenToolStarts()
+ {
+ // Documents the transition: when a tool starts (HasUsedToolsThisTurn goes true),
+ // multi-agent sessions transition from 180s to 600s.
+ var before = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true);
+ Assert.Equal(180, before);
+
+ // Tool starts → hasActiveTool goes true
+ var during = ComputeEffectiveTimeout(
+ hasActiveTool: true, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true);
+ Assert.Equal(600, during);
+
+ // After tool completes → hasUsedTools stays true
+ var after = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: true, isMultiAgent: true);
+ Assert.Equal(600, after);
+ }
+
[Fact]
public void HasUsedToolsThisTurn_ResetOnNewSend()
{
@@ -1194,14 +1279,22 @@ public void ResumeQuiescence_OnlyTriggersWhenResumedAndNoEvents()
public void ResumeQuiescence_NotResumed_NeverTriggersQuiescence()
{
// Non-resumed sessions must NEVER get the 30s quiescence timeout,
- // regardless of other flags.
+ // regardless of other flags. Each case uses its appropriate timeout tier:
+ // - Base inactivity (120s) for standard sessions
+ // - Tool execution (600s) for sessions with tool activity
+ // - Multi-agent no-tool (180s) for multi-agent sessions without tools
Assert.Equal(120, ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false));
Assert.Equal(600, ComputeEffectiveTimeout(
hasActiveTool: true, isResumed: false, hasReceivedEvents: false, hasUsedTools: false));
Assert.Equal(600, ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: true));
- Assert.Equal(600, ComputeEffectiveTimeout(
+ Assert.Equal(180, ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true));
+ // All must NOT be 30s (quiescence)
+ Assert.NotEqual(30, ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false));
+ Assert.NotEqual(30, ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true));
}
@@ -1238,10 +1331,15 @@ public void ResumeQuiescence_TransitionsToInactivity_AfterIsResumedCleared()
[InlineData(false, true, true, false, false, 600)] // Resumed, events: tool timeout
[InlineData(true, true, false, false, false, 600)] // Resumed, active tool: tool timeout
[InlineData(false, true, false, true, false, 600)] // Resumed, used tools: tool timeout
- [InlineData(false, false, false, false, true, 600)] // Multi-agent: tool timeout
+ [InlineData(false, false, false, false, true, 180)] // Multi-agent, no tools: 180s (faster dead connection detection)
+ [InlineData(true, false, false, false, true, 600)] // Multi-agent, active tool: tool timeout
+ [InlineData(false, false, false, true, true, 600)] // Multi-agent, used tools: tool timeout
[InlineData(false, true, false, false, true, 30)] // Resumed+multiAgent, no events: quiescence wins
[InlineData(false, false, false, true, false, 600)] // HasUsedTools: tool timeout
[InlineData(true, true, true, true, true, 600)] // All flags: tool timeout
+ [InlineData(false, true, true, false, true, 600)] // Resumed+multiAgent+events: tool timeout (not 180s)
+ [InlineData(false, false, true, false, true, 180)] // Multi-agent+events, no tools: still 180s (events don't affect tier 3)
+ [InlineData(true, false, false, true, true, 600)] // Multi-agent+activeTool+hasUsedTools: tool timeout
public void WatchdogTimeoutSelection_ExhaustiveMatrix(
bool hasActiveTool, bool isResumed, bool hasReceivedEvents,
bool hasUsedTools, bool isMultiAgent, int expectedTimeout)
@@ -1817,14 +1915,41 @@ public void Regression_PR163_IsResumed_NotClearedDuringToolActivity()
// --- PR #195 regression: multi-agent workers killed at 120s ---
[Fact]
- public void Regression_PR195_MultiAgentWorkers_Use600s()
+ public void Regression_PR195_MultiAgentWorkers_NotKilledAt120s()
{
// Multi-agent workers doing text-heavy tasks (PR reviews, no tools)
- // were killed at 120s inactivity. Fix: isMultiAgent flag → 600s.
- var timeout = ComputeEffectiveTimeout(
+ // were killed at 120s inactivity. Fix: now uses 180s (multi-agent no-tool timeout).
+ // Sessions with tool activity use 600s.
+ var timeoutNoTools = ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: true,
hasUsedTools: false, isMultiAgent: true);
- Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, timeout);
+ Assert.Equal(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, timeoutNoTools);
+ Assert.True(timeoutNoTools > 120, "Multi-agent sessions must not be killed at 120s");
+
+ // If tools are being used, use the full 600s timeout
+ var timeoutWithTools = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: true,
+ hasUsedTools: true, isMultiAgent: true);
+ Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, timeoutWithTools);
+ }
+
+ // --- Bug fix: orchestrator stuck after SocketException 10054 ---
+
+ [Fact]
+ public void Regression_OrchestratorStuck_MultiAgentNoToolsUsesModerateTimeout()
+ {
+ // When SocketException 10054 kills the JSON-RPC connection, orchestrator sessions
+ // would previously wait 600s before the watchdog cleared IsProcessing.
+ // Fix: multi-agent sessions without tool activity use 180s timeout.
+ // This is long enough for legitimate model reasoning (1-3 min)
+ // but short enough to not leave users waiting 10 minutes for a dead connection.
+ var timeout = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true);
+
+ Assert.Equal(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, timeout);
+ Assert.Equal(180, timeout);
+ Assert.True(timeout < 600, "Multi-agent no-tool timeout must be shorter than tool timeout");
+ Assert.True(timeout > 120, "Multi-agent no-tool timeout must be longer than base inactivity timeout");
}
// --- PR #211 regression: quiescence must not kill active sessions ---
diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs
index 1c828bbc1d..e33a04ce83 100644
--- a/PolyPilot/Services/CopilotService.Events.cs
+++ b/PolyPilot/Services/CopilotService.Events.cs
@@ -1370,6 +1370,12 @@ private void HandleReflectionAdvanceResult(SessionState state, string response,
/// If no SDK events arrive for this many seconds while a tool is actively executing, the session is considered stuck.
/// This is much longer because legitimate tool executions (e.g., running UI tests, long builds) can take many minutes.
internal const int WatchdogToolExecutionTimeoutSeconds = 600;
+ /// If no SDK events arrive for this many seconds for a multi-agent session WITHOUT active tools,
+ /// the session is considered stuck. This is shorter than WatchdogToolExecutionTimeoutSeconds because:
+ /// - Multi-agent sessions without tool activity are likely waiting for model reasoning (1-3 min typical)
+ /// - If the connection died (SocketException 10054), we don't want users waiting 10 minutes
+ /// - The orchestration loop has its own CancelAfter timeout as a backup
+ internal const int WatchdogMultiAgentNoToolTimeoutSeconds = 180;
/// If a resumed session receives zero SDK events for this many seconds, it was likely already
/// finished when the app restarted. Short enough that users don't have to click Stop, long enough
/// for the SDK to start streaming if the turn is genuinely still active.
@@ -1481,12 +1487,25 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session
// goes true and we fall through to the normal timeout tiers.
var useResumeQuiescence = state.Info.IsResumed && !hasReceivedEvents && !hasActiveTool && !hasUsedTools;
- var useToolTimeout = hasActiveTool || (state.Info.IsResumed && !useResumeQuiescence) || hasUsedTools || isMultiAgentSession;
+ // Timeout tier selection:
+ // 1. Resumed session with no events → 30s quiescence
+ // 2. Active tool running OR resumed session with events → 600s (tools can take 10+ min)
+ // 3. Multi-agent session WITHOUT active tools or prior tool use → 180s (catch dead connections faster)
+ // 4. Standard session → 120s inactivity
+ //
+ // The key insight for tier 3: multi-agent sessions that haven't touched tools are likely
+ // doing pure model reasoning (1-3 min typical). If we hit 3 min with no events, the
+ // connection is probably dead (SocketException 10054 in SDK event loop). Users shouldn't
+ // wait 10 min to discover a dead connection.
+ var useToolTimeout = hasActiveTool || (state.Info.IsResumed && !useResumeQuiescence) || hasUsedTools;
+ var useMultiAgentNoToolTimeout = isMultiAgentSession && !hasActiveTool && !hasUsedTools && !state.Info.IsResumed;
var effectiveTimeout = useResumeQuiescence
? WatchdogResumeQuiescenceTimeoutSeconds
: useToolTimeout
? WatchdogToolExecutionTimeoutSeconds
- : WatchdogInactivityTimeoutSeconds;
+ : useMultiAgentNoToolTimeout
+ ? WatchdogMultiAgentNoToolTimeoutSeconds
+ : WatchdogInactivityTimeoutSeconds;
// Safety net: check absolute max processing time, but only if events have also
// gone stale. If events are still flowing (elapsed < effectiveTimeout), the session
diff --git a/PolyPilot/Services/CopilotService.Persistence.cs b/PolyPilot/Services/CopilotService.Persistence.cs
index b3a4c3bc76..59c7c745b4 100644
--- a/PolyPilot/Services/CopilotService.Persistence.cs
+++ b/PolyPilot/Services/CopilotService.Persistence.cs
@@ -423,13 +423,19 @@ public async Task RestorePreviousSessionsAsync(CancellationToken cancellationTok
}
}
- IsRestoring = false;
}
}
catch (Exception ex)
{
Debug($"Failed to load active sessions file: {ex.Message}");
}
+ finally
+ {
+ // CRITICAL: Always reset IsRestoring even on failure.
+ // If this stays true, the entire UI becomes unresponsive
+ // (Resume buttons disabled, session interactions blocked).
+ IsRestoring = false;
+ }
}
}
diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs
index 21c960ee5b..962f02c7ed 100644
--- a/PolyPilot/Services/CopilotService.cs
+++ b/PolyPilot/Services/CopilotService.cs
@@ -638,10 +638,18 @@ public async Task InitializeAsync(CancellationToken cancellationToken = default)
LoadOrganization();
// Restore previous sessions (includes subscribing to untracked server sessions in Persistent mode)
- IsRestoring = true;
- OnStateChanged?.Invoke();
- await RestorePreviousSessionsAsync(cancellationToken);
- IsRestoring = false;
+ // Use try-finally to GUARANTEE IsRestoring is cleared even if restore throws.
+ // This is defense-in-depth alongside the finally block inside RestorePreviousSessionsAsync.
+ try
+ {
+ IsRestoring = true;
+ OnStateChanged?.Invoke();
+ await RestorePreviousSessionsAsync(cancellationToken);
+ }
+ finally
+ {
+ IsRestoring = false;
+ }
// Start health check loop for any codespace groups (regardless of whether sessions were restored)
if (CodespacesEnabled)