Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions PolyPilot.Tests/InputValidationTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ public void ValidateImagePath_OutOverload_ErrorReturnsEmpty()
[Fact]
public void ValidateImagePath_SymlinkOutsideImagesDir_ReturnsNotAllowed()
{
// Skip on Windows: Unix paths like /etc/passwd are relative on Windows,
// so symlink resolution yields a path inside the images dir, not outside.
if (OperatingSystem.IsWindows())
return;

var imagesDir = ShowImageTool.GetImagesDir();
Directory.CreateDirectory(imagesDir);
var linkPath = Path.Combine(imagesDir, "evil-link.png");
Expand All @@ -144,6 +149,11 @@ public void ValidateImagePath_SymlinkOutsideImagesDir_ReturnsNotAllowed()
[Fact]
public void ValidateImagePath_DirectorySymlinkBypass_ReturnsNotAllowed()
{
// Skip on Windows: Unix paths like /etc are relative on Windows,
// so symlink resolution yields a path inside the images dir, not outside.
if (OperatingSystem.IsWindows())
return;

var imagesDir = ShowImageTool.GetImagesDir();
Directory.CreateDirectory(imagesDir);
var symlinkDir = Path.Combine(imagesDir, "evil-subdir");
Expand Down
2 changes: 1 addition & 1 deletion PolyPilot.Tests/MultiAgentRegressionTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1462,7 +1462,7 @@ public void ReconnectState_ShouldCarryIsMultiAgentSession()
// Find the reconnect block where HasUsedToolsThisTurn is carried forward
var reconnectBlock = source.Substring(source.IndexOf("newState.HasUsedToolsThisTurn = state.HasUsedToolsThisTurn"));
// IsMultiAgentSession must be carried forward in the same block
Assert.Contains("newState.IsMultiAgentSession = state.IsMultiAgentSession", reconnectBlock.Substring(0, 200));
Assert.Contains("newState.IsMultiAgentSession = state.IsMultiAgentSession", reconnectBlock.Substring(0, 400));
}

[Fact]
Expand Down
149 changes: 137 additions & 12 deletions PolyPilot.Tests/ProcessingWatchdogTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,20 @@ public void WatchdogToolExecutionTimeout_IsReasonable()
"Tool execution timeout must be greater than base inactivity timeout");
}

[Fact]
public void WatchdogMultiAgentNoToolTimeout_IsReasonable()
{
// Multi-agent no-tool timeout must be between base inactivity and tool execution timeouts.
// Long enough for model reasoning (1-3 min) but short enough to catch dead connections.
Assert.InRange(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, 120, 300);
Assert.True(
CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds > CopilotService.WatchdogInactivityTimeoutSeconds,
"Multi-agent no-tool timeout must be greater than base inactivity timeout");
Assert.True(
CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds < CopilotService.WatchdogToolExecutionTimeoutSeconds,
"Multi-agent no-tool timeout must be less than tool execution timeout");
}

[Fact]
public void WatchdogTimeout_IsGreaterThanCheckInterval()
{
Expand Down Expand Up @@ -889,18 +903,21 @@ public void HasUsedToolsThisTurn_ResetByCompleteResponse()
}

/// <summary>
/// Mirrors the three-tier timeout selection logic from RunProcessingWatchdogAsync.
/// Mirrors the four-tier timeout selection logic from RunProcessingWatchdogAsync.
/// Kept in sync so tests validate the actual production formula.
/// </summary>
private static int ComputeEffectiveTimeout(bool hasActiveTool, bool isResumed, bool hasReceivedEvents, bool hasUsedTools, bool isMultiAgent = false)
{
var useResumeQuiescence = isResumed && !hasReceivedEvents && !hasActiveTool && !hasUsedTools;
var useToolTimeout = hasActiveTool || (isResumed && !useResumeQuiescence) || hasUsedTools || isMultiAgent;
var useToolTimeout = hasActiveTool || (isResumed && !useResumeQuiescence) || hasUsedTools;
var useMultiAgentNoToolTimeout = isMultiAgent && !hasActiveTool && !hasUsedTools && !isResumed;
return useResumeQuiescence
? CopilotService.WatchdogResumeQuiescenceTimeoutSeconds
: useToolTimeout
? CopilotService.WatchdogToolExecutionTimeoutSeconds
: CopilotService.WatchdogInactivityTimeoutSeconds;
: useMultiAgentNoToolTimeout
? CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds
: CopilotService.WatchdogInactivityTimeoutSeconds;
}

[Fact]
Expand Down Expand Up @@ -973,12 +990,35 @@ public void WatchdogTimeoutSelection_ResumedWithActiveTool_UsesToolTimeout()
}

[Fact]
public void WatchdogTimeoutSelection_MultiAgent_UsesToolTimeout()
public void WatchdogTimeoutSelection_MultiAgent_NoTools_UsesMultiAgentNoToolTimeout()
{
// Multi-agent sessions use longer tool timeout even without tool activity
// Multi-agent sessions without tool activity use a moderate timeout (180s)
// to catch dead connections faster than 600s while still allowing model reasoning
var effectiveTimeout = ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true);

Assert.Equal(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, effectiveTimeout);
Assert.Equal(180, effectiveTimeout);
}

[Fact]
public void WatchdogTimeoutSelection_MultiAgent_WithTools_UsesToolTimeout()
{
// Multi-agent sessions WITH tool activity still use the full 600s timeout
var effectiveTimeout = ComputeEffectiveTimeout(
hasActiveTool: true, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true);

Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, effectiveTimeout);
Assert.Equal(600, effectiveTimeout);
}

[Fact]
public void WatchdogTimeoutSelection_MultiAgent_HasUsedTools_UsesToolTimeout()
{
// Multi-agent sessions that have used tools this turn use 600s timeout
var effectiveTimeout = ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: true, isMultiAgent: true);

Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, effectiveTimeout);
Assert.Equal(600, effectiveTimeout);
}
Expand All @@ -995,6 +1035,51 @@ public void WatchdogTimeoutSelection_MultiAgentResumed_NoEvents_UsesQuiescenceTi
Assert.Equal(30, effectiveTimeout);
}

[Fact]
public void WatchdogTimeoutSelection_MultiAgentResumed_WithEvents_UsesToolTimeout()
{
// Multi-agent session that is resumed and has received events MUST use 600s timeout,
// NOT 180s. The resumed+events path takes precedence over multi-agent tier.
var effectiveTimeout = ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: true, hasReceivedEvents: true, hasUsedTools: false, isMultiAgent: true);

Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, effectiveTimeout);
Assert.Equal(600, effectiveTimeout);
}

[Fact]
public void WatchdogTimeoutSelection_MultiAgent_EventsOnly_UsesModerateTimeout()
{
// Edge case: multi-agent session that has received events but no tools used
// and is NOT resumed. The hasReceivedEvents flag does NOT affect tier 3.
// Should still use 180s because hasActiveTool=false and hasUsedTools=false.
var effectiveTimeout = ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: true, hasUsedTools: false, isMultiAgent: true);

Assert.Equal(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, effectiveTimeout);
Assert.Equal(180, effectiveTimeout);
}

[Fact]
public void WatchdogTimeoutSelection_Transition_180sTo600s_WhenToolStarts()
{
// Documents the transition: when a tool starts (HasUsedToolsThisTurn goes true),
// multi-agent sessions transition from 180s to 600s.
var before = ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true);
Assert.Equal(180, before);

// Tool starts → hasActiveTool goes true
var during = ComputeEffectiveTimeout(
hasActiveTool: true, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true);
Assert.Equal(600, during);

// After tool completes → hasUsedTools stays true
var after = ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: true, isMultiAgent: true);
Assert.Equal(600, after);
}

[Fact]
public void HasUsedToolsThisTurn_ResetOnNewSend()
{
Expand Down Expand Up @@ -1194,14 +1279,22 @@ public void ResumeQuiescence_OnlyTriggersWhenResumedAndNoEvents()
public void ResumeQuiescence_NotResumed_NeverTriggersQuiescence()
{
// Non-resumed sessions must NEVER get the 30s quiescence timeout,
// regardless of other flags.
// regardless of other flags. Each case uses its appropriate timeout tier:
// - Base inactivity (120s) for standard sessions
// - Tool execution (600s) for sessions with tool activity
// - Multi-agent no-tool (180s) for multi-agent sessions without tools
Assert.Equal(120, ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false));
Assert.Equal(600, ComputeEffectiveTimeout(
hasActiveTool: true, isResumed: false, hasReceivedEvents: false, hasUsedTools: false));
Assert.Equal(600, ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: true));
Assert.Equal(600, ComputeEffectiveTimeout(
Assert.Equal(180, ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true));
// All must NOT be 30s (quiescence)
Assert.NotEqual(30, ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false));
Assert.NotEqual(30, ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true));
}

Expand Down Expand Up @@ -1238,10 +1331,15 @@ public void ResumeQuiescence_TransitionsToInactivity_AfterIsResumedCleared()
[InlineData(false, true, true, false, false, 600)] // Resumed, events: tool timeout
[InlineData(true, true, false, false, false, 600)] // Resumed, active tool: tool timeout
[InlineData(false, true, false, true, false, 600)] // Resumed, used tools: tool timeout
[InlineData(false, false, false, false, true, 600)] // Multi-agent: tool timeout
[InlineData(false, false, false, false, true, 180)] // Multi-agent, no tools: 180s (faster dead connection detection)
[InlineData(true, false, false, false, true, 600)] // Multi-agent, active tool: tool timeout
[InlineData(false, false, false, true, true, 600)] // Multi-agent, used tools: tool timeout
[InlineData(false, true, false, false, true, 30)] // Resumed+multiAgent, no events: quiescence wins
[InlineData(false, false, false, true, false, 600)] // HasUsedTools: tool timeout
[InlineData(true, true, true, true, true, 600)] // All flags: tool timeout
[InlineData(false, true, true, false, true, 600)] // Resumed+multiAgent+events: tool timeout (not 180s)
[InlineData(false, false, true, false, true, 180)] // Multi-agent+events, no tools: still 180s (events don't affect tier 3)
[InlineData(true, false, false, true, true, 600)] // Multi-agent+activeTool+hasUsedTools: tool timeout
public void WatchdogTimeoutSelection_ExhaustiveMatrix(
bool hasActiveTool, bool isResumed, bool hasReceivedEvents,
bool hasUsedTools, bool isMultiAgent, int expectedTimeout)
Expand Down Expand Up @@ -1817,14 +1915,41 @@ public void Regression_PR163_IsResumed_NotClearedDuringToolActivity()
// --- PR #195 regression: multi-agent workers killed at 120s ---

[Fact]
public void Regression_PR195_MultiAgentWorkers_Use600s()
public void Regression_PR195_MultiAgentWorkers_NotKilledAt120s()
{
// Multi-agent workers doing text-heavy tasks (PR reviews, no tools)
// were killed at 120s inactivity. Fix: isMultiAgent flag → 600s.
var timeout = ComputeEffectiveTimeout(
// were killed at 120s inactivity. Fix: now uses 180s (multi-agent no-tool timeout).
// Sessions with tool activity use 600s.
var timeoutNoTools = ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: true,
hasUsedTools: false, isMultiAgent: true);
Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, timeout);
Assert.Equal(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, timeoutNoTools);
Assert.True(timeoutNoTools > 120, "Multi-agent sessions must not be killed at 120s");

// If tools are being used, use the full 600s timeout
var timeoutWithTools = ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: true,
hasUsedTools: true, isMultiAgent: true);
Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, timeoutWithTools);
}

// --- Bug fix: orchestrator stuck after SocketException 10054 ---

[Fact]
public void Regression_OrchestratorStuck_MultiAgentNoToolsUsesModerateTimeout()
{
// When SocketException 10054 kills the JSON-RPC connection, orchestrator sessions
// would previously wait 600s before the watchdog cleared IsProcessing.
// Fix: multi-agent sessions without tool activity use 180s timeout.
// This is long enough for legitimate model reasoning (1-3 min)
// but short enough to not leave users waiting 10 minutes for a dead connection.
var timeout = ComputeEffectiveTimeout(
hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false, isMultiAgent: true);

Assert.Equal(CopilotService.WatchdogMultiAgentNoToolTimeoutSeconds, timeout);
Assert.Equal(180, timeout);
Assert.True(timeout < 600, "Multi-agent no-tool timeout must be shorter than tool timeout");
Assert.True(timeout > 120, "Multi-agent no-tool timeout must be longer than base inactivity timeout");
}

// --- PR #211 regression: quiescence must not kill active sessions ---
Expand Down
23 changes: 21 additions & 2 deletions PolyPilot/Services/CopilotService.Events.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1370,6 +1370,12 @@ private void HandleReflectionAdvanceResult(SessionState state, string response,
/// <summary>If no SDK events arrive for this many seconds while a tool is actively executing, the session is considered stuck.
/// This is much longer because legitimate tool executions (e.g., running UI tests, long builds) can take many minutes.</summary>
internal const int WatchdogToolExecutionTimeoutSeconds = 600;
/// <summary>If no SDK events arrive for this many seconds for a multi-agent session WITHOUT active tools,
/// the session is considered stuck. This is shorter than WatchdogToolExecutionTimeoutSeconds because:
/// - Multi-agent sessions without tool activity are likely waiting for model reasoning (1-3 min typical)
/// - If the connection died (SocketException 10054), we don't want users waiting 10 minutes
/// - The orchestration loop has its own CancelAfter timeout as a backup</summary>
internal const int WatchdogMultiAgentNoToolTimeoutSeconds = 180;
/// <summary>If a resumed session receives zero SDK events for this many seconds, it was likely already
/// finished when the app restarted. Short enough that users don't have to click Stop, long enough
/// for the SDK to start streaming if the turn is genuinely still active.</summary>
Expand Down Expand Up @@ -1481,12 +1487,25 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session
// goes true and we fall through to the normal timeout tiers.
var useResumeQuiescence = state.Info.IsResumed && !hasReceivedEvents && !hasActiveTool && !hasUsedTools;

var useToolTimeout = hasActiveTool || (state.Info.IsResumed && !useResumeQuiescence) || hasUsedTools || isMultiAgentSession;
// Timeout tier selection:
// 1. Resumed session with no events → 30s quiescence
// 2. Active tool running OR resumed session with events → 600s (tools can take 10+ min)
// 3. Multi-agent session WITHOUT active tools or prior tool use → 180s (catch dead connections faster)
// 4. Standard session → 120s inactivity
//
// The key insight for tier 3: multi-agent sessions that haven't touched tools are likely
// doing pure model reasoning (1-3 min typical). If we hit 3 min with no events, the
// connection is probably dead (SocketException 10054 in SDK event loop). Users shouldn't
// wait 10 min to discover a dead connection.
var useToolTimeout = hasActiveTool || (state.Info.IsResumed && !useResumeQuiescence) || hasUsedTools;
var useMultiAgentNoToolTimeout = isMultiAgentSession && !hasActiveTool && !hasUsedTools && !state.Info.IsResumed;
var effectiveTimeout = useResumeQuiescence
? WatchdogResumeQuiescenceTimeoutSeconds
: useToolTimeout
? WatchdogToolExecutionTimeoutSeconds
: WatchdogInactivityTimeoutSeconds;
: useMultiAgentNoToolTimeout
? WatchdogMultiAgentNoToolTimeoutSeconds
: WatchdogInactivityTimeoutSeconds;

// Safety net: check absolute max processing time, but only if events have also
// gone stale. If events are still flowing (elapsed < effectiveTimeout), the session
Expand Down
8 changes: 7 additions & 1 deletion PolyPilot/Services/CopilotService.Persistence.cs
Original file line number Diff line number Diff line change
Expand Up @@ -423,13 +423,19 @@ public async Task RestorePreviousSessionsAsync(CancellationToken cancellationTok
}
}

IsRestoring = false;
}
}
catch (Exception ex)
{
Debug($"Failed to load active sessions file: {ex.Message}");
}
finally
{
// CRITICAL: Always reset IsRestoring even on failure.
// If this stays true, the entire UI becomes unresponsive
// (Resume buttons disabled, session interactions blocked).
IsRestoring = false;
}
}

}
Expand Down
16 changes: 12 additions & 4 deletions PolyPilot/Services/CopilotService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -638,10 +638,18 @@ public async Task InitializeAsync(CancellationToken cancellationToken = default)
LoadOrganization();

// Restore previous sessions (includes subscribing to untracked server sessions in Persistent mode)
IsRestoring = true;
OnStateChanged?.Invoke();
await RestorePreviousSessionsAsync(cancellationToken);
IsRestoring = false;
// Use try-finally to GUARANTEE IsRestoring is cleared even if restore throws.
// This is defense-in-depth alongside the finally block inside RestorePreviousSessionsAsync.
try
{
IsRestoring = true;
OnStateChanged?.Invoke();
await RestorePreviousSessionsAsync(cancellationToken);
}
finally
{
IsRestoring = false;
}

// Start health check loop for any codespace groups (regardless of whether sessions were restored)
if (CodespacesEnabled)
Expand Down