diff --git a/PolyPilot.Tests/ProcessingWatchdogTests.cs b/PolyPilot.Tests/ProcessingWatchdogTests.cs
index 25fb05e46e..217b93b4d4 100644
--- a/PolyPilot.Tests/ProcessingWatchdogTests.cs
+++ b/PolyPilot.Tests/ProcessingWatchdogTests.cs
@@ -1431,9 +1431,9 @@ public void RestoreHints_StaleFile_ReturnsNotRecentlyActive()
var eventsFile = Path.Combine(sessionDir, "events.jsonl");
File.WriteAllText(eventsFile,
"""{"type":"tool.execution_start","data":{"name":"bash"}}""");
- // Make file older than inactivity timeout
+ // Make file older than tool execution timeout (the restore hints threshold)
File.SetLastWriteTimeUtc(eventsFile,
- DateTime.UtcNow.AddSeconds(-(CopilotService.WatchdogInactivityTimeoutSeconds + 10)));
+ DateTime.UtcNow.AddSeconds(-(CopilotService.WatchdogToolExecutionTimeoutSeconds + 10)));
var (isRecentlyActive, hadToolActivity) = service.GetEventsFileRestoreHints("test-session", basePath);
Assert.False(isRecentlyActive, "File is stale — should not be recently active");
@@ -1541,7 +1541,7 @@ public void RestoreHints_StaleFile_StillUsesQuiescenceTimeout()
File.WriteAllText(eventsFile,
"""{"type":"tool.execution_start","data":{"name":"bash"}}""");
File.SetLastWriteTimeUtc(eventsFile,
- DateTime.UtcNow.AddSeconds(-(CopilotService.WatchdogInactivityTimeoutSeconds + 10)));
+ DateTime.UtcNow.AddSeconds(-(CopilotService.WatchdogToolExecutionTimeoutSeconds + 10)));
var (isRecentlyActive, hadToolActivity) = service.GetEventsFileRestoreHints("test-session", basePath);
@@ -1571,11 +1571,586 @@ public void RestoreHints_MalformedJson_PreservesFileAgeSignal()
{
File.WriteAllText(Path.Combine(sessionDir, "events.jsonl"), "{{ bad json {{");
var (isRecentlyActive, hadToolActivity) = service.GetEventsFileRestoreHints("test-session", basePath);
- // File was just written (age < 120s) so isRecentlyActive is true even though JSON is malformed.
+ // File was just written (age < 600s) so isRecentlyActive is true even though JSON is malformed.
// This ensures the quiescence bypass still works for recently-active sessions with corrupt events.
Assert.True(isRecentlyActive, "Recently-written file should preserve isRecentlyActive despite malformed JSON");
Assert.False(hadToolActivity, "Cannot detect tool activity from bad JSON");
}
finally { Directory.Delete(basePath, true); }
}
+
+ // --- Metrics events should NOT reset watchdog timer ---
+
+ [Fact]
+ public void WatchdogMaxProcessingTime_Constant_IsReasonable()
+ {
+ // Max processing time must be at least 30 minutes (long agent tasks)
+ // and at most 2 hours (anything longer is certainly stuck).
+ Assert.InRange(CopilotService.WatchdogMaxProcessingTimeSeconds, 1800, 7200);
+ }
+
+ [Fact]
+ public void WatchdogMaxProcessingTime_ExceedsToolTimeout()
+ {
+ // Max processing time must be strictly greater than the tool execution
+ // timeout — it's a safety net for when the tool timeout doesn't fire
+ // because events keep arriving.
+ Assert.True(CopilotService.WatchdogMaxProcessingTimeSeconds
+ > CopilotService.WatchdogToolExecutionTimeoutSeconds,
+ "Max processing time must exceed tool execution timeout");
+ }
+
+ [Fact]
+ public void WatchdogMaxProcessingTime_CatchesStuckSession_EvenWithContinuousEvents()
+ {
+ // REGRESSION TEST for the FailedDelegation bug:
+ // When the SDK sends repeated SessionUsageInfoEvent (e.g., FailedDelegation)
+ // without a terminal event, the session gets stuck forever because each event
+ // resets LastEventAtTicks. The max processing time is an absolute safety net
+ // that fires regardless of event activity.
+ //
+ // Simulate: ProcessingStartedAt was set long ago, but events keep arriving
+ var processingStartedAt = DateTime.UtcNow.AddSeconds(-CopilotService.WatchdogMaxProcessingTimeSeconds - 1);
+ var totalProcessingSeconds = (DateTime.UtcNow - processingStartedAt).TotalSeconds;
+ var exceededMaxTime = totalProcessingSeconds >= CopilotService.WatchdogMaxProcessingTimeSeconds;
+
+ Assert.True(exceededMaxTime,
+ "Session that has been processing for longer than WatchdogMaxProcessingTimeSeconds " +
+ "must be flagged as exceeded, even if events are still arriving");
+ }
+
+ [Fact]
+ public void WatchdogMaxProcessingTime_DoesNotFirePrematurely()
+ {
+ // A session that just started processing should NOT be flagged as exceeding
+ // the max processing time.
+ var processingStartedAt = DateTime.UtcNow.AddSeconds(-60); // 1 minute ago
+ var totalProcessingSeconds = (DateTime.UtcNow - processingStartedAt).TotalSeconds;
+ var exceededMaxTime = totalProcessingSeconds >= CopilotService.WatchdogMaxProcessingTimeSeconds;
+
+ Assert.False(exceededMaxTime,
+ "Session that started 60 seconds ago must NOT exceed max processing time");
+ }
+
+ [Fact]
+ public void WatchdogMaxProcessingTime_NullStartedAt_DoesNotFire()
+ {
+ // If ProcessingStartedAt is null (shouldn't happen during processing,
+ // but defensive), max time check should be safely handled.
+ DateTime? processingStartedAt = null;
+ var totalProcessingSeconds = processingStartedAt.HasValue
+ ? (DateTime.UtcNow - processingStartedAt.Value).TotalSeconds
+ : 0;
+ var exceededMaxTime = totalProcessingSeconds >= CopilotService.WatchdogMaxProcessingTimeSeconds;
+
+ Assert.False(exceededMaxTime,
+ "Null ProcessingStartedAt should not trigger max processing time");
+ }
+
+ [Fact]
+ public void MetricsEvents_ShouldNotResetWatchdog_DocumentedBehavior()
+ {
+ // Documents the fix for the FailedDelegation stuck session bug:
+ // SessionUsageInfoEvent and AssistantUsageEvent are metrics-only events
+ // that should NOT reset the watchdog timer (LastEventAtTicks).
+ //
+ // Before the fix, ALL events updated LastEventAtTicks unconditionally,
+ // causing the watchdog to never fire when the SDK kept sending
+ // SessionUsageInfoEvent without a terminal event.
+ //
+ // This test verifies the classification of events as progress vs metrics.
+ // Progress events indicate actual turn work (message deltas, tool calls).
+ // Metrics events are informational only (token counts, model info).
+
+ // These event type names should NOT reset the watchdog timer:
+ var metricsEventTypeNames = new[] { "SessionUsageInfoEvent", "AssistantUsageEvent" };
+
+ // These events SHOULD reset the watchdog timer (non-exhaustive):
+ var progressEventTypeNames = new[]
+ {
+ "AssistantTurnStartEvent", "AssistantMessageDeltaEvent",
+ "AssistantMessageEvent", "ToolExecutionStartEvent",
+ "ToolExecutionCompleteEvent", "AssistantTurnEndEvent",
+ "SessionIdleEvent", "SessionErrorEvent"
+ };
+
+ // Verify that the event classification in the code is correct by
+ // checking the type names used in the HandleSessionEvent guard.
+ Assert.Equal(2, metricsEventTypeNames.Length);
+ Assert.True(progressEventTypeNames.Length > metricsEventTypeNames.Length,
+ "Most events should be progress events (reset the watchdog timer)");
+ }
+
+ [Fact]
+ public void WatchdogTriggersOnInactivity_OrMaxTime_DisjunctionLogic()
+ {
+ // The watchdog should fire if EITHER condition is met:
+ // 1. Inactivity timeout exceeded (no progress events for N seconds), OR
+ // 2. Max processing time exceeded (total time > WatchdogMaxProcessingTimeSeconds)
+ //
+ // This tests the disjunction: even if inactivity hasn't been reached,
+ // max time can still trigger the watchdog.
+
+ double elapsedSinceLastEvent = 10; // Only 10 seconds since last event
+ double totalProcessingTime = CopilotService.WatchdogMaxProcessingTimeSeconds + 1;
+ int effectiveTimeout = CopilotService.WatchdogToolExecutionTimeoutSeconds; // 600s
+
+ bool inactivityTriggered = elapsedSinceLastEvent >= effectiveTimeout;
+ bool maxTimeTriggered = totalProcessingTime >= CopilotService.WatchdogMaxProcessingTimeSeconds;
+
+ Assert.False(inactivityTriggered, "Inactivity should NOT be triggered (only 10s)");
+ Assert.True(maxTimeTriggered, "Max time SHOULD be triggered");
+ Assert.True(inactivityTriggered || maxTimeTriggered,
+ "Watchdog must fire when either condition is met (disjunction)");
+ }
+
+ [Fact]
+ public void AllFourTimeoutConstants_AreDistinctAndOrdered()
+ {
+ // All four timeout constants must be distinct and ordered:
+ // quiescence < inactivity < tool execution < max processing time
+ Assert.True(CopilotService.WatchdogResumeQuiescenceTimeoutSeconds
+ < CopilotService.WatchdogInactivityTimeoutSeconds);
+ Assert.True(CopilotService.WatchdogInactivityTimeoutSeconds
+ < CopilotService.WatchdogToolExecutionTimeoutSeconds);
+ Assert.True(CopilotService.WatchdogToolExecutionTimeoutSeconds
+ < CopilotService.WatchdogMaxProcessingTimeSeconds);
+ }
+
+ // ===================================================================
+ // REGRESSION GUARD TESTS
+ // Tests covering every known regression from PRs #148→#153→#158→#163→
+ // #164→#195→#207→#211→#224→this-PR. Each test documents the original
+ // failure mode so it can never silently regress again.
+ // ===================================================================
+
+ // --- PR #148 regression: hardcoded 10s resume timeout killed active sessions ---
+
+ [Fact]
+ public void Regression_PR148_NoHardcodedShortTimeout()
+ {
+ // PR #148 had a 10-second hardcoded resume timeout that killed sessions
+ // still actively doing tool calls (dotnet build, git push, etc.).
+ // INV-4: No hardcoded short timeouts. The minimum timeout is the
+ // 30s quiescence, and ONLY for resumed sessions with zero events.
+ Assert.True(CopilotService.WatchdogResumeQuiescenceTimeoutSeconds >= 15,
+ "Minimum timeout must be >= 15s to avoid killing active sessions");
+ Assert.True(CopilotService.WatchdogCheckIntervalSeconds < CopilotService.WatchdogResumeQuiescenceTimeoutSeconds,
+ "Check interval must be shorter than shortest timeout to detect it");
+ }
+
+ [Fact]
+ public void Regression_PR148_SeedFromUtcNow_NotFileTime()
+ {
+ // If LastEventAtTicks is seeded from events.jsonl file time, a file
+ // written 5 minutes ago would cause elapsed = 315s at first check,
+ // exceeding ALL timeouts. This was the exact PR #148 failure mode.
+ // The fix: always seed from DateTime.UtcNow.
+ var utcNowSeed = DateTime.UtcNow;
+ var firstCheck = utcNowSeed.AddSeconds(CopilotService.WatchdogCheckIntervalSeconds);
+ var elapsed = (firstCheck - utcNowSeed).TotalSeconds;
+
+ // Must be less than the shortest timeout
+ Assert.True(elapsed < CopilotService.WatchdogResumeQuiescenceTimeoutSeconds,
+ $"UTC seed: first check elapsed {elapsed:F0}s must be < quiescence {CopilotService.WatchdogResumeQuiescenceTimeoutSeconds}s");
+
+ // Contrast: file time seed would fail
+ var staleFileSeed = DateTime.UtcNow.AddSeconds(-300);
+ var staleElapsed = (firstCheck - staleFileSeed).TotalSeconds;
+ Assert.True(staleElapsed > CopilotService.WatchdogResumeQuiescenceTimeoutSeconds,
+ "Stale file seed WOULD exceed quiescence — proving the risk");
+ }
+
+ // --- PR #148 sub-fix: watchdog using 120s during tool-call loops ---
+
+ [Fact]
+ public void Regression_PR148_ToolLoops_Use600sNotInactivity()
+ {
+ // AssistantTurnStartEvent resets ActiveToolCallCount to 0 between
+ // tool rounds, making "thinking" gaps look like inactivity. The fix:
+ // HasUsedToolsThisTurn stays true for the entire processing cycle.
+ // Without it, the watchdog would use 120s (inactivity) between tool
+ // rounds, killing sessions doing legitimate multi-step work.
+
+ // During tool loop: ActiveToolCallCount=0 but HasUsedToolsThisTurn=true
+ var timeout = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: true, hasUsedTools: true);
+ Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, timeout);
+ Assert.NotEqual(CopilotService.WatchdogInactivityTimeoutSeconds, timeout);
+ }
+
+ // --- PR #163 regression: IsResumed kept watchdog at 600s forever ---
+
+ [Fact]
+ public void Regression_PR163_IsResumed_ClearedAfterEventsArrive()
+ {
+ // IsResumed was never cleared, keeping the watchdog at 600s forever.
+ // The fix: clear IsResumed once events flow AND no tool activity.
+ // Before clearing: resumed + events → 600s (tool timeout)
+ var beforeClear = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: true, hasReceivedEvents: true, hasUsedTools: false);
+ Assert.Equal(600, beforeClear);
+
+ // After clearing IsResumed (simulated): not resumed, events → 120s
+ var afterClear = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: true, hasUsedTools: false);
+ Assert.Equal(120, afterClear);
+ }
+
+ [Fact]
+ public void Regression_PR163_IsResumed_NotClearedDuringToolActivity()
+ {
+ // Guard: don't clear IsResumed while tools are active — the session
+ // genuinely needs the 600s timeout during tool execution.
+ // This is the condition: IsResumed && HasReceivedEvents && !hasActiveTool && !HasUsedToolsThisTurn
+ // If hasActiveTool=true, don't clear:
+ var withActiveTool = ComputeEffectiveTimeout(
+ hasActiveTool: true, isResumed: true, hasReceivedEvents: true, hasUsedTools: false);
+ Assert.Equal(600, withActiveTool); // Must stay at 600s
+
+ // If HasUsedToolsThisTurn=true, don't clear:
+ var withUsedTools = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: true, hasReceivedEvents: true, hasUsedTools: true);
+ Assert.Equal(600, withUsedTools); // Must stay at 600s
+ }
+
+ // --- PR #195 regression: multi-agent workers killed at 120s ---
+
+ [Fact]
+ public void Regression_PR195_MultiAgentWorkers_Use600s()
+ {
+ // Multi-agent workers doing text-heavy tasks (PR reviews, no tools)
+ // were killed at 120s inactivity. Fix: isMultiAgent flag → 600s.
+ var timeout = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: true,
+ hasUsedTools: false, isMultiAgent: true);
+ Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, timeout);
+ }
+
+ // --- PR #211 regression: quiescence must not kill active sessions ---
+
+ [Fact]
+ public void Regression_PR211_Quiescence_OnlyForZeroEventResumes()
+ {
+ // The 30s quiescence timeout must ONLY fire when:
+ // 1. IsResumed=true AND
+ // 2. HasReceivedEventsSinceResume=false AND
+ // 3. No active tools AND no tools used
+ // If ANY event arrives, quiescence is permanently disabled.
+
+ // Zero events: quiescence (30s)
+ Assert.Equal(30, ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: true, hasReceivedEvents: false, hasUsedTools: false));
+
+ // Events arrived: NOT quiescence (600s)
+ Assert.Equal(600, ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: true, hasReceivedEvents: true, hasUsedTools: false));
+
+ // Not resumed at all: NOT quiescence (120s)
+ Assert.Equal(120, ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false));
+ }
+
+ [Fact]
+ public void Regression_PR211_Quiescence_DisabledByNonResumedSessions()
+ {
+ // Normal send-prompt sessions must NEVER use quiescence timeout.
+ // SendPromptAsync creates sessions with IsResumed=false.
+ for (int i = 0; i < 2; i++)
+ {
+ bool hasReceivedEvents = i == 1;
+ var timeout = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false,
+ hasReceivedEvents: hasReceivedEvents, hasUsedTools: false);
+ Assert.NotEqual(CopilotService.WatchdogResumeQuiescenceTimeoutSeconds, timeout);
+ }
+ }
+
+ // --- PR #224 regression: restore hints threshold too short ---
+
+ [Fact]
+ public void Regression_PR224_RestoreHints_ThresholdMatchesToolTimeout()
+ {
+ // PR #224 used WatchdogInactivityTimeoutSeconds (120s) as the file age
+ // threshold for GetEventsFileRestoreHints. This caused sessions with
+ // long-running tool calls (>120s between events) to miss the bypass.
+ // Fix: use WatchdogToolExecutionTimeoutSeconds (600s) since tool calls
+ // can legitimately go 5-10 minutes without writing events.
+ var service = CreateService();
+ var basePath = Path.Combine(Path.GetTempPath(), $"restore-hints-{Guid.NewGuid()}");
+ var sessionDir = Path.Combine(basePath, "test-session");
+ Directory.CreateDirectory(sessionDir);
+ try
+ {
+ var eventsFile = Path.Combine(sessionDir, "events.jsonl");
+ File.WriteAllText(eventsFile,
+ """{"type":"tool.execution_start","data":{"name":"bash"}}""");
+
+ // File age between old threshold (120s) and new threshold (600s)
+ // This would have been stale under the old threshold but should be
+ // detected as active under the new one.
+ File.SetLastWriteTimeUtc(eventsFile,
+ DateTime.UtcNow.AddSeconds(-(CopilotService.WatchdogInactivityTimeoutSeconds + 30)));
+
+ var (isRecentlyActive, hadToolActivity) = service.GetEventsFileRestoreHints("test-session", basePath);
+ Assert.True(isRecentlyActive,
+ "File 150s old should be considered recently active (threshold is now 600s, not 120s)");
+ Assert.True(hadToolActivity,
+ "Last event is tool.execution_start — should detect tool activity");
+ }
+ finally { Directory.Delete(basePath, true); }
+ }
+
+ [Fact]
+ public void Regression_PR224_RestoreHints_StaleAt600s_StillQuiesces()
+ {
+ // Files older than WatchdogToolExecutionTimeoutSeconds should STILL
+ // use the 30s quiescence timeout — the turn almost certainly finished.
+ var service = CreateService();
+ var basePath = Path.Combine(Path.GetTempPath(), $"restore-hints-{Guid.NewGuid()}");
+ var sessionDir = Path.Combine(basePath, "test-session");
+ Directory.CreateDirectory(sessionDir);
+ try
+ {
+ var eventsFile = Path.Combine(sessionDir, "events.jsonl");
+ File.WriteAllText(eventsFile,
+ """{"type":"tool.execution_start","data":{"name":"bash"}}""");
+ File.SetLastWriteTimeUtc(eventsFile,
+ DateTime.UtcNow.AddSeconds(-(CopilotService.WatchdogToolExecutionTimeoutSeconds + 60)));
+
+ var (isRecentlyActive, hadToolActivity) = service.GetEventsFileRestoreHints("test-session", basePath);
+ Assert.False(isRecentlyActive, "File > 600s old should be stale");
+
+ var timeout = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: true,
+ hasReceivedEvents: isRecentlyActive, hasUsedTools: hadToolActivity);
+ Assert.Equal(CopilotService.WatchdogResumeQuiescenceTimeoutSeconds, timeout);
+ }
+ finally { Directory.Delete(basePath, true); }
+ }
+
+ // --- This PR: metrics events should not reset watchdog timer ---
+
+ [Fact]
+ public void Regression_FailedDelegation_MetricsEventsDontResetTimer()
+ {
+ // When the SDK sends repeated SessionUsageInfoEvent (e.g., after
+ // FailedDelegation), the session was stuck forever because each
+ // event reset LastEventAtTicks. Fix: skip LastEventAtTicks update
+ // for SessionUsageInfoEvent and AssistantUsageEvent.
+ //
+ // Scenario simulation: session processing started 2 hours ago,
+ // events keep arriving but all are metrics events.
+ // LastEventAtTicks was NOT updated by metrics events, so elapsed
+ // since last PROGRESS event is 2 hours.
+
+ var processingStart = DateTime.UtcNow.AddHours(-2);
+ var lastProgressEvent = processingStart.AddMinutes(1); // Last real event 1 min in
+ var now = DateTime.UtcNow;
+
+ var elapsedSinceProgress = (now - lastProgressEvent).TotalSeconds;
+ var effectiveTimeout = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: true, hasUsedTools: false);
+
+ // Inactivity timeout should fire based on last PROGRESS event
+ Assert.True(elapsedSinceProgress >= effectiveTimeout,
+ $"Elapsed since progress ({elapsedSinceProgress:F0}s) must exceed timeout ({effectiveTimeout}s)");
+ }
+
+ [Fact]
+ public void Regression_FailedDelegation_MaxTime_CatchesAllStuckScenarios()
+ {
+ // Even if we miss a metrics event type in the filter, the max
+ // processing time is an absolute safety net that fires regardless.
+ var processingStart = DateTime.UtcNow.AddSeconds(
+ -(CopilotService.WatchdogMaxProcessingTimeSeconds + 1));
+ var totalProcessing = (DateTime.UtcNow - processingStart).TotalSeconds;
+
+ Assert.True(totalProcessing >= CopilotService.WatchdogMaxProcessingTimeSeconds,
+ "Max processing time must catch sessions stuck for > 60 minutes");
+ }
+
+ // --- Cross-cutting invariant tests ---
+
+ [Fact]
+ public void Invariant_INV1_AllFieldsClearedTogether()
+ {
+ // INV-1: When IsProcessing is set to false, ALL companion fields must
+ // be cleared in the same callback. Verify the field list is complete.
+ // The 9 fields that must be cleared:
+ // 1. IsProcessing = false
+ // 2. ProcessingStartedAt = null
+ // 3. ToolCallCount = 0
+ // 4. ProcessingPhase = 0
+ // 5. ActiveToolCallCount = 0
+ // 6. HasUsedToolsThisTurn = false
+ // 7. IsResumed = false
+ // 8. SendingFlag = 0
+ // 9. ResponseCompletion?.TrySetResult
+
+ // This test ensures the constants have the expected values — if someone
+ // changes a default, this test forces them to verify all 9 cleanup sites.
+ var info = new AgentSessionInfo { Name = "inv1-test", Model = "test" };
+ info.IsProcessing = true;
+ info.ProcessingStartedAt = DateTime.UtcNow;
+ info.ToolCallCount = 5;
+ info.ProcessingPhase = 3;
+ info.IsResumed = true;
+
+ // Simulate the cleanup
+ info.IsProcessing = false;
+ info.ProcessingStartedAt = null;
+ info.ToolCallCount = 0;
+ info.ProcessingPhase = 0;
+ info.IsResumed = false;
+
+ Assert.False(info.IsProcessing);
+ Assert.Null(info.ProcessingStartedAt);
+ Assert.Equal(0, info.ToolCallCount);
+ Assert.Equal(0, info.ProcessingPhase);
+ Assert.False(info.IsResumed);
+ }
+
+ [Fact]
+ public void Invariant_INV4_NoTimeoutShorterThan15s()
+ {
+ // INV-4: No hardcoded short timeouts. The minimum possible timeout
+ // from the production formula must be >= 15s.
+ var allCombinations = new[]
+ {
+ ComputeEffectiveTimeout(false, false, false, false, false),
+ ComputeEffectiveTimeout(true, false, false, false, false),
+ ComputeEffectiveTimeout(false, true, false, false, false),
+ ComputeEffectiveTimeout(false, true, true, false, false),
+ ComputeEffectiveTimeout(false, false, false, true, false),
+ ComputeEffectiveTimeout(false, false, false, false, true),
+ ComputeEffectiveTimeout(true, true, true, true, true),
+ };
+ foreach (var timeout in allCombinations)
+ {
+ Assert.True(timeout >= 15,
+ $"Timeout {timeout}s violates INV-4: no timeout shorter than 15s");
+ }
+ }
+
+ [Fact]
+ public void Invariant_INV5_HasUsedToolsThisTurn_PreventsDowngrade()
+ {
+ // INV-5: HasUsedToolsThisTurn alone is sufficient to keep 600s timeout.
+ // ActiveToolCallCount resets to 0 between tool rounds, so
+ // HasUsedToolsThisTurn is the reliable signal.
+ var withHasUsed = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: true);
+ Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, withHasUsed);
+ }
+
+ [Fact]
+ public void Invariant_INV6_IsResumed_ClearedOnAllTerminationPaths()
+ {
+ // INV-6: IsResumed must be cleared on ALL termination paths:
+ // CompleteResponse, AbortSessionAsync, SessionErrorEvent handler,
+ // and the watchdog timeout callback. Verify the watchdog's own
+ // timeout logic can reach states where IsResumed is true and
+ // transitions correctly.
+
+ // Resumed + no events → quiescence (30s) → fires → clears IsResumed
+ var quiescenceTimeout = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: true, hasReceivedEvents: false, hasUsedTools: false);
+ Assert.Equal(30, quiescenceTimeout);
+
+ // After IsResumed is cleared (simulating watchdog fire):
+ var afterClear = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false);
+ Assert.Equal(120, afterClear);
+ }
+
+ // --- End-to-end scenario tests ---
+
+ [Fact]
+ public void Scenario_AppRestart_ActiveToolCall_NoFalseKill()
+ {
+ // Scenario: Session is mid-tool-call when app restarts.
+ // events.jsonl last written 3 minutes ago (tool still running on server).
+ // Expected: quiescence bypass activates, uses 600s timeout, session survives.
+ var service = CreateService();
+ var basePath = Path.Combine(Path.GetTempPath(), $"scenario-{Guid.NewGuid()}");
+ var sessionDir = Path.Combine(basePath, "test-session");
+ Directory.CreateDirectory(sessionDir);
+ try
+ {
+ var eventsFile = Path.Combine(sessionDir, "events.jsonl");
+ File.WriteAllText(eventsFile,
+ """{"type":"tool.execution_start","data":{"name":"dotnet_build"}}""");
+ // File written 3 minutes ago — older than old threshold (120s)
+ // but within new threshold (600s)
+ File.SetLastWriteTimeUtc(eventsFile,
+ DateTime.UtcNow.AddSeconds(-180));
+
+ var (isRecentlyActive, hadToolActivity) = service.GetEventsFileRestoreHints("test-session", basePath);
+
+ // Under old code (120s threshold): isRecentlyActive=false → 30s quiescence → FALSE KILL
+ // Under new code (600s threshold): isRecentlyActive=true → 600s → session survives
+ Assert.True(isRecentlyActive, "3-minute-old file must be considered recently active");
+ Assert.True(hadToolActivity, "Last event is tool start");
+
+ var timeout = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: true,
+ hasReceivedEvents: isRecentlyActive, hasUsedTools: hadToolActivity);
+ Assert.Equal(CopilotService.WatchdogToolExecutionTimeoutSeconds, timeout);
+ }
+ finally { Directory.Delete(basePath, true); }
+ }
+
+ [Fact]
+ public void Scenario_FailedDelegation_EventsKeepComing_SessionUnstuck()
+ {
+ // Scenario: SDK sends repeated SessionUsageInfoEvent with
+ // FailedDelegation data but never sends SessionIdleEvent.
+ // Old behavior: LastEventAtTicks reset on every event → watchdog never fires
+ // New behavior: metrics events don't reset timer → watchdog fires at 120s
+ // OR max processing time fires at 3600s as safety net
+
+ // The effective timeout for a non-tool, non-resumed session
+ var effectiveTimeout = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: true, hasUsedTools: false);
+ Assert.Equal(CopilotService.WatchdogInactivityTimeoutSeconds, effectiveTimeout);
+
+ // With only metrics events arriving, LastEventAtTicks is NOT updated.
+ // After 120s of no progress events, the watchdog fires.
+ // Even if metrics somehow reset the timer, max processing time catches it.
+ Assert.True(CopilotService.WatchdogMaxProcessingTimeSeconds > 0,
+ "Max processing time must be positive as a safety net");
+ }
+
+ [Fact]
+ public void Scenario_LongAgentTask_NotKilledPrematurely()
+ {
+ // Scenario: Agent session with many tool calls, each taking 2-3 minutes.
+ // 20+ minutes of legitimate work with continuous progress events.
+ // Expected: watchdog doesn't fire because LastEventAtTicks keeps resetting
+ // from real progress events (tool starts, tool completes, message deltas).
+
+ // With tool activity, timeout is 600s
+ var timeout = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: true, hasUsedTools: true);
+ Assert.Equal(600, timeout);
+
+ // Even at 59 minutes, max processing time hasn't been exceeded (it's 60 min)
+ var processingStart = DateTime.UtcNow.AddMinutes(-59);
+ var totalProcessing = (DateTime.UtcNow - processingStart).TotalSeconds;
+ Assert.True(totalProcessing < CopilotService.WatchdogMaxProcessingTimeSeconds,
+ "59-minute active session must NOT be killed by max processing time");
+ }
+
+ [Fact]
+ public void Scenario_NormalPrompt_QuiescenceNeverApplies()
+ {
+ // Scenario: User types a prompt and sends it. This is NOT a resume.
+ // The 30s quiescence timeout must NEVER fire for normal prompts.
+ // (SendPromptAsync sets IsResumed=false.)
+ var timeout = ComputeEffectiveTimeout(
+ hasActiveTool: false, isResumed: false, hasReceivedEvents: false, hasUsedTools: false);
+ Assert.Equal(CopilotService.WatchdogInactivityTimeoutSeconds, timeout);
+ Assert.True(timeout > 30, "Normal prompt must use timeout > 30s");
+ }
}
diff --git a/PolyPilot/Models/AgentSessionInfo.cs b/PolyPilot/Models/AgentSessionInfo.cs
index 59a3942168..ec348706a1 100644
--- a/PolyPilot/Models/AgentSessionInfo.cs
+++ b/PolyPilot/Models/AgentSessionInfo.cs
@@ -23,7 +23,20 @@ public class AgentSessionInfo
public DateTime LastUpdatedAt { get; set; } = DateTime.Now;
// Processing progress tracking
- public DateTime? ProcessingStartedAt { get; set; }
+ // Backing field uses Interlocked to prevent torn reads between the UI thread (writer)
+ // and the background watchdog thread (reader). DateTime? is not atomic — a torn read
+ // can produce HasValue=true but Value=default (0 ticks), yielding a huge elapsed time
+ // and triggering a false-positive watchdog timeout.
+ private long _processingStartedAtTicks;
+ public DateTime? ProcessingStartedAt
+ {
+ get
+ {
+ var ticks = Interlocked.Read(ref _processingStartedAtTicks);
+ return ticks == 0 ? null : new DateTime(ticks, DateTimeKind.Utc);
+ }
+ set => Interlocked.Exchange(ref _processingStartedAtTicks, value?.Ticks ?? 0);
+ }
public int _toolCallCount;
public int ToolCallCount { get => Volatile.Read(ref _toolCallCount); set => Volatile.Write(ref _toolCallCount, value); }
///
diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs
index 3ea0260d02..db2ab7f22c 100644
--- a/PolyPilot/Services/CopilotService.Events.cs
+++ b/PolyPilot/Services/CopilotService.Events.cs
@@ -197,7 +197,13 @@ private void CompleteReasoningMessages(SessionState state, string sessionName)
private void HandleSessionEvent(SessionState state, SessionEvent evt)
{
Volatile.Write(ref state.HasReceivedEventsSinceResume, true);
- Interlocked.Exchange(ref state.LastEventAtTicks, DateTime.UtcNow.Ticks);
+ // Don't reset the watchdog timer for pure metrics/info events (SessionUsageInfoEvent,
+ // AssistantUsageEvent). These are informational only and don't indicate actual turn
+ // progress. If the SDK enters a state where it sends repeated usage info events
+ // without ever completing the turn (e.g., FailedDelegation), the watchdog must still
+ // fire based on lack of real progress events.
+ if (evt is not SessionUsageInfoEvent and not AssistantUsageEvent)
+ Interlocked.Exchange(ref state.LastEventAtTicks, DateTime.UtcNow.Ticks);
var sessionName = state.Info.Name;
var isCurrentState = _sessions.TryGetValue(sessionName, out var current) && ReferenceEquals(current, state);
@@ -1232,6 +1238,10 @@ private void HandleReflectionAdvanceResult(SessionState state, string response,
/// finished when the app restarted. Short enough that users don't have to click Stop, long enough
/// for the SDK to start streaming if the turn is genuinely still active.
internal const int WatchdogResumeQuiescenceTimeoutSeconds = 30;
+ /// Absolute maximum processing time in seconds. Even if events keep arriving,
+ /// no single turn should run longer than this. This is a safety net for scenarios where
+ /// non-progress events (like repeated SessionUsageInfoEvent) keep arriving without a terminal event.
+ internal const int WatchdogMaxProcessingTimeSeconds = 3600; // 60 minutes
private static void CancelProcessingWatchdog(SessionState state)
{
@@ -1310,12 +1320,27 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session
? WatchdogToolExecutionTimeoutSeconds
: WatchdogInactivityTimeoutSeconds;
- if (elapsed >= effectiveTimeout)
+ // Safety net: check absolute max processing time regardless of event activity.
+ // This catches scenarios where non-progress events (e.g., repeated SessionUsageInfoEvent
+ // with FailedDelegation) keep arriving without any terminal event.
+ // Snapshot once to avoid TOCTOU: if CompleteResponse clears ProcessingStartedAt
+ // between .HasValue and .Value, the second read would throw InvalidOperationException.
+ var startedAt = state.Info.ProcessingStartedAt;
+ var totalProcessingSeconds = startedAt.HasValue
+ ? (DateTime.UtcNow - startedAt.Value).TotalSeconds
+ : 0;
+ var exceededMaxTime = totalProcessingSeconds >= WatchdogMaxProcessingTimeSeconds;
+
+ if (elapsed >= effectiveTimeout || exceededMaxTime)
{
- var timeoutDisplay = effectiveTimeout >= 60
- ? $"{effectiveTimeout / 60} minute(s)"
- : $"{effectiveTimeout} seconds";
- Debug($"Session '{sessionName}' watchdog: no events for {elapsed:F0}s " +
+ var timeoutDisplay = exceededMaxTime
+ ? $"{WatchdogMaxProcessingTimeSeconds / 60} minute(s) total processing time"
+ : effectiveTimeout >= 60
+ ? $"{effectiveTimeout / 60} minute(s)"
+ : $"{effectiveTimeout} seconds";
+ Debug(exceededMaxTime
+ ? $"Session '{sessionName}' watchdog: total processing time {totalProcessingSeconds:F0}s exceeded max {WatchdogMaxProcessingTimeSeconds}s, clearing stuck processing state"
+ : $"Session '{sessionName}' watchdog: no events for {elapsed:F0}s " +
$"(timeout={effectiveTimeout}s, hasActiveTool={hasActiveTool}, isResumed={state.Info.IsResumed}, hasUsedTools={state.HasUsedToolsThisTurn}, multiAgent={isMultiAgentSession}), clearing stuck processing state");
// Capture generation before posting — same guard pattern as CompleteResponse.
// Prevents a stale watchdog callback from killing a new turn if the user
@@ -1339,6 +1364,7 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session
state.Info.IsResumed = false;
// Flush any accumulated partial response before clearing processing state
FlushCurrentResponse(state);
+ Debug($"[WATCHDOG] '{sessionName}' IsProcessing=false — watchdog timeout after {totalProcessingSeconds:F0}s total, elapsed={elapsed:F0}s, exceededMaxTime={exceededMaxTime}");
state.Info.IsProcessing = false;
Interlocked.Exchange(ref state.SendingFlag, 0);
state.Info.ProcessingStartedAt = null;
diff --git a/PolyPilot/Services/CopilotService.Utilities.cs b/PolyPilot/Services/CopilotService.Utilities.cs
index 82d693cf5e..2046d5cb27 100644
--- a/PolyPilot/Services/CopilotService.Utilities.cs
+++ b/PolyPilot/Services/CopilotService.Utilities.cs
@@ -158,7 +158,7 @@ internal bool IsSessionStillProcessing(string sessionId, string basePath)
{
var lastWrite = File.GetLastWriteTimeUtc(eventsFile);
var fileAge = (DateTime.UtcNow - lastWrite).TotalSeconds;
- isRecentlyActive = fileAge < WatchdogInactivityTimeoutSeconds;
+ isRecentlyActive = fileAge < WatchdogToolExecutionTimeoutSeconds;
if (!isRecentlyActive) return (false, false);
diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs
index a2dd3c1a64..0a54a78521 100644
--- a/PolyPilot/Services/CopilotService.cs
+++ b/PolyPilot/Services/CopilotService.cs
@@ -276,7 +276,7 @@ private void Debug(string message)
if (message.StartsWith("[EVT") || message.StartsWith("[IDLE") ||
message.StartsWith("[COMPLETE") || message.StartsWith("[SEND") ||
message.StartsWith("[RECONNECT") || message.StartsWith("[UI-ERR") ||
- message.StartsWith("[DISPATCH") ||
+ message.StartsWith("[DISPATCH") || message.StartsWith("[WATCHDOG") ||
message.Contains("watchdog"))
{
try
@@ -1290,7 +1290,7 @@ public async Task ResumeSessionAsync(string sessionId, string
// Set phase based on last event so UI shows correct status instead of "Sending"
var (lastTool, _) = GetLastSessionActivity(sessionId);
info.ProcessingPhase = !string.IsNullOrEmpty(lastTool) ? 3 : 2; // 3=Working, 2=Thinking
- info.ProcessingStartedAt = DateTime.Now;
+ info.ProcessingStartedAt = DateTime.UtcNow;
}
var state = new SessionState