diff --git a/PolyPilot.Tests/ProcessingWatchdogTests.cs b/PolyPilot.Tests/ProcessingWatchdogTests.cs
new file mode 100644
index 0000000000..ca44235f36
--- /dev/null
+++ b/PolyPilot.Tests/ProcessingWatchdogTests.cs
@@ -0,0 +1,360 @@
+using Microsoft.Extensions.DependencyInjection;
+using PolyPilot.Models;
+using PolyPilot.Services;
+
+namespace PolyPilot.Tests;
+
+///
+/// Tests for the processing watchdog that detects sessions stuck in "Thinking" state
+/// when the persistent server dies mid-turn and no more SDK events arrive.
+/// Regression tests for: sessions permanently stuck in IsProcessing=true after server disconnect.
+///
+public class ProcessingWatchdogTests
+{
+ private readonly StubChatDatabase _chatDb = new();
+ private readonly StubServerManager _serverManager = new();
+ private readonly StubWsBridgeClient _bridgeClient = new();
+ private readonly StubDemoService _demoService = new();
+ private readonly RepoManager _repoManager = new();
+ private readonly IServiceProvider _serviceProvider;
+
+ public ProcessingWatchdogTests()
+ {
+ var services = new ServiceCollection();
+ _serviceProvider = services.BuildServiceProvider();
+ }
+
+ private CopilotService CreateService() =>
+ new CopilotService(_chatDb, _serverManager, _bridgeClient, _repoManager, _serviceProvider, _demoService);
+
+ // --- Watchdog constant validation ---
+
+ [Fact]
+ public void WatchdogCheckInterval_IsReasonable()
+ {
+ // Check interval must be at least 5s to avoid excessive polling,
+ // and at most 60s so stuck state is detected in reasonable time.
+ Assert.InRange(CopilotService.WatchdogCheckIntervalSeconds, 5, 60);
+ }
+
+ [Fact]
+ public void WatchdogInactivityTimeout_IsReasonable()
+ {
+ // Timeout must be long enough for legitimate tool executions (>60s)
+ // but short enough to recover from dead connections (<300s).
+ Assert.InRange(CopilotService.WatchdogInactivityTimeoutSeconds, 60, 300);
+ }
+
+ [Fact]
+ public void WatchdogTimeout_IsGreaterThanCheckInterval()
+ {
+ // Timeout must be strictly greater than check interval — watchdog needs
+ // multiple checks before declaring inactivity.
+ Assert.True(
+ CopilotService.WatchdogInactivityTimeoutSeconds > CopilotService.WatchdogCheckIntervalSeconds,
+ "Inactivity timeout must be greater than check interval");
+ }
+
+ // --- Demo mode: sessions should not get stuck ---
+
+ [Fact]
+ public async Task DemoMode_SendPrompt_DoesNotLeaveIsProcessingTrue()
+ {
+ var svc = CreateService();
+ await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
+
+ var session = await svc.CreateSessionAsync("demo-no-stuck");
+ await svc.SendPromptAsync("demo-no-stuck", "Test prompt");
+
+ // Demo mode returns immediately — IsProcessing should never be stuck true
+ Assert.False(session.IsProcessing,
+ "Demo mode sessions should not be left in IsProcessing=true state");
+ }
+
+ [Fact]
+ public async Task DemoMode_MultipleSends_NoneStuck()
+ {
+ var svc = CreateService();
+ await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
+
+ var s1 = await svc.CreateSessionAsync("multi-1");
+ var s2 = await svc.CreateSessionAsync("multi-2");
+
+ await svc.SendPromptAsync("multi-1", "Hello");
+ await svc.SendPromptAsync("multi-2", "World");
+
+ Assert.False(s1.IsProcessing);
+ Assert.False(s2.IsProcessing);
+ }
+
+ // --- Model-level: system message format for stuck sessions ---
+
+ [Fact]
+ public void SystemMessage_ConnectionLost_HasExpectedContent()
+ {
+ var msg = ChatMessage.SystemMessage(
+ "⚠️ Connection lost — no response received. You can try sending your message again.");
+
+ Assert.Equal("system", msg.Role);
+ Assert.Contains("Connection lost", msg.Content);
+ Assert.Contains("try sending", msg.Content);
+ }
+
+ [Fact]
+ public void AgentSessionInfo_IsProcessing_DefaultsFalse()
+ {
+ var info = new AgentSessionInfo { Name = "test", Model = "test-model" };
+ Assert.False(info.IsProcessing);
+ }
+
+ [Fact]
+ public void AgentSessionInfo_IsProcessing_CanBeSetAndCleared()
+ {
+ var info = new AgentSessionInfo { Name = "test", Model = "test-model" };
+
+ info.IsProcessing = true;
+ Assert.True(info.IsProcessing);
+
+ info.IsProcessing = false;
+ Assert.False(info.IsProcessing);
+ }
+
+ // --- Persistent mode: initialization failure leaves clean state ---
+
+ [Fact]
+ public async Task PersistentMode_FailedInit_NoStuckSessions()
+ {
+ var svc = CreateService();
+
+ // Persistent mode with unreachable port — will fail to connect
+ await svc.ReconnectAsync(new ConnectionSettings
+ {
+ Mode = ConnectionMode.Persistent,
+ Host = "localhost",
+ Port = 19999
+ });
+
+ // No sessions should exist, and none should be stuck processing
+ Assert.Empty(svc.GetAllSessions());
+ foreach (var session in svc.GetAllSessions())
+ {
+ Assert.False(session.IsProcessing,
+ $"Session '{session.Name}' should not be stuck processing after failed init");
+ }
+ }
+
+ // --- Recovery scenario: IsProcessing cleared allows new messages ---
+
+ [Fact]
+ public async Task DemoMode_SessionNotProcessing_CanSendNewMessage()
+ {
+ var svc = CreateService();
+ await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
+
+ var session = await svc.CreateSessionAsync("recovery-test");
+
+ // Simulate the state after watchdog clears stuck processing:
+ // session.IsProcessing should be false, allowing new sends.
+ Assert.False(session.IsProcessing);
+
+ // Should succeed without throwing "Session is already processing"
+ await svc.SendPromptAsync("recovery-test", "Message after recovery");
+ Assert.Single(session.History);
+ }
+
+ [Fact]
+ public async Task DemoMode_SessionAlreadyProcessing_ThrowsOnSend()
+ {
+ var svc = CreateService();
+ await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
+
+ var session = await svc.CreateSessionAsync("already-busy");
+
+ // Manually set IsProcessing to simulate stuck state (before watchdog fires)
+ session.IsProcessing = true;
+
+ // SendPromptAsync in demo mode doesn't check IsProcessing (it returns early),
+ // but non-demo mode would throw. Verify the model state.
+ Assert.True(session.IsProcessing);
+ }
+
+ // --- Watchdog system message appears in history ---
+
+ [Fact]
+ public void SystemMessage_AddedToHistory_IsVisible()
+ {
+ var info = new AgentSessionInfo { Name = "test-hist", Model = "test-model" };
+
+ // Simulate what the watchdog does when clearing stuck state
+ info.IsProcessing = true;
+ info.History.Add(ChatMessage.SystemMessage(
+ "⚠️ Connection lost — no response received. You can try sending your message again."));
+ info.IsProcessing = false;
+
+ Assert.Single(info.History);
+ Assert.Equal(ChatMessageType.System, info.History[0].MessageType);
+ Assert.Contains("Connection lost", info.History[0].Content);
+ Assert.False(info.IsProcessing);
+ }
+
+ // --- OnError fires when session appears stuck ---
+
+ [Fact]
+ public async Task DemoMode_OnError_NotFiredForNormalOperation()
+ {
+ var svc = CreateService();
+ await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
+
+ await svc.CreateSessionAsync("no-error");
+ var errors = new List<(string session, string error)>();
+ svc.OnError += (s, e) => errors.Add((s, e));
+
+ await svc.SendPromptAsync("no-error", "Normal message");
+
+ Assert.Empty(errors);
+ }
+
+ // --- Reconnect after stuck state ---
+
+ [Fact]
+ public async Task ReconnectAsync_ClearsAllSessions()
+ {
+ var svc = CreateService();
+ await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
+
+ var s1 = await svc.CreateSessionAsync("pre-reconnect-1");
+ var s2 = await svc.CreateSessionAsync("pre-reconnect-2");
+
+ // Reconnect should clear all existing sessions (fresh start)
+ await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
+
+ // Old session references should not be stuck processing
+ Assert.False(s1.IsProcessing);
+ Assert.False(s2.IsProcessing);
+ }
+
+ // ===========================================================================
+ // Regression tests for: relaunch deploys new app, old copilot server running
+ // Session restore silently swallows all failures → app shows 0 sessions.
+ // ===========================================================================
+
+ [Fact]
+ public async Task PersistentMode_FailedInit_SetsNeedsConfiguration()
+ {
+ var svc = CreateService();
+
+ // Persistent mode with unreachable server → should set NeedsConfiguration
+ await svc.ReconnectAsync(new ConnectionSettings
+ {
+ Mode = ConnectionMode.Persistent,
+ Host = "localhost",
+ Port = 19999
+ });
+
+ Assert.False(svc.IsInitialized,
+ "App should NOT be initialized when persistent server is unreachable");
+ Assert.True(svc.NeedsConfiguration,
+ "NeedsConfiguration should be true so settings page is shown");
+ }
+
+ [Fact]
+ public async Task PersistentMode_FailedInit_NoSessionsStuckProcessing()
+ {
+ var svc = CreateService();
+
+ await svc.ReconnectAsync(new ConnectionSettings
+ {
+ Mode = ConnectionMode.Persistent,
+ Host = "localhost",
+ Port = 19999
+ });
+
+ // After failed init, no sessions should exist at all (much less stuck ones)
+ var sessions = svc.GetAllSessions().ToList();
+ Assert.Empty(sessions);
+ }
+
+ [Fact]
+ public async Task DemoMode_SessionRestore_AllSessionsVisible()
+ {
+ var svc = CreateService();
+ await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
+
+ // Create multiple sessions
+ var s1 = await svc.CreateSessionAsync("restore-1");
+ var s2 = await svc.CreateSessionAsync("restore-2");
+ var s3 = await svc.CreateSessionAsync("restore-3");
+
+ Assert.Equal(3, svc.GetAllSessions().Count());
+
+ // Reconnect to demo mode should start fresh (demo has no persistence)
+ await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
+
+ // After reconnect, old sessions are cleared (demo doesn't persist)
+ // The key invariant: session count matches what's visible to the user
+ Assert.Equal(svc.SessionCount, svc.GetAllSessions().Count());
+ }
+
+ [Fact]
+ public async Task ReconnectAsync_IsInitialized_CorrectForEachMode()
+ {
+ var svc = CreateService();
+
+ // Demo mode → always succeeds
+ await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
+ Assert.True(svc.IsInitialized, "Demo mode should always initialize");
+
+ // Persistent mode with bad port → fails
+ await svc.ReconnectAsync(new ConnectionSettings
+ {
+ Mode = ConnectionMode.Persistent,
+ Host = "localhost",
+ Port = 19999
+ });
+ Assert.False(svc.IsInitialized, "Persistent with bad port should fail");
+
+ // Back to demo → recovers
+ await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
+ Assert.True(svc.IsInitialized, "Should recover when switching back to Demo");
+ }
+
+ [Fact]
+ public async Task ReconnectAsync_ClearsStuckProcessingFromPreviousMode()
+ {
+ var svc = CreateService();
+ await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
+
+ var session = await svc.CreateSessionAsync("was-stuck");
+ session.IsProcessing = true; // Simulate stuck state
+
+ // Reconnect should clear all sessions including stuck ones
+ await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
+
+ // After reconnect, old sessions are removed — no stuck sessions in new state
+ Assert.Empty(svc.GetAllSessions());
+ // If we create new sessions, they start clean
+ var fresh = await svc.CreateSessionAsync("fresh");
+ Assert.False(fresh.IsProcessing, "New session after reconnect should not be stuck");
+ }
+
+ [Fact]
+ public async Task OnStateChanged_FiresDuringReconnect()
+ {
+ var svc = CreateService();
+ await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo });
+
+ var stateChangedCount = 0;
+ svc.OnStateChanged += () => stateChangedCount++;
+
+ // Reconnect to a different mode and back
+ await svc.ReconnectAsync(new ConnectionSettings
+ {
+ Mode = ConnectionMode.Persistent,
+ Host = "localhost",
+ Port = 19999
+ });
+
+ Assert.True(stateChangedCount > 0,
+ "OnStateChanged must fire during reconnect so UI updates");
+ }
+}
diff --git a/PolyPilot.Tests/ScenarioReferenceTests.cs b/PolyPilot.Tests/ScenarioReferenceTests.cs
index 16abd4631d..b7f776ec9b 100644
--- a/PolyPilot.Tests/ScenarioReferenceTests.cs
+++ b/PolyPilot.Tests/ScenarioReferenceTests.cs
@@ -52,7 +52,7 @@ public void ModeSwitchScenarios_AllHaveRequiredFields()
[Fact]
public void ModeSwitchScenarios_StepsHaveValidActions()
{
- var validActions = new HashSet { "click", "evaluate", "wait", "shell", "screenshot" };
+ var validActions = new HashSet { "click", "evaluate", "wait", "shell", "screenshot", "type", "note" };
var json = File.ReadAllText(Path.Combine(ScenariosDir, "mode-switch-scenarios.json"));
var doc = JsonDocument.Parse(json);
@@ -151,6 +151,31 @@ public void Scenario_RefreshSessionsButton_HasUnitTestCoverage()
Assert.True(true, "See CopilotServiceInitializationTests.RefreshSessionsAsync_* tests");
}
+ ///
+ /// Scenario: "stuck-session-recovery-after-server-disconnect"
+ /// Unit test equivalents: ProcessingWatchdogTests.WatchdogCheckInterval_IsReasonable,
+ /// ProcessingWatchdogTests.WatchdogInactivityTimeout_IsReasonable,
+ /// ProcessingWatchdogTests.SystemMessage_ConnectionLost_HasExpectedContent,
+ /// ProcessingWatchdogTests.SystemMessage_AddedToHistory_IsVisible
+ ///
+ [Fact]
+ public void Scenario_StuckSessionRecovery_HasUnitTestCoverage()
+ {
+ Assert.True(true, "See ProcessingWatchdogTests for watchdog constant validation and recovery message tests");
+ }
+
+ ///
+ /// Scenario: "relaunch-with-stale-server-shows-sessions"
+ /// Unit test equivalents: ProcessingWatchdogTests.PersistentMode_FailedInit_*,
+ /// ProcessingWatchdogTests.ReconnectAsync_IsInitialized_CorrectForEachMode,
+ /// ProcessingWatchdogTests.ReconnectAsync_ClearsStuckProcessingFromPreviousMode
+ ///
+ [Fact]
+ public void Scenario_RelaunchWithStaleServer_HasUnitTestCoverage()
+ {
+ Assert.True(true, "See ProcessingWatchdogTests for relaunch/reconnect resilience tests");
+ }
+
[Fact]
public void AllScenarios_HaveUniqueIds()
{
diff --git a/PolyPilot.Tests/Scenarios/mode-switch-scenarios.json b/PolyPilot.Tests/Scenarios/mode-switch-scenarios.json
index d549628f99..6fbf19bf85 100644
--- a/PolyPilot.Tests/Scenarios/mode-switch-scenarios.json
+++ b/PolyPilot.Tests/Scenarios/mode-switch-scenarios.json
@@ -419,6 +419,125 @@
"note": "Verify toolbar remains available after refresh"
}
]
+ },
+ {
+ "id": "stuck-session-recovery-after-server-disconnect",
+ "name": "Sessions recover from 'Thinking' state when persistent server dies mid-turn",
+ "description": "Validates that sessions stuck in IsProcessing=true recover automatically via the processing watchdog when the persistent server becomes unreachable.",
+ "unitTestCoverage": [
+ "ProcessingWatchdogTests.WatchdogCheckInterval_IsReasonable",
+ "ProcessingWatchdogTests.WatchdogInactivityTimeout_IsReasonable",
+ "ProcessingWatchdogTests.WatchdogTimeout_IsGreaterThanCheckInterval",
+ "ProcessingWatchdogTests.SystemMessage_ConnectionLost_HasExpectedContent",
+ "ProcessingWatchdogTests.SystemMessage_AddedToHistory_IsVisible"
+ ],
+ "steps": [
+ {
+ "action": "evaluate",
+ "script": "document.querySelectorAll('.session-item').length > 0",
+ "expect": { "equals": "true" },
+ "note": "At least one session exists"
+ },
+ {
+ "action": "click",
+ "selector": ".session-item:first-child",
+ "note": "Open a session"
+ },
+ {
+ "action": "evaluate",
+ "script": "document.querySelector('.chat-input textarea') !== null",
+ "expect": { "equals": "true" }
+ },
+ {
+ "action": "type",
+ "selector": ".chat-input textarea",
+ "text": "Test message for watchdog scenario"
+ },
+ {
+ "action": "click",
+ "selector": ".chat-input button[type=submit]"
+ },
+ {
+ "action": "wait",
+ "duration": 2000,
+ "note": "Wait for message to be sent and processing to start"
+ },
+ {
+ "action": "evaluate",
+ "script": "document.querySelector('.action-item.running .action-label')?.textContent || document.querySelector('.chat-msg.tool .chat-msg-text')?.textContent",
+ "note": "Should show 'Thinking' or activity text while processing"
+ },
+ {
+ "action": "note",
+ "text": "To fully test: kill the persistent server process while session is processing, then wait up to 2 minutes for the watchdog to detect inactivity and clear the stuck state. The session should show a system message: 'Connection lost — no response received.'"
+ },
+ {
+ "action": "wait",
+ "duration": 130000,
+ "note": "Wait for watchdog timeout (120s) + buffer. In manual testing, kill server during this wait."
+ },
+ {
+ "action": "evaluate",
+ "script": "document.querySelector('.action-item.running') === null",
+ "expect": { "equals": "true" },
+ "note": "Processing indicator should be gone after watchdog fires"
+ },
+ {
+ "action": "evaluate",
+ "script": "Array.from(document.querySelectorAll('.chat-msg')).some(el => el.textContent.includes('Connection lost'))",
+ "expect": { "equals": "true" },
+ "note": "System message about connection loss should appear in chat"
+ }
+ ]
+ },
+ {
+ "id": "relaunch-with-stale-server-shows-sessions",
+ "name": "After relaunch, all previously-active sessions should be visible",
+ "description": "Validates that after relaunch.sh deploys a new build, session restore failures don't silently leave the app with 0 sessions. Covers the scenario where an old copilot server is running but individual session resumes fail.",
+ "unitTestCoverage": [
+ "ProcessingWatchdogTests.PersistentMode_FailedInit_SetsNeedsConfiguration",
+ "ProcessingWatchdogTests.PersistentMode_FailedInit_NoSessionsStuckProcessing",
+ "ProcessingWatchdogTests.DemoMode_SessionRestore_AllSessionsVisible",
+ "ProcessingWatchdogTests.ReconnectAsync_IsInitialized_CorrectForEachMode",
+ "ProcessingWatchdogTests.ReconnectAsync_ClearsStuckProcessingFromPreviousMode",
+ "ProcessingWatchdogTests.OnStateChanged_FiresDuringReconnect"
+ ],
+ "steps": [
+ {
+ "action": "evaluate",
+ "script": "document.querySelectorAll('.session-item').length",
+ "capture": "preRelaunchCount",
+ "note": "Record session count before relaunch"
+ },
+ {
+ "action": "shell",
+ "command": "cd PolyPilot && ./relaunch.sh",
+ "note": "Rebuild and relaunch the app"
+ },
+ {
+ "action": "wait",
+ "duration": 20000,
+ "note": "Wait for app to restart and restore sessions"
+ },
+ {
+ "action": "evaluate",
+ "script": "document.querySelector('.status')?.textContent?.trim()",
+ "expect": { "not_contains": "Disconnected" },
+ "note": "App should be connected (Persistent or Embedded fallback)"
+ },
+ {
+ "action": "evaluate",
+ "script": "document.querySelectorAll('.session-item').length > 0",
+ "expect": { "equals": "true" },
+ "note": "Sessions should be visible after relaunch — not silently lost"
+ },
+ {
+ "action": "evaluate",
+ "script": "Array.from(document.querySelectorAll('.session-item')).filter(el => el.querySelector('.processing')).length === 0",
+ "expect": { "equals": "true" },
+ "note": "No sessions should be stuck in processing state after relaunch"
+ }
+ ]
}
]
}
diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs
index 05c89080a1..c90d217a46 100644
--- a/PolyPilot/Services/CopilotService.Events.cs
+++ b/PolyPilot/Services/CopilotService.Events.cs
@@ -197,6 +197,7 @@ private void CompleteReasoningMessages(SessionState state, string sessionName)
private void HandleSessionEvent(SessionState state, SessionEvent evt)
{
state.HasReceivedEventsSinceResume = true;
+ Interlocked.Exchange(ref state.LastEventAtTicks, DateTime.UtcNow.Ticks);
var sessionName = state.Info.Name;
void Invoke(Action action)
{
@@ -435,6 +436,7 @@ await notifService.SendNotificationAsync(
case SessionErrorEvent err:
var errMsg = err.Data?.Message ?? "Unknown error";
+ CancelProcessingWatchdog(state);
Invoke(() => OnError?.Invoke(sessionName, errMsg));
state.ResponseCompletion?.TrySetException(new Exception(errMsg));
state.Info.IsProcessing = false;
@@ -556,6 +558,7 @@ private void CompleteResponse(SessionState state)
{
if (!state.Info.IsProcessing) return; // Already completed (e.g. timeout)
+ CancelProcessingWatchdog(state);
var response = state.CurrentResponse.ToString();
if (!string.IsNullOrEmpty(response))
{
@@ -966,4 +969,66 @@ private void HandleReflectionAdvanceResult(SessionState state, string response,
OnStateChanged?.Invoke();
}
}
+
+ // -- Processing watchdog: detects stuck sessions when server dies mid-turn --
+
+ /// Interval between watchdog checks in seconds.
+ internal const int WatchdogCheckIntervalSeconds = 15;
+ /// If no SDK events arrive for this many seconds, the session is considered stuck.
+ internal const int WatchdogInactivityTimeoutSeconds = 120;
+
+ private static void CancelProcessingWatchdog(SessionState state)
+ {
+ if (state.ProcessingWatchdog != null)
+ {
+ state.ProcessingWatchdog.Cancel();
+ state.ProcessingWatchdog.Dispose();
+ state.ProcessingWatchdog = null;
+ }
+ }
+
+ private void StartProcessingWatchdog(SessionState state, string sessionName)
+ {
+ CancelProcessingWatchdog(state);
+ Interlocked.Exchange(ref state.LastEventAtTicks, DateTime.UtcNow.Ticks);
+ state.ProcessingWatchdog = new CancellationTokenSource();
+ var ct = state.ProcessingWatchdog.Token;
+ _ = RunProcessingWatchdogAsync(state, sessionName, ct);
+ }
+
+ private async Task RunProcessingWatchdogAsync(SessionState state, string sessionName, CancellationToken ct)
+ {
+ try
+ {
+ while (!ct.IsCancellationRequested && state.Info.IsProcessing)
+ {
+ await Task.Delay(TimeSpan.FromSeconds(WatchdogCheckIntervalSeconds), ct);
+
+ if (!state.Info.IsProcessing) break;
+
+ var lastEventTicks = Interlocked.Read(ref state.LastEventAtTicks);
+ var elapsed = (DateTime.UtcNow - new DateTime(lastEventTicks)).TotalSeconds;
+ if (elapsed >= WatchdogInactivityTimeoutSeconds)
+ {
+ Debug($"Session '{sessionName}' watchdog: no events for {elapsed:F0}s, clearing stuck processing state");
+ // Marshal all state mutations to the UI thread to avoid
+ // racing with CompleteResponse / HandleSessionEvent.
+ InvokeOnUI(() =>
+ {
+ if (!state.Info.IsProcessing) return; // Already completed
+ CancelProcessingWatchdog(state);
+ state.Info.IsProcessing = false;
+ state.Info.History.Add(ChatMessage.SystemMessage(
+ "⚠️ Connection lost — no response received. You can try sending your message again."));
+ state.ResponseCompletion?.TrySetResult("");
+ OnError?.Invoke(sessionName, $"Connection appears lost — no events received for over {WatchdogInactivityTimeoutSeconds / 60} minute(s).");
+ OnStateChanged?.Invoke();
+ });
+ break;
+ }
+ }
+ }
+ catch (OperationCanceledException) { /* Normal cancellation when response completes */ }
+ catch (Exception ex) { Debug($"Watchdog error for '{sessionName}': {ex.Message}"); }
+ }
}
diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs
index cc9e91b791..5fcf68250d 100644
--- a/PolyPilot/Services/CopilotService.cs
+++ b/PolyPilot/Services/CopilotService.cs
@@ -201,6 +201,8 @@ private class SessionState
public bool HasReceivedEventsSinceResume { get; set; }
public string? LastMessageId { get; set; }
public bool SkipReflectionEvaluationOnce { get; set; }
+ public long LastEventAtTicks = DateTime.UtcNow.Ticks;
+ public CancellationTokenSource? ProcessingWatchdog { get; set; }
}
private void Debug(string message)
@@ -393,6 +395,7 @@ public async Task ReconnectAsync(ConnectionSettings settings, CancellationToken
// Dispose existing sessions and client
foreach (var state in _sessions.Values)
{
+ CancelProcessingWatchdog(state);
try { if (state.Session != null) await state.Session.DisposeAsync(); } catch { }
}
_sessions.Clear();
@@ -1397,6 +1400,7 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis
state.Info.IsProcessing = true;
state.ResponseCompletion = new TaskCompletionSource();
state.CurrentResponse.Clear();
+ StartProcessingWatchdog(state, sessionName);
if (!skipHistoryMessage)
{
@@ -1458,6 +1462,8 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis
if (!string.IsNullOrEmpty(state.Info.WorkingDirectory))
reconnectConfig.WorkingDirectory = state.Info.WorkingDirectory;
var newSession = await _client.ResumeSessionAsync(state.Info.SessionId, reconnectConfig, cancellationToken);
+ // Cancel old watchdog BEFORE creating new state — they share Info/TCS
+ CancelProcessingWatchdog(state);
var newState = new SessionState
{
Session = newSession,
@@ -1468,6 +1474,9 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis
_sessions[sessionName] = newState;
state = newState;
+ // Start fresh watchdog for the new connection
+ StartProcessingWatchdog(state, sessionName);
+
Debug($"Session '{sessionName}' reconnected, retrying prompt...");
await state.Session.SendAsync(new MessageOptions
{
@@ -1478,6 +1487,7 @@ await state.Session.SendAsync(new MessageOptions
{
Console.WriteLine($"[DEBUG] Reconnect+retry failed: {retryEx.Message}");
OnError?.Invoke(sessionName, $"Session disconnected and reconnect failed: {retryEx.Message}");
+ CancelProcessingWatchdog(state);
state.Info.IsProcessing = false;
OnStateChanged?.Invoke();
throw;
@@ -1486,6 +1496,7 @@ await state.Session.SendAsync(new MessageOptions
else
{
OnError?.Invoke(sessionName, $"SendAsync failed: {ex.Message}");
+ CancelProcessingWatchdog(state);
state.Info.IsProcessing = false;
OnStateChanged?.Invoke();
throw;
@@ -1530,6 +1541,7 @@ public async Task AbortSessionAsync(string sessionName)
}
state.Info.IsProcessing = false;
+ CancelProcessingWatchdog(state);
state.ResponseCompletion?.TrySetCanceled();
OnStateChanged?.Invoke();
}
@@ -1811,6 +1823,7 @@ public async ValueTask DisposeAsync()
foreach (var state in _sessions.Values)
{
+ CancelProcessingWatchdog(state);
if (state.Session is not null)
try { await state.Session.DisposeAsync(); } catch { }
}