diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 9a6e65d472..6f65f3c092 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -96,6 +96,7 @@ When switching between Embedded and Persistent modes (via Settings → Save & Re - **NEVER use `git push --force`** — always use `git push --force-with-lease` instead when a force push is needed (e.g., after a rebase). This prevents overwriting remote changes made by others. - **NEVER commit screenshots, images, or binary files** — use `git diff --stat` or `git status` before committing to verify no `.png`, `.jpg`, `.bmp`, or other image files are staged. Screenshots from PolyPilot (e.g., `screenshot_*.png`) are generated locally and must NEVER be committed. The `.gitignore` blocks common patterns, but always double-check. - **NEVER use `git add -A` or `git add .` blindly** — always review what's being staged first with `git status`. Prefer `git add ` when possible to avoid accidentally committing generated files. +- **When creating a new branch for a PR**, always base it on `upstream/main` (or `origin/main`). Do NOT branch from whatever HEAD happens to be — the repo may be on a feature branch. Use `git checkout -b upstream/main`. After creating the branch, verify with `git log --oneline upstream/main..HEAD` that only your commits appear. - When contributing to an existing PR, prefer adding commits on top. Rebase only when explicitly asked. - Use `git add -f` when adding files matched by `.gitignore` patterns (e.g., `*.app/` catches `PolyPilot/`). diff --git a/PolyPilot.Tests/ResponseFlushTests.cs b/PolyPilot.Tests/ResponseFlushTests.cs new file mode 100644 index 0000000000..0ed54f8745 --- /dev/null +++ b/PolyPilot.Tests/ResponseFlushTests.cs @@ -0,0 +1,371 @@ +using Microsoft.Extensions.DependencyInjection; +using PolyPilot.Models; +using PolyPilot.Services; + +namespace PolyPilot.Tests; + +/// +/// Regression tests for: session response lost when IsProcessing is cleared +/// prematurely by watchdog, error handler, or reconnect race condition. +/// Bug: UI shows "Thinking" with no response, but SDK completed the work. +/// +public class ResponseFlushTests +{ + private readonly StubChatDatabase _chatDb = new(); + private readonly StubServerManager _serverManager = new(); + private readonly StubWsBridgeClient _bridgeClient = new(); + private readonly StubDemoService _demoService = new(); + private readonly RepoManager _repoManager = new(); + private readonly IServiceProvider _serviceProvider; + + public ResponseFlushTests() + { + var services = new ServiceCollection(); + _serviceProvider = services.BuildServiceProvider(); + } + + private CopilotService CreateService() => + new CopilotService(_chatDb, _serverManager, _bridgeClient, _repoManager, _serviceProvider, _demoService); + + // --- Model-level: simulated watchdog flush preserves content --- + + [Fact] + public void WatchdogFlush_SimulatedStateTransition_PreservesHistory() + { + // Simulates the watchdog scenario: session accumulates response content, + // then IsProcessing is cleared. The fix ensures accumulated content + // is flushed to history before clearing IsProcessing. + var info = new AgentSessionInfo { Name = "test-flush", Model = "test-model" }; + + // Simulate: user sent a message + info.History.Add(new ChatMessage("user", "What files are on the network?", DateTime.Now)); + info.IsProcessing = true; + info.MessageCount = info.History.Count; + + // Simulate: partial response accumulated (normally in CurrentResponse, + // but at model level we test the flush writes to history) + var partialResponse = new ChatMessage("assistant", "I found 3 files on the network drive.", DateTime.Now); + info.History.Add(partialResponse); + info.MessageCount = info.History.Count; + + // Simulate: watchdog fires, clears IsProcessing + info.IsProcessing = false; + info.History.Add(ChatMessage.SystemMessage( + "⚠️ Session appears stuck — no response received. You can try sending your message again.")); + + // Verify: the partial response is still in history (not lost) + Assert.False(info.IsProcessing); + Assert.Equal(3, info.History.Count); + var assistantMessages = info.History.Where(m => m.Role == "assistant").ToList(); + Assert.Single(assistantMessages); + Assert.Contains("3 files", assistantMessages[0].Content); + } + + [Fact] + public void ErrorEvent_SimulatedStateTransition_PreservesPartialResponse() + { + // Simulates the SessionErrorEvent scenario: deltas arrived, + // then an error occurs. The fix ensures accumulated content + // is flushed before clearing IsProcessing. + var info = new AgentSessionInfo { Name = "test-error-flush", Model = "test-model" }; + + // User message + info.History.Add(new ChatMessage("user", "List network shares", DateTime.Now)); + info.IsProcessing = true; + + // Partial assistant response (flushed by the fix before error clears state) + var partial = new ChatMessage("assistant", "Accessing network...", DateTime.Now); + info.History.Add(partial); + + // Error occurs, clears processing + info.History.Add(ChatMessage.ErrorMessage("Connection lost")); + info.IsProcessing = false; + + // Verify partial response is preserved + Assert.False(info.IsProcessing); + Assert.Equal(3, info.History.Count); + Assert.Equal("assistant", info.History[1].Role); + Assert.Equal("Accessing network...", info.History[1].Content); + } + + // --- Service-level: abort flushes accumulated content --- + + [Fact] + public async Task DemoMode_AbortWhileProcessing_HistoryIntact() + { + // Abort should preserve existing history (the fix ensures + // FlushCurrentResponse runs before clearing state). + var svc = CreateService(); + await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo }); + + var session = await svc.CreateSessionAsync("abort-flush"); + await svc.SendPromptAsync("abort-flush", "First message"); + + var historyCountAfterFirst = session.History.Count; + Assert.True(historyCountAfterFirst >= 1, "Should have at least the user message"); + + // Simulate stuck state then abort + session.IsProcessing = true; + await svc.AbortSessionAsync("abort-flush"); + + Assert.False(session.IsProcessing); + // History from before abort must be preserved + Assert.True(session.History.Count >= historyCountAfterFirst, + "Abort should not lose existing history messages"); + } + + [Fact] + public async Task DemoMode_SendAfterStuckRecovery_AddsToHistory() + { + // After recovering from a stuck state, new messages should work normally. + var svc = CreateService(); + await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo }); + + var session = await svc.CreateSessionAsync("recovery-flush"); + + // First exchange + await svc.SendPromptAsync("recovery-flush", "Hello"); + var countAfterFirst = session.History.Count; + + // Simulate watchdog clearing stuck state + session.IsProcessing = true; + session.IsProcessing = false; + session.History.Add(ChatMessage.SystemMessage("⚠️ Session appears stuck")); + + // Send another message — should work + await svc.SendPromptAsync("recovery-flush", "Try again"); + Assert.False(session.IsProcessing); + Assert.True(session.History.Count > countAfterFirst, + "Messages after recovery should be added to history"); + } + + // --- CompleteResponse with IsProcessing=false should flush --- + + [Fact] + public void CompleteResponse_WhenProcessingFalse_StillFlushesContent() + { + // This tests the invariant: if response content exists but IsProcessing + // was cleared prematurely, the content should still be flushed to history + // when CompleteResponse runs (e.g., from a late SessionIdleEvent). + var info = new AgentSessionInfo { Name = "late-flush", Model = "test-model" }; + + // Start a turn + info.History.Add(new ChatMessage("user", "Do work", DateTime.Now)); + info.IsProcessing = true; + + // Watchdog fires prematurely, clears IsProcessing + // (In the actual code, FlushCurrentResponse would run here due to the fix) + info.IsProcessing = false; + + // Late SessionIdleEvent arrives — CompleteResponse called + // (In actual code, the fix flushes CurrentResponse even when IsProcessing=false) + // Verify the model supports this: we can add assistant messages after IsProcessing=false + var lateContent = new ChatMessage("assistant", "Work completed successfully.", DateTime.Now); + info.History.Add(lateContent); + info.MessageCount = info.History.Count; + + // Verify the late content is in history alongside the user message + Assert.Equal(2, info.History.Count); + Assert.Equal("user", info.History[0].Role); + Assert.Equal("assistant", info.History[1].Role); + Assert.Equal("Work completed successfully.", info.History[1].Content); + } + + // --- Reconnect generation reset --- + + [Fact] + public async Task DemoMode_ReconnectAsync_ClearsProcessingState() + { + // Reconnect should leave no sessions in stuck state in the service. + var svc = CreateService(); + await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo }); + + await svc.CreateSessionAsync("recon-1"); + await svc.CreateSessionAsync("recon-2"); + + // Reconnect clears everything — old sessions are removed from service + await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo }); + + // After reconnect, service should have no sessions + Assert.Empty(svc.GetAllSessions()); + } + + [Fact] + public async Task DemoMode_SendAfterReconnect_WorksNormally() + { + // After reconnect, sessions in the new connection should work. + var svc = CreateService(); + await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo }); + + await svc.CreateSessionAsync("before-recon"); + await svc.SendPromptAsync("before-recon", "Message 1"); + + // Reconnect (simulates mode switch or server restart) + await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo }); + + // Create new session in new connection + var newSession = await svc.CreateSessionAsync("after-recon"); + await svc.SendPromptAsync("after-recon", "Fresh start"); + + Assert.False(newSession.IsProcessing); + Assert.True(newSession.History.Count >= 1); + } + + // --- History integrity after state transitions --- + + [Fact] + public void HistoryPreserved_AcrossMultipleStateTransitions() + { + // Verify history is preserved across multiple processing cycles, + // including simulated watchdog/error interruptions. + var info = new AgentSessionInfo { Name = "multi-cycle", Model = "test-model" }; + + // Cycle 1: Normal completion + info.History.Add(new ChatMessage("user", "Message 1", DateTime.Now)); + info.IsProcessing = true; + info.History.Add(new ChatMessage("assistant", "Response 1", DateTime.Now)); + info.IsProcessing = false; + + // Cycle 2: Watchdog interruption (partial response preserved by fix) + info.History.Add(new ChatMessage("user", "Message 2", DateTime.Now)); + info.IsProcessing = true; + info.History.Add(new ChatMessage("assistant", "Partial response 2", DateTime.Now)); + info.IsProcessing = false; // Watchdog + info.History.Add(ChatMessage.SystemMessage("⚠️ Session appears stuck")); + + // Cycle 3: Error interruption (partial response preserved by fix) + info.History.Add(new ChatMessage("user", "Message 3", DateTime.Now)); + info.IsProcessing = true; + info.History.Add(new ChatMessage("assistant", "Partial 3", DateTime.Now)); + info.History.Add(ChatMessage.ErrorMessage("Connection error")); + info.IsProcessing = false; + + // Cycle 4: Normal completion after errors + info.History.Add(new ChatMessage("user", "Message 4", DateTime.Now)); + info.IsProcessing = true; + info.History.Add(new ChatMessage("assistant", "Response 4", DateTime.Now)); + info.IsProcessing = false; + + // All messages should be preserved + Assert.False(info.IsProcessing); + // ErrorMessage has role="assistant", so count excludes it via MessageType + var assistantResponses = info.History + .Where(m => m.Role == "assistant" && m.MessageType == ChatMessageType.Assistant) + .ToList(); + Assert.Equal(4, assistantResponses.Count); + Assert.Equal("Response 1", assistantResponses[0].Content); + Assert.Equal("Partial response 2", assistantResponses[1].Content); + Assert.Equal("Partial 3", assistantResponses[2].Content); + Assert.Equal("Response 4", assistantResponses[3].Content); + + // System/error messages also preserved + Assert.Single(info.History.Where(m => m.MessageType == ChatMessageType.System)); + Assert.Single(info.History.Where(m => m.MessageType == ChatMessageType.Error)); + } + + [Fact] + public async Task DemoMode_HighMessageCount_NoResponseLoss() + { + // Regression: the original bug had 90 messages. Ensure high message + // count sessions don't lose responses. + var svc = CreateService(); + await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo }); + + var session = await svc.CreateSessionAsync("high-count"); + + // Send many messages + for (int i = 0; i < 20; i++) + { + await svc.SendPromptAsync("high-count", $"Message {i}"); + } + + Assert.False(session.IsProcessing); + // Each send in demo mode adds at least a user message + Assert.True(session.History.Count >= 20, + $"Expected at least 20 history entries, got {session.History.Count}"); + + // Simulate stuck + recovery on high-count session + session.IsProcessing = true; + await svc.AbortSessionAsync("high-count"); + Assert.False(session.IsProcessing); + + // Can still send + await svc.SendPromptAsync("high-count", "After recovery"); + Assert.False(session.IsProcessing); + } + + // --- Persistent mode: connection failure doesn't lose state --- + + [Fact] + public async Task PersistentMode_FailedInit_NoOrphanedProcessing() + { + // If persistent mode fails to connect, no sessions should be + // left in a processing state. + var svc = CreateService(); + + await svc.ReconnectAsync(new ConnectionSettings + { + Mode = ConnectionMode.Persistent, + Host = "localhost", + Port = 19999 + }); + + Assert.Empty(svc.GetAllSessions()); + foreach (var s in svc.GetAllSessions()) + { + Assert.False(s.IsProcessing, + $"Session '{s.Name}' stuck processing after failed init"); + } + } + + // --- OnStateChanged fires during recovery --- + + [Fact] + public async Task DemoMode_AbortFiresStateChanged() + { + var svc = CreateService(); + await svc.ReconnectAsync(new ConnectionSettings { Mode = ConnectionMode.Demo }); + + await svc.CreateSessionAsync("state-changed"); + var stateChangedCount = 0; + svc.OnStateChanged += () => stateChangedCount++; + + // Simulate stuck + abort + var session = svc.GetAllSessions().First(); + session.IsProcessing = true; + await svc.AbortSessionAsync("state-changed"); + + // OnStateChanged should have fired during abort + Assert.True(stateChangedCount >= 1, + "OnStateChanged must fire when abort clears processing state"); + } + + // --- ChatMessage factory methods used in fixes --- + + [Fact] + public void ChatMessage_SystemMessage_HasCorrectType() + { + var msg = ChatMessage.SystemMessage("⚠️ Session appears stuck"); + Assert.Equal("system", msg.Role); + Assert.Equal(ChatMessageType.System, msg.MessageType); + Assert.Contains("appears stuck", msg.Content); + } + + [Fact] + public void ChatMessage_ErrorMessage_HasCorrectType() + { + var msg = ChatMessage.ErrorMessage("Connection lost"); + Assert.Equal(ChatMessageType.Error, msg.MessageType); + Assert.Contains("Connection lost", msg.Content); + } + + [Fact] + public void ChatMessage_AssistantMessage_ModelPreserved() + { + // The fix creates assistant messages with model info during flush. + var msg = new ChatMessage("assistant", "Response text", DateTime.Now) { Model = "gpt-5.3-codex" }; + Assert.Equal("assistant", msg.Role); + Assert.Equal("gpt-5.3-codex", msg.Model); + Assert.Equal("Response text", msg.Content); + } +} diff --git a/PolyPilot/Components/Layout/SessionSidebar.razor b/PolyPilot/Components/Layout/SessionSidebar.razor index 304ecd6f64..97cb8d0796 100644 --- a/PolyPilot/Components/Layout/SessionSidebar.razor +++ b/PolyPilot/Components/Layout/SessionSidebar.razor @@ -1157,7 +1157,19 @@ else return; } worktreePath = repoRoot; - await RunProcessAsync("git", $"checkout -b {branchName}", repoRoot); + // Determine the upstream default branch to base the fix on. + // Try upstream/main, origin/main, then fall back to current HEAD. + var baseBranch = await GetDefaultRemoteBranch(repoRoot); + if (baseBranch != null) + { + // Fetch latest and create branch from the upstream default + await RunProcessAsync("git", "fetch --quiet upstream 2>/dev/null || git fetch --quiet origin", repoRoot); + await RunProcessAsync("git", $"checkout -b {branchName} {baseBranch}", repoRoot); + } + else + { + await RunProcessAsync("git", $"checkout -b {branchName}", repoRoot); + } } footerStatus = "⏳ Launching copilot…"; @@ -1204,8 +1216,9 @@ You are fixing a bug in the PolyPilot app (a .NET MAUI Blazor Hybrid app). 5. Add UI scenario definitions to `PolyPilot.Tests/Scenarios/mode-switch-scenarios.json` if the bug involves mode switching, settings, or UI flows. 6. Run `cd PolyPilot.Tests && dotnet test` to verify all tests pass (existing + new). 7. Run `cd PolyPilot && dotnet build -f {(OperatingSystem.IsWindows() ? "net10.0-windows10.0.19041.0" : "net10.0-maccatalyst")}` to verify the app builds. -8. Commit your changes to branch `{branchName}` with a descriptive commit message. -9. Push the branch and create a PR against `main` in the PureWeen/PolyPilot repository using `gh pr create`. +8. **Verify your branch is based on `main`** — run `git log --oneline upstream/main..HEAD` (or `origin/main..HEAD`) and confirm only YOUR commits appear. If unrelated commits are included, the branch was created from the wrong base. Fix it: create a new branch from `upstream/main` and cherry-pick your commits onto it. +9. Commit your changes to branch `{branchName}` with a descriptive commit message. +10. Push the branch and create a PR against `main` in the PureWeen/PolyPilot repository using `gh pr create`. Important conventions: - Never use `static readonly` fields that call platform APIs (use lazy `??=` properties instead). @@ -1295,6 +1308,37 @@ Important conventions: if (p != null) await p.WaitForExitAsync(); } + /// + /// Find the upstream default branch ref (e.g. "upstream/main", "origin/main"). + /// Returns null if no suitable remote branch is found. + /// + private static async Task GetDefaultRemoteBranch(string repoRoot) + { + // Prefer upstream/main (fork convention), then origin/main + foreach (var candidate in new[] { "upstream/main", "upstream/master", "origin/main", "origin/master" }) + { + try + { + var psi = new System.Diagnostics.ProcessStartInfo("git", $"rev-parse --verify {candidate}") + { + WorkingDirectory = repoRoot, + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true + }; + var p = System.Diagnostics.Process.Start(psi); + if (p != null) + { + await p.WaitForExitAsync(); + if (p.ExitCode == 0) return candidate; + } + } + catch { } + } + return null; + } + private static string EscapeForAppleScript(string s) => s.Replace("\\", "\\\\").Replace("\"", "\\\""); diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs index 5a92315329..62eed238dd 100644 --- a/PolyPilot/Services/CopilotService.Events.cs +++ b/PolyPilot/Services/CopilotService.Events.cs @@ -499,6 +499,8 @@ await notifService.SendNotificationAsync( { OnError?.Invoke(sessionName, errMsg); state.ResponseCompletion?.TrySetException(new Exception(errMsg)); + // Flush any accumulated partial response before clearing processing state + FlushCurrentResponse(state); Debug($"[ERROR] '{sessionName}' SessionErrorEvent cleared IsProcessing (error={errMsg})"); state.Info.IsProcessing = false; state.Info.IsResumed = false; @@ -627,7 +629,19 @@ private void CompleteResponse(SessionState state, long? expectedGeneration = nul { if (!state.Info.IsProcessing) { - Debug($"[COMPLETE] '{state.Info.Name}' CompleteResponse skipped — IsProcessing already false"); + // Still flush any accumulated content — delta events may have arrived + // after IsProcessing was cleared prematurely by watchdog/error handler. + if (state.CurrentResponse.Length > 0) + { + Debug($"[COMPLETE] '{state.Info.Name}' IsProcessing already false but flushing " + + $"{state.CurrentResponse.Length} chars of accumulated content"); + FlushCurrentResponse(state); + OnStateChanged?.Invoke(); + } + else + { + Debug($"[COMPLETE] '{state.Info.Name}' CompleteResponse skipped — IsProcessing already false"); + } return; // Already completed (e.g. timeout) } @@ -1141,6 +1155,8 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session Interlocked.Exchange(ref state.ActiveToolCallCount, 0); state.HasUsedToolsThisTurn = false; state.Info.IsResumed = false; + // Flush any accumulated partial response before clearing processing state + FlushCurrentResponse(state); state.Info.IsProcessing = false; state.Info.History.Add(ChatMessage.SystemMessage( "⚠️ Session appears stuck — no response received. You can try sending your message again.")); diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs index 3fb1f352a9..805bc6e7a5 100644 --- a/PolyPilot/Services/CopilotService.cs +++ b/PolyPilot/Services/CopilotService.cs @@ -1528,10 +1528,19 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis Session = newSession, Info = state.Info }; - newState.ResponseCompletion = state.ResponseCompletion; + newState.ResponseCompletion = new TaskCompletionSource(); newSession.On(evt => HandleSessionEvent(newState, evt)); _sessions[sessionName] = newState; state = newState; + + // Increment generation AFTER registering the event handler so that any + // replayed SessionIdleEvent from the old turn captures the stale generation + // (0) while the retry turn uses the new generation. This prevents the + // replayed IDLE from clearing IsProcessing before the retry completes. + Interlocked.Increment(ref state.ProcessingGeneration); + state.Info.IsProcessing = true; + state.CurrentResponse.Clear(); + Debug($"[RECONNECT] '{sessionName}' reset processing state: gen={Interlocked.Read(ref state.ProcessingGeneration)}"); // Start fresh watchdog for the new connection StartProcessingWatchdog(state, sessionName);