diff --git a/PolyPilot.Tests/PolyPilot.Tests.csproj b/PolyPilot.Tests/PolyPilot.Tests.csproj index bf9e6c4984..ef28dde2e6 100644 --- a/PolyPilot.Tests/PolyPilot.Tests.csproj +++ b/PolyPilot.Tests/PolyPilot.Tests.csproj @@ -44,6 +44,7 @@ + diff --git a/PolyPilot.Tests/SessionAnalyzerTests.cs b/PolyPilot.Tests/SessionAnalyzerTests.cs new file mode 100644 index 0000000000..85a2486df2 --- /dev/null +++ b/PolyPilot.Tests/SessionAnalyzerTests.cs @@ -0,0 +1,295 @@ +using PolyPilot.Services; + +namespace PolyPilot.Tests; + +[Collection("BaseDir")] +public class SessionAnalyzerTests +{ + [Fact] + public void CollectDiagnostics_IncludesServerHealth() + { + var tempDir = Path.Combine(Path.GetTempPath(), $"analyzer-test-{Guid.NewGuid()}"); + Directory.CreateDirectory(tempDir); + try + { + SessionAnalyzerService.SetBaseDirForTesting(tempDir); + + File.WriteAllText( + Path.Combine(tempDir, "event-diagnostics.log"), + "[SEND] 'TestSession' IsProcessing=true\n[COMPLETE] 'TestSession' done\n"); + + var copilotService = CreateService(); + var serverManager = new TestServerManager { IsRunning = true, Pid = 12345, Port = 4321 }; + var analyzer = new SessionAnalyzerService(copilotService, serverManager); + + var diagnostics = analyzer.CollectDiagnostics(); + + Assert.Contains("Event Diagnostics", diagnostics); + Assert.Contains("[SEND]", diagnostics); + Assert.Contains("[COMPLETE]", diagnostics); + Assert.Contains("Server running: True", diagnostics); + Assert.Contains("12345", diagnostics); + } + finally + { + SessionAnalyzerService.SetBaseDirForTesting(TestSetup.TestBaseDir); + Directory.Delete(tempDir, recursive: true); + } + } + + [Fact] + public void CollectDiagnostics_IncludesCrashLog_WhenPresent() + { + var tempDir = Path.Combine(Path.GetTempPath(), $"analyzer-test-{Guid.NewGuid()}"); + Directory.CreateDirectory(tempDir); + try + { + SessionAnalyzerService.SetBaseDirForTesting(tempDir); + + File.WriteAllText( + Path.Combine(tempDir, "crash.log"), + "=== 2026-04-18 ===\nSystem.Exception: test crash\n"); + + var copilotService = CreateService(); + var serverManager = new TestServerManager(); + var analyzer = new SessionAnalyzerService(copilotService, serverManager); + + var diagnostics = analyzer.CollectDiagnostics(); + + Assert.Contains("Crash Log", diagnostics); + Assert.Contains("test crash", diagnostics); + } + finally + { + SessionAnalyzerService.SetBaseDirForTesting(TestSetup.TestBaseDir); + Directory.Delete(tempDir, recursive: true); + } + } + + [Fact] + public void CollectDiagnostics_HandlesEmptyLogs() + { + var tempDir = Path.Combine(Path.GetTempPath(), $"analyzer-test-{Guid.NewGuid()}"); + Directory.CreateDirectory(tempDir); + try + { + SessionAnalyzerService.SetBaseDirForTesting(tempDir); + + var copilotService = CreateService(); + var serverManager = new TestServerManager(); + var analyzer = new SessionAnalyzerService(copilotService, serverManager); + + var diagnostics = analyzer.CollectDiagnostics(); + + Assert.Contains("Active Session States", diagnostics); + Assert.Contains("Server Health", diagnostics); + } + finally + { + SessionAnalyzerService.SetBaseDirForTesting(TestSetup.TestBaseDir); + Directory.Delete(tempDir, recursive: true); + } + } + + [Fact] + public void BuildAnalysisPrompt_ContainsDiagnosticData() + { + var diagnostics = "## Test Data\nSome diagnostic info here"; + var prompt = SessionAnalyzerService.BuildAnalysisPrompt(diagnostics); + + Assert.Contains("PolyPilot Session Analyzer", prompt); + Assert.Contains("Stuck sessions", prompt); + Assert.Contains("Watchdog kills", prompt); + Assert.Contains("Test Data", prompt); + Assert.Contains("Some diagnostic info here", prompt); + } + + [Fact] + public void BuildAnalysisPrompt_InstructsSingleBranchPrStrategy() + { + var prompt = SessionAnalyzerService.BuildAnalysisPrompt("data"); + + // Must instruct reuse of a single branch and PR + Assert.Contains("fix/session-analyzer-findings", prompt); + Assert.Contains("Always reuse the SAME branch", prompt); + Assert.Contains("Never create a new branch per finding", prompt); + } + + [Fact] + public void Constants_HaveReasonableDefaults() + { + Assert.Equal(10, SessionAnalyzerService.DefaultAnalysisIntervalMinutes); + Assert.Equal(1, SessionAnalyzerService.MinAnalysisIntervalMinutes); + Assert.Equal(1440, SessionAnalyzerService.MaxAnalysisIntervalMinutes); + Assert.Equal(200, SessionAnalyzerService.DiagnosticLogTailLines); + Assert.Equal(50, SessionAnalyzerService.CrashLogTailLines); + Assert.Equal("PolyPilot Monitor", SessionAnalyzerService.AnalyzerSessionName); + Assert.Equal(10 * 1024 * 1024, SessionAnalyzerService.MaxLogFileSizeBytes); + } + + [Fact] + public void IsRunning_FalseBeforeStart() + { + var copilotService = CreateService(); + var serverManager = new TestServerManager(); + var analyzer = new SessionAnalyzerService(copilotService, serverManager); + + Assert.False(analyzer.IsRunning); + Assert.Null(analyzer.LastAnalysisAt); + Assert.Equal(0, analyzer.AnalysisCount); + } + + [Fact] + public void Dispose_StopsAnalyzer() + { + var copilotService = CreateService(); + var serverManager = new TestServerManager(); + var analyzer = new SessionAnalyzerService(copilotService, serverManager); + + analyzer.Dispose(); + analyzer.Dispose(); // double dispose is safe + Assert.False(analyzer.IsRunning); + } + + [Fact] + public async Task DisposeAsync_StopsAnalyzer() + { + var copilotService = CreateService(); + var serverManager = new TestServerManager(); + var analyzer = new SessionAnalyzerService(copilotService, serverManager); + + await analyzer.DisposeAsync(); + await analyzer.DisposeAsync(); // double dispose is safe + Assert.False(analyzer.IsRunning); + } + + [Fact] + public async Task RunSingleAnalysis_ReturnsNull_WhenNoSessionCreated() + { + var copilotService = CreateService(); + var serverManager = new TestServerManager(); + var analyzer = new SessionAnalyzerService(copilotService, serverManager); + + // _analyzerSessionName is null — no session was created + var result = await analyzer.RunSingleAnalysisAsync(); + Assert.Null(result); + Assert.Equal(0, analyzer.AnalysisCount); + } + + [Fact] + public void TailFile_CapsLargeFiles() + { + var tempFile = Path.GetTempFileName(); + try + { + // Write a small file — TailFile should return last N lines + var lines = Enumerable.Range(1, 500).Select(i => $"line {i}").ToArray(); + File.WriteAllLines(tempFile, lines); + + var result = SessionAnalyzerService.TailFile(tempFile, 10); + Assert.Equal(10, result.Length); + Assert.Equal("line 491", result[0]); + Assert.Equal("line 500", result[9]); + } + finally + { + File.Delete(tempFile); + } + } + + [Fact] + public void TailFile_HandlesSmallFile() + { + var tempFile = Path.GetTempFileName(); + try + { + File.WriteAllLines(tempFile, new[] { "a", "b", "c" }); + var result = SessionAnalyzerService.TailFile(tempFile, 10); + Assert.Equal(3, result.Length); + } + finally + { + File.Delete(tempFile); + } + } + + [Fact] + public void TailFile_HandlesNonexistentFile() + { + var result = SessionAnalyzerService.TailFile("/nonexistent/path", 10); + Assert.Empty(result); + } + + [Fact] + public void SessionAnalyzerIntervalMinutes_ClampsToMinimum() + { + var settings = new PolyPilot.Models.ConnectionSettings(); + + settings.SessionAnalyzerIntervalMinutes = 0; + Assert.Equal(1, settings.SessionAnalyzerIntervalMinutes); + + settings.SessionAnalyzerIntervalMinutes = -5; + Assert.Equal(1, settings.SessionAnalyzerIntervalMinutes); + + settings.SessionAnalyzerIntervalMinutes = 30; + Assert.Equal(30, settings.SessionAnalyzerIntervalMinutes); + } + + [Fact] + public void SessionAnalyzerIntervalMinutes_ClampsToMaximum() + { + var settings = new PolyPilot.Models.ConnectionSettings(); + + settings.SessionAnalyzerIntervalMinutes = 2000; + Assert.Equal(1440, settings.SessionAnalyzerIntervalMinutes); + + settings.SessionAnalyzerIntervalMinutes = int.MaxValue; + Assert.Equal(1440, settings.SessionAnalyzerIntervalMinutes); + } + + [Fact] + public void BuildAnalysisPrompt_UsesAutopilotMode() + { + // The analyzer runs in autopilot so it can create/update PRs + var prompt = SessionAnalyzerService.BuildAnalysisPrompt("data"); + + Assert.Contains("Write the fix, run tests, commit", prompt); + Assert.Contains("open one with a clear title", prompt); + } + + private static string GetTempDir() => Path.GetTempPath(); + + private static CopilotService CreateService() + { + var services = new Microsoft.Extensions.DependencyInjection.ServiceCollection(); + var serviceProvider = Microsoft.Extensions.DependencyInjection.ServiceCollectionContainerBuilderExtensions + .BuildServiceProvider(services); + return new CopilotService( + new StubChatDatabase(), + new StubServerManager(), + new StubWsBridgeClient(), + new RepoManager(), + serviceProvider, + new StubDemoService()); + } + + private class TestServerManager : IServerManager + { + public bool IsRunning { get; set; } + public int? Pid { get; set; } + public int Port { get; set; } = 4321; + public string? Error { get; set; } + + bool IServerManager.IsServerRunning => IsRunning; + int? IServerManager.ServerPid => Pid; + int IServerManager.ServerPort => Port; + string? IServerManager.LastError => Error; + + public event Action? OnStatusChanged; + + public bool CheckServerRunning(string host = "127.0.0.1", int? port = null) => IsRunning; + public Task StartServerAsync(int port, string? githubToken = null) => Task.FromResult(true); + public void StopServer() { } + public bool DetectExistingServer() => IsRunning; + } +} diff --git a/PolyPilot.Tests/TestSetup.cs b/PolyPilot.Tests/TestSetup.cs index ba4a065433..979a9f4026 100644 --- a/PolyPilot.Tests/TestSetup.cs +++ b/PolyPilot.Tests/TestSetup.cs @@ -36,5 +36,6 @@ internal static void Initialize() FiestaService.SetStateFilePathForTesting(Path.Combine(TestBaseDir, "fiesta.json")); ConnectionSettings.SetSettingsFilePathForTesting(Path.Combine(TestBaseDir, "settings.json")); ScheduledTaskService.SetTasksFilePathForTesting(Path.Combine(TestBaseDir, "scheduled-tasks.json")); + SessionAnalyzerService.SetBaseDirForTesting(TestBaseDir); } } diff --git a/PolyPilot/Components/Pages/Settings.razor b/PolyPilot/Components/Pages/Settings.razor index 41bc363e9d..f1cb433203 100644 --- a/PolyPilot/Components/Pages/Settings.razor +++ b/PolyPilot/Components/Pages/Settings.razor @@ -536,6 +536,30 @@ } + @if (PlatformHelper.IsDesktop) + { + + + + + 🔍 Session Analyzer + + Background monitor that analyzes sessions for reliability issues and can auto-fix bugs via PR + + @if (settings.EnableSessionAnalyzer) + { + + + Analysis interval (minutes): + + + + } + + } + @@ -1398,6 +1422,29 @@ SaveSettingsQuietly(); } + private async Task ToggleSessionAnalyzer(ChangeEventArgs e) + { + settings.EnableSessionAnalyzer = e.Value is true; + settings.Save(); + if (settings.EnableSessionAnalyzer) + { + CopilotService.StartSessionAnalyzerIfEnabled(); + } + else + { + await CopilotService.StopSessionAnalyzerAsync(); + } + } + + private void OnAnalyzerIntervalChanged(ChangeEventArgs e) + { + if (int.TryParse(e.Value?.ToString(), out var val)) + { + settings.SessionAnalyzerIntervalMinutes = val; + SaveSettingsQuietly(); + } + } + private void ToggleAutoUpdate() { if (GitAutoUpdate.IsEnabled) diff --git a/PolyPilot/MauiProgram.cs b/PolyPilot/MauiProgram.cs index 659972ea66..04cce55bfd 100644 --- a/PolyPilot/MauiProgram.cs +++ b/PolyPilot/MauiProgram.cs @@ -120,6 +120,7 @@ public static MauiApp CreateMauiApp() builder.Services.AddSingleton(); builder.Services.AddSingleton(); builder.Services.AddSingleton(); + builder.Services.AddSingleton(); #if DEBUG builder.Services.AddBlazorWebViewDeveloperTools(); diff --git a/PolyPilot/Models/ConnectionSettings.cs b/PolyPilot/Models/ConnectionSettings.cs index 075be14b4f..13d72ffd2d 100644 --- a/PolyPilot/Models/ConnectionSettings.cs +++ b/PolyPilot/Models/ConnectionSettings.cs @@ -131,6 +131,23 @@ public string? ServerPassword /// public bool EnableVerboseEventTracing { get; set; } = false; + /// + /// When true, a background copilot CLI session perpetually monitors all running + /// sessions for reliability issues (stuck processing, watchdog kills, error patterns) + /// and can auto-create PRs with fixes. + /// + public bool EnableSessionAnalyzer { get; set; } = false; + + /// + /// How often (in minutes) the session analyzer runs its diagnostic analysis. + /// + public int SessionAnalyzerIntervalMinutes + { + get => _sessionAnalyzerIntervalMinutes; + set => _sessionAnalyzerIntervalMinutes = Math.Clamp(value, 1, 1440); + } + private int _sessionAnalyzerIntervalMinutes = 10; + /// /// Normalizes a remote URL by ensuring it has an http(s):// scheme. /// Plain IPs/hostnames get http://, devtunnels/known TLS hosts get https://. diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs index 3ceb11a193..84b1a923fa 100644 --- a/PolyPilot/Services/CopilotService.cs +++ b/PolyPilot/Services/CopilotService.cs @@ -847,6 +847,42 @@ private void StopKeepalivePing() } } + /// + /// Start the session analyzer if enabled in settings. Desktop-only, non-demo, non-remote. + /// + internal void StartSessionAnalyzerIfEnabled() + { + if (IsDemoMode || IsRemoteMode) return; + + var settings = _currentSettings ?? ConnectionSettings.Load(); + if (!settings.EnableSessionAnalyzer) return; + + var analyzer = _serviceProvider?.GetService(typeof(SessionAnalyzerService)) as SessionAnalyzerService; + if (analyzer == null || analyzer.IsRunning) return; + + // Use the repo root of this worktree as the working directory + var cwd = Directory.GetCurrentDirectory(); + var interval = settings.SessionAnalyzerIntervalMinutes; + + _ = Task.Run(async () => + { + try { await analyzer.StartAsync(cwd, interval); } + catch (Exception ex) { Debug($"[ANALYZER] Failed to start: {ex.Message}"); } + }); + } + + /// + /// Stop the session analyzer gracefully. + /// + internal async Task StopSessionAnalyzerAsync() + { + var analyzer = _serviceProvider?.GetService(typeof(SessionAnalyzerService)) as SessionAnalyzerService; + if (analyzer == null || !analyzer.IsRunning) return; + + try { await analyzer.StopAsync(); } + catch (Exception ex) { Debug($"[ANALYZER] Failed to stop: {ex.Message}"); } + } + private async Task RunKeepalivePingAsync(CancellationToken ct) { try @@ -1293,6 +1329,9 @@ public async Task InitializeAsync(CancellationToken cancellationToken = default) // Start keepalive pinging to prevent server idle timeout if (!IsDemoMode && !IsRemoteMode && _client != null) StartKeepalivePing(); + + // Start session analyzer if enabled in settings + StartSessionAnalyzerIfEnabled(); } /// @@ -1360,6 +1399,7 @@ public async Task ReconnectAsync(ConnectionSettings settings, CancellationToken StopKeepalivePing(); await StopCodespaceHealthCheckAsync(); StopExternalSessionScanner(); + await StopSessionAnalyzerAsync(); // Dispose existing sessions and client foreach (var state in _sessions.Values) @@ -1470,6 +1510,9 @@ public async Task ReconnectAsync(ConnectionSettings settings, CancellationToken // Start keepalive pinging to prevent server idle timeout if (!IsDemoMode && !IsRemoteMode && _client != null) StartKeepalivePing(); + + // Start session analyzer if enabled in settings + StartSessionAnalyzerIfEnabled(); } /// diff --git a/PolyPilot/Services/SessionAnalyzerService.cs b/PolyPilot/Services/SessionAnalyzerService.cs new file mode 100644 index 0000000000..a334898130 --- /dev/null +++ b/PolyPilot/Services/SessionAnalyzerService.cs @@ -0,0 +1,396 @@ +using System.Text; +using System.Text.Json; + +namespace PolyPilot.Services; + +/// +/// A background service that maintains a dedicated copilot CLI session to perpetually +/// analyze running PolyPilot sessions for issues. The analyzer runs in autopilot mode +/// and can create/update a single long-lived PR with fixes — always pushing to the +/// same branch so fixes accumulate rather than spawning multiple PRs. +/// +public class SessionAnalyzerService : IAsyncDisposable, IDisposable +{ + private readonly CopilotService _copilotService; + private readonly IServerManager _serverManager; + private CancellationTokenSource? _cts; + private Task? _analysisLoop; + private string? _analyzerSessionName; + private bool _disposed; + private int _analysisCount; + private long _lastAnalysisAtTicks; + + internal const string AnalyzerGroupName = "🔍 Session Analyzer"; + internal const string AnalyzerSessionName = "PolyPilot Monitor"; + internal const int DefaultAnalysisIntervalMinutes = 10; + internal const int MinAnalysisIntervalMinutes = 1; + internal const int MaxAnalysisIntervalMinutes = 1440; // 24 hours + internal const int DiagnosticLogTailLines = 200; + internal const int CrashLogTailLines = 50; + internal const int MaxLogFileSizeBytes = 10 * 1024 * 1024; // 10 MB cap for TailFile + + private static string? _polypilotDir; + private static string PolyPilotDir => _polypilotDir ??= CopilotService.BaseDir; + + public bool IsRunning => _analysisLoop is { IsCompleted: false }; + + public DateTime? LastAnalysisAt + { + get + { + var ticks = Interlocked.Read(ref _lastAnalysisAtTicks); + return ticks == 0 ? null : new DateTime(ticks, DateTimeKind.Utc); + } + } + + public int AnalysisCount => Interlocked.CompareExchange(ref _analysisCount, 0, 0); + public string? LastFinding { get; private set; } + + public SessionAnalyzerService(CopilotService copilotService, IServerManager serverManager) + { + _copilotService = copilotService; + _serverManager = serverManager; + } + + /// + /// Start the perpetual analysis loop. Creates the analyzer session if needed. + /// + public async Task StartAsync(string repoWorkingDirectory, int intervalMinutes = DefaultAnalysisIntervalMinutes) + { + if (IsRunning) return; + + var clampedInterval = Math.Clamp(intervalMinutes, MinAnalysisIntervalMinutes, MaxAnalysisIntervalMinutes); + _cts = new CancellationTokenSource(); + var token = _cts.Token; + + try + { + var session = await _copilotService.CreateSessionAsync( + AnalyzerSessionName, + model: "claude-sonnet-4.5", + workingDirectory: repoWorkingDirectory, + cancellationToken: token); + + session.IsHidden = true; + // Only set name after successful creation + _analyzerSessionName = AnalyzerSessionName; + } + catch (Exception ex) + { + LogAnalyzer($"Failed to create analyzer session: {ex.Message}"); + _analyzerSessionName = null; + return; + } + + _analysisLoop = RunAnalysisLoopAsync(clampedInterval, token); + } + + /// + /// Stop the analysis loop, await completion, and clean up the analyzer session. + /// + public async Task StopAsync() + { + _cts?.Cancel(); + + if (_analysisLoop is not null) + { + try { await _analysisLoop.WaitAsync(TimeSpan.FromSeconds(5)); } + catch (TimeoutException) { LogAnalyzer("Analysis loop did not stop within 5s timeout"); } + catch (OperationCanceledException) { /* expected */ } + catch (Exception ex) { LogAnalyzer($"Error awaiting analysis loop: {ex.Message}"); } + _analysisLoop = null; + } + + // Close the analyzer session in CopilotService so the name can be reused on restart + if (_analyzerSessionName is not null) + { + try { await _copilotService.CloseSessionAsync(_analyzerSessionName); } + catch (Exception ex) { LogAnalyzer($"Error closing analyzer session: {ex.Message}"); } + } + + _cts?.Dispose(); + _cts = null; + _analyzerSessionName = null; + } + + /// + /// Synchronous stop for IDisposable — prefer StopAsync/DisposeAsync. + /// + public void Stop() + { + _cts?.Cancel(); + _cts?.Dispose(); + _cts = null; + _analysisLoop = null; + + // Best-effort session cleanup — fire-and-forget since we can't await in sync path + if (_analyzerSessionName is not null) + { + var name = _analyzerSessionName; + _ = Task.Run(async () => + { + try { await _copilotService.CloseSessionAsync(name); } + catch { /* best effort */ } + }); + } + _analyzerSessionName = null; + } + + /// + /// Run a single analysis pass immediately (for testing or on-demand use). + /// + public async Task RunSingleAnalysisAsync(CancellationToken cancellationToken = default) + { + if (string.IsNullOrEmpty(_analyzerSessionName)) return null; + + var diagnostics = CollectDiagnostics(); + var prompt = BuildAnalysisPrompt(diagnostics); + + try + { + // Use a linked token with a 10-minute timeout so autopilot can't block forever + using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + timeoutCts.CancelAfter(TimeSpan.FromMinutes(10)); + + var response = await _copilotService.SendPromptAsync( + _analyzerSessionName, + prompt, + cancellationToken: timeoutCts.Token, + agentMode: "autopilot"); + + Interlocked.Exchange(ref _lastAnalysisAtTicks, DateTime.UtcNow.Ticks); + Interlocked.Increment(ref _analysisCount); + + if (!string.IsNullOrWhiteSpace(response)) + LastFinding = response.Length > 200 ? response[..200] + "..." : response; + + return response; + } + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + throw; // caller-initiated cancellation — propagate + } + catch (Exception ex) + { + LogAnalyzer($"Analysis failed: {ex.Message}"); + return null; + } + } + + private async Task RunAnalysisLoopAsync(int intervalMinutes, CancellationToken token) + { + // Initial delay — let the app settle after launch + await Task.Delay(TimeSpan.FromMinutes(2), token); + + while (!token.IsCancellationRequested) + { + try + { + await RunSingleAnalysisAsync(token); + } + catch (OperationCanceledException) { break; } + catch (Exception ex) + { + LogAnalyzer($"Analysis loop error: {ex.Message}"); + } + + try + { + await Task.Delay(TimeSpan.FromMinutes(intervalMinutes), token); + } + catch (OperationCanceledException) { break; } + } + } + + /// + /// Collect current diagnostic data from all available sources. + /// + internal string CollectDiagnostics() + { + var sb = new StringBuilder(); + + // 1. Recent event diagnostics + var diagLog = Path.Combine(PolyPilotDir, "event-diagnostics.log"); + if (File.Exists(diagLog)) + { + var lines = TailFile(diagLog, DiagnosticLogTailLines); + if (lines.Length > 0) + { + sb.AppendLine("## Recent Event Diagnostics (last 200 lines)"); + sb.AppendLine("```"); + sb.AppendLine(string.Join(Environment.NewLine, lines)); + sb.AppendLine("```"); + sb.AppendLine(); + } + } + + // 2. Crash log + var crashLog = Path.Combine(PolyPilotDir, "crash.log"); + if (File.Exists(crashLog)) + { + var lines = TailFile(crashLog, CrashLogTailLines); + if (lines.Length > 0) + { + sb.AppendLine("## Recent Crash Log (last 50 lines)"); + sb.AppendLine("```"); + sb.AppendLine(string.Join(Environment.NewLine, lines)); + sb.AppendLine("```"); + sb.AppendLine(); + } + } + + // 3. Active session states (snapshot to avoid torn reads) + sb.AppendLine("## Active Session States"); + sb.AppendLine("```json"); + sb.AppendLine(CollectSessionStates()); + sb.AppendLine("```"); + sb.AppendLine(); + + // 4. Server health + sb.AppendLine("## Server Health"); + sb.AppendLine($"- Server running: {_serverManager.IsServerRunning}"); + sb.AppendLine($"- Server PID: {_serverManager.ServerPid}"); + sb.AppendLine($"- Server port: {_serverManager.ServerPort}"); + if (!string.IsNullOrEmpty(_serverManager.LastError)) + sb.AppendLine($"- Last error: {_serverManager.LastError}"); + sb.AppendLine(); + + return sb.ToString(); + } + + /// + /// Collect summary state for all active sessions. + /// Snapshots the enumeration with ToList() to avoid torn reads. + /// + private string CollectSessionStates() + { + var sessions = _copilotService.GetAllSessions().ToList(); + var summaries = new List(); + + foreach (var session in sessions) + { + if (session.Name == AnalyzerSessionName) continue; + + summaries.Add(new + { + name = session.Name, + isProcessing = session.IsProcessing, + processingPhase = session.ProcessingPhase, + toolCallCount = session.ToolCallCount, + processingStartedAt = session.ProcessingStartedAt, + messageCount = session.MessageCount, + lastUpdated = session.LastUpdatedAt, + isResumed = session.IsResumed, + }); + } + + return JsonSerializer.Serialize(summaries, new JsonSerializerOptions { WriteIndented = true }); + } + + /// + /// Build the analysis prompt with collected diagnostics. + /// The analyzer runs in autopilot and accumulates fixes on a single long-lived branch/PR. + /// + internal static string BuildAnalysisPrompt(string diagnostics) + { + return $""" + You are the PolyPilot Session Analyzer — a reliability monitor that runs perpetually alongside PolyPilot. + + Your job is to analyze the diagnostic data below and identify any issues with running sessions. + + ## What to look for: + 1. **Stuck sessions** — sessions showing IsProcessing=true for too long without recent events + 2. **Watchdog kills** — [WATCHDOG] entries that indicate sessions were force-completed + 3. **Error patterns** — [ERROR], [RECONNECT], crash log entries + 4. **Premature completions** — [IDLE-FALLBACK] or [COMPLETE] entries that shouldn't have fired + 5. **Dead connections** — sessions with no event activity but still marked as processing + 6. **Phantom sessions** — (previous) or (resumed) sessions that shouldn't exist + 7. **Resource leaks** — growing file descriptor counts, memory issues + + ## What to do when you find issues: + + ### For code bugs you can fix: + 1. Check if branch `fix/session-analyzer-findings` already exists (`git branch -a | grep fix/session-analyzer-findings`) + 2. If it exists, check it out and pull latest — your previous fixes are already there + 3. If it does not exist, create it from `main` + 4. Write the fix, run tests, commit, and push to `fix/session-analyzer-findings` + 5. Check if a PR already exists for that branch (`gh pr list --head fix/session-analyzer-findings`) + 6. If a PR exists, it will automatically pick up your new commits — just add a comment summarizing the new fix + 7. If no PR exists, open one with a clear title and description + + **IMPORTANT**: Always reuse the SAME branch `fix/session-analyzer-findings`. Never create a new branch per finding. This keeps all fixes in one PR that accumulates over time. + + ### For stuck sessions or transient issues: + - Report them clearly but do not attempt code changes + + ## Current Diagnostic Data: + + {diagnostics} + + Analyze the data above. If everything looks healthy, say "All sessions healthy" and briefly explain why. + If you find issues, describe each one with severity (critical/warning/info) and recommended action. + """; + } + + /// + /// Read the last N lines of a file efficiently using reverse seek. + /// Caps file read to MaxLogFileSizeBytes to avoid unbounded memory usage. + /// + internal static string[] TailFile(string path, int lineCount) + { + try + { + using var fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); + // Cap how much we read from the end to avoid loading huge files + var readLength = Math.Min(fs.Length, MaxLogFileSizeBytes); + if (readLength < fs.Length) + fs.Seek(fs.Length - readLength, SeekOrigin.Begin); + + using var reader = new StreamReader(fs); + var buffer = new Queue(); + string? line; + while ((line = reader.ReadLine()) != null) + { + buffer.Enqueue(line); + if (buffer.Count > lineCount) + buffer.Dequeue(); + } + + return buffer.ToArray(); + } + catch + { + return Array.Empty(); + } + } + + private static void LogAnalyzer(string message) + { + try + { + var logPath = Path.Combine(PolyPilotDir, "event-diagnostics.log"); + File.AppendAllText(logPath, $"{DateTime.UtcNow:yyyy-MM-dd HH:mm:ss.fff} [ANALYZER] {message}{Environment.NewLine}"); + } + catch { /* best effort */ } + } + + public async ValueTask DisposeAsync() + { + if (_disposed) return; + _disposed = true; + await StopAsync(); + } + + public void Dispose() + { + if (_disposed) return; + _disposed = true; + Stop(); + } + + // For test isolation + internal static void SetBaseDirForTesting(string dir) + { + _polypilotDir = dir; + } +}
Background monitor that analyzes sessions for reliability issues and can auto-fix bugs via PR