From b48b72a2d715e45851e0f99b7989376d153c53b3 Mon Sep 17 00:00:00 2001 From: Shane Date: Mon, 23 Mar 2026 09:03:31 -0500 Subject: [PATCH 1/4] fix: eliminate false-positive premature idle detection that delayed multi-agent synthesis by ~16s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem In RecoverFromPrematureIdleIfNeededAsync, after a worker completes legitimately, IsEventsFileActive() returned true because events.jsonl was just written by the idle event itself (mtime < PrematureIdleEventsFileFreshnessSeconds = 15s). This triggered unnecessary DISPATCH-RECOVER polling — 8 rounds over ~16 seconds — before the code finally confirmed the worker was truly done. Observed in diagnostics (FlyoutLeak session, 2026-03-22): [COMPLETE] 'FlyoutLeak-worker-1' CompleteResponse executing (responseLen=0, flushedLen=3406) [DISPATCH-RECOVER] premature idle detected via events.jsonl freshness — truncated response=3406 chars [DISPATCH-RECOVER] events.jsonl stale and not processing after round 8 — worker is truly done (3406 chars) [DISPATCH-RECOVER] recovery found no additional content after 8 rounds — using original response (3406 chars) (16 seconds wasted; synthesis was delayed by this amount per worker) ## Fix Replace the raw IsEventsFileActive() check with a mtime-comparison approach: 1. Snapshot events.jsonl mtime at TCS completion 2. Wait PrematureIdleEventsGracePeriodMs (2s) to observe file activity 3. Only declare premature idle if mtime CHANGED — proving CLI is still writing Normal completions: idle event writes events.jsonl → mtime frozen during 2s → not detected True premature idles: CLI continues writing events → mtime advances → detected correctly The polling loop also updated to compare against the stable mtime baseline rather than using raw freshness, making it consistent with the initial detection approach. ## New constant PrematureIdleEventsGracePeriodMs = 2000 — grace period for mtime observation ## New helper GetEventsFileMtime(sessionId) — internal DateTime? returning events.jsonl mtime ## Tests added (5 new, all in MultiAgentRegressionTests.cs) - PrematureIdleEventsGracePeriodMs_ConstantExists — bounds check (500ms–5s) - GetEventsFileMtime_HelperExists — structural check for internal DateTime? helper - RecoverFromPrematureIdleIfNeededAsync_UsesMtimeComparisonForInitialDetection - RecoverFromPrematureIdleIfNeededAsync_PollingLoopUsesMtimeComparison - HasDiskFallback char window increased 8000→12000 (method grew by ~10 lines) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/MultiAgentRegressionTests.cs | 89 ++++++++++++++++++- .../Services/CopilotService.Organization.cs | 48 ++++++++-- 2 files changed, 128 insertions(+), 9 deletions(-) diff --git a/PolyPilot.Tests/MultiAgentRegressionTests.cs b/PolyPilot.Tests/MultiAgentRegressionTests.cs index dcb7ecb89a..ac244cbfdc 100644 --- a/PolyPilot.Tests/MultiAgentRegressionTests.cs +++ b/PolyPilot.Tests/MultiAgentRegressionTests.cs @@ -2480,7 +2480,7 @@ public void RecoverFromPrematureIdleIfNeededAsync_HasDiskFallback() // Find the method definition (not a call site) var methodIdx = source.IndexOf("private async Task RecoverFromPrematureIdleIfNeededAsync", StringComparison.Ordinal); Assert.True(methodIdx >= 0, "RecoverFromPrematureIdleIfNeededAsync method definition must exist"); - var methodBlock = source.Substring(methodIdx, Math.Min(8000, source.Length - methodIdx)); + var methodBlock = source.Substring(methodIdx, Math.Min(12000, source.Length - methodIdx)); Assert.Contains("LoadHistoryFromDisk", methodBlock); } @@ -2588,5 +2588,92 @@ public void PrematureIdleEventsFileFreshnessSeconds_ConstantExists() Assert.True(constIdx >= 0, "Must be an internal const int"); } + [Fact] + public void PrematureIdleEventsGracePeriodMs_ConstantExists() + { + // Grace period constant must exist and be in a sensible range (500ms–5s). + // Too short → still false-positives; too long → adds unnecessary latency. + Assert.True(CopilotService.PrematureIdleEventsGracePeriodMs >= 500, + "Grace period must be >= 500ms to observe mtime change"); + Assert.True(CopilotService.PrematureIdleEventsGracePeriodMs <= 5000, + "Grace period must be <= 5s to not delay normal completions excessively"); + + var orgPath = Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.Organization.cs"); + var source = File.ReadAllText(orgPath); + var constIdx = source.IndexOf("internal const int PrematureIdleEventsGracePeriodMs", StringComparison.Ordinal); + Assert.True(constIdx >= 0, "PrematureIdleEventsGracePeriodMs must be an internal const int"); + } + + [Fact] + public void GetEventsFileMtime_HelperExists() + { + // GetEventsFileMtime must exist as an internal helper returning DateTime? + // Used by RecoverFromPrematureIdleIfNeededAsync for mtime-comparison detection. + var orgPath = Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.Organization.cs"); + var source = File.ReadAllText(orgPath); + + var helperIdx = source.IndexOf("internal DateTime? GetEventsFileMtime(", StringComparison.Ordinal); + Assert.True(helperIdx >= 0, "GetEventsFileMtime helper must exist as internal DateTime?"); + + var helperBlock = source.Substring(helperIdx, Math.Min(600, source.Length - helperIdx)); + Assert.Contains("GetLastWriteTimeUtc", helperBlock); + Assert.Contains("events.jsonl", helperBlock); + } + + [Fact] + public void RecoverFromPrematureIdleIfNeededAsync_UsesMtimeComparisonForInitialDetection() + { + // Structural: instead of raw IsEventsFileActive (which sees the idle event's own write + // as "fresh" and false-positives), the method must snapshot mtime, wait the grace period, + // then compare mtimes. Only a changed mtime proves the CLI is still writing. + var orgPath = Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.Organization.cs"); + var source = File.ReadAllText(orgPath); + + var methodIdx = source.IndexOf("private async Task RecoverFromPrematureIdleIfNeededAsync", StringComparison.Ordinal); + Assert.True(methodIdx >= 0, "RecoverFromPrematureIdleIfNeededAsync must exist"); + var methodBlock = source.Substring(methodIdx, Math.Min(4000, source.Length - methodIdx)); + + // Must use mtime comparison in the detection phase + Assert.Contains("GetEventsFileMtime", methodBlock); + Assert.Contains("PrematureIdleEventsGracePeriodMs", methodBlock); + Assert.Contains("stableMtime", methodBlock); + + // The grace period delay must appear before the stableMtime assignment + var delayIdx = methodBlock.IndexOf("PrematureIdleEventsGracePeriodMs", StringComparison.Ordinal); + var assignIdx = methodBlock.IndexOf("stableMtime = GetEventsFileMtime", StringComparison.Ordinal); + Assert.True(assignIdx >= 0, "stableMtime must be assigned from GetEventsFileMtime after the delay"); + Assert.True(delayIdx < assignIdx, "Grace period delay must precede stable-mtime assignment"); + } + + [Fact] + public void RecoverFromPrematureIdleIfNeededAsync_PollingLoopUsesMtimeComparison() + { + // Structural: the secondary polling loop must also use mtime comparison (not raw + // IsEventsFileActive) so that a stale-but-fresh file doesn't trigger false detection + // in subsequent poll cycles. + var orgPath = Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.Organization.cs"); + var source = File.ReadAllText(orgPath); + + var methodIdx = source.IndexOf("private async Task RecoverFromPrematureIdleIfNeededAsync", StringComparison.Ordinal); + Assert.True(methodIdx >= 0, "RecoverFromPrematureIdleIfNeededAsync must exist"); + var methodBlock = source.Substring(methodIdx, Math.Min(5000, source.Length - methodIdx)); + + // The polling loop must compare currentMtime against stableMtime + Assert.Contains("currentMtime", methodBlock); + Assert.Contains("stableMtime", methodBlock); + + // Both GetEventsFileMtime calls should appear in the method + var calls = 0; + var searchFrom = 0; + while (true) + { + var idx = methodBlock.IndexOf("GetEventsFileMtime", searchFrom, StringComparison.Ordinal); + if (idx < 0) break; + calls++; + searchFrom = idx + 1; + } + Assert.True(calls >= 2, $"GetEventsFileMtime must be called at least twice (grace + polling), found {calls}"); + } + #endregion } diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index 907f85b3ad..c666c4d970 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -47,6 +47,13 @@ public partial class CopilotService /// the worker is likely still active despite the premature session.idle. internal const int PrematureIdleEventsFileFreshnessSeconds = 15; + /// Grace period after TCS completion before declaring premature idle based on + /// events.jsonl mtime. During this window we observe whether the file's mtime changes + /// (indicating the CLI is still writing), vs. remaining frozen (normal completion where + /// the idle event itself wrote the file). This prevents false-positive premature idle + /// detection when events.jsonl was just written by the completing idle event. + internal const int PrematureIdleEventsGracePeriodMs = 2000; + /// Maximum time to wait for the worker's real completion after detecting a /// premature session.idle re-arm. Workers with long tool runs can take minutes. internal const int PrematureIdleRecoveryTimeoutMs = 300_000; @@ -2369,17 +2376,26 @@ bool WaitForPrematureIdleSignal() // Fast path: check if PrematureIdleSignal was already set var detected = IsPrematureIdleSignalSet(); - + + // Stable mtime after grace period — used as baseline for polling loop comparison + DateTime? stableMtime = null; + if (!detected) { - // Check events.jsonl freshness immediately — if the file was just written, - // the CLI is still actively working despite the premature session.idle - detected = IsEventsFileActive(state.Info.SessionId); + // Snapshot mtime and wait briefly to detect whether CLI is still writing. + // Without this grace period we'd false-positive: the idle event itself just + // wrote to events.jsonl, making it appear "fresh" even on normal completion. + var mtimeBefore = GetEventsFileMtime(state.Info.SessionId); + try { await Task.Delay(PrematureIdleEventsGracePeriodMs, cancellationToken); } + catch (OperationCanceledException) { return initialResponse; } + stableMtime = GetEventsFileMtime(state.Info.SessionId); + // Mtime changed → CLI wrote new events during grace period → genuine premature idle + detected = stableMtime.HasValue && stableMtime.Value > (mtimeBefore ?? DateTime.MinValue); } - + if (!detected) { - // Wait for PrematureIdleSignal OR poll events.jsonl freshness + // Wait for PrematureIdleSignal OR poll events.jsonl for mtime changes var detectStart = DateTime.UtcNow; while ((DateTime.UtcNow - detectStart).TotalMilliseconds < PrematureIdleDetectionWindowMs) { @@ -2393,8 +2409,9 @@ bool WaitForPrematureIdleSignal() break; } - // Re-check events.jsonl freshness each cycle - if (IsEventsFileActive(state.Info.SessionId)) + // Check if events.jsonl mtime advanced past the stable baseline + var currentMtime = GetEventsFileMtime(state.Info.SessionId); + if (currentMtime.HasValue && currentMtime.Value > (stableMtime ?? DateTime.MinValue)) { detected = true; break; @@ -2547,6 +2564,21 @@ private bool IsEventsFileActive(string? sessionId) catch { return false; } } + /// Get the last-write UTC time of a session's events.jsonl, or null if the file + /// does not exist. Used by premature idle recovery to compare mtime before and after a + /// grace period to distinguish genuine ongoing CLI activity from a stale idle-event write. + internal DateTime? GetEventsFileMtime(string? sessionId) + { + if (string.IsNullOrEmpty(sessionId)) return null; + try + { + var eventsPath = Path.Combine(SessionStatePath, sessionId, "events.jsonl"); + if (!File.Exists(eventsPath)) return null; + return File.GetLastWriteTimeUtc(eventsPath); + } + catch { return null; } + } + private static string BuildWorkerPrompt(string identity, string worktreeNote, string sharedPrefix, string originalPrompt, string task) { return $"{identity}{worktreeNote}\n\nYour response will be collected and synthesized with other workers' responses.\n\n" + From 18174324279bae3c5681e2a159bc3148758a6690 Mon Sep 17 00:00:00 2001 From: Shane Date: Mon, 23 Mar 2026 15:25:45 -0500 Subject: [PATCH 2/4] docs: add instruction to always fix test failures, never dismiss as pre-existing Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/copilot-instructions.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 9bbafad2ca..8207816878 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -19,6 +19,8 @@ The test project lives at `../PolyPilot.Tests/` (sibling directory). It includes **Always run tests after modifying models, bridge messages, or serialization logic.** When adding new features or changing existing behavior, update or add tests to match. The tests serve as a living specification of the app's data contracts and parsing logic. +> ⚠️ **Zero tolerance for test failures.** When you encounter test failures — whether caused by your changes or pre-existing — **always fix them**. Never dismiss failures as "pre-existing" or "unrelated". If tests fail, they must be fixed before the task is complete. A green test suite is a hard requirement for every PR. + ### Android ```bash dotnet build -f net10.0-android # Build only From 76ae3157253a233c9525ab9b89fc1a6b01bcb7d3 Mon Sep 17 00:00:00 2001 From: Shane Date: Mon, 23 Mar 2026 15:31:47 -0500 Subject: [PATCH 3/4] fix: normalize worktree paths before comparison in local folder group migration ReconcileOrganization's promotion/heal migration compared wt.Path (raw, potentially using forward slashes or relative forms) against normalizedExtPath/normalizedLocalPath (computed via Path.GetFullPath). On Windows, unix-style paths like /tmp/wt-1 normalize to C:\tmp\wt-1, causing the equality check to always fail and incorrectly treating every session as 'stranded', migrating them to newly-created URL groups. Two sites fixed: 1. Promotion migration (line ~673): wt.Path normalized before StartsWith/Equals check 2. Heal stranded sessions (line ~696): wt.Path normalized before StartsWith/Equals check This fixes Scenario_MixedSessions_ReconcileDoesNotScatter which was failing because sessions in a repo group were incorrectly being migrated to a different group ID. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Services/CopilotService.Organization.cs | 38 +++++++++++++------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index c666c4d970..edc9607d50 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -670,11 +670,18 @@ internal void ReconcileOrganization(bool allowPruning = true) { if (meta.WorktreeId == null) continue; var wt = _repoManager.Worktrees.FirstOrDefault(w => w.Id == meta.WorktreeId); - if (wt != null && !wt.Path.StartsWith(normalizedExtPath + Path.DirectorySeparatorChar, StringComparison.OrdinalIgnoreCase) - && !string.Equals(wt.Path, normalizedExtPath, StringComparison.OrdinalIgnoreCase)) + if (wt != null) { - Debug($"ReconcileOrganization: migrating '{meta.SessionName}' from promoted local folder group to URL group '{urlGroup.Id}'"); - meta.GroupId = urlGroup.Id; + // Normalize wt.Path before comparing: on Windows, stored paths may use + // forward slashes or relative forms that differ from the GetFullPath result. + var normalizedWtPath = Path.GetFullPath(wt.Path) + .TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar); + if (!normalizedWtPath.StartsWith(normalizedExtPath + Path.DirectorySeparatorChar, StringComparison.OrdinalIgnoreCase) + && !string.Equals(normalizedWtPath, normalizedExtPath, StringComparison.OrdinalIgnoreCase)) + { + Debug($"ReconcileOrganization: migrating '{meta.SessionName}' from promoted local folder group to URL group '{urlGroup.Id}'"); + meta.GroupId = urlGroup.Id; + } } } } @@ -693,16 +700,23 @@ internal void ReconcileOrganization(bool allowPruning = true) if (meta.WorktreeId == null) continue; if (protectedGroupIds.Contains(meta.GroupId)) continue; var wt = _repoManager.Worktrees.FirstOrDefault(w => w.Id == meta.WorktreeId); - if (wt != null && !wt.Path.StartsWith(normalizedLocalPath + Path.DirectorySeparatorChar, StringComparison.OrdinalIgnoreCase) - && !string.Equals(wt.Path, normalizedLocalPath, StringComparison.OrdinalIgnoreCase)) + if (wt != null) { - var repoName = _repoManager.Repositories.FirstOrDefault(r => r.Id == localGroup.RepoId)?.Name ?? localGroup.RepoId; - var urlGroup = GetOrCreateRepoGroup(localGroup.RepoId!, repoName!); - if (urlGroup != null) + // Normalize wt.Path before comparing: on Windows, stored paths may use + // forward slashes or relative forms that differ from the GetFullPath result. + var normalizedWtPath = Path.GetFullPath(wt.Path) + .TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar); + if (!normalizedWtPath.StartsWith(normalizedLocalPath + Path.DirectorySeparatorChar, StringComparison.OrdinalIgnoreCase) + && !string.Equals(normalizedWtPath, normalizedLocalPath, StringComparison.OrdinalIgnoreCase)) { - Debug($"ReconcileOrganization: healing '{meta.SessionName}' from local folder group '{localGroup.Name}' to URL group '{urlGroup.Id}'"); - meta.GroupId = urlGroup.Id; - changed = true; + var repoName = _repoManager.Repositories.FirstOrDefault(r => r.Id == localGroup.RepoId)?.Name ?? localGroup.RepoId; + var urlGroup = GetOrCreateRepoGroup(localGroup.RepoId!, repoName!); + if (urlGroup != null) + { + Debug($"ReconcileOrganization: healing '{meta.SessionName}' from local folder group '{localGroup.Name}' to URL group '{urlGroup.Id}'"); + meta.GroupId = urlGroup.Id; + changed = true; + } } } } From bd7ba54d4f80130a637bba19bbd13e40adfc83fc Mon Sep 17 00:00:00 2001 From: Shane Date: Mon, 23 Mar 2026 15:32:18 -0500 Subject: [PATCH 4/4] docs: reinforce zero-tolerance test failure policy for worker agents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the existing 'zero tolerance' callout to explicitly prohibit worker/implementer agents from dismissing failures as 'pre-existing'. The previous wording was clear but insufficient — agents still reported pre-existing failures without fixing them. New language makes the requirement unambiguous: encountering a pre-existing failure and not fixing it is itself a task failure. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/copilot-instructions.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 8207816878..f0648c935d 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -20,6 +20,8 @@ The test project lives at `../PolyPilot.Tests/` (sibling directory). It includes **Always run tests after modifying models, bridge messages, or serialization logic.** When adding new features or changing existing behavior, update or add tests to match. The tests serve as a living specification of the app's data contracts and parsing logic. > ⚠️ **Zero tolerance for test failures.** When you encounter test failures — whether caused by your changes or pre-existing — **always fix them**. Never dismiss failures as "pre-existing" or "unrelated". If tests fail, they must be fixed before the task is complete. A green test suite is a hard requirement for every PR. +> +> **For worker/implementer agents specifically:** You are NEVER allowed to report a test failure as "pre-existing" and move on. If you discover a test failure that existed before your change, you must still fix it as part of your task. Saying "pre-existing failure, not caused by my changes" without fixing it is a task failure. Fix the test or fix the production code — whichever is correct — then verify the suite is green. ### Android ```bash