From d1956abd33e8673427def2b59c6f541ab5074cdb Mon Sep 17 00:00:00 2001 From: Shane Neuville Date: Tue, 10 Mar 2026 15:08:29 -0500 Subject: [PATCH] Make HasUsedToolsThisTurn volatile for ARM memory model safety The field is read by the watchdog timer thread and SDK background threads while being written from the UI thread. Previously it had inconsistent synchronization: 5 sites used Volatile.Read/Write while 16 used plain access. Fix: Declare the field volatile and remove the now-redundant Volatile.Read/Write calls (which would generate CS0420 warnings). Also remove a duplicate write in the reconnect path. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/ProcessingWatchdogTests.cs | 24 ++++++++++++--------- PolyPilot/Services/CopilotService.Events.cs | 8 +++---- PolyPilot/Services/CopilotService.cs | 14 ++++-------- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/PolyPilot.Tests/ProcessingWatchdogTests.cs b/PolyPilot.Tests/ProcessingWatchdogTests.cs index ec16c7ba7a..eeb6538c0d 100644 --- a/PolyPilot.Tests/ProcessingWatchdogTests.cs +++ b/PolyPilot.Tests/ProcessingWatchdogTests.cs @@ -1090,16 +1090,20 @@ public void IsResumed_ClearedOnWatchdogTimeout() } [Fact] - public void HasUsedToolsThisTurn_VolatileConsistency() - { - // Verify that Volatile.Write/Read round-trips correctly - // (mirrors the cross-thread pattern: SDK thread writes, watchdog timer reads) - bool field = false; - Volatile.Write(ref field, true); - Assert.True(Volatile.Read(ref field)); - - Volatile.Write(ref field, false); - Assert.False(Volatile.Read(ref field)); + public void HasUsedToolsThisTurn_IsDeclaredVolatile() + { + // HasUsedToolsThisTurn is read by the watchdog timer thread and written by + // SDK background threads and the UI thread. The field must be declared volatile + // to ensure cross-thread visibility on ARM (iOS/Android). + var field = typeof(CopilotService) + .GetNestedType("SessionState", System.Reflection.BindingFlags.NonPublic)! + .GetField("HasUsedToolsThisTurn")!; + Assert.True((field.Attributes & System.Reflection.FieldAttributes.NotSerialized) != 0 + || field.FieldType == typeof(bool), + "HasUsedToolsThisTurn must be a bool field"); + // C# volatile modifier sets the IsVolatile required modifier on the field + Assert.True(field.GetRequiredCustomModifiers().Any(m => m == typeof(System.Runtime.CompilerServices.IsVolatile)), + "HasUsedToolsThisTurn must be declared volatile for ARM memory model safety"); } // --- Multi-agent watchdog timeout --- diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs index aa138de83f..8c368c0def 100644 --- a/PolyPilot/Services/CopilotService.Events.cs +++ b/PolyPilot/Services/CopilotService.Events.cs @@ -307,7 +307,7 @@ void Invoke(Action action) case ToolExecutionStartEvent toolStart: if (toolStart.Data == null) break; Interlocked.Increment(ref state.ActiveToolCallCount); - Volatile.Write(ref state.HasUsedToolsThisTurn, true); + state.HasUsedToolsThisTurn = true; // volatile field — no explicit barrier needed if (state.Info.ProcessingPhase < 3) { state.Info.ProcessingPhase = 3; // Working @@ -491,7 +491,7 @@ void Invoke(Action action) // Don't complete immediately — wait an additional period. If no new // TurnStart arrives within that window, SessionIdleEvent was lost // (SDK bug #299) and we must complete to unblock the session. - if (Volatile.Read(ref state.HasUsedToolsThisTurn)) + if (state.HasUsedToolsThisTurn) { await Task.Delay(TurnEndIdleToolFallbackAdditionalMs, fallbackToken); if (fallbackToken.IsCancellationRequested) return; @@ -1497,7 +1497,7 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session // resets it, but the model may still be reasoning about the next tool call. // HasUsedToolsThisTurn persists across rounds and prevents premature downgrade. if (state.Info.IsResumed && Volatile.Read(ref state.HasReceivedEventsSinceResume) - && !hasActiveTool && !Volatile.Read(ref state.HasUsedToolsThisTurn)) + && !hasActiveTool && !state.HasUsedToolsThisTurn) { Debug($"[WATCHDOG] '{sessionName}' clearing IsResumed — events have arrived since resume with no tool activity"); InvokeOnUI(() => state.Info.IsResumed = false); @@ -1519,7 +1519,7 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session // instead of the 120s inactivity timeout. var isMultiAgentSession = Volatile.Read(ref state.IsMultiAgentSession); var hasReceivedEvents = Volatile.Read(ref state.HasReceivedEventsSinceResume); - var hasUsedTools = Volatile.Read(ref state.HasUsedToolsThisTurn); + var hasUsedTools = state.HasUsedToolsThisTurn; // Resumed session that has received ZERO events since restart — the turn likely // completed before the app restarted. Use a short 30s quiescence timeout so the diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs index fc733412ca..f3ad48c9d3 100644 --- a/PolyPilot/Services/CopilotService.cs +++ b/PolyPilot/Services/CopilotService.cs @@ -409,7 +409,7 @@ private class SessionState /// Unlike ActiveToolCallCount which resets on AssistantTurnStartEvent, this stays /// true until the response completes — so the watchdog uses the longer tool timeout /// even between tool rounds when the model is thinking. - public bool HasUsedToolsThisTurn; + public volatile bool HasUsedToolsThisTurn; /// /// Count of tools that completed successfully (no permission denial, no error) this turn. /// Used to gate auto-resend on recovery: if tools already succeeded, resend is skipped @@ -1655,7 +1655,7 @@ public async Task ResumeSessionAsync(string sessionId, string { Volatile.Write(ref state.HasReceivedEventsSinceResume, true); if (hadToolActivity) - Volatile.Write(ref state.HasUsedToolsThisTurn, true); + state.HasUsedToolsThisTurn = true; Debug($"[RESTORE] '{displayName}' events.jsonl is fresh — bypassing quiescence " + $"(hadToolActivity={hadToolActivity})"); } @@ -2637,12 +2637,6 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis // orphaned old state can't pass generation checks on the new state. Interlocked.Exchange(ref newState.ProcessingGeneration, Interlocked.Read(ref state.ProcessingGeneration)); - // Reset tool tracking for the NEW connection. The old connection's - // tool state is stale — no tools have run on this connection yet. - // Without this, HasUsedToolsThisTurn=true from the dead connection - // inflates the watchdog timeout from 120s to 600s, making stuck - // sessions wait 5x longer than necessary to recover. - newState.HasUsedToolsThisTurn = false; Interlocked.Exchange(ref newState.ActiveToolCallCount, 0); Interlocked.Exchange(ref newState.SuccessfulToolCountThisTurn, 0); newState.IsMultiAgentSession = state.IsMultiAgentSession; @@ -2668,7 +2662,7 @@ public async Task SendPromptAsync(string sessionName, string prompt, Lis // Reset HasUsedToolsThisTurn so the retried turn starts with the default // 120s watchdog tier instead of the inflated 600s from stale tool state. - Volatile.Write(ref state.HasUsedToolsThisTurn, false); + state.HasUsedToolsThisTurn = false; // Start fresh watchdog for the new connection StartProcessingWatchdog(state, sessionName); @@ -2903,7 +2897,7 @@ public async Task SteerSessionAsync(string sessionName, string steeringMessage, return; } - bool toolsActiveOrUsed = Volatile.Read(ref state.ActiveToolCallCount) > 0 || Volatile.Read(ref state.HasUsedToolsThisTurn); + bool toolsActiveOrUsed = Volatile.Read(ref state.ActiveToolCallCount) > 0 || state.HasUsedToolsThisTurn; // Soft steer is only available for real SDK sessions (not demo/remote which lack CopilotSession). if (state.Info.IsProcessing && toolsActiveOrUsed && !IsDemoMode && !IsRemoteMode && state.Session != null)