Use reference inequality to detect runtime progress; doc mutation contract

jahooma · jahooma · commit 08a82ff3541c · 2026-04-19T16:13:01.000-07:00
Addresses review feedback:
- Switch SDK's runtimeMadeProgress check from history-length comparison to
  array-reference inequality. This is robust to context pruning shrinking
  history below its starting length mid-run, which would cause a false
  negative and duplicate the user prompt.
- Document loopAgentSteps' mutation contract: it mutates params.agentState
  in place throughout the run so callers see in-progress work even when
  an error throws before a normal return.
diff --git a/packages/agent-runtime/src/run-agent-step.ts b/packages/agent-runtime/src/run-agent-step.ts
@@ -536,6 +536,17 @@ export const runAgentStep = async (
   }
 }
 
+/**
+ * Runs the agent loop.
+ *
+ * IMPORTANT: This function mutates `params.agentState` in place throughout the
+ * run (not just at return time). Fields like `messageHistory`, `systemPrompt`,
+ * `toolDefinitions`, `creditsUsed`, and `output` are updated as work progresses
+ * so that callers holding a reference to the same object (e.g. the SDK's
+ * `sessionState.mainAgentState`) see in-progress work immediately — which
+ * matters when an error is thrown mid-run and the normal return path is
+ * skipped.
+ */
 export async function loopAgentSteps(
   params: {
     addAgentStep: AddAgentStepFn
diff --git a/sdk/src/__tests__/run-error-preserves-history.test.ts b/sdk/src/__tests__/run-error-preserves-history.test.ts
@@ -43,62 +43,61 @@ describe('Error preserves in-progress message history', () => {
     // 2. Then throws due to a downstream timeout/service error
     spyOn(mainPromptModule, 'callMainPrompt').mockImplementation(
       async (params: Parameters<typeof mainPromptModule.callMainPrompt>[0]) => {
-        const history = params.action.sessionState.mainAgentState.messageHistory
-
-        // The runtime adds the user message as part of building its initial messages
-        history.push({
-          role: 'user',
-          content: [{ type: 'text', text: 'Fix the bug in auth.ts' }],
-          tags: ['USER_PROMPT'],
-        })
-
-        // Step 1: assistant responds with a tool call (reading a file)
-        history.push({
-          role: 'assistant',
-          content: [
-            { type: 'text', text: 'Let me read the auth file first.' },
-            {
-              type: 'tool-call',
-              toolCallId: 'read-1',
-              toolName: 'read_files',
-              input: { paths: ['auth.ts'] },
-            } as ToolCallContentBlock,
-          ],
-        })
-
-        // Tool result
-        history.push({
-          role: 'tool',
-          toolCallId: 'read-1',
-          toolName: 'read_files',
-          content: [
-            {
-              type: 'json',
-              value: [{ path: 'auth.ts', content: 'const auth = ...' }],
-            },
-          ],
-        })
-
-        // Step 2: assistant continues with another tool call (writing the fix)
-        history.push({
-          role: 'assistant',
-          content: [
-            { type: 'text', text: 'Found the issue, writing the fix now.' },
-            {
-              type: 'tool-call',
-              toolCallId: 'write-1',
-              toolName: 'write_file',
-              input: { path: 'auth.ts', content: 'const auth = fixed' },
-            } as ToolCallContentBlock,
-          ],
-        })
-
-        history.push({
-          role: 'tool',
-          toolCallId: 'write-1',
-          toolName: 'write_file',
-          content: [{ type: 'json', value: { file: 'auth.ts', message: 'File written' } }],
-        })
+        const mainAgentState = params.action.sessionState.mainAgentState
+
+        // Match the real runtime's behavior: replace messageHistory with a new
+        // array that includes the user prompt as its first entry. The SDK
+        // detects runtime progress via reference inequality, so we must
+        // reassign the array rather than pushing into it.
+        mainAgentState.messageHistory = [
+          ...mainAgentState.messageHistory,
+          {
+            role: 'user',
+            content: [{ type: 'text', text: 'Fix the bug in auth.ts' }],
+            tags: ['USER_PROMPT'],
+          },
+          {
+            role: 'assistant',
+            content: [
+              { type: 'text', text: 'Let me read the auth file first.' },
+              {
+                type: 'tool-call',
+                toolCallId: 'read-1',
+                toolName: 'read_files',
+                input: { paths: ['auth.ts'] },
+              } as ToolCallContentBlock,
+            ],
+          },
+          {
+            role: 'tool',
+            toolCallId: 'read-1',
+            toolName: 'read_files',
+            content: [
+              {
+                type: 'json',
+                value: [{ path: 'auth.ts', content: 'const auth = ...' }],
+              },
+            ],
+          },
+          {
+            role: 'assistant',
+            content: [
+              { type: 'text', text: 'Found the issue, writing the fix now.' },
+              {
+                type: 'tool-call',
+                toolCallId: 'write-1',
+                toolName: 'write_file',
+                input: { path: 'auth.ts', content: 'const auth = fixed' },
+              } as ToolCallContentBlock,
+            ],
+          },
+          {
+            role: 'tool',
+            toolCallId: 'write-1',
+            toolName: 'write_file',
+            content: [{ type: 'json', value: { file: 'auth.ts', message: 'File written' } }],
+          },
+        ]
 
         // Now simulate a server timeout on the next LLM call
         const timeoutError = new Error('Service Unavailable') as Error & {
@@ -195,31 +194,34 @@ describe('Error preserves in-progress message history', () => {
     // Run 1: agent does some work then hits an error
     spyOn(mainPromptModule, 'callMainPrompt').mockImplementation(
       async (params: Parameters<typeof mainPromptModule.callMainPrompt>[0]) => {
-        const history = params.action.sessionState.mainAgentState.messageHistory
-
-        history.push({
-          role: 'user',
-          content: [{ type: 'text', text: 'Investigate the login bug' }],
-          tags: ['USER_PROMPT'],
-        })
-        history.push(assistantMessage('I found the problem in auth.ts on line 42.'))
-        history.push({
-          role: 'assistant',
-          content: [
-            {
-              type: 'tool-call',
-              toolCallId: 'read-login',
-              toolName: 'read_files',
-              input: { paths: ['login.ts'] },
-            } as ToolCallContentBlock,
-          ],
-        })
-        history.push({
-          role: 'tool',
-          toolCallId: 'read-login',
-          toolName: 'read_files',
-          content: [{ type: 'json', value: [{ path: 'login.ts', content: 'login code' }] }],
-        })
+        const mainAgentState = params.action.sessionState.mainAgentState
+
+        mainAgentState.messageHistory = [
+          ...mainAgentState.messageHistory,
+          {
+            role: 'user',
+            content: [{ type: 'text', text: 'Investigate the login bug' }],
+            tags: ['USER_PROMPT'],
+          },
+          assistantMessage('I found the problem in auth.ts on line 42.'),
+          {
+            role: 'assistant',
+            content: [
+              {
+                type: 'tool-call',
+                toolCallId: 'read-login',
+                toolName: 'read_files',
+                input: { paths: ['login.ts'] },
+              } as ToolCallContentBlock,
+            ],
+          },
+          {
+            role: 'tool',
+            toolCallId: 'read-login',
+            toolName: 'read_files',
+            content: [{ type: 'json', value: [{ path: 'login.ts', content: 'login code' }] }],
+          },
+        ]
 
         const error = new Error('Service Unavailable') as Error & {
           statusCode: number
diff --git a/sdk/src/run.ts b/sdk/src/run.ts
@@ -282,21 +282,24 @@ async function runOnce({
     }
   }
 
-  // The agent runtime mutates sessionState.mainAgentState as it progresses, so any
-  // messages added beyond this baseline reflect in-progress work that should be preserved.
-  const initialHistoryLength = sessionState.mainAgentState.messageHistory.length
+  // The agent runtime mutates sessionState.mainAgentState as it progresses,
+  // replacing messageHistory with a new array once it adds the user prompt.
+  // Comparing array identity detects progress more robustly than length:
+  // context pruning could shrink history below its starting length without
+  // meaning the runtime never ran.
+  const initialMessageHistory = sessionState.mainAgentState.messageHistory
 
   /** Calculates the current session state if cancelled.
    *
    * This is used when callMainPrompt throws an error. If the agent runtime made
-   * any progress (added messages to the shared session state), those messages are
+   * any progress (replaced the shared messageHistory), those messages are
    * preserved. Otherwise the user's message is added so it isn't lost.
    */
   function getCancelledSessionState(message: string): SessionState {
-    const state = cloneDeep(sessionState)
-
     const runtimeMadeProgress =
-      state.mainAgentState.messageHistory.length > initialHistoryLength
+      sessionState.mainAgentState.messageHistory !== initialMessageHistory
+
+    const state = cloneDeep(sessionState)
 
     // Only add the user's message if the runtime didn't get a chance to add it.
     if (!runtimeMadeProgress && (prompt || preparedContent)) {