Skip to content

Commit 08a82ff

Browse files
committed
Use reference inequality to detect runtime progress; doc mutation contract
Addresses review feedback: - Switch SDK's runtimeMadeProgress check from history-length comparison to array-reference inequality. This is robust to context pruning shrinking history below its starting length mid-run, which would cause a false negative and duplicate the user prompt. - Document loopAgentSteps' mutation contract: it mutates params.agentState in place throughout the run so callers see in-progress work even when an error throws before a normal return.
1 parent 21769ae commit 08a82ff

3 files changed

Lines changed: 104 additions & 88 deletions

File tree

packages/agent-runtime/src/run-agent-step.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,17 @@ export const runAgentStep = async (
536536
}
537537
}
538538

539+
/**
540+
* Runs the agent loop.
541+
*
542+
* IMPORTANT: This function mutates `params.agentState` in place throughout the
543+
* run (not just at return time). Fields like `messageHistory`, `systemPrompt`,
544+
* `toolDefinitions`, `creditsUsed`, and `output` are updated as work progresses
545+
* so that callers holding a reference to the same object (e.g. the SDK's
546+
* `sessionState.mainAgentState`) see in-progress work immediately — which
547+
* matters when an error is thrown mid-run and the normal return path is
548+
* skipped.
549+
*/
539550
export async function loopAgentSteps(
540551
params: {
541552
addAgentStep: AddAgentStepFn

sdk/src/__tests__/run-error-preserves-history.test.ts

Lines changed: 83 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -43,62 +43,61 @@ describe('Error preserves in-progress message history', () => {
4343
// 2. Then throws due to a downstream timeout/service error
4444
spyOn(mainPromptModule, 'callMainPrompt').mockImplementation(
4545
async (params: Parameters<typeof mainPromptModule.callMainPrompt>[0]) => {
46-
const history = params.action.sessionState.mainAgentState.messageHistory
47-
48-
// The runtime adds the user message as part of building its initial messages
49-
history.push({
50-
role: 'user',
51-
content: [{ type: 'text', text: 'Fix the bug in auth.ts' }],
52-
tags: ['USER_PROMPT'],
53-
})
54-
55-
// Step 1: assistant responds with a tool call (reading a file)
56-
history.push({
57-
role: 'assistant',
58-
content: [
59-
{ type: 'text', text: 'Let me read the auth file first.' },
60-
{
61-
type: 'tool-call',
62-
toolCallId: 'read-1',
63-
toolName: 'read_files',
64-
input: { paths: ['auth.ts'] },
65-
} as ToolCallContentBlock,
66-
],
67-
})
68-
69-
// Tool result
70-
history.push({
71-
role: 'tool',
72-
toolCallId: 'read-1',
73-
toolName: 'read_files',
74-
content: [
75-
{
76-
type: 'json',
77-
value: [{ path: 'auth.ts', content: 'const auth = ...' }],
78-
},
79-
],
80-
})
81-
82-
// Step 2: assistant continues with another tool call (writing the fix)
83-
history.push({
84-
role: 'assistant',
85-
content: [
86-
{ type: 'text', text: 'Found the issue, writing the fix now.' },
87-
{
88-
type: 'tool-call',
89-
toolCallId: 'write-1',
90-
toolName: 'write_file',
91-
input: { path: 'auth.ts', content: 'const auth = fixed' },
92-
} as ToolCallContentBlock,
93-
],
94-
})
95-
96-
history.push({
97-
role: 'tool',
98-
toolCallId: 'write-1',
99-
toolName: 'write_file',
100-
content: [{ type: 'json', value: { file: 'auth.ts', message: 'File written' } }],
101-
})
46+
const mainAgentState = params.action.sessionState.mainAgentState
47+
48+
// Match the real runtime's behavior: replace messageHistory with a new
49+
// array that includes the user prompt as its first entry. The SDK
50+
// detects runtime progress via reference inequality, so we must
51+
// reassign the array rather than pushing into it.
52+
mainAgentState.messageHistory = [
53+
...mainAgentState.messageHistory,
54+
{
55+
role: 'user',
56+
content: [{ type: 'text', text: 'Fix the bug in auth.ts' }],
57+
tags: ['USER_PROMPT'],
58+
},
59+
{
60+
role: 'assistant',
61+
content: [
62+
{ type: 'text', text: 'Let me read the auth file first.' },
63+
{
64+
type: 'tool-call',
65+
toolCallId: 'read-1',
66+
toolName: 'read_files',
67+
input: { paths: ['auth.ts'] },
68+
} as ToolCallContentBlock,
69+
],
70+
},
71+
{
72+
role: 'tool',
73+
toolCallId: 'read-1',
74+
toolName: 'read_files',
75+
content: [
76+
{
77+
type: 'json',
78+
value: [{ path: 'auth.ts', content: 'const auth = ...' }],
79+
},
80+
],
81+
},
82+
{
83+
role: 'assistant',
84+
content: [
85+
{ type: 'text', text: 'Found the issue, writing the fix now.' },
86+
{
87+
type: 'tool-call',
88+
toolCallId: 'write-1',
89+
toolName: 'write_file',
90+
input: { path: 'auth.ts', content: 'const auth = fixed' },
91+
} as ToolCallContentBlock,
92+
],
93+
},
94+
{
95+
role: 'tool',
96+
toolCallId: 'write-1',
97+
toolName: 'write_file',
98+
content: [{ type: 'json', value: { file: 'auth.ts', message: 'File written' } }],
99+
},
100+
]
102101

103102
// Now simulate a server timeout on the next LLM call
104103
const timeoutError = new Error('Service Unavailable') as Error & {
@@ -195,31 +194,34 @@ describe('Error preserves in-progress message history', () => {
195194
// Run 1: agent does some work then hits an error
196195
spyOn(mainPromptModule, 'callMainPrompt').mockImplementation(
197196
async (params: Parameters<typeof mainPromptModule.callMainPrompt>[0]) => {
198-
const history = params.action.sessionState.mainAgentState.messageHistory
199-
200-
history.push({
201-
role: 'user',
202-
content: [{ type: 'text', text: 'Investigate the login bug' }],
203-
tags: ['USER_PROMPT'],
204-
})
205-
history.push(assistantMessage('I found the problem in auth.ts on line 42.'))
206-
history.push({
207-
role: 'assistant',
208-
content: [
209-
{
210-
type: 'tool-call',
211-
toolCallId: 'read-login',
212-
toolName: 'read_files',
213-
input: { paths: ['login.ts'] },
214-
} as ToolCallContentBlock,
215-
],
216-
})
217-
history.push({
218-
role: 'tool',
219-
toolCallId: 'read-login',
220-
toolName: 'read_files',
221-
content: [{ type: 'json', value: [{ path: 'login.ts', content: 'login code' }] }],
222-
})
197+
const mainAgentState = params.action.sessionState.mainAgentState
198+
199+
mainAgentState.messageHistory = [
200+
...mainAgentState.messageHistory,
201+
{
202+
role: 'user',
203+
content: [{ type: 'text', text: 'Investigate the login bug' }],
204+
tags: ['USER_PROMPT'],
205+
},
206+
assistantMessage('I found the problem in auth.ts on line 42.'),
207+
{
208+
role: 'assistant',
209+
content: [
210+
{
211+
type: 'tool-call',
212+
toolCallId: 'read-login',
213+
toolName: 'read_files',
214+
input: { paths: ['login.ts'] },
215+
} as ToolCallContentBlock,
216+
],
217+
},
218+
{
219+
role: 'tool',
220+
toolCallId: 'read-login',
221+
toolName: 'read_files',
222+
content: [{ type: 'json', value: [{ path: 'login.ts', content: 'login code' }] }],
223+
},
224+
]
223225

224226
const error = new Error('Service Unavailable') as Error & {
225227
statusCode: number

sdk/src/run.ts

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -282,21 +282,24 @@ async function runOnce({
282282
}
283283
}
284284

285-
// The agent runtime mutates sessionState.mainAgentState as it progresses, so any
286-
// messages added beyond this baseline reflect in-progress work that should be preserved.
287-
const initialHistoryLength = sessionState.mainAgentState.messageHistory.length
285+
// The agent runtime mutates sessionState.mainAgentState as it progresses,
286+
// replacing messageHistory with a new array once it adds the user prompt.
287+
// Comparing array identity detects progress more robustly than length:
288+
// context pruning could shrink history below its starting length without
289+
// meaning the runtime never ran.
290+
const initialMessageHistory = sessionState.mainAgentState.messageHistory
288291

289292
/** Calculates the current session state if cancelled.
290293
*
291294
* This is used when callMainPrompt throws an error. If the agent runtime made
292-
* any progress (added messages to the shared session state), those messages are
295+
* any progress (replaced the shared messageHistory), those messages are
293296
* preserved. Otherwise the user's message is added so it isn't lost.
294297
*/
295298
function getCancelledSessionState(message: string): SessionState {
296-
const state = cloneDeep(sessionState)
297-
298299
const runtimeMadeProgress =
299-
state.mainAgentState.messageHistory.length > initialHistoryLength
300+
sessionState.mainAgentState.messageHistory !== initialMessageHistory
301+
302+
const state = cloneDeep(sessionState)
300303

301304
// Only add the user's message if the runtime didn't get a chance to add it.
302305
if (!runtimeMadeProgress && (prompt || preparedContent)) {

0 commit comments

Comments
 (0)