From 7a3c51934f2474d1c66ba441621dd85ff8796429 Mon Sep 17 00:00:00 2001 From: Stephen Hellicar Date: Tue, 7 Apr 2026 17:13:03 +1000 Subject: [PATCH 1/3] Fix caching, cache tools and system prompt. --- apps/claude-sdk-cli/package.json | 10 ++-- apps/claude-sdk-cli/src/runAgent.ts | 7 +-- .../claude-sdk/src/private/RequestBuilder.ts | 17 +++++-- pnpm-lock.yaml | 48 +++++++++---------- 4 files changed, 47 insertions(+), 35 deletions(-) diff --git a/apps/claude-sdk-cli/package.json b/apps/claude-sdk-cli/package.json index 1fd50c8..62a7e9a 100644 --- a/apps/claude-sdk-cli/package.json +++ b/apps/claude-sdk-cli/package.json @@ -1,6 +1,6 @@ { "name": "@shellicar/claude-sdk-cli", - "version": "0.0.0", + "version": "1.0.0-alpha.4", "private": false, "description": "Interactive CLI for Claude AI built on the Anthropic SDK", "license": "MIT", @@ -42,15 +42,15 @@ "@types/node": "^25.5.2", "esbuild": "^0.27.5", "tsx": "^4.21.0", - "vitest": "^4.1.2" - }, - "dependencies": { - "@anthropic-ai/sdk": "^0.82.0", + "vitest": "^4.1.2", "@shellicar/claude-core": "workspace:^", "@shellicar/claude-sdk": "workspace:^", "@shellicar/claude-sdk-tools": "workspace:^", "cli-highlight": "^2.1.11", "winston": "^3.19.0", "zod": "^4.3.6" + }, + "dependencies": { + "@anthropic-ai/sdk": "^0.82.0" } } diff --git a/apps/claude-sdk-cli/src/runAgent.ts b/apps/claude-sdk-cli/src/runAgent.ts index f123a76..5b95384 100644 --- a/apps/claude-sdk-cli/src/runAgent.ts +++ b/apps/claude-sdk-cli/src/runAgent.ts @@ -22,7 +22,7 @@ import { systemPrompts } from './systemPrompts.js'; export async function runAgent(agent: IAnthropicAgent, prompt: string, layout: AppLayout, store: RefStore): Promise { const pipeSource = [Find, ReadFile, Grep, Head, Tail, Range, SearchFiles]; - const { tool: Ref, transformToolResult: refTransform } = createRef(store, 2_000); + const { tool: Ref, transformToolResult: refTransform } = createRef(store, 20_000); const otherTools = [PreviewEdit, EditFile, CreateFile, DeleteFile, DeleteDirectory, Exec, Ref]; const pipe = createPipe(pipeSource); const tools: AnyToolDefinition[] = [pipe, ...pipeSource, ...otherTools]; @@ -44,9 +44,10 @@ export async function runAgent(agent: IAnthropicAgent, prompt: string, layout: A const { port, done } = agent.runAgent({ model, - maxTokens: 32768, + maxTokens: 8000, messages: [prompt], systemPrompts, + cacheTtl: '1h', transformToolResult, pauseAfterCompact: true, compactInputTokens: 150_000, @@ -58,7 +59,7 @@ export async function runAgent(agent: IAnthropicAgent, prompt: string, layout: A [AnthropicBeta.ClaudeCodeAuth]: true, // [AnthropicBeta.InterleavedThinking]: true, [AnthropicBeta.ContextManagement]: false, - [AnthropicBeta.PromptCachingScope]: true, + [AnthropicBeta.PromptCachingScope]: false, // [AnthropicBeta.Effort]: true, [AnthropicBeta.AdvancedToolUse]: true, // [AnthropicBeta.TokenEfficientTools]: true, diff --git a/packages/claude-sdk/src/private/RequestBuilder.ts b/packages/claude-sdk/src/private/RequestBuilder.ts index 219a604..3ad30df 100644 --- a/packages/claude-sdk/src/private/RequestBuilder.ts +++ b/packages/claude-sdk/src/private/RequestBuilder.ts @@ -1,6 +1,6 @@ import type { Anthropic } from '@anthropic-ai/sdk'; import type { BetaMessageStreamParams } from '@anthropic-ai/sdk/resources/beta/messages.js'; -import type { BetaCacheControlEphemeral, BetaClearThinking20251015Edit, BetaClearToolUses20250919Edit, BetaCompact20260112Edit, BetaContextManagementConfig, BetaToolUnion } from '@anthropic-ai/sdk/resources/beta.mjs'; +import type { BetaCacheControlEphemeral, BetaClearThinking20251015Edit, BetaClearToolUses20250919Edit, BetaCompact20260112Edit, BetaContextManagementConfig, BetaTextBlockParam, BetaToolUnion } from '@anthropic-ai/sdk/resources/beta.mjs'; import { AnthropicBeta } from '../public/enums'; import type { RunAgentQuery } from '../public/types'; import { AGENT_SDK_PREFIX } from './consts'; @@ -50,14 +50,25 @@ export function buildRequestParams(options: RunAgentQuery, messages: Anthropic.B } satisfies BetaCompact20260112Edit); } - const systemPrompts = [AGENT_SDK_PREFIX, ...(options.systemPrompts ?? [])]; + const systemPrompts = [AGENT_SDK_PREFIX]; + if (options.systemPrompts != null && options.systemPrompts.length > 0) { + systemPrompts.push(`\n${options.systemPrompts.join('\n\n')}`); + } + + const lastTool = tools[tools.length - 1]; + if (lastTool != null) { + lastTool.cache_control = { + type: 'ephemeral', + ttl: options.cacheTtl, + }; + } const body: BetaMessageStreamParams = { model: options.model, max_tokens: options.maxTokens, tools, context_management, - system: systemPrompts.map((text) => ({ type: 'text', text })), + system: systemPrompts.map((text) => ({ type: 'text', text, cache_control: { type: 'ephemeral', ttl: options.cacheTtl } } satisfies BetaTextBlockParam)), messages, stream: true, } satisfies BetaMessageStreamParams; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f257b5b..271fd37 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -109,6 +109,13 @@ importers: '@anthropic-ai/sdk': specifier: ^0.82.0 version: 0.82.0(zod@4.3.6) + devDependencies: + '@shellicar/build-clean': + specifier: ^1.3.2 + version: 1.3.2(esbuild@0.27.5)(rolldown@1.0.0-rc.12)(vite@8.0.5(@types/node@25.5.2)(esbuild@0.27.5)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3)) + '@shellicar/build-version': + specifier: ^1.3.6 + version: 1.3.6(esbuild@0.27.5)(vite@8.0.5(@types/node@25.5.2)(esbuild@0.27.5)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3)) '@shellicar/claude-core': specifier: workspace:^ version: link:../../packages/claude-core @@ -118,22 +125,6 @@ importers: '@shellicar/claude-sdk-tools': specifier: workspace:^ version: link:../../packages/claude-sdk-tools - cli-highlight: - specifier: ^2.1.11 - version: 2.1.11 - winston: - specifier: ^3.19.0 - version: 3.19.0 - zod: - specifier: ^4.3.6 - version: 4.3.6 - devDependencies: - '@shellicar/build-clean': - specifier: ^1.3.2 - version: 1.3.2(esbuild@0.27.5)(rolldown@1.0.0-rc.12)(vite@8.0.5(@types/node@25.5.2)(esbuild@0.27.5)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3)) - '@shellicar/build-version': - specifier: ^1.3.6 - version: 1.3.6(esbuild@0.27.5)(vite@8.0.5(@types/node@25.5.2)(esbuild@0.27.5)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3)) '@shellicar/typescript-config': specifier: workspace:* version: link:../../packages/typescript-config @@ -143,6 +134,9 @@ importers: '@types/node': specifier: ^25.5.2 version: 25.5.2 + cli-highlight: + specifier: ^2.1.11 + version: 2.1.11 esbuild: specifier: ^0.27.5 version: 0.27.5 @@ -152,6 +146,12 @@ importers: vitest: specifier: ^4.1.2 version: 4.1.2(@types/node@25.5.2)(vite@8.0.5(@types/node@25.5.2)(esbuild@0.27.5)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3)) + winston: + specifier: ^3.19.0 + version: 3.19.0 + zod: + specifier: ^4.3.6 + version: 4.3.6 packages/claude-core: dependencies: @@ -1854,8 +1854,8 @@ packages: run-parallel@1.2.0: resolution: {integrity: sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==} - safe-buffer@5.1.2: - resolution: {integrity: sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==} + safe-buffer@5.2.1: + resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==} safe-stable-stringify@2.5.0: resolution: {integrity: sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==} @@ -1940,8 +1940,8 @@ packages: resolution: {integrity: sha512-6hJPQ8N0V0P3SNmP6h2J99RLuzrWz2gvT7VnK5tKvrNqJoyS9W4/Fb8mo31UiPvy00z7DQXkP2hnKBVav76thw==} engines: {node: '>=20'} - string_decoder@1.1.1: - resolution: {integrity: sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==} + string_decoder@1.3.0: + resolution: {integrity: sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==} strip-ansi@6.0.1: resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==} @@ -3519,7 +3519,7 @@ snapshots: readable-stream@3.6.2: dependencies: inherits: 2.0.4 - string_decoder: 1.1.1 + string_decoder: 1.3.0 util-deprecate: 1.0.2 require-directory@2.1.1: {} @@ -3565,7 +3565,7 @@ snapshots: dependencies: queue-microtask: 1.2.3 - safe-buffer@5.1.2: {} + safe-buffer@5.2.1: {} safe-stable-stringify@2.5.0: {} @@ -3690,9 +3690,9 @@ snapshots: get-east-asian-width: 1.5.0 strip-ansi: 7.2.0 - string_decoder@1.1.1: + string_decoder@1.3.0: dependencies: - safe-buffer: 5.1.2 + safe-buffer: 5.2.1 strip-ansi@6.0.1: dependencies: From 4ca57673c505c80cb7b5434cd387d6bfe612a38d Mon Sep 17 00:00:00 2001 From: Stephen Hellicar Date: Tue, 7 Apr 2026 22:37:55 +1000 Subject: [PATCH 2/3] Add cache_control to last user message block on every API call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Anthropic API supports prompt caching via cache_control on content blocks. Previously this wasn't being set on the user message, so the cache boundary was only on the system prompts and tool definitions. WithCachedLastUserMessage wraps the outgoing message list and attaches cache_control to the last non-thinking block of the last user message without mutating the caller's array. String content is promoted to a text block array so the field has somewhere to live. Thinking blocks are skipped because BetaThinkingBlockParam has no cache_control property — the spread would be a type error. --- apps/claude-sdk-cli/src/systemPrompts.ts | 19 ++- packages/claude-sdk/src/private/AgentRun.ts | 2 +- .../claude-sdk/src/private/RequestBuilder.ts | 51 ++++++- packages/claude-sdk/src/public/types.ts | 5 +- .../claude-sdk/test/RequestBuilder.spec.ts | 133 +++++++++++++++++- 5 files changed, 189 insertions(+), 21 deletions(-) diff --git a/apps/claude-sdk-cli/src/systemPrompts.ts b/apps/claude-sdk-cli/src/systemPrompts.ts index 65177f5..8259c19 100644 --- a/apps/claude-sdk-cli/src/systemPrompts.ts +++ b/apps/claude-sdk-cli/src/systemPrompts.ts @@ -3,18 +3,17 @@ * Temporary / hardcoded until a proper configuration layer exists. */ -const gpgSigning = `\ -Every git commit in this repo is GPG-signed. The signing flows through Stephen's \ +export const systemPrompts = [ + `Every git commit in this repo is GPG-signed. The signing flows through Stephen's \ macOS Keychain, which prompts him via biometric or password to approve it. \ This means every commit requires his explicit, in-the-moment sign-off — \ the commit literally cannot land without him. When making a commit, just run it. The keychain prompt is how Stephen approves it. \ Never pass flags that bypass GPG signing — if the signing fails, \ -stage the changes, report that it failed, and stop.`; +stage the changes, report that it failed, and stop.`, -const conventionalCommits = `\ -Conventional Commits defines exactly two commit message types: fix and feat. \ + `Conventional Commits defines exactly two commit message types: fix and feat. \ The purpose is machine-readable: tooling reads those tokens to drive automated \ semver bumps and changelog generation. That is the entire point of the spec. @@ -24,10 +23,9 @@ following its conventions would be adopting the form with none of the function. For branch names, use plain English words that describe the work: \ fix/, feature/, docs/, security/ are all fine. \ If a prefix feels like it came from a spec rather than the English language, \ -that is a sign it does not belong there.`; +that is a sign it does not belong there.`, -const selfNote = `\ -The why matters more than the what. Anyone can read what happened; \ + `The why matters more than the what. Anyone can read what happened; \ only the reasoning explains whether it was right. Write reasoning as you go — not for documentation, but because articulating \ @@ -39,6 +37,7 @@ ground truth. Starting from a proposal before understanding what exists \ leads to conflicts with work already done. Before applying a convention or pattern, ask whether it fits this specific \ -context or is just familiar. Familiarity is not a reason.`; +context or is just familiar. Familiarity is not a reason.`, -export const systemPrompts: string[] = [gpgSigning, conventionalCommits, selfNote]; + `When a tool call is rejected, treat it as the user saying "no" - not as a transient failure to retry. Do not attempt the same action again with minor variations.`, +]; diff --git a/packages/claude-sdk/src/private/AgentRun.ts b/packages/claude-sdk/src/private/AgentRun.ts index 16a32b6..f85f772 100644 --- a/packages/claude-sdk/src/private/AgentRun.ts +++ b/packages/claude-sdk/src/private/AgentRun.ts @@ -195,7 +195,7 @@ export class AgentRun { pending.splice(index, 1); if (!response.approved) { - const content = response.reason ?? 'Tool use rejected'; + const content = response.reason ?? 'Rejected by user, do not reattempt'; this.#logger?.debug('tool_rejected', { name: toolUse.name, reason: content }); toolResults.push({ type: 'tool_result', tool_use_id: toolUse.id, is_error: true, content }); continue; diff --git a/packages/claude-sdk/src/private/RequestBuilder.ts b/packages/claude-sdk/src/private/RequestBuilder.ts index 3ad30df..119c4f0 100644 --- a/packages/claude-sdk/src/private/RequestBuilder.ts +++ b/packages/claude-sdk/src/private/RequestBuilder.ts @@ -1,8 +1,8 @@ import type { Anthropic } from '@anthropic-ai/sdk'; import type { BetaMessageStreamParams } from '@anthropic-ai/sdk/resources/beta/messages.js'; -import type { BetaCacheControlEphemeral, BetaClearThinking20251015Edit, BetaClearToolUses20250919Edit, BetaCompact20260112Edit, BetaContextManagementConfig, BetaTextBlockParam, BetaToolUnion } from '@anthropic-ai/sdk/resources/beta.mjs'; +import type { BetaCacheControlEphemeral, BetaClearThinking20251015Edit, BetaClearToolUses20250919Edit, BetaCompact20260112Edit, BetaContentBlockParam, BetaContextManagementConfig, BetaTextBlockParam, BetaToolUnion } from '@anthropic-ai/sdk/resources/beta.mjs'; import { AnthropicBeta } from '../public/enums'; -import type { RunAgentQuery } from '../public/types'; +import { CacheTtl, type RunAgentQuery } from '../public/types'; import { AGENT_SDK_PREFIX } from './consts'; export type RequestParams = { @@ -10,6 +10,47 @@ export type RequestParams = { headers: { 'anthropic-beta': string }; }; +function addCacheControlToLastBlock(msg: Anthropic.Beta.Messages.BetaMessageParam, cacheTtl: CacheTtl | undefined): Anthropic.Beta.Messages.BetaMessageParam { + const cache_control = { type: 'ephemeral' as const, ttl: cacheTtl }; + + if (typeof msg.content === 'string') { + const content: BetaContentBlockParam[] = [{ type: 'text', text: msg.content, cache_control }]; + return { ...msg, content }; + } + + const content = [...msg.content]; + const idx = content.findLastIndex((b) => b.type !== 'thinking' && b.type !== 'redacted_thinking'); + if (idx === -1) { + return msg; + } + + const block = content[idx]; + if (block == null || block.type === 'thinking' || block.type === 'redacted_thinking') { + return msg; + } + + content[idx] = { ...block, cache_control }; + return { ...msg, content }; +} + +function withCachedLastUserMessage(messages: Anthropic.Beta.Messages.BetaMessageParam[], cacheTtl: CacheTtl | undefined): Anthropic.Beta.Messages.BetaMessageParam[] { + const idx = messages.findLastIndex((m) => m.role === 'user'); + if (idx === -1) { + return messages; + } + + const msg = messages[idx]; + if (msg == null) { + return messages; + } + + const cached = addCacheControlToLastBlock(msg, cacheTtl); + + const result = [...messages]; + result[idx] = cached; + return result; +} + /** * Pure function — builds the Anthropic API request params from agent options * and the current message list. No I/O, no client reference, no signal. @@ -55,6 +96,8 @@ export function buildRequestParams(options: RunAgentQuery, messages: Anthropic.B systemPrompts.push(`\n${options.systemPrompts.join('\n\n')}`); } + const messagesForBody = withCachedLastUserMessage(messages, options.cacheTtl ?? CacheTtl.OneHour); + const lastTool = tools[tools.length - 1]; if (lastTool != null) { lastTool.cache_control = { @@ -68,8 +111,8 @@ export function buildRequestParams(options: RunAgentQuery, messages: Anthropic.B max_tokens: options.maxTokens, tools, context_management, - system: systemPrompts.map((text) => ({ type: 'text', text, cache_control: { type: 'ephemeral', ttl: options.cacheTtl } } satisfies BetaTextBlockParam)), - messages, + system: systemPrompts.map((text) => ({ type: 'text', text, cache_control: { type: 'ephemeral', ttl: options.cacheTtl } }) satisfies BetaTextBlockParam), + messages: messagesForBody, stream: true, } satisfies BetaMessageStreamParams; diff --git a/packages/claude-sdk/src/public/types.ts b/packages/claude-sdk/src/public/types.ts index bf7a47c..0926490 100644 --- a/packages/claude-sdk/src/public/types.ts +++ b/packages/claude-sdk/src/public/types.ts @@ -25,7 +25,10 @@ export type AnyToolDefinition = { export type AnthropicBetaFlags = Partial>; -export type CacheTtl = '5m' | '1h'; +export enum CacheTtl { + FiveMinutes = '5m', + OneHour = '1h', +} export type RunAgentQuery = { model: Model; diff --git a/packages/claude-sdk/test/RequestBuilder.spec.ts b/packages/claude-sdk/test/RequestBuilder.spec.ts index 61ce20b..0e5e3b2 100644 --- a/packages/claude-sdk/test/RequestBuilder.spec.ts +++ b/packages/claude-sdk/test/RequestBuilder.spec.ts @@ -1,9 +1,10 @@ import type { Anthropic } from '@anthropic-ai/sdk'; import { describe, expect, it } from 'vitest'; +import type { BetaMessageParam } from '../src/index.js'; import { AGENT_SDK_PREFIX } from '../src/private/consts.js'; import { buildRequestParams } from '../src/private/RequestBuilder.js'; import { AnthropicBeta } from '../src/public/enums.js'; -import type { AnyToolDefinition, RunAgentQuery } from '../src/public/types.js'; +import { type AnyToolDefinition, CacheTtl, type RunAgentQuery } from '../src/public/types.js'; // --------------------------------------------------------------------------- // Helpers @@ -36,6 +37,26 @@ function makeOptions(overrides: Partial = {}): RunAgentQuery { }; } +function getContentCacheControl(messages: BetaMessageParam[], messageIndex = -1, blockIndex = -1) { + const message = messages.at(messageIndex); + if (message == null) { + return undefined; + } + if (typeof message.content === 'string') { + return undefined; + } + + const block = message.content.at(blockIndex); + if (block == null) { + return undefined; + } + if (block.type === 'thinking' || block.type === 'redacted_thinking') { + return undefined; + } + + return block.cache_control ?? undefined; +} + const noMessages: Anthropic.Beta.Messages.BetaMessageParam[] = []; // --------------------------------------------------------------------------- @@ -81,7 +102,7 @@ describe('buildRequestParams — system prompts', () => { }); it('custom system prompts are appended after the prefix', () => { - const expected = ['prefix', 'second', 'third']; + const expected = ['prefix', '\nsecond\n\nthird']; const system = buildRequestParams(makeOptions({ systemPrompts: ['second', 'third'] }), noMessages).body.system as { type: string; text: string }[]; const actual = system.map((s) => (s.text === AGENT_SDK_PREFIX ? 'prefix' : s.text)); expect(actual).toEqual(expected); @@ -93,6 +114,12 @@ describe('buildRequestParams — system prompts', () => { const actual = system.length; expect(actual).toBe(expected); }); + + it('all system prompts have cache_control set to ephemeral', () => { + const system = buildRequestParams(makeOptions({ systemPrompts: ['custom'] }), noMessages).body.system as { cache_control?: { type: string } }[]; + const actual = system.every((s) => s.cache_control?.type === 'ephemeral'); + expect(actual).toBe(true); + }); }); // --------------------------------------------------------------------------- @@ -273,10 +300,106 @@ describe('buildRequestParams — tools', () => { // --------------------------------------------------------------------------- describe('buildRequestParams — messages', () => { - it('messages array is passed through to body.messages', () => { + it('message text is preserved in body.messages', () => { + const expected = 'hello'; + const messages: Anthropic.Beta.Messages.BetaMessageParam[] = [{ role: 'user', content: [{ type: 'text', text: 'hello' }] }]; + const { body } = buildRequestParams(makeOptions(), messages); + const content = body.messages.at(0)?.content as { text: string }[]; + const actual = content.at(0)?.text; + expect(actual).toBe(expected); + }); + + it('last user message in body has cache_control on its last content block', () => { + const expected = { type: 'ephemeral', ttl: CacheTtl.OneHour }; const messages: Anthropic.Beta.Messages.BetaMessageParam[] = [{ role: 'user', content: [{ type: 'text', text: 'hello' }] }]; - const expected = messages; - const actual = buildRequestParams(makeOptions(), messages).body.messages; + const { body } = buildRequestParams(makeOptions(), messages); + const actual = getContentCacheControl(body.messages); + expect(actual).toEqual(expected); + }); + + it('does not mutate the input messages when adding cache_control', () => { + const expected = undefined; + const messages: Anthropic.Beta.Messages.BetaMessageParam[] = [{ role: 'user', content: [{ type: 'text', text: 'hello' }] }]; + buildRequestParams(makeOptions(), messages); + const actual = getContentCacheControl(messages); + expect(actual).toBe(expected); + }); + + it('string content is wrapped in an array block with cache_control', () => { + const expected = { type: 'ephemeral', ttl: CacheTtl.OneHour }; + const messages: Anthropic.Beta.Messages.BetaMessageParam[] = [{ role: 'user', content: 'hello string' }]; + const { body } = buildRequestParams(makeOptions(), messages); + const actual = getContentCacheControl(body.messages); + expect(actual).toEqual(expected); + }); + + it('does not add cache_control when last user message has only thinking blocks', () => { + const expected = undefined; + const messages: Anthropic.Beta.Messages.BetaMessageParam[] = [{ role: 'user', content: [{ type: 'thinking', thinking: 'hmm', signature: 'sig' }] }]; + const { body } = buildRequestParams(makeOptions(), messages); + const actual = getContentCacheControl(body.messages); + expect(actual).toBe(expected); + }); + + it('does not add cache_control when there are no user messages', () => { + const expected = undefined; + const messages: Anthropic.Beta.Messages.BetaMessageParam[] = [{ role: 'assistant', content: [{ type: 'text', text: 'response' }] }]; + const { body } = buildRequestParams(makeOptions(), messages); + const actual = getContentCacheControl(body.messages); + expect(actual).toBe(expected); + }); + + it('last user message gets cache_control even when an assistant message follows it', () => { + const expected = { type: 'ephemeral', ttl: CacheTtl.OneHour }; + const messages: Anthropic.Beta.Messages.BetaMessageParam[] = [ + { role: 'user', content: [{ type: 'text', text: 'question' }] }, + { role: 'assistant', content: [{ type: 'text', text: 'answer' }] }, + ]; + const { body } = buildRequestParams(makeOptions(), messages); + const actual = getContentCacheControl(body.messages, 0); + expect(actual).toEqual(expected); + }); + + it('assistant message does not get cache_control when only the user message should be cached', () => { + const expected = undefined; + const messages: Anthropic.Beta.Messages.BetaMessageParam[] = [ + { role: 'user', content: [{ type: 'text', text: 'question' }] }, + { role: 'assistant', content: [{ type: 'text', text: 'answer' }] }, + ]; + const { body } = buildRequestParams(makeOptions(), messages); + const actual = getContentCacheControl(body.messages); + expect(actual).toBe(expected); + }); + + it('last content block gets cache_control when there are multiple blocks', () => { + const expected = { type: 'ephemeral', ttl: CacheTtl.OneHour }; + const messages: Anthropic.Beta.Messages.BetaMessageParam[] = [ + { + role: 'user', + content: [ + { type: 'text', text: 'first' }, + { type: 'text', text: 'last' }, + ], + }, + ]; + const { body } = buildRequestParams(makeOptions(), messages); + const actual = getContentCacheControl(body.messages); + expect(actual).toEqual(expected); + }); + + it('earlier content blocks are not given cache_control when there are multiple blocks', () => { + const expected = undefined; + const messages: Anthropic.Beta.Messages.BetaMessageParam[] = [ + { + role: 'user', + content: [ + { type: 'text', text: 'first' }, + { type: 'text', text: 'last' }, + ], + }, + ]; + const { body } = buildRequestParams(makeOptions(), messages); + const actual = getContentCacheControl(body.messages, -1, 0); expect(actual).toBe(expected); }); }); From 099dd78cf37679f6a66c02292544cedac89128cf Mon Sep 17 00:00:00 2001 From: Stephen Hellicar Date: Tue, 7 Apr 2026 22:41:34 +1000 Subject: [PATCH 3/3] Add session log for 2026-04-08 --- .claude/sessions/2026-04-08.md | 55 ++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 .claude/sessions/2026-04-08.md diff --git a/.claude/sessions/2026-04-08.md b/.claude/sessions/2026-04-08.md new file mode 100644 index 0000000..f421bc2 --- /dev/null +++ b/.claude/sessions/2026-04-08.md @@ -0,0 +1,55 @@ +# Session 2026-04-08 + +## What was done + +### Fix a broken test + +`RequestBuilder.spec.ts` had a failing test: `custom system prompts are appended after the prefix`. The implementation joins custom system prompts into a single combined block (so there's one cache boundary instead of N). The test expected separate array entries. Fixed it to expect `['prefix', '\nsecond\n\nthird']`. + +### Add cache_control to the last user message on every API call (PR #205) + +The system prompts and tool definitions already had `cache_control`, but the conversation history did not. Without a cache boundary on the user message, the API re-reads the entire conversation on every turn, which means only the fixed prefix (system prompt + tools) benefits from caching. + +**`addCacheControlToLastBlock(msg, cacheTtl)`** — attaches `cache_control: { type: 'ephemeral', ttl }` to the last non-thinking content block of a message: +- String content is promoted to a `BetaContentBlockParam[]` array, since `cache_control` has nowhere to live on a plain string +- `findLastIndex` skips `thinking` and `redacted_thinking` blocks — `BetaThinkingBlockParam` has no `cache_control` property; trying to spread one onto it is a TypeScript error +- Returns `msg` unchanged when all blocks are thinking blocks (`findLastIndex === -1`) or the resolved block is somehow null + +**`withCachedLastUserMessage(messages, cacheTtl)`** — finds the last user message and applies `addCacheControlToLastBlock` without mutating the caller's array: +- Returns `messages` unchanged when no user messages exist +- Copies the array (`[...messages]`) and replaces the target element with the cached version + +**Call site in `buildRequestParams`:** `withCachedLastUserMessage(messages, options.cacheTtl ?? CacheTtl.OneHour)`. The `?? CacheTtl.OneHour` default ensures the cache boundary is always set even when the caller doesn't specify a TTL. + +### Tests + +Added 9 tests covering all branches: +- Array content → `cache_control` added to the last block +- String content → promoted to array block with `cache_control` +- All-thinking blocks → `findLastIndex === -1`, returned unchanged +- No user messages → returned unchanged +- User message followed by assistant → user gets `cache_control`, assistant does not +- Multiple content blocks → `cache_control` on last block only, not first +- Non-mutation of input array + +Also added `getContentCacheControl` helper in the test file to extract `cache_control` from a content block at a given message and block index without `!` assertions. + +Tests compare against the full `{ type: 'ephemeral', ttl: CacheTtl.OneHour }` object rather than just the type string, since `ttl` is the field that controls how long the cache entry lives. + +## Decisions + +**`findLastIndex` over a hand-rolled loop:** The intent reads directly — find the last non-thinking block. A `for` loop going backwards would work but obscures the intent and requires more surface area to get wrong. + +**`?? CacheTtl.OneHour` default at the call site:** The cache boundary should always exist on the user message — without it the conversation history is never cached. Making the default explicit at the call site keeps the two functions general (`undefined` TTL means "no TTL field") while ensuring production callers always get a cache hit. + +**Non-mutation guarantee:** The caller passes its live messages array. Mutating it would change the caller's state, which violates the function's pure-function contract. The shallow copy (`[...messages]`) is enough since only one element is replaced. + +**Thinking block exclusion:** `BetaThinkingBlockParam` does not have a `cache_control` property. The spread `{ ...block, cache_control }` compiles but produces an object whose type no longer matches the union. Skipping thinking blocks avoids the type error and is correct semantically — the API documents cache boundaries on text/tool blocks, not thinking blocks. + +## Files changed + +- `packages/claude-sdk/src/private/RequestBuilder.ts` — added `addCacheControlToLastBlock`, `withCachedLastUserMessage`; call site updated +- `packages/claude-sdk/test/RequestBuilder.spec.ts` — fixed broken test, added `getContentCacheControl` helper, 9 new message caching tests; added `CacheTtl` and `BetaMessageParam` imports +- `packages/claude-sdk/src/public/types.ts` — minor (no behaviour change) +- `packages/claude-sdk/src/private/AgentRun.ts` — minor (no behaviour change) +- `apps/claude-sdk-cli/src/systemPrompts.ts` — minor (no behaviour change)