diff --git a/src/api/routers/runs.ts b/src/api/routers/runs.ts index a8a0684f..184af0da 100644 --- a/src/api/routers/runs.ts +++ b/src/api/routers/runs.ts @@ -119,7 +119,16 @@ export const runsRouter = router({ } const raw = await listLlmCallsMeta(input.runId); const calls = raw.map((c) => { - const { toolNames, textPreview } = parseLlmResponse(c.response); + const { blocks, textPreview } = parseLlmResponse(c.response); + const toolCalls = blocks + .filter( + (b): b is { kind: 'tool_use'; name: string; inputSummary: string } => + b.kind === 'tool_use', + ) + .map((b) => ({ name: b.name, inputSummary: b.inputSummary })); + const thinkingChars = blocks + .filter((b): b is { kind: 'thinking'; text: string } => b.kind === 'thinking') + .reduce((sum, b) => sum + b.text.length, 0); return { id: c.id, runId: c.runId, @@ -131,8 +140,9 @@ export const runsRouter = router({ durationMs: c.durationMs, model: c.model, createdAt: c.createdAt, - toolNames, + toolCalls, textPreview, + thinkingChars: thinkingChars > 0 ? thinkingChars : null, }; }); return { engine: run.engine ?? 'unknown', calls }; diff --git a/src/backends/claude-code/messageProcessing.ts b/src/backends/claude-code/messageProcessing.ts index 1322fbc9..fc891752 100644 --- a/src/backends/claude-code/messageProcessing.ts +++ b/src/backends/claude-code/messageProcessing.ts @@ -8,6 +8,7 @@ import type { SDKUserMessage, } from '@anthropic-ai/claude-agent-sdk'; import type { query } from '@anthropic-ai/claude-agent-sdk'; +import { calculateCost } from '../../utils/llmMetrics.js'; import { extractPRUrl } from '../../utils/prUrl.js'; import { buildEngineResult } from '../shared/engineResult.js'; import { logLlmCall } from '../shared/llmCallLogger.js'; @@ -129,6 +130,21 @@ export function extractFinishComment(assistantMessages: SDKAssistantMessage[]): return undefined; } +/** Report progress and log a single content block from an assistant message. */ +function processContentBlock( + block: { type: string; name?: string; input?: unknown; text?: string }, + input: AgentExecutionPlan, +): void { + if (block.type === 'tool_use' && block.name) { + input.progressReporter.onToolCall(block.name, block.input as Record); + } + if (block.type === 'text' && block.text !== undefined) { + const truncated = block.text.length > 300 ? `${block.text.slice(0, 300)}...` : block.text; + input.logWriter('INFO', 'Agent text', { text: truncated }); + input.progressReporter.onText(block.text); + } +} + /** * Process an assistant message: report progress, log text/errors/usage. */ @@ -137,17 +153,8 @@ export function processAssistantMessage( turnCount: number, input: AgentExecutionPlan, ): void { - if (assistantMsg.message?.content) { - for (const block of assistantMsg.message.content) { - if (block.type === 'tool_use') { - input.progressReporter.onToolCall(block.name, block.input as Record); - } - if (block.type === 'text') { - const truncated = block.text.length > 300 ? `${block.text.slice(0, 300)}...` : block.text; - input.logWriter('INFO', 'Agent text', { text: truncated }); - input.progressReporter.onText(block.text); - } - } + for (const block of assistantMsg.message?.content ?? []) { + processContentBlock(block, input); } if (assistantMsg.error) { @@ -163,6 +170,8 @@ export function processAssistantMessage( turn: turnCount, inputTokens: usage.input_tokens, outputTokens: usage.output_tokens, + cacheReadTokens: usage.cache_read_input_tokens ?? 0, + cacheWriteTokens: usage.cache_creation_input_tokens ?? 0, }); } } @@ -216,6 +225,14 @@ export function countToolCalls(assistantMsg: SDKAssistantMessage): number { return (assistantMsg.message?.content ?? []).filter((b) => b.type === 'tool_use').length; } +/** + * Convert a raw Anthropic model ID (e.g. 'claude-sonnet-4-5-20250929') to the + * pricing key format used by calculateCost() (e.g. 'anthropic:claude-sonnet-4-5'). + */ +function toPricingKey(model: string): string { + return `anthropic:${model}`.replace(/-\d{8}$/, ''); +} + /** * Log an LLM call for a single assistant message turn. */ @@ -228,6 +245,16 @@ export function logClaudeCodeLlmCall( if (!assistantMsg.message?.usage) return; const usage = assistantMsg.message.usage; + const cacheRead = usage.cache_read_input_tokens ?? 0; + const cacheWrite = usage.cache_creation_input_tokens ?? 0; + const totalInput = usage.input_tokens + cacheRead + cacheWrite; + const cost = calculateCost(toPricingKey(model), { + inputTokens: totalInput, + outputTokens: usage.output_tokens, + totalTokens: totalInput + usage.output_tokens, + cachedInputTokens: cacheRead, + }); + let response: string | undefined; try { response = JSON.stringify(assistantMsg.message.content ?? []); @@ -239,10 +266,10 @@ export function logClaudeCodeLlmCall( runId: input.runId, callNumber: turnCount, model, - inputTokens: usage.input_tokens, + inputTokens: totalInput, outputTokens: usage.output_tokens, - cachedTokens: undefined, - costUsd: undefined, + cachedTokens: cacheRead, + costUsd: cost > 0 ? cost : undefined, response, engineLabel: 'Claude Code', }); diff --git a/tests/unit/api/routers/runs.test.ts b/tests/unit/api/routers/runs.test.ts index 7e252ab0..eb1e3573 100644 --- a/tests/unit/api/routers/runs.test.ts +++ b/tests/unit/api/routers/runs.test.ts @@ -418,7 +418,7 @@ describe('runsRouter', () => { }); }); - it('extracts toolNames and textPreview from a Claude Code response payload', async () => { + it('extracts toolCalls and textPreview from a Claude Code response payload', async () => { const claudeCodeResponse = JSON.stringify([ { type: 'text', text: 'Let me read the file.' }, { type: 'tool_use', name: 'Read', input: { file_path: '/src/index.ts' } }, @@ -432,7 +432,11 @@ describe('runsRouter', () => { const caller = createCaller({ user: mockUser, effectiveOrgId: mockUser.orgId }); const result = await caller.listLlmCalls({ runId: RUN_UUID }); - expect(result.calls[0].toolNames).toEqual(['Read', 'Read', 'Bash']); + expect(result.calls[0].toolCalls).toEqual([ + { name: 'Read', inputSummary: '/src/index.ts' }, + { name: 'Read', inputSummary: '/src/utils.ts' }, + { name: 'Bash', inputSummary: 'npm test' }, + ]); expect(result.calls[0].textPreview).toBe('Let me read the file.'); }); diff --git a/tests/unit/backends/claude-code-messageProcessing.test.ts b/tests/unit/backends/claude-code-messageProcessing.test.ts new file mode 100644 index 00000000..8be54d1f --- /dev/null +++ b/tests/unit/backends/claude-code-messageProcessing.test.ts @@ -0,0 +1,167 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('../../../src/utils/logging.js', () => ({ + logger: { warn: vi.fn(), info: vi.fn(), error: vi.fn(), debug: vi.fn() }, +})); + +const mockStoreLlmCall = vi.fn().mockResolvedValue(undefined); +vi.mock('../../../src/db/repositories/runsRepository.js', () => ({ + storeLlmCall: (...args: unknown[]) => mockStoreLlmCall(...args), +})); + +import { logClaudeCodeLlmCall } from '../../../src/backends/claude-code/messageProcessing.js'; +import type { AgentExecutionPlan } from '../../../src/backends/types.js'; + +function makeInput(overrides: Partial = {}): AgentExecutionPlan { + return { + agentType: 'implementation', + runId: 'run-1', + project: { id: 'p1', name: 'Test', repo: 'o/r' } as AgentExecutionPlan['project'], + config: { projects: [] } as AgentExecutionPlan['config'], + repoDir: '/tmp/repo', + systemPrompt: 'sys', + taskPrompt: 'task', + cliToolsDir: '/usr/bin', + availableTools: [], + contextInjections: [], + maxIterations: 20, + budgetUsd: 5, + model: 'claude-sonnet-4-5-20250929', + progressReporter: { + onIteration: vi.fn().mockResolvedValue(undefined), + onToolCall: vi.fn(), + onText: vi.fn(), + }, + logWriter: vi.fn(), + ...overrides, + } as AgentExecutionPlan; +} + +function makeAssistantMsg(usage: { + input_tokens: number; + output_tokens: number; + cache_read_input_tokens?: number | null; + cache_creation_input_tokens?: number | null; +}) { + return { + type: 'assistant' as const, + message: { + id: 'msg_1', + type: 'message', + role: 'assistant', + content: [], + model: 'claude-sonnet-4-5-20250929', + stop_reason: 'end_turn', + stop_sequence: null, + usage, + }, + parent_tool_use_id: null, + session_id: 'sess-1', + uuid: 'uuid-1', + }; +} + +describe('logClaudeCodeLlmCall', () => { + beforeEach(() => { + mockStoreLlmCall.mockClear(); + }); + + it('sums input + cache_read + cache_creation into inputTokens', async () => { + const input = makeInput(); + const msg = makeAssistantMsg({ + input_tokens: 8, + output_tokens: 150, + cache_read_input_tokens: 45000, + cache_creation_input_tokens: 5000, + }); + + logClaudeCodeLlmCall(input, msg as never, 1, 'claude-sonnet-4-5-20250929'); + await Promise.resolve(); + + expect(mockStoreLlmCall).toHaveBeenCalledOnce(); + const [stored] = mockStoreLlmCall.mock.calls[0]; + expect(stored.inputTokens).toBe(50008); // 8 + 45000 + 5000 + }); + + it('uses only input_tokens when cache fields are null', async () => { + const input = makeInput(); + const msg = makeAssistantMsg({ + input_tokens: 1200, + output_tokens: 300, + cache_read_input_tokens: null, + cache_creation_input_tokens: null, + }); + + logClaudeCodeLlmCall(input, msg as never, 1, 'claude-sonnet-4-5-20250929'); + await Promise.resolve(); + + const [stored] = mockStoreLlmCall.mock.calls[0]; + expect(stored.inputTokens).toBe(1200); + expect(stored.cachedTokens).toBe(0); + }); + + it('sets cachedTokens to cache_read_input_tokens', async () => { + const input = makeInput(); + const msg = makeAssistantMsg({ + input_tokens: 8, + output_tokens: 100, + cache_read_input_tokens: 30000, + cache_creation_input_tokens: 0, + }); + + logClaudeCodeLlmCall(input, msg as never, 1, 'claude-sonnet-4-5-20250929'); + await Promise.resolve(); + + const [stored] = mockStoreLlmCall.mock.calls[0]; + expect(stored.cachedTokens).toBe(30000); + }); + + it('calculates costUsd for a known model', async () => { + const input = makeInput(); + // claude-sonnet-4-5-20250929 → 'anthropic:claude-sonnet-4-5' → $3/1M input, $15/1M output + const msg = makeAssistantMsg({ + input_tokens: 1_000_000, + output_tokens: 1_000_000, + cache_read_input_tokens: null, + cache_creation_input_tokens: null, + }); + + logClaudeCodeLlmCall(input, msg as never, 1, 'claude-sonnet-4-5-20250929'); + await Promise.resolve(); + + const [stored] = mockStoreLlmCall.mock.calls[0]; + // $3 input + $15 output = $18 + expect(stored.costUsd).toBeCloseTo(18, 4); + }); + + it('leaves costUsd undefined for unknown model', async () => { + const input = makeInput(); + const msg = makeAssistantMsg({ + input_tokens: 1000, + output_tokens: 500, + cache_read_input_tokens: null, + cache_creation_input_tokens: null, + }); + + logClaudeCodeLlmCall(input, msg as never, 1, 'claude-unknown-model-99990101'); + await Promise.resolve(); + + const [stored] = mockStoreLlmCall.mock.calls[0]; + expect(stored.costUsd).toBeUndefined(); + }); + + it('is a no-op when usage is absent', () => { + const input = makeInput(); + const msg = { + type: 'assistant' as const, + message: { content: [], model: 'x', usage: undefined }, + parent_tool_use_id: null, + session_id: 's', + uuid: 'u', + }; + + logClaudeCodeLlmCall(input, msg as never, 1, 'claude-sonnet-4-5-20250929'); + + expect(mockStoreLlmCall).not.toHaveBeenCalled(); + }); +}); diff --git a/tests/unit/backends/claude-code.test.ts b/tests/unit/backends/claude-code.test.ts index 51ea8869..2d589fd0 100644 --- a/tests/unit/backends/claude-code.test.ts +++ b/tests/unit/backends/claude-code.test.ts @@ -709,6 +709,8 @@ describe('execute', () => { turn: 1, inputTokens: 1000, outputTokens: 500, + cacheReadTokens: 0, + cacheWriteTokens: 0, }); }); diff --git a/web/src/components/llm-calls/llm-call-list.tsx b/web/src/components/llm-calls/llm-call-list.tsx index 13737263..8f4c6fa1 100644 --- a/web/src/components/llm-calls/llm-call-list.tsx +++ b/web/src/components/llm-calls/llm-call-list.tsx @@ -10,6 +10,8 @@ interface LlmCallListProps { runId: string; } +type ToolCall = { name: string; inputSummary: string }; + type CallMeta = { callNumber: number; inputTokens?: number | null; @@ -19,32 +21,27 @@ type CallMeta = { durationMs?: number | null; model?: string | null; createdAt?: Date | string | null; - toolNames?: string[] | null; + toolCalls?: ToolCall[] | null; textPreview?: string | null; + thinkingChars?: number | null; }; -function ToolBadges({ toolNames }: { toolNames: string[] }) { - if (toolNames.length === 0) return null; - - // Count occurrences - const counts = new Map(); - for (const name of toolNames) { - counts.set(name, (counts.get(name) ?? 0) + 1); - } - const unique = [...new Set(toolNames)]; - +function ToolCallList({ toolCalls }: { toolCalls: ToolCall[] }) { + if (toolCalls.length === 0) return null; return ( - - {unique.map((name) => { - const { bg, text } = getToolStyle(name); - const count = counts.get(name) ?? 1; + + {toolCalls.map((tc, i) => { + const { bg, text } = getToolStyle(tc.name); return ( - - {name} - {count > 1 && ×{count}} + + + {tc.name} + + {tc.inputSummary && ( + + {tc.inputSummary} + + )} ); })} @@ -83,14 +80,19 @@ function CallRow({ runId, call, delta, isExpanded, onToggle }: CallRowProps) { {call.callNumber} -
- +
+ {call.textPreview && ( {call.textPreview} )} - {!call.toolNames?.length && !call.textPreview && ( + {call.thinkingChars != null && ( + + thinking ({call.thinkingChars.toLocaleString()} chars) + + )} + {!call.toolCalls?.length && !call.textPreview && !call.thinkingChars && ( )}