diff --git a/src/app/api/openrouter/[...path]/route.ts b/src/app/api/openrouter/[...path]/route.ts index 75d693b1d9..94faf0b9e2 100644 --- a/src/app/api/openrouter/[...path]/route.ts +++ b/src/app/api/openrouter/[...path]/route.ts @@ -173,9 +173,6 @@ export async function POST(request: NextRequest): Promise 1) { - body.cache_control = { type: 'ephemeral' }; - } requestBodyParsed = { kind: 'messages', body }; } else { const body: GatewayResponsesRequest = JSON.parse(requestBodyText); diff --git a/src/lib/providers/openrouter/request-helpers.ts b/src/lib/providers/openrouter/request-helpers.ts index 7fcdd00bf1..7d10a9f331 100644 --- a/src/lib/providers/openrouter/request-helpers.ts +++ b/src/lib/providers/openrouter/request-helpers.ts @@ -80,11 +80,29 @@ function setCacheControlOnResponsesMessage(message: OpenAI.Responses.ResponseInp } } +function isObjectRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null; +} + +function containsCacheControl(value: unknown): boolean { + if (Array.isArray(value)) { + return value.some(containsCacheControl); + } + if (!isObjectRecord(value)) { + return false; + } + if (Object.hasOwn(value, 'cache_control')) { + return true; + } + return Object.values(value).some(containsCacheControl); +} + export function addCacheBreakpoints(request: GatewayRequest) { if ( request.kind === 'chat_completions' && Array.isArray(request.body.messages) && - request.body.messages.length > 1 + request.body.messages.length > 1 && + !containsCacheControl(request.body.messages) ) { const lastMessage = request.body.messages.findLast( msg => msg.role === 'user' || msg.role === 'tool' @@ -98,7 +116,8 @@ export function addCacheBreakpoints(request: GatewayRequest) { } else if ( request.kind === 'responses' && Array.isArray(request.body.input) && - request.body.input.length > 1 + request.body.input.length > 1 && + !containsCacheControl(request.body.input) ) { const lastMessage = request.body.input.findLast( msg => (msg.type === 'message' && msg.role === 'user') || msg.type === 'function_call_output' @@ -109,6 +128,14 @@ export function addCacheBreakpoints(request: GatewayRequest) { ); setCacheControlOnResponsesMessage(lastMessage); } + } else if ( + request.kind === 'messages' && + request.body.messages.length > 1 && + !request.body.cache_control && + !containsCacheControl(request.body.messages) + ) { + console.debug('[addCacheBreakpoints] setting cache breakpoint on messages request'); + request.body.cache_control = { type: 'ephemeral' }; } } diff --git a/src/tests/openrouter-request-helpers.test.ts b/src/tests/openrouter-request-helpers.test.ts new file mode 100644 index 0000000000..87bace7fa5 --- /dev/null +++ b/src/tests/openrouter-request-helpers.test.ts @@ -0,0 +1,166 @@ +import { describe, expect, test } from '@jest/globals'; +import { addCacheBreakpoints } from '@/lib/providers/openrouter/request-helpers'; +import type { GatewayRequest } from '@/lib/providers/openrouter/types'; +import type OpenAI from 'openai'; + +describe('addCacheBreakpoints', () => { + test('adds a cache breakpoint to the last eligible chat completions message when none exist', () => { + const request: GatewayRequest = { + kind: 'chat_completions', + body: { + model: 'test-model', + messages: [ + { role: 'system', content: 'You are helpful.' }, + { role: 'user', content: 'First prompt' }, + { role: 'assistant', content: 'First response' }, + { + role: 'user', + content: [ + { type: 'text', text: 'Latest prompt' }, + { type: 'text', text: 'Latest detail' }, + ], + }, + ], + }, + }; + + addCacheBreakpoints(request); + + const lastContent = request.body.messages.at(-1)?.content; + expect(Array.isArray(lastContent)).toBe(true); + if (!Array.isArray(lastContent)) return; + expect(lastContent.at(-1)).toMatchObject({ + type: 'text', + text: 'Latest detail', + cache_control: { type: 'ephemeral' }, + }); + }); + + test('does nothing for chat completions requests when any cache_control is already present', () => { + const request: GatewayRequest = { + kind: 'chat_completions', + body: { + model: 'test-model', + messages: [ + { role: 'system', content: 'You are helpful.' }, + { + role: 'user', + content: [ + { + type: 'text', + text: 'First prompt', + cache_control: { type: 'ephemeral' }, + } as OpenAI.ChatCompletionContentPartText, + ], + }, + { role: 'assistant', content: 'First response' }, + { + role: 'user', + content: [ + { type: 'text', text: 'Latest prompt' }, + { type: 'text', text: 'Latest detail' }, + ], + }, + ], + }, + }; + + addCacheBreakpoints(request); + + const lastContent = + request.kind === 'chat_completions' && request.body.messages.at(-1)?.content; + expect(lastContent).toEqual([ + { type: 'text', text: 'Latest prompt' }, + { type: 'text', text: 'Latest detail' }, + ]); + }); + + test('does nothing for responses requests when any cache_control is already present', () => { + const request: GatewayRequest = { + kind: 'responses', + body: { + model: 'test-model', + input: [ + { + type: 'message', + role: 'user', + content: [ + { + type: 'input_text', + text: 'First prompt', + // @ts-expect-error non-standard cache_control extension + cache_control: { type: 'ephemeral' }, + }, + ], + }, + { + type: 'function_call_output', + call_id: 'call_123', + output: [ + { type: 'input_text', text: 'Tool output' }, + { type: 'input_text', text: 'Tool detail' }, + ], + }, + ], + }, + }; + + addCacheBreakpoints(request); + + const lastItem = request.kind === 'responses' && request.body.input?.at(-1); + expect(lastItem).toMatchObject({ + type: 'function_call_output', + output: [ + { type: 'input_text', text: 'Tool output' }, + { type: 'input_text', text: 'Tool detail' }, + ], + }); + }); + + test('adds top-level cache_control on messages request when none is present', () => { + const request: GatewayRequest = { + kind: 'messages', + body: { + model: 'anthropic/claude-sonnet-4-5', + max_tokens: 1024, + messages: [ + { role: 'user', content: 'First prompt' }, + { role: 'assistant', content: 'First response' }, + { role: 'user', content: 'Latest prompt' }, + ], + }, + }; + + addCacheBreakpoints(request); + + expect(request.body.cache_control).toEqual({ type: 'ephemeral' }); + }); + + test('does nothing for messages request when any cache_control is already present', () => { + const request: GatewayRequest = { + kind: 'messages', + body: { + model: 'anthropic/claude-sonnet-4-5', + max_tokens: 1024, + messages: [ + { + role: 'user', + content: [ + { + type: 'text', + text: 'First prompt', + cache_control: { type: 'ephemeral' }, + }, + ], + }, + { role: 'assistant', content: 'First response' }, + { role: 'user', content: 'Latest prompt' }, + ], + }, + }; + + addCacheBreakpoints(request); + + expect(request.body.cache_control).toBeUndefined(); + }); +});