diff --git a/apps/web/src/lib/ai-gateway/finishReason.test.ts b/apps/web/src/lib/ai-gateway/finishReason.test.ts new file mode 100644 index 000000000..8ec6f0cd7 --- /dev/null +++ b/apps/web/src/lib/ai-gateway/finishReason.test.ts @@ -0,0 +1,38 @@ +import { + ERROR_FINISH_REASONS, + NON_ERROR_FINISH_REASONS, + isErrorFinishReason, +} from '@/lib/ai-gateway/finishReason'; + +describe('finishReason', () => { + it('classifies known error finish_reasons as errors', () => { + for (const reason of ERROR_FINISH_REASONS) { + expect(isErrorFinishReason(reason)).toBe(true); + } + }); + + it('classifies known non-error finish_reasons as non-errors', () => { + for (const reason of NON_ERROR_FINISH_REASONS) { + expect(isErrorFinishReason(reason)).toBe(false); + } + }); + + it('treats null/undefined as non-error', () => { + expect(isErrorFinishReason(null)).toBe(false); + expect(isErrorFinishReason(undefined)).toBe(false); + }); + + it('treats unrecognised string values as non-error', () => { + // Unknown values should not flip hasError; other signals (statusCode, + // wasAborted) handle those cases. This also keeps us from creating + // spurious error rows when a new provider adds a new stop reason. + expect(isErrorFinishReason('something_new_from_provider')).toBe(false); + }); + + it('does not double-count any reason in both lists', () => { + const intersection = NON_ERROR_FINISH_REASONS.filter(r => + (ERROR_FINISH_REASONS as readonly string[]).includes(r) + ); + expect(intersection).toEqual([]); + }); +}); diff --git a/apps/web/src/lib/ai-gateway/finishReason.ts b/apps/web/src/lib/ai-gateway/finishReason.ts new file mode 100644 index 000000000..27706f602 --- /dev/null +++ b/apps/web/src/lib/ai-gateway/finishReason.ts @@ -0,0 +1,40 @@ +// The two lists below enumerate every distinct value observed for the +// `finish_reason` column in production `microdollar_usage` logs. +// `unknown` / `other` are kept as non-error catch-alls so a novel upstream +// value does not immediately inflate the error rate. + +export const NON_ERROR_FINISH_REASONS = [ + 'stop', + 'tool_calls', + 'tool-calls', + 'end_turn', + 'completed', + 'tool_use', + 'stop_sequence', + 'unknown', + 'other', +] as const; + +export const ERROR_FINISH_REASONS = [ + 'length', + 'max_tokens', + 'content_filter', + 'content-filter', + 'error', + 'network_error', + 'failed', + 'model_context_window_exceeded', + 'engine_overloaded', + 'refusal', + 'incomplete', + 'in_progress', +] as const; + +const errorFinishReasonSet: ReadonlySet = new Set(ERROR_FINISH_REASONS); + +// `null` / `undefined` return false: an absent finish_reason is handled by +// the `wasAborted` / `reportedError` signals in the parsers, not here. +export function isErrorFinishReason(finish_reason: string | null | undefined): boolean { + if (finish_reason == null) return false; + return errorFinishReasonSet.has(finish_reason); +} diff --git a/apps/web/src/lib/ai-gateway/processUsage.messages.ts b/apps/web/src/lib/ai-gateway/processUsage.messages.ts index 67af35004..dc343e464 100644 --- a/apps/web/src/lib/ai-gateway/processUsage.messages.ts +++ b/apps/web/src/lib/ai-gateway/processUsage.messages.ts @@ -17,6 +17,7 @@ import { drainSseStream, extractVercelIsByok, } from '@/lib/ai-gateway/processUsage.shared'; +import { isErrorFinishReason } from '@/lib/ai-gateway/finishReason'; import type Anthropic from '@anthropic-ai/sdk'; type MaybeHasVercelProviderMetadata = { @@ -170,7 +171,7 @@ export async function parseMessagesMicrodollarUsageFromStream( const coreProps = { messageId, - hasError: reportedError || wasAborted, + hasError: reportedError || wasAborted || isErrorFinishReason(finish_reason), model, responseContent, inference_provider, @@ -205,14 +206,15 @@ export function parseMessagesMicrodollarUsageFromString( .map(c => c.text) .join(''); + const finish_reason = responseJson?.stop_reason ?? null; const coreProps = { messageId: responseJson?.id ?? null, - hasError: !responseJson?.model || statusCode >= 400, + hasError: !responseJson?.model || statusCode >= 400 || isErrorFinishReason(finish_reason), model: responseJson?.model ?? null, responseContent, inference_provider, upstream_id: null, - finish_reason: responseJson?.stop_reason ?? null, + finish_reason, latency: null, moderation_latency: null, generation_time: null, diff --git a/apps/web/src/lib/ai-gateway/processUsage.responses.ts b/apps/web/src/lib/ai-gateway/processUsage.responses.ts index eb8e2eae3..b5fefef3e 100644 --- a/apps/web/src/lib/ai-gateway/processUsage.responses.ts +++ b/apps/web/src/lib/ai-gateway/processUsage.responses.ts @@ -18,6 +18,7 @@ import { drainSseStream, extractVercelIsByok, } from '@/lib/ai-gateway/processUsage.shared'; +import { isErrorFinishReason } from '@/lib/ai-gateway/finishReason'; // OpenRouter adds cost fields to the standard Responses API usage object. // ref: https://openrouter.ai/docs/use-cases/usage-accounting#response-format @@ -204,7 +205,7 @@ export async function parseResponsesMicrodollarUsageFromStream( const coreProps = { messageId, - hasError: reportedError || wasAborted, + hasError: reportedError || wasAborted || isErrorFinishReason(finish_reason), model, responseContent, inference_provider, diff --git a/apps/web/src/lib/ai-gateway/processUsage.ts b/apps/web/src/lib/ai-gateway/processUsage.ts index 1a79696f9..d001dd753 100644 --- a/apps/web/src/lib/ai-gateway/processUsage.ts +++ b/apps/web/src/lib/ai-gateway/processUsage.ts @@ -54,6 +54,7 @@ import { parseMessagesMicrodollarUsageFromString, } from '@/lib/ai-gateway/processUsage.messages'; import { OPENROUTER_BYOK_COST_MULTIPLIER } from '@/lib/ai-gateway/processUsage.constants'; +import { isErrorFinishReason } from '@/lib/ai-gateway/finishReason'; import { computeOpenRouterCostFields, drainSseStream, @@ -785,7 +786,7 @@ export async function parseMicrodollarUsageFromStream( const coreProps = { kiloUserId, messageId, - hasError: reportedError || wasAborted, + hasError: reportedError || wasAborted || isErrorFinishReason(finish_reason), model, responseContent, inference_provider, @@ -822,10 +823,11 @@ export function parseMicrodollarUsageFromString( }); } const choice = responseJson?.choices?.[0]; + const finish_reason = choice?.finish_reason ?? null; const coreProps = { kiloUserId, messageId: responseJson?.id ?? null, - hasError: !responseJson?.model || statusCode >= 400, + hasError: !responseJson?.model || statusCode >= 400 || isErrorFinishReason(finish_reason), model: responseJson?.model ?? null, responseContent: choice?.message.content ?? '', inference_provider: @@ -833,7 +835,7 @@ export function parseMicrodollarUsageFromString( choice?.message?.provider_metadata?.gateway?.routing?.finalProvider ?? null, upstream_id: null, - finish_reason: choice?.finish_reason ?? null, + finish_reason, latency: null, moderation_latency: null, generation_time: null,