From 4336fffdabb7e9668dbed1098e9bb67fae308797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sun, 25 Jan 2026 02:08:11 +0800 Subject: [PATCH 1/9] feat(api): add X-NexusGate-ReqId request deduplication Add support for request deduplication using X-NexusGate-ReqId header: - Cache completed responses and return them for duplicate requests - Return 409 Conflict with Retry-After for in-flight requests - Support both streaming and non-streaming requests - Scope ReqId by API key to prevent cross-user collisions - Use Redis SETNX for atomic in-flight marking (600s TTL) - Store cachedResponse in database for future cache hits - Add cache_hit status to completions table and frontend UI Database changes: - Add req_id, source_completion_id, api_format, cached_response columns - Add cache_hit status to completions_status enum - Create partial unique index on (api_key_id, req_id) Co-Authored-By: Claude Opus 4.5 --- .../drizzle/0011_outgoing_johnny_blaze.sql | 7 + backend/drizzle/meta/0011_snapshot.json | 1040 +++++++++++++++++ backend/drizzle/meta/_journal.json | 7 + backend/src/api/v1/completions.ts | 251 +++- backend/src/api/v1/messages.ts | 229 +++- backend/src/api/v1/responses.ts | 251 +++- backend/src/db/index.ts | 81 ++ backend/src/db/schema.ts | 19 +- backend/src/utils/redisClient.ts | 28 + backend/src/utils/reqIdCache.ts | 159 +++ backend/src/utils/reqIdHandler.ts | 325 ++++++ backend/src/utils/streaming-context.ts | 34 +- frontend/src/i18n/locales/en-US.json | 2 + frontend/src/i18n/locales/zh-CN.json | 2 + frontend/src/pages/requests/columns.tsx | 3 + .../pages/requests/detail-panel/header.tsx | 5 + 16 files changed, 2349 insertions(+), 94 deletions(-) create mode 100644 backend/drizzle/0011_outgoing_johnny_blaze.sql create mode 100644 backend/drizzle/meta/0011_snapshot.json create mode 100644 backend/src/utils/reqIdCache.ts create mode 100644 backend/src/utils/reqIdHandler.ts diff --git a/backend/drizzle/0011_outgoing_johnny_blaze.sql b/backend/drizzle/0011_outgoing_johnny_blaze.sql new file mode 100644 index 0000000..76795fb --- /dev/null +++ b/backend/drizzle/0011_outgoing_johnny_blaze.sql @@ -0,0 +1,7 @@ +ALTER TYPE "public"."completions_status" ADD VALUE 'cache_hit';--> statement-breakpoint +ALTER TABLE "completions" ADD COLUMN "req_id" varchar(127);--> statement-breakpoint +ALTER TABLE "completions" ADD COLUMN "source_completion_id" integer;--> statement-breakpoint +ALTER TABLE "completions" ADD COLUMN "api_format" varchar(31);--> statement-breakpoint +ALTER TABLE "completions" ADD COLUMN "cached_response" jsonb;--> statement-breakpoint +ALTER TABLE "completions" ADD CONSTRAINT "completions_source_completion_id_completions_id_fk" FOREIGN KEY ("source_completion_id") REFERENCES "public"."completions"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint +CREATE UNIQUE INDEX "completions_api_key_req_id_unique" ON "completions" ("api_key_id", "req_id") WHERE "req_id" IS NOT NULL; \ No newline at end of file diff --git a/backend/drizzle/meta/0011_snapshot.json b/backend/drizzle/meta/0011_snapshot.json new file mode 100644 index 0000000..2df7511 --- /dev/null +++ b/backend/drizzle/meta/0011_snapshot.json @@ -0,0 +1,1040 @@ +{ + "id": "7d19c611-e3a5-4186-b863-f3a792c60de3", + "prevId": "6be63fd9-50d1-4d49-acea-e53401c54b2f", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.api_keys": { + "name": "api_keys", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "api_keys_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "key": { + "name": "key", + "type": "varchar(63)", + "primaryKey": false, + "notNull": true + }, + "comment": { + "name": "comment", + "type": "varchar", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "last_seen": { + "name": "last_seen", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "revoked": { + "name": "revoked", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "rpm_limit": { + "name": "rpm_limit", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 50 + }, + "tpm_limit": { + "name": "tpm_limit", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 50000 + }, + "external_id": { + "name": "external_id", + "type": "varchar(127)", + "primaryKey": false, + "notNull": false + }, + "source": { + "name": "source", + "type": "api_key_source", + "typeSchema": "public", + "primaryKey": false, + "notNull": false, + "default": "'manual'" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "api_keys_key_unique": { + "name": "api_keys_key_unique", + "nullsNotDistinct": false, + "columns": [ + "key" + ] + }, + "api_keys_external_id_unique": { + "name": "api_keys_external_id_unique", + "nullsNotDistinct": false, + "columns": [ + "external_id" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.completions": { + "name": "completions", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "completions_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "api_key_id": { + "name": "api_key_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "upstream_id": { + "name": "upstream_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "model_id": { + "name": "model_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "model": { + "name": "model", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "prompt": { + "name": "prompt", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "prompt_tokens": { + "name": "prompt_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "completion": { + "name": "completion", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "completion_tokens": { + "name": "completion_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "completions_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'pending'" + }, + "ttft": { + "name": "ttft", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "duration": { + "name": "duration", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "deleted": { + "name": "deleted", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "rating": { + "name": "rating", + "type": "real", + "primaryKey": false, + "notNull": false + }, + "req_id": { + "name": "req_id", + "type": "varchar(127)", + "primaryKey": false, + "notNull": false + }, + "source_completion_id": { + "name": "source_completion_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "api_format": { + "name": "api_format", + "type": "varchar(31)", + "primaryKey": false, + "notNull": false + }, + "cached_response": { + "name": "cached_response", + "type": "jsonb", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "completions_api_key_id_api_keys_id_fk": { + "name": "completions_api_key_id_api_keys_id_fk", + "tableFrom": "completions", + "tableTo": "api_keys", + "columnsFrom": [ + "api_key_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "completions_upstream_id_upstreams_id_fk": { + "name": "completions_upstream_id_upstreams_id_fk", + "tableFrom": "completions", + "tableTo": "upstreams", + "columnsFrom": [ + "upstream_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "completions_source_completion_id_completions_id_fk": { + "name": "completions_source_completion_id_completions_id_fk", + "tableFrom": "completions", + "tableTo": "completions", + "columnsFrom": [ + "source_completion_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "completions_id_unique": { + "name": "completions_id_unique", + "nullsNotDistinct": false, + "columns": [ + "id" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.embeddings": { + "name": "embeddings", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "embeddings_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "api_key_id": { + "name": "api_key_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "model_id": { + "name": "model_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "model": { + "name": "model", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "input": { + "name": "input", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "input_tokens": { + "name": "input_tokens", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "embedding": { + "name": "embedding", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "dimensions": { + "name": "dimensions", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "completions_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'pending'" + }, + "duration": { + "name": "duration", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "deleted": { + "name": "deleted", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + } + }, + "indexes": {}, + "foreignKeys": { + "embeddings_api_key_id_api_keys_id_fk": { + "name": "embeddings_api_key_id_api_keys_id_fk", + "tableFrom": "embeddings", + "tableTo": "api_keys", + "columnsFrom": [ + "api_key_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "embeddings_model_id_models_id_fk": { + "name": "embeddings_model_id_models_id_fk", + "tableFrom": "embeddings", + "tableTo": "models", + "columnsFrom": [ + "model_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "embeddings_id_unique": { + "name": "embeddings_id_unique", + "nullsNotDistinct": false, + "columns": [ + "id" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.models": { + "name": "models", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "models_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "provider_id": { + "name": "provider_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "system_name": { + "name": "system_name", + "type": "varchar(63)", + "primaryKey": false, + "notNull": true + }, + "remote_id": { + "name": "remote_id", + "type": "varchar(63)", + "primaryKey": false, + "notNull": false + }, + "model_type": { + "name": "model_type", + "type": "model_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'chat'" + }, + "context_length": { + "name": "context_length", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "input_price": { + "name": "input_price", + "type": "real", + "primaryKey": false, + "notNull": false + }, + "output_price": { + "name": "output_price", + "type": "real", + "primaryKey": false, + "notNull": false + }, + "weight": { + "name": "weight", + "type": "real", + "primaryKey": false, + "notNull": true, + "default": 1 + }, + "comment": { + "name": "comment", + "type": "varchar", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "deleted": { + "name": "deleted", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + } + }, + "indexes": {}, + "foreignKeys": { + "models_provider_id_providers_id_fk": { + "name": "models_provider_id_providers_id_fk", + "tableFrom": "models", + "tableTo": "providers", + "columnsFrom": [ + "provider_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "models_provider_system_name_unique": { + "name": "models_provider_system_name_unique", + "nullsNotDistinct": false, + "columns": [ + "provider_id", + "system_name" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.providers": { + "name": "providers", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "providers_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "name": { + "name": "name", + "type": "varchar(63)", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "provider_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'openai'" + }, + "base_url": { + "name": "base_url", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "api_key": { + "name": "api_key", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false + }, + "api_version": { + "name": "api_version", + "type": "varchar(31)", + "primaryKey": false, + "notNull": false + }, + "comment": { + "name": "comment", + "type": "varchar", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "deleted": { + "name": "deleted", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "providers_name_unique": { + "name": "providers_name_unique", + "nullsNotDistinct": false, + "columns": [ + "name" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.settings": { + "name": "settings", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "settings_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "key": { + "name": "key", + "type": "varchar(63)", + "primaryKey": false, + "notNull": true + }, + "value": { + "name": "value", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "settings_key_unique": { + "name": "settings_key_unique", + "nullsNotDistinct": false, + "columns": [ + "key" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.srv_logs": { + "name": "srv_logs", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "srv_logs_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "related_api_key_id": { + "name": "related_api_key_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "related_upstream_id": { + "name": "related_upstream_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "related_completion_id": { + "name": "related_completion_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "message": { + "name": "message", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "level": { + "name": "level", + "type": "srv_logs_level", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "details": { + "name": "details", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "acknowledged": { + "name": "acknowledged", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "ack_at": { + "name": "ack_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "srv_logs_related_api_key_id_api_keys_id_fk": { + "name": "srv_logs_related_api_key_id_api_keys_id_fk", + "tableFrom": "srv_logs", + "tableTo": "api_keys", + "columnsFrom": [ + "related_api_key_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "srv_logs_related_upstream_id_upstreams_id_fk": { + "name": "srv_logs_related_upstream_id_upstreams_id_fk", + "tableFrom": "srv_logs", + "tableTo": "upstreams", + "columnsFrom": [ + "related_upstream_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "srv_logs_related_completion_id_completions_id_fk": { + "name": "srv_logs_related_completion_id_completions_id_fk", + "tableFrom": "srv_logs", + "tableTo": "completions", + "columnsFrom": [ + "related_completion_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "srv_logs_id_unique": { + "name": "srv_logs_id_unique", + "nullsNotDistinct": false, + "columns": [ + "id" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.upstreams": { + "name": "upstreams", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "upstreams_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "name": { + "name": "name", + "type": "varchar(63)", + "primaryKey": false, + "notNull": true + }, + "url": { + "name": "url", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "model": { + "name": "model", + "type": "varchar(63)", + "primaryKey": false, + "notNull": true + }, + "upstream_model": { + "name": "upstream_model", + "type": "varchar(63)", + "primaryKey": false, + "notNull": false + }, + "api_key": { + "name": "api_key", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false + }, + "weight": { + "name": "weight", + "type": "real", + "primaryKey": false, + "notNull": true, + "default": 1 + }, + "comment": { + "name": "comment", + "type": "varchar", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "deleted": { + "name": "deleted", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": { + "public.api_key_source": { + "name": "api_key_source", + "schema": "public", + "values": [ + "manual", + "operator", + "init" + ] + }, + "public.completions_status": { + "name": "completions_status", + "schema": "public", + "values": [ + "pending", + "completed", + "failed", + "aborted", + "cache_hit" + ] + }, + "public.model_type": { + "name": "model_type", + "schema": "public", + "values": [ + "chat", + "embedding" + ] + }, + "public.provider_type": { + "name": "provider_type", + "schema": "public", + "values": [ + "openai", + "openai-responses", + "anthropic", + "azure", + "ollama" + ] + }, + "public.srv_logs_level": { + "name": "srv_logs_level", + "schema": "public", + "values": [ + "unspecific", + "info", + "warn", + "error" + ] + } + }, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} \ No newline at end of file diff --git a/backend/drizzle/meta/_journal.json b/backend/drizzle/meta/_journal.json index 9f2d7e8..8c4593c 100644 --- a/backend/drizzle/meta/_journal.json +++ b/backend/drizzle/meta/_journal.json @@ -78,6 +78,13 @@ "when": 1769256296714, "tag": "0010_noisy_deathbird", "breakpoints": true + }, + { + "idx": 11, + "version": "7", + "when": 1769275632379, + "tag": "0011_outgoing_johnny_blaze", + "breakpoints": true } ] } \ No newline at end of file diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts index 97df736..41e32e1 100644 --- a/backend/src/api/v1/completions.ts +++ b/backend/src/api/v1/completions.ts @@ -19,6 +19,7 @@ import type { CompletionsMessageType, ToolDefinitionType, ToolChoiceType, + CachedResponseType, } from "@/db/schema"; import { extractUpstreamHeaders, @@ -36,6 +37,14 @@ import { selectMultipleCandidates, type FailoverConfig, } from "@/services/failover"; +import { + checkReqId, + finalizeReqId, + recordCacheHit, + buildInFlightErrorResponse, + extractReqId, + type ApiFormat, +} from "@/utils/reqIdHandler"; const logger = consola.withTag("completionsApi"); @@ -160,6 +169,16 @@ function buildCompletionRecord( }; } +/** + * ReqId context for request deduplication + */ +interface ReqIdContext { + reqId: string; + apiKeyId: number; + preCreatedCompletionId: number; + apiFormat: ApiFormat; +} + /** * Process a successful non-streaming response * Ensures completion is saved to database before returning @@ -172,6 +191,7 @@ async function processNonStreamingResponse( apiKeyRecord: ApiKey | null, begin: number, signal?: AbortSignal, + reqIdContext?: ReqIdContext, ): Promise { // Parse response using upstream adapter const upstreamAdapter = getUpstreamAdapter(providerType); @@ -197,21 +217,53 @@ async function processNonStreamingResponse( }, ]; + // Build cached response for ReqId deduplication + const cachedResponse: CachedResponseType = { + body: serialized, + format: "openai-chat", + }; + // Check if client disconnected during processing if (signal?.aborted) { completion.status = "aborted"; - await addCompletions(completion, bearer, { - level: "info", - message: "Client disconnected during non-streaming response", - details: { - type: "completionError", - data: { type: "aborted", msg: "Client disconnected" }, - }, - }); + if (reqIdContext) { + // Use finalizeReqId for ReqId requests + await finalizeReqId( + reqIdContext.apiKeyId, + reqIdContext.reqId, + reqIdContext.preCreatedCompletionId, + { + ...completion, + cachedResponse, + }, + ); + } else { + await addCompletions(completion, bearer, { + level: "info", + message: "Client disconnected during non-streaming response", + details: { + type: "completionError", + data: { type: "aborted", msg: "Client disconnected" }, + }, + }); + } } else { completion.status = "completed"; - // Use await to ensure database write completes before returning - await addCompletions(completion, bearer); + if (reqIdContext) { + // Use finalizeReqId for ReqId requests + await finalizeReqId( + reqIdContext.apiKeyId, + reqIdContext.reqId, + reqIdContext.preCreatedCompletionId, + { + ...completion, + cachedResponse, + }, + ); + } else { + // Use await to ensure database write completes before returning + await addCompletions(completion, bearer); + } } // Consume tokens for TPM rate limiting (post-flight) @@ -237,6 +289,7 @@ async function* processStreamingResponse( apiKeyRecord: ApiKey | null, begin: number, signal?: AbortSignal, + reqIdContext?: ReqIdContext, ): AsyncGenerator { // Get adapters const upstreamAdapter = getUpstreamAdapter(providerType); @@ -244,8 +297,44 @@ async function* processStreamingResponse( logger.debug("parse stream completions response"); + // Build streaming ReqId context if provided + const streamingReqIdContext = reqIdContext + ? { + reqId: reqIdContext.reqId, + apiKeyId: reqIdContext.apiKeyId, + preCreatedCompletionId: reqIdContext.preCreatedCompletionId, + apiFormat: reqIdContext.apiFormat, + buildCachedResponse: (comp: Completion): CachedResponseType => { + // For streaming, we build a complete non-streaming response for cache + return { + body: { + id: `chatcmpl-cache-${reqIdContext.preCreatedCompletionId}`, + object: "chat.completion", + created: Math.floor(Date.now() / 1000), + model: comp.model, + choices: comp.completion.map((c, i) => ({ + index: i, + message: { + role: c.role || "assistant", + content: c.content, + tool_calls: c.tool_calls, + }, + finish_reason: c.tool_calls?.length ? "tool_calls" : "stop", + })), + usage: { + prompt_tokens: comp.promptTokens, + completion_tokens: comp.completionTokens, + total_tokens: comp.promptTokens + comp.completionTokens, + }, + }, + format: "openai-chat", + }; + }, + } + : undefined; + // Create streaming context with abort handling - const ctx = new StreamingContext(completion, bearer, apiKeyRecord, begin, signal); + const ctx = new StreamingContext(completion, bearer, apiKeyRecord, begin, signal, streamingReqIdContext); // Track whether we've logged the client abort (to avoid duplicate logs) let loggedAbort = false; @@ -398,7 +487,7 @@ export const completionsApi = new Elysia({ .use(rateLimitPlugin) .post( "/completions", - async function* ({ body, set, bearer, request, apiKeyRecord }) { + async function ({ body, set, bearer, request, apiKeyRecord }) { if (bearer === undefined) { set.status = 500; return { error: "Internal server error" }; @@ -407,6 +496,10 @@ export const completionsApi = new Elysia({ const reqHeaders = request.headers; const begin = Date.now(); + // Extract ReqId for request deduplication + const reqId = extractReqId(reqHeaders); + const apiFormat: ApiFormat = "openai-chat"; + // Parse model@provider format and extract provider from header const { systemName, targetProvider } = parseModelProvider( body.model, @@ -457,6 +550,75 @@ export const completionsApi = new Elysia({ // Extract extra headers for passthrough const extraHeaders = extractUpstreamHeaders(reqHeaders); + // Check ReqId for deduplication (if provided) + const isStream = body.stream === true; + const reqIdResult = await checkReqId(reqId, { + apiKeyId: apiKeyRecord.id, + model: body.model, + modelId: candidates[0]?.model.id, + prompt: { + messages: body.messages as CompletionsMessageType[], + tools: body.tools as ToolDefinitionType[] | undefined, + tool_choice: body.tool_choice as ToolChoiceType | undefined, + extraHeaders, + }, + apiFormat, + endpoint: "/v1/chat/completions", + isStream, + }); + + // Handle cache hit - return cached response + if (reqIdResult.type === "cache_hit") { + const sourceCompletion = reqIdResult.completion; + // Record the cache hit + await recordCacheHit(sourceCompletion, apiKeyRecord.id); + + // Return cached response + if (sourceCompletion.cachedResponse) { + return sourceCompletion.cachedResponse.body as Record; + } + + // Fallback: reconstruct response from completion data + const reconstructed = { + id: `chatcmpl-cache-${sourceCompletion.id}`, + object: "chat.completion", + created: Math.floor(sourceCompletion.createdAt.getTime() / 1000), + model: sourceCompletion.model, + choices: sourceCompletion.completion.map((c, i) => ({ + index: i, + message: { + role: c.role || "assistant", + content: c.content, + tool_calls: c.tool_calls, + }, + finish_reason: c.tool_calls?.length ? "tool_calls" : "stop", + })), + usage: { + prompt_tokens: sourceCompletion.promptTokens, + completion_tokens: sourceCompletion.completionTokens, + total_tokens: sourceCompletion.promptTokens + sourceCompletion.completionTokens, + }, + }; + return reconstructed; + } + + // Handle in-flight - return 409 Conflict + if (reqIdResult.type === "in_flight") { + set.status = 409; + set.headers["Retry-After"] = String(reqIdResult.retryAfter); + return buildInFlightErrorResponse( + reqId!, + reqIdResult.inFlight, + reqIdResult.retryAfter, + apiFormat, + ); + } + + // For new_request, we have a pre-created completionId + const preCreatedCompletionId = reqIdResult.type === "new_request" + ? reqIdResult.completionId + : null; + // Parse request using adapter const requestAdapter = getRequestAdapter("openai-chat"); const internalRequest = requestAdapter.parse( @@ -484,7 +646,7 @@ export const completionsApi = new Elysia({ // Handle streaming vs non-streaming if (internalRequest.stream) { - // Streaming request - use yield for streaming responses + // Streaming request - return an async generator if (body.n && body.n > 1) { set.status = 400; return { error: "Stream completions with n > 1 is not supported" }; @@ -546,26 +708,42 @@ export const completionsApi = new Elysia({ extraHeaders, ); - try { - yield* processStreamingResponse( - result.response, - completion, - bearer, - providerType, - apiKeyRecord ?? null, - begin, - request.signal, - ); - } catch (error) { - // Don't log error if it's due to client abort - if (!request.signal.aborted) { - logger.error("Stream processing error", error); - set.status = 500; - yield JSON.stringify({ error: "Stream processing error" }); + // Build ReqId context if we have a pre-created completion + const streamReqIdContext = preCreatedCompletionId && reqId + ? { + reqId, + apiKeyId: apiKeyRecord.id, + preCreatedCompletionId, + apiFormat, + } + : undefined; + + // Return an async generator for streaming + const streamResponse = result.response; + const streamSignal = request.signal; + return (async function* () { + try { + yield* processStreamingResponse( + streamResponse, + completion, + bearer, + providerType, + apiKeyRecord ?? null, + begin, + streamSignal, + streamReqIdContext, + ); + } catch (error) { + // Don't log error if it's due to client abort + if (!streamSignal.aborted) { + logger.error("Stream processing error", error); + set.status = 500; + yield JSON.stringify({ error: "Stream processing error" }); + } } - } + })(); } else { - // Non-streaming request - use return for normal JSON response + // Non-streaming request - return JSON response directly const result = await executeWithFailover( candidates, buildRequestForProvider, @@ -616,6 +794,16 @@ export const completionsApi = new Elysia({ extraHeaders, ); + // Build ReqId context if we have a pre-created completion + const nonStreamReqIdContext = preCreatedCompletionId && reqId + ? { + reqId, + apiKeyId: apiKeyRecord.id, + preCreatedCompletionId, + apiFormat, + } + : undefined; + try { const response = await processNonStreamingResponse( result.response, @@ -625,6 +813,7 @@ export const completionsApi = new Elysia({ apiKeyRecord ?? null, begin, request.signal, + nonStreamReqIdContext, ); // Return parsed JSON object for proper content-type return JSON.parse(response) as Record; diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts index 9eb6a36..64024f7 100644 --- a/backend/src/api/v1/messages.ts +++ b/backend/src/api/v1/messages.ts @@ -30,6 +30,15 @@ import { selectMultipleCandidates, type FailoverConfig, } from "@/services/failover"; +import { + checkReqId, + finalizeReqId, + recordCacheHit, + buildInFlightErrorResponse, + extractReqId, + type ApiFormat, +} from "@/utils/reqIdHandler"; +import type { CachedResponseType } from "@/db/schema"; const logger = consola.withTag("messagesApi"); @@ -152,6 +161,16 @@ function buildCompletionRecord( }; } +/** + * ReqId context for request deduplication + */ +interface ReqIdContext { + reqId: string; + apiKeyId: number; + preCreatedCompletionId: number; + apiFormat: ApiFormat; +} + /** * Process a successful non-streaming message response * Ensures completion is saved to database before returning @@ -164,6 +183,7 @@ async function processNonStreamingResponse( apiKeyRecord: ApiKey | null, begin: number, signal?: AbortSignal, + reqIdContext?: ReqIdContext, ): Promise { // Parse response using upstream adapter const upstreamAdapter = getUpstreamAdapter(providerType); @@ -185,20 +205,44 @@ async function processNonStreamingResponse( }, ]; + // Build cached response for ReqId deduplication + const cachedResponse: CachedResponseType = { + body: serialized, + format: "anthropic", + }; + // Check if client disconnected during processing if (signal?.aborted) { completion.status = "aborted"; - await addCompletions(completion, bearer, { - level: "info", - message: "Client disconnected during non-streaming response", - details: { - type: "completionError", - data: { type: "aborted", msg: "Client disconnected" }, - }, - }); + if (reqIdContext) { + await finalizeReqId( + reqIdContext.apiKeyId, + reqIdContext.reqId, + reqIdContext.preCreatedCompletionId, + { ...completion, cachedResponse }, + ); + } else { + await addCompletions(completion, bearer, { + level: "info", + message: "Client disconnected during non-streaming response", + details: { + type: "completionError", + data: { type: "aborted", msg: "Client disconnected" }, + }, + }); + } } else { completion.status = "completed"; - await addCompletions(completion, bearer); + if (reqIdContext) { + await finalizeReqId( + reqIdContext.apiKeyId, + reqIdContext.reqId, + reqIdContext.preCreatedCompletionId, + { ...completion, cachedResponse }, + ); + } else { + await addCompletions(completion, bearer); + } } // Consume tokens for TPM rate limiting (post-flight) @@ -224,6 +268,7 @@ async function* processStreamingResponse( apiKeyRecord: ApiKey | null, begin: number, signal?: AbortSignal, + reqIdContext?: ReqIdContext, ): AsyncGenerator { // Get adapters const upstreamAdapter = getUpstreamAdapter(providerType); @@ -231,8 +276,39 @@ async function* processStreamingResponse( logger.debug("parse stream messages response"); + // Build streaming ReqId context if provided + const streamingReqIdContext = reqIdContext + ? { + reqId: reqIdContext.reqId, + apiKeyId: reqIdContext.apiKeyId, + preCreatedCompletionId: reqIdContext.preCreatedCompletionId, + apiFormat: reqIdContext.apiFormat, + buildCachedResponse: (comp: Completion): CachedResponseType => { + // For streaming, build a complete non-streaming Anthropic response for cache + return { + body: { + id: `msg-cache-${reqIdContext.preCreatedCompletionId}`, + type: "message", + role: "assistant", + content: comp.completion.map((c) => ({ + type: "text", + text: c.content || "", + })), + model: comp.model, + stop_reason: "end_turn", + usage: { + input_tokens: comp.promptTokens, + output_tokens: comp.completionTokens, + }, + }, + format: "anthropic", + }; + }, + } + : undefined; + // Create streaming context with abort handling - const ctx = new StreamingContext(completion, bearer, apiKeyRecord, begin, signal); + const ctx = new StreamingContext(completion, bearer, apiKeyRecord, begin, signal, streamingReqIdContext); // Track whether we've logged the client abort (to avoid duplicate logs) let loggedAbort = false; @@ -379,7 +455,7 @@ export const messagesApi = new Elysia({ .use(rateLimitPlugin) .post( "/messages", - async function* ({ body, set, bearer, request, apiKeyRecord }) { + async function ({ body, set, bearer, request, apiKeyRecord }) { if (bearer === undefined) { set.status = 500; return { @@ -391,6 +467,10 @@ export const messagesApi = new Elysia({ const reqHeaders = request.headers; const begin = Date.now(); + // Extract ReqId for request deduplication + const reqId = extractReqId(reqHeaders); + const apiFormat: ApiFormat = "anthropic"; + // Parse model@provider format and extract provider from header const { systemName, targetProvider } = parseModelProvider( body.model, @@ -441,6 +521,68 @@ export const messagesApi = new Elysia({ // Extract extra headers for passthrough const extraHeaders = extractUpstreamHeaders(reqHeaders); + // Check ReqId for deduplication (if provided) + const isStream = body.stream === true; + const reqIdResult = await checkReqId(reqId, { + apiKeyId: apiKeyRecord.id, + model: body.model, + modelId: candidates[0]?.model.id, + prompt: { + messages: body.messages.map((m: { role: string; content: unknown }) => ({ + role: m.role, + content: typeof m.content === "string" ? m.content : JSON.stringify(m.content), + })), + extraHeaders, + }, + apiFormat, + endpoint: "/v1/messages", + isStream, + }); + + // Handle cache hit - return cached response + if (reqIdResult.type === "cache_hit") { + const sourceCompletion = reqIdResult.completion; + await recordCacheHit(sourceCompletion, apiKeyRecord.id); + + if (sourceCompletion.cachedResponse) { + return sourceCompletion.cachedResponse.body as Record; + } + + // Fallback: reconstruct Anthropic response + return { + id: `msg-cache-${sourceCompletion.id}`, + type: "message", + role: "assistant", + content: sourceCompletion.completion.map((c) => ({ + type: "text", + text: c.content || "", + })), + model: sourceCompletion.model, + stop_reason: "end_turn", + usage: { + input_tokens: sourceCompletion.promptTokens, + output_tokens: sourceCompletion.completionTokens, + }, + }; + } + + // Handle in-flight - return 409 Conflict + if (reqIdResult.type === "in_flight") { + set.status = 409; + set.headers["Retry-After"] = String(reqIdResult.retryAfter); + return buildInFlightErrorResponse( + reqId!, + reqIdResult.inFlight, + reqIdResult.retryAfter, + apiFormat, + ); + } + + // For new_request, we have a pre-created completionId + const preCreatedCompletionId = reqIdResult.type === "new_request" + ? reqIdResult.completionId + : null; + // Parse request using Anthropic adapter const requestAdapter = getRequestAdapter("anthropic"); const internalRequest = requestAdapter.parse( @@ -468,7 +610,7 @@ export const messagesApi = new Elysia({ // Handle streaming vs non-streaming if (internalRequest.stream) { - // Streaming request - use yield for streaming responses + // Streaming request - return an async generator const result = await executeWithFailover( candidates, buildRequestForProvider, @@ -526,26 +668,42 @@ export const messagesApi = new Elysia({ extraHeaders, ); - try { - yield* processStreamingResponse( - result.response, - completion, - bearer, - providerType, - apiKeyRecord ?? null, - begin, - request.signal, - ); - } catch (error) { - // Don't log error if it's due to client abort - if (!request.signal.aborted) { - logger.error("Stream processing error", error); - set.status = 500; - yield `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "server_error", message: "Stream processing error" } })}\n\n`; + // Build ReqId context if we have a pre-created completion + const streamReqIdContext = preCreatedCompletionId && reqId + ? { + reqId, + apiKeyId: apiKeyRecord.id, + preCreatedCompletionId, + apiFormat, + } + : undefined; + + // Return an async generator for streaming + const streamResponse = result.response; + const streamSignal = request.signal; + return (async function* () { + try { + yield* processStreamingResponse( + streamResponse, + completion, + bearer, + providerType, + apiKeyRecord ?? null, + begin, + streamSignal, + streamReqIdContext, + ); + } catch (error) { + // Don't log error if it's due to client abort + if (!streamSignal.aborted) { + logger.error("Stream processing error", error); + set.status = 500; + yield `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "server_error", message: "Stream processing error" } })}\n\n`; + } } - } + })(); } else { - // Non-streaming request - use return for normal JSON response + // Non-streaming request - return JSON response directly const result = await executeWithFailover( candidates, buildRequestForProvider, @@ -595,6 +753,16 @@ export const messagesApi = new Elysia({ extraHeaders, ); + // Build ReqId context if we have a pre-created completion + const nonStreamReqIdContext = preCreatedCompletionId && reqId + ? { + reqId, + apiKeyId: apiKeyRecord.id, + preCreatedCompletionId, + apiFormat, + } + : undefined; + try { const response = await processNonStreamingResponse( result.response, @@ -604,6 +772,7 @@ export const messagesApi = new Elysia({ apiKeyRecord ?? null, begin, request.signal, + nonStreamReqIdContext, ); // Return parsed JSON object for proper content-type return JSON.parse(response) as Record; diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts index d86b44f..345c205 100644 --- a/backend/src/api/v1/responses.ts +++ b/backend/src/api/v1/responses.ts @@ -30,6 +30,15 @@ import { selectMultipleCandidates, type FailoverConfig, } from "@/services/failover"; +import { + checkReqId, + finalizeReqId, + recordCacheHit, + buildInFlightErrorResponse, + extractReqId, + type ApiFormat, +} from "@/utils/reqIdHandler"; +import type { CachedResponseType } from "@/db/schema"; const logger = consola.withTag("responsesApi"); @@ -163,6 +172,16 @@ function buildCompletionRecord( }; } +/** + * ReqId context for request deduplication + */ +interface ReqIdContext { + reqId: string; + apiKeyId: number; + preCreatedCompletionId: number; + apiFormat: ApiFormat; +} + /** * Process a successful non-streaming response * Ensures completion is saved to database before returning @@ -175,6 +194,7 @@ async function processNonStreamingResponse( apiKeyRecord: ApiKey | null, begin: number, signal?: AbortSignal, + reqIdContext?: ReqIdContext, ): Promise { // Parse response using upstream adapter const upstreamAdapter = getUpstreamAdapter(providerType); @@ -196,20 +216,44 @@ async function processNonStreamingResponse( }, ]; + // Build cached response for ReqId deduplication + const cachedResponse: CachedResponseType = { + body: serialized, + format: "openai-responses", + }; + // Check if client disconnected during processing if (signal?.aborted) { completion.status = "aborted"; - await addCompletions(completion, bearer, { - level: "info", - message: "Client disconnected during non-streaming response", - details: { - type: "completionError", - data: { type: "aborted", msg: "Client disconnected" }, - }, - }); + if (reqIdContext) { + await finalizeReqId( + reqIdContext.apiKeyId, + reqIdContext.reqId, + reqIdContext.preCreatedCompletionId, + { ...completion, cachedResponse }, + ); + } else { + await addCompletions(completion, bearer, { + level: "info", + message: "Client disconnected during non-streaming response", + details: { + type: "completionError", + data: { type: "aborted", msg: "Client disconnected" }, + }, + }); + } } else { completion.status = "completed"; - await addCompletions(completion, bearer); + if (reqIdContext) { + await finalizeReqId( + reqIdContext.apiKeyId, + reqIdContext.reqId, + reqIdContext.preCreatedCompletionId, + { ...completion, cachedResponse }, + ); + } else { + await addCompletions(completion, bearer); + } } // Consume tokens for TPM rate limiting (post-flight) @@ -235,6 +279,7 @@ async function* processStreamingResponse( apiKeyRecord: ApiKey | null, begin: number, signal?: AbortSignal, + reqIdContext?: ReqIdContext, ): AsyncGenerator { // Get adapters const upstreamAdapter = getUpstreamAdapter(providerType); @@ -242,8 +287,40 @@ async function* processStreamingResponse( logger.debug("parse stream responses"); + // Build streaming ReqId context if provided + const streamingReqIdContext = reqIdContext + ? { + reqId: reqIdContext.reqId, + apiKeyId: reqIdContext.apiKeyId, + preCreatedCompletionId: reqIdContext.preCreatedCompletionId, + apiFormat: reqIdContext.apiFormat, + buildCachedResponse: (comp: Completion): CachedResponseType => { + // For streaming, build a complete non-streaming Response API response for cache + return { + body: { + id: `resp-cache-${reqIdContext.preCreatedCompletionId}`, + object: "response", + created_at: Math.floor(Date.now() / 1000), + model: comp.model, + output: comp.completion.map((c) => ({ + type: "message", + role: c.role || "assistant", + content: [{ type: "output_text", text: c.content || "" }], + })), + usage: { + input_tokens: comp.promptTokens, + output_tokens: comp.completionTokens, + total_tokens: comp.promptTokens + comp.completionTokens, + }, + }, + format: "openai-responses", + }; + }, + } + : undefined; + // Create streaming context with abort handling - const ctx = new StreamingContext(completion, bearer, apiKeyRecord, begin, signal); + const ctx = new StreamingContext(completion, bearer, apiKeyRecord, begin, signal, streamingReqIdContext); // Track whether we've logged the client abort (to avoid duplicate logs) let loggedAbort = false; @@ -395,7 +472,7 @@ export const responsesApi = new Elysia({ .use(rateLimitPlugin) .post( "/responses", - async function* ({ body, set, bearer, request, apiKeyRecord }) { + async function ({ body, set, bearer, request, apiKeyRecord }) { if (bearer === undefined) { set.status = 500; return { @@ -407,6 +484,10 @@ export const responsesApi = new Elysia({ const reqHeaders = request.headers; const begin = Date.now(); + // Extract ReqId for request deduplication + const reqId = extractReqId(reqHeaders); + const apiFormat: ApiFormat = "openai-responses"; + // Parse model@provider format and extract provider from header const { systemName, targetProvider } = parseModelProvider( body.model, @@ -457,6 +538,89 @@ export const responsesApi = new Elysia({ // Extract extra headers for passthrough const extraHeaders = extractUpstreamHeaders(reqHeaders); + // Check ReqId for deduplication (if provided) + const isStream = body.stream === true; + + // Convert input to messages format for storage + const inputMessages: Array<{ role: string; content: string }> = []; + if (typeof body.input === "string") { + inputMessages.push({ role: "user", content: body.input }); + } else if (Array.isArray(body.input)) { + for (const item of body.input) { + if (typeof item === "object" && item !== null) { + if (item.type === "message") { + inputMessages.push({ + role: item.role || "user", + content: typeof item.content === "string" ? item.content : JSON.stringify(item.content), + }); + } else if (item.type === "function_call_output") { + inputMessages.push({ + role: "tool", + content: item.output || "", + }); + } + } + } + } + + const reqIdResult = await checkReqId(reqId, { + apiKeyId: apiKeyRecord.id, + model: body.model, + modelId: candidates[0]?.model.id, + prompt: { + messages: inputMessages, + extraHeaders, + }, + apiFormat, + endpoint: "/v1/responses", + isStream, + }); + + // Handle cache hit - return cached response + if (reqIdResult.type === "cache_hit") { + const sourceCompletion = reqIdResult.completion; + await recordCacheHit(sourceCompletion, apiKeyRecord.id); + + if (sourceCompletion.cachedResponse) { + return sourceCompletion.cachedResponse.body as Record; + } + + // Fallback: reconstruct Response API response + return { + id: `resp-cache-${sourceCompletion.id}`, + object: "response", + created_at: Math.floor(sourceCompletion.createdAt.getTime() / 1000), + model: sourceCompletion.model, + output: sourceCompletion.completion.map((c) => ({ + type: "message", + role: c.role || "assistant", + content: [{ type: "output_text", text: c.content || "" }], + })), + usage: { + input_tokens: sourceCompletion.promptTokens, + output_tokens: sourceCompletion.completionTokens, + total_tokens: sourceCompletion.promptTokens + sourceCompletion.completionTokens, + }, + }; + } + + // Handle in-flight - return 409 Conflict + if (reqIdResult.type === "in_flight") { + set.status = 409; + set.headers["Retry-After"] = String(reqIdResult.retryAfter); + return buildInFlightErrorResponse( + reqId!, + reqIdResult.inFlight, + reqIdResult.retryAfter, + apiFormat, + ); + } + + // For new_request, we have a pre-created completionId + const preCreatedCompletionId = reqIdResult.type === "new_request" + ? reqIdResult.completionId + : null; + // Parse request using Response API adapter const requestAdapter = getRequestAdapter("openai-responses"); const internalRequest = requestAdapter.parse( @@ -484,7 +648,7 @@ export const responsesApi = new Elysia({ // Handle streaming vs non-streaming if (internalRequest.stream) { - // Streaming request - use yield for streaming responses + // Streaming request - return an async generator const result = await executeWithFailover( candidates, buildRequestForProvider, @@ -542,26 +706,42 @@ export const responsesApi = new Elysia({ extraHeaders, ); - try { - yield* processStreamingResponse( - result.response, - completion, - bearer, - providerType, - apiKeyRecord ?? null, - begin, - request.signal, - ); - } catch (error) { - // Don't log error if it's due to client abort - if (!request.signal.aborted) { - logger.error("Stream processing error", error); - set.status = 500; - yield `event: error\ndata: ${JSON.stringify({ type: "error", error: { code: "internal_error", message: "Stream processing error", param: null, help_url: null } })}\n\n`; + // Build ReqId context if we have a pre-created completion + const streamReqIdContext = preCreatedCompletionId && reqId + ? { + reqId, + apiKeyId: apiKeyRecord.id, + preCreatedCompletionId, + apiFormat, + } + : undefined; + + // Return an async generator for streaming + const streamResponse = result.response; + const streamSignal = request.signal; + return (async function* () { + try { + yield* processStreamingResponse( + streamResponse, + completion, + bearer, + providerType, + apiKeyRecord ?? null, + begin, + streamSignal, + streamReqIdContext, + ); + } catch (error) { + // Don't log error if it's due to client abort + if (!streamSignal.aborted) { + logger.error("Stream processing error", error); + set.status = 500; + yield `event: error\ndata: ${JSON.stringify({ type: "error", error: { code: "internal_error", message: "Stream processing error", param: null, help_url: null } })}\n\n`; + } } - } + })(); } else { - // Non-streaming request - use return for normal JSON response + // Non-streaming request - return JSON response directly const result = await executeWithFailover( candidates, buildRequestForProvider, @@ -611,6 +791,16 @@ export const responsesApi = new Elysia({ extraHeaders, ); + // Build ReqId context if we have a pre-created completion + const nonStreamReqIdContext = preCreatedCompletionId && reqId + ? { + reqId, + apiKeyId: apiKeyRecord.id, + preCreatedCompletionId, + apiFormat, + } + : undefined; + try { const response = await processNonStreamingResponse( result.response, @@ -620,6 +810,7 @@ export const responsesApi = new Elysia({ apiKeyRecord ?? null, begin, request.signal, + nonStreamReqIdContext, ); // Return parsed JSON object for proper content-type return JSON.parse(response) as Record; diff --git a/backend/src/db/index.ts b/backend/src/db/index.ts index d51bd5b..a5567f1 100644 --- a/backend/src/db/index.ts +++ b/backend/src/db/index.ts @@ -1257,3 +1257,84 @@ export async function getEmbeddingsTimeSeries( avg_duration: string; }[]; } + +// ============================================ +// ReqId Deduplication Operations +// ============================================ + +/** + * Find a completion by ReqId (for cache hit detection) + * Only returns completions that are not pending (completed, failed, aborted, cache_hit) + * @param apiKeyId the API key ID (ReqId is scoped per API key) + * @param reqId the client-provided request ID + * @returns completion record if found and not pending, null otherwise + */ +export async function findCompletionByReqId( + apiKeyId: number, + reqId: string, +): Promise { + logger.debug("findCompletionByReqId", apiKeyId, reqId); + const r = await db + .select() + .from(schema.CompletionsTable) + .where( + and( + eq(schema.CompletionsTable.apiKeyId, apiKeyId), + eq(schema.CompletionsTable.reqId, reqId), + not(schema.CompletionsTable.deleted), + // Only return non-pending completions (completed, failed, aborted, cache_hit) + not(eq(schema.CompletionsTable.status, "pending")), + ), + ) + .limit(1); + const [first] = r; + return first ?? null; +} + +/** + * Create a pending completion record with ReqId + * Used to reserve the ReqId before making the upstream request + * @param c completion data including reqId + * @returns the created completion record, null if ReqId already exists (unique constraint violation) + */ +export async function createPendingCompletion( + c: CompletionInsert, +): Promise { + logger.debug("createPendingCompletion", c.model, c.reqId); + try { + const r = await db + .insert(schema.CompletionsTable) + .values(c) + .returning(); + const [first] = r; + return first ?? null; + } catch (error) { + // Handle unique constraint violation (duplicate ReqId) + if (error instanceof Error && error.message.includes("unique")) { + logger.warn("Duplicate ReqId detected", c.reqId); + return null; + } + throw error; + } +} + +/** + * Update a completion record + * Used to update pending completions after upstream request completes + * @param id completion ID + * @param updates partial completion data to update + * @returns updated completion record, null if not found + */ +export async function updateCompletion( + id: number, + updates: Partial, +): Promise { + logger.debug("updateCompletion", id); + const r = await db + .update(schema.CompletionsTable) + .set({ ...updates, updatedAt: new Date() }) + .where(eq(schema.CompletionsTable.id, id)) + .returning(); + const [first] = r; + return first ?? null; +} diff --git a/backend/src/db/schema.ts b/backend/src/db/schema.ts index 5ea0561..e70d6d6 100644 --- a/backend/src/db/schema.ts +++ b/backend/src/db/schema.ts @@ -151,8 +151,18 @@ export const CompletionsStatusEnum = pgEnum("completions_status", [ "completed", "failed", "aborted", + "cache_hit", ]); -export type CompletionsStatusEnumType = "pending" | "completed" | "failed" | "aborted"; +export type CompletionsStatusEnumType = "pending" | "completed" | "failed" | "aborted" | "cache_hit"; + +/** + * Cached response type for ReqId deduplication + * Stores the serialized response for cache_hit returns + */ +export type CachedResponseType = { + body: unknown; + format: "openai-chat" | "openai-responses" | "anthropic"; +}; export const CompletionsTable = pgTable("completions", { id: integer("id").primaryKey().generatedAlwaysAsIdentity().unique(), @@ -176,6 +186,13 @@ export const CompletionsTable = pgTable("completions", { updatedAt: timestamp("updated_at").notNull().defaultNow(), deleted: boolean("deleted").notNull().default(false), rating: real("rating"), + // ReqId deduplication fields + reqId: varchar("req_id", { length: 127 }), + sourceCompletionId: integer("source_completion_id").references( + (): AnyPgColumn => CompletionsTable.id, + ), + apiFormat: varchar("api_format", { length: 31 }), + cachedResponse: jsonb("cached_response").$type(), }); export const SrvLogsLevelEnum = pgEnum("srv_logs_level", [ diff --git a/backend/src/utils/redisClient.ts b/backend/src/utils/redisClient.ts index dc3d086..2a8d3f8 100644 --- a/backend/src/utils/redisClient.ts +++ b/backend/src/utils/redisClient.ts @@ -98,6 +98,34 @@ class RedisClient { } } + /** + * Set a value in Redis only if the key does not exist (atomic SETNX) + * @param {string} key - Key to set + * @param {string | number} value - Value to store + * @param {number} ttlSeconds - Time to live in seconds + * @returns {Promise} true if the key was set, false if it already existed + */ + public async setnx( + key: string, + value: string | number, + ttlSeconds: number, + ): Promise { + try { + // SET key value EX ttl NX - sets only if key doesn't exist + const result = await this.client.set( + key, + value.toString(), + "EX", + ttlSeconds, + "NX", + ); + return result === "OK"; + } catch (error) { + logger.error(`Redis setnx error: ${(error as Error).message}`); + return false; + } + } + /** * Execute a Lua script atomically * @param {string} script - Lua script to execute diff --git a/backend/src/utils/reqIdCache.ts b/backend/src/utils/reqIdCache.ts new file mode 100644 index 0000000..91336ad --- /dev/null +++ b/backend/src/utils/reqIdCache.ts @@ -0,0 +1,159 @@ +/** + * Redis-based In-Flight request tracking for ReqId deduplication + * + * Tracks requests that are currently being processed to prevent duplicate + * concurrent requests with the same ReqId. + */ + +import { consola } from "consola"; +import { redisClient } from "./redisClient"; + +const logger = consola.withTag("reqIdCache"); + +/** + * In-flight request data stored in Redis + */ +export interface InFlightRequest { + completionId: number; + startTime: number; // Unix timestamp in milliseconds + isStream: boolean; + endpoint: string; +} + +// Redis key prefix for in-flight requests +const KEY_PREFIX = "reqid:inflight"; + +// TTL for in-flight markers (10 minutes) +// This prevents orphan keys if the server crashes during request processing +const IN_FLIGHT_TTL_SECONDS = 600; + +// Estimated request durations for Retry-After calculation +const ESTIMATED_STREAM_DURATION_MS = 60000; // 60 seconds for streaming +const ESTIMATED_NON_STREAM_DURATION_MS = 30000; // 30 seconds for non-streaming + +/** + * Build the Redis key for an in-flight request + */ +function buildKey(apiKeyId: number, reqId: string): string { + return `${KEY_PREFIX}:${apiKeyId}:${reqId}`; +} + +/** + * Mark a request as in-flight (atomically using SETNX) + * + * @param apiKeyId - The API key ID + * @param reqId - The client-provided request ID + * @param completionId - The database completion ID + * @param endpoint - The API endpoint being called + * @param isStream - Whether this is a streaming request + * @returns true if successfully marked (request is new), false if already in-flight + */ +export async function markInFlight( + apiKeyId: number, + reqId: string, + completionId: number, + endpoint: string, + isStream: boolean, +): Promise { + const key = buildKey(apiKeyId, reqId); + const data: InFlightRequest = { + completionId, + startTime: Date.now(), + isStream, + endpoint, + }; + + try { + const success = await redisClient.setnx( + key, + JSON.stringify(data), + IN_FLIGHT_TTL_SECONDS, + ); + + if (success) { + logger.debug("Marked request as in-flight", { apiKeyId, reqId, completionId }); + } else { + logger.debug("Request already in-flight", { apiKeyId, reqId }); + } + + return success; + } catch (error) { + logger.error("Failed to mark request as in-flight", error); + // Return false to be safe - treat as if already in-flight + return false; + } +} + +/** + * Get in-flight request data + * + * @param apiKeyId - The API key ID + * @param reqId - The client-provided request ID + * @returns The in-flight request data, or null if not in-flight + */ +export async function getInFlight( + apiKeyId: number, + reqId: string, +): Promise { + const key = buildKey(apiKeyId, reqId); + + try { + const data = await redisClient.get(key); + if (!data) { + return null; + } + + return JSON.parse(data) as InFlightRequest; + } catch (error) { + logger.error("Failed to get in-flight request", error); + return null; + } +} + +/** + * Clear the in-flight marker for a request + * + * Should be called when a request completes (successfully or with error) + * + * @param apiKeyId - The API key ID + * @param reqId - The client-provided request ID + */ +export async function clearInFlight( + apiKeyId: number, + reqId: string, +): Promise { + const key = buildKey(apiKeyId, reqId); + + try { + await redisClient.del(key); + logger.debug("Cleared in-flight marker", { apiKeyId, reqId }); + } catch (error) { + logger.error("Failed to clear in-flight marker", error); + // Non-critical - the TTL will eventually expire the key + } +} + +/** + * Calculate the recommended Retry-After value based on in-flight request state + * + * @param inFlight - The in-flight request data + * @returns Retry-After value in seconds (minimum 1) + */ +export function calculateRetryAfter(inFlight: InFlightRequest): number { + const elapsed = Date.now() - inFlight.startTime; + const estimatedTotal = inFlight.isStream + ? ESTIMATED_STREAM_DURATION_MS + : ESTIMATED_NON_STREAM_DURATION_MS; + + const remainingMs = Math.max(estimatedTotal - elapsed, 1000); + return Math.ceil(remainingMs / 1000); +} + +/** + * Check if Redis is available for in-flight tracking + * + * @returns true if Redis is connected and ready + */ +export function isRedisAvailable(): boolean { + return redisClient.isConnected(); +} diff --git a/backend/src/utils/reqIdHandler.ts b/backend/src/utils/reqIdHandler.ts new file mode 100644 index 0000000..83c54a7 --- /dev/null +++ b/backend/src/utils/reqIdHandler.ts @@ -0,0 +1,325 @@ +/** + * ReqId Handler - Main logic for request deduplication + * + * Handles the full lifecycle of ReqId-based request deduplication: + * 1. Check if request is a cache hit (completed/failed/aborted) + * 2. Check if request is currently in-flight + * 3. Create new pending requests + * 4. Finalize requests after completion + */ + +import { consola } from "consola"; +import { + findCompletionByReqId, + createPendingCompletion, + updateCompletion, + insertCompletion, + type Completion, + type CompletionInsert, +} from "@/db"; +import { + markInFlight, + getInFlight, + clearInFlight, + calculateRetryAfter, + isRedisAvailable, + type InFlightRequest, +} from "./reqIdCache"; +import type { CachedResponseType } from "@/db/schema"; + +const logger = consola.withTag("reqIdHandler"); + +/** + * HTTP header name for client-provided request ID + */ +export const REQID_HEADER = "x-nexusgate-reqid"; + +/** + * API format types + */ +export type ApiFormat = "openai-chat" | "openai-responses" | "anthropic"; + +/** + * Result types for ReqId check + */ +export type ReqIdCheckResult = + | { type: "cache_hit"; completion: Completion } + | { type: "in_flight"; inFlight: InFlightRequest; retryAfter: number } + | { type: "new_request"; completionId: number } + | { type: "no_reqid" }; // No ReqId provided - proceed normally + +/** + * Data needed to create a pending completion + */ +export interface PendingCompletionData { + apiKeyId: number; + model: string; + modelId?: number; + prompt: CompletionInsert["prompt"]; + apiFormat: ApiFormat; + endpoint: string; + isStream: boolean; +} + +/** + * Check ReqId status and determine how to handle the request + * + * Flow: + * 1. If no ReqId provided, return no_reqid (proceed normally) + * 2. Check database for completed request with this ReqId + * 3. Check Redis for in-flight request with this ReqId + * 4. Create new pending request and mark as in-flight + * + * @param reqId - The client-provided request ID (from header) + * @param data - Pending completion data + * @returns Check result indicating how to proceed + */ +export async function checkReqId( + reqId: string | null, + data: PendingCompletionData, +): Promise { + // No ReqId provided - proceed with normal request flow + if (!reqId) { + return { type: "no_reqid" }; + } + + const { apiKeyId, model, modelId, prompt, apiFormat, endpoint, isStream } = data; + + // Step 1: Check database for existing completed request + const existingCompletion = await findCompletionByReqId(apiKeyId, reqId); + if (existingCompletion) { + logger.info("Cache hit for ReqId", { reqId, completionId: existingCompletion.id }); + return { type: "cache_hit", completion: existingCompletion }; + } + + // Step 2: Check Redis for in-flight request + if (isRedisAvailable()) { + const inFlight = await getInFlight(apiKeyId, reqId); + if (inFlight) { + const retryAfter = calculateRetryAfter(inFlight); + logger.info("Request in-flight for ReqId", { reqId, retryAfter }); + return { type: "in_flight", inFlight, retryAfter }; + } + } else { + logger.warn("Redis unavailable, skipping in-flight check"); + } + + // Step 3: Create pending completion and mark as in-flight + const pendingData: CompletionInsert = { + apiKeyId, + model, + modelId, + prompt, + promptTokens: -1, + completion: [], + completionTokens: -1, + status: "pending", + ttft: -1, + duration: -1, + reqId, + apiFormat, + }; + + const newCompletion = await createPendingCompletion(pendingData); + + if (!newCompletion) { + // Unique constraint violation - another request beat us to it + // Re-check database (might have completed) or Redis (might be in-flight) + logger.warn("Failed to create pending completion, re-checking state", { reqId }); + + const recheck = await findCompletionByReqId(apiKeyId, reqId); + if (recheck) { + return { type: "cache_hit", completion: recheck }; + } + + if (isRedisAvailable()) { + const inFlight = await getInFlight(apiKeyId, reqId); + if (inFlight) { + const retryAfter = calculateRetryAfter(inFlight); + return { type: "in_flight", inFlight, retryAfter }; + } + } + + // Shouldn't happen, but treat as in-flight with default retry + logger.error("Unexpected state: ReqId exists but not found", { reqId }); + return { + type: "in_flight", + inFlight: { + completionId: 0, + startTime: Date.now(), + isStream, + endpoint, + }, + retryAfter: 5, + }; + } + + // Step 4: Mark as in-flight in Redis + if (isRedisAvailable()) { + const marked = await markInFlight( + apiKeyId, + reqId, + newCompletion.id, + endpoint, + isStream, + ); + + if (!marked) { + // Another process beat us - this shouldn't happen since we have DB unique constraint + // But handle gracefully + logger.warn("Failed to mark in-flight after DB insert", { reqId }); + } + } + + logger.debug("Created new pending request", { reqId, completionId: newCompletion.id }); + return { type: "new_request", completionId: newCompletion.id }; +} + +/** + * Finalize a request after it completes + * + * Updates the completion record and clears the in-flight marker + * + * @param apiKeyId - The API key ID + * @param reqId - The client-provided request ID + * @param completionId - The completion ID to update + * @param updates - Completion updates (status, tokens, response, etc.) + */ +export async function finalizeReqId( + apiKeyId: number, + reqId: string, + completionId: number, + updates: Partial & { cachedResponse?: CachedResponseType }, +): Promise { + try { + // Update completion record + await updateCompletion(completionId, updates); + + // Clear in-flight marker + if (isRedisAvailable()) { + await clearInFlight(apiKeyId, reqId); + } + + logger.debug("Finalized request", { reqId, completionId, status: updates.status }); + } catch (error) { + logger.error("Failed to finalize request", { reqId, completionId, error }); + // Still try to clear in-flight marker + if (isRedisAvailable()) { + await clearInFlight(apiKeyId, reqId); + } + throw error; + } +} + +/** + * Build a cache_hit completion record + * + * Creates a new completion record that references the source completion + * + * @param sourceCompletion - The original completion that was cached + * @param apiKeyId - The API key ID for the new request + * @returns CompletionInsert for the cache_hit record + */ +export function buildCacheHitRecord( + sourceCompletion: Completion, + apiKeyId: number, +): CompletionInsert { + return { + apiKeyId, + model: sourceCompletion.model, + modelId: sourceCompletion.modelId, + upstreamId: sourceCompletion.upstreamId, + prompt: sourceCompletion.prompt, + promptTokens: 0, // Cache hit doesn't consume tokens + completion: sourceCompletion.completion, + completionTokens: 0, // Cache hit doesn't consume tokens + status: "cache_hit", + ttft: 0, + duration: 0, + reqId: sourceCompletion.reqId, + sourceCompletionId: sourceCompletion.id, + apiFormat: sourceCompletion.apiFormat, + }; +} + +/** + * Record a cache hit in the database + * + * @param sourceCompletion - The original completion that was cached + * @param apiKeyId - The API key ID for the new request + * @returns The created cache_hit completion record + */ +export async function recordCacheHit( + sourceCompletion: Completion, + apiKeyId: number, +): Promise { + const record = buildCacheHitRecord(sourceCompletion, apiKeyId); + // Use insertCompletion instead of createPendingCompletion to avoid ReqId conflicts + // since cache_hit records share the same ReqId as the source + return await insertCompletion({ + ...record, + reqId: null, // Don't set ReqId on cache_hit records to avoid unique constraint + }); +} + +/** + * Build the 409 Conflict error response for in-flight requests + * + * @param reqId - The client-provided request ID + * @param inFlight - The in-flight request data + * @param retryAfter - Retry-After value in seconds + * @param format - The API format for response formatting + * @returns Error response object + */ +export function buildInFlightErrorResponse( + reqId: string, + inFlight: InFlightRequest, + retryAfter: number, + format: ApiFormat, +): Record { + const startedAt = new Date(inFlight.startTime).toISOString(); + + if (format === "anthropic") { + return { + type: "error", + error: { + type: "conflict", + message: "A request with this X-NexusGate-ReqId is already being processed", + req_id: reqId, + retry_after: retryAfter, + started_at: startedAt, + }, + }; + } + + // OpenAI format (openai-chat, openai-responses) + return { + error: { + code: "request_in_flight", + message: "A request with this X-NexusGate-ReqId is already being processed", + type: "conflict", + req_id: reqId, + retry_after: retryAfter, + started_at: startedAt, + }, + }; +} + +/** + * Extract ReqId from request headers + * + * @param headers - Request headers + * @returns The ReqId value or null if not present + */ +export function extractReqId(headers: Headers): string | null { + const reqId = headers.get(REQID_HEADER); + if (!reqId || reqId.trim() === "") { + return null; + } + // Validate ReqId length (max 127 chars as per schema) + if (reqId.length > 127) { + logger.warn("ReqId too long, truncating", { length: reqId.length }); + return reqId.substring(0, 127); + } + return reqId.trim(); +} diff --git a/backend/src/utils/streaming-context.ts b/backend/src/utils/streaming-context.ts index 4554f46..e30f01e 100644 --- a/backend/src/utils/streaming-context.ts +++ b/backend/src/utils/streaming-context.ts @@ -4,12 +4,26 @@ */ import type { + CachedResponseType, CompletionsStatusEnumType, ToolCallType, } from "@/db/schema"; import { addCompletions, type Completion } from "@/utils/completions"; import { consumeTokens } from "@/plugins/apiKeyRateLimitPlugin"; import type { ApiKey } from "@/plugins/apiKeyPlugin"; +import { finalizeReqId } from "@/utils/reqIdHandler"; + +/** + * ReqId context for request deduplication + */ +export interface StreamingReqIdContext { + reqId: string; + apiKeyId: number; + preCreatedCompletionId: number; + apiFormat: "openai-chat" | "openai-responses" | "anthropic"; + /** Function to build the cached response from accumulated data */ + buildCachedResponse?: (completion: Completion) => CachedResponseType; +} /** * StreamingContext manages the state of a streaming response. @@ -22,6 +36,7 @@ export class StreamingContext { private begin: number; private saved = false; private signal?: AbortSignal; + private reqIdContext?: StreamingReqIdContext; // Accumulated data during streaming textParts: string[] = []; @@ -41,12 +56,14 @@ export class StreamingContext { apiKeyRecord: ApiKey | null, begin: number, signal?: AbortSignal, + reqIdContext?: StreamingReqIdContext, ) { this.completion = completion; this.bearer = bearer; this.apiKeyRecord = apiKeyRecord; this.begin = begin; this.signal = signal; + this.reqIdContext = reqIdContext; // Note: We don't save immediately on abort anymore. // Instead, we continue processing chunks from upstream and save the full @@ -112,8 +129,21 @@ export class StreamingContext { this.completion.ttft = this.ttft; this.completion.duration = Date.now() - this.begin; - // Save to database - if (error) { + // Save to database - use finalizeReqId if ReqId context is present + if (this.reqIdContext) { + // Build cached response if callback is provided + const cachedResponse = this.reqIdContext.buildCachedResponse?.(this.completion); + + await finalizeReqId( + this.reqIdContext.apiKeyId, + this.reqIdContext.reqId, + this.reqIdContext.preCreatedCompletionId, + { + ...this.completion, + cachedResponse, + }, + ); + } else if (error) { await addCompletions(this.completion, this.bearer, { level: status === "aborted" ? "info" : "error", message: `Stream ${status}: ${error}`, diff --git a/frontend/src/i18n/locales/en-US.json b/frontend/src/i18n/locales/en-US.json index 8611ccc..6a3d98a 100644 --- a/frontend/src/i18n/locales/en-US.json +++ b/frontend/src/i18n/locales/en-US.json @@ -94,6 +94,7 @@ "pages.requests.columns.Completed": "Completed", "pages.requests.columns.Failed": "Failed", "pages.requests.columns.Aborted": "Aborted", + "pages.requests.columns.CacheHit": "Cache Hit", "pages.requests.columns.Model": "Model", "pages.requests.columns.TTFT": "TTFT", "pages.requests.columns.TimeToFirstToken": "Time To First Token", @@ -120,6 +121,7 @@ "pages.requests.detail-panel.header.Completed": "Completed", "pages.requests.detail-panel.header.Failed": "Failed", "pages.requests.detail-panel.header.Aborted": "Aborted", + "pages.requests.detail-panel.header.CacheHit": "Cache Hit", "pages.requests.detail-panel.header.ClosePanel": "Close panel", "pages.requests.detail-panel.index.Close": "Close", "pages.requests.detail-panel.index.Retry": "Retry", diff --git a/frontend/src/i18n/locales/zh-CN.json b/frontend/src/i18n/locales/zh-CN.json index 61780ac..d2b3064 100644 --- a/frontend/src/i18n/locales/zh-CN.json +++ b/frontend/src/i18n/locales/zh-CN.json @@ -95,6 +95,7 @@ "pages.requests.columns.Completed": "已完成", "pages.requests.columns.Failed": "失败", "pages.requests.columns.Aborted": "已中止", + "pages.requests.columns.CacheHit": "缓存命中", "pages.requests.columns.Model": "模型", "pages.requests.columns.TTFT": "TTFT", "pages.requests.columns.TimeToFirstToken": "首 Token 返回时间", @@ -121,6 +122,7 @@ "pages.requests.detail-panel.header.Completed": "已完成", "pages.requests.detail-panel.header.Failed": "失败", "pages.requests.detail-panel.header.Aborted": "已中止", + "pages.requests.detail-panel.header.CacheHit": "缓存命中", "pages.requests.detail-panel.header.ClosePanel": "关闭面板", "pages.requests.detail-panel.index.Close": "关闭", "pages.requests.detail-panel.index.Retry": "重试", diff --git a/frontend/src/pages/requests/columns.tsx b/frontend/src/pages/requests/columns.tsx index d6eb2f5..1df9a03 100644 --- a/frontend/src/pages/requests/columns.tsx +++ b/frontend/src/pages/requests/columns.tsx @@ -44,6 +44,9 @@ export const columns: ColumnDef[] = [ .with('aborted', () => ( {i18n.t('pages.requests.columns.Aborted')} )) + .with('cache_hit', () => ( + {i18n.t('pages.requests.columns.CacheHit')} + )) .exhaustive() return (
diff --git a/frontend/src/pages/requests/detail-panel/header.tsx b/frontend/src/pages/requests/detail-panel/header.tsx index 587b65d..f44f5e8 100644 --- a/frontend/src/pages/requests/detail-panel/header.tsx +++ b/frontend/src/pages/requests/detail-panel/header.tsx @@ -67,6 +67,11 @@ function StatusIndicator({ status }: { status: ChatRequest['status'] }) { {t('pages.requests.detail-panel.header.Aborted')} )) + .with('cache_hit', () => ( + + {t('pages.requests.detail-panel.header.CacheHit')} + + )) .exhaustive() } From 506d717368d47d6879cc05b8ad80dc63aa175168 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sun, 25 Jan 2026 02:20:49 +0800 Subject: [PATCH 2/9] fix: address PR review feedback - Simplify buildCacheHitRecord by removing reqId field (was immediately overwritten) - Fix extractReqId to trim before length validation - Remove ineffective set.status=500 in streaming error handlers (status cannot change after streaming starts) Co-Authored-By: Claude Opus 4.5 --- backend/src/api/v1/completions.ts | 2 +- backend/src/api/v1/messages.ts | 2 +- backend/src/api/v1/responses.ts | 2 +- backend/src/utils/reqIdHandler.ts | 25 +++++++++++++------------ 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts index 41e32e1..53ba482 100644 --- a/backend/src/api/v1/completions.ts +++ b/backend/src/api/v1/completions.ts @@ -737,7 +737,7 @@ export const completionsApi = new Elysia({ // Don't log error if it's due to client abort if (!streamSignal.aborted) { logger.error("Stream processing error", error); - set.status = 500; + // Note: HTTP status cannot be changed after streaming has started yield JSON.stringify({ error: "Stream processing error" }); } } diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts index 64024f7..81d614f 100644 --- a/backend/src/api/v1/messages.ts +++ b/backend/src/api/v1/messages.ts @@ -697,7 +697,7 @@ export const messagesApi = new Elysia({ // Don't log error if it's due to client abort if (!streamSignal.aborted) { logger.error("Stream processing error", error); - set.status = 500; + // Note: HTTP status cannot be changed after streaming has started yield `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "server_error", message: "Stream processing error" } })}\n\n`; } } diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts index 345c205..4c63b47 100644 --- a/backend/src/api/v1/responses.ts +++ b/backend/src/api/v1/responses.ts @@ -735,7 +735,7 @@ export const responsesApi = new Elysia({ // Don't log error if it's due to client abort if (!streamSignal.aborted) { logger.error("Stream processing error", error); - set.status = 500; + // Note: HTTP status cannot be changed after streaming has started yield `event: error\ndata: ${JSON.stringify({ type: "error", error: { code: "internal_error", message: "Stream processing error", param: null, help_url: null } })}\n\n`; } } diff --git a/backend/src/utils/reqIdHandler.ts b/backend/src/utils/reqIdHandler.ts index 83c54a7..52832ce 100644 --- a/backend/src/utils/reqIdHandler.ts +++ b/backend/src/utils/reqIdHandler.ts @@ -236,7 +236,8 @@ export function buildCacheHitRecord( status: "cache_hit", ttft: 0, duration: 0, - reqId: sourceCompletion.reqId, + // Note: reqId is intentionally omitted to avoid unique constraint violations + // cache_hit records don't need their own reqId since they reference sourceCompletionId sourceCompletionId: sourceCompletion.id, apiFormat: sourceCompletion.apiFormat, }; @@ -254,12 +255,7 @@ export async function recordCacheHit( apiKeyId: number, ): Promise { const record = buildCacheHitRecord(sourceCompletion, apiKeyId); - // Use insertCompletion instead of createPendingCompletion to avoid ReqId conflicts - // since cache_hit records share the same ReqId as the source - return await insertCompletion({ - ...record, - reqId: null, // Don't set ReqId on cache_hit records to avoid unique constraint - }); + return await insertCompletion(record); } /** @@ -313,13 +309,18 @@ export function buildInFlightErrorResponse( */ export function extractReqId(headers: Headers): string | null { const reqId = headers.get(REQID_HEADER); - if (!reqId || reqId.trim() === "") { + if (!reqId) { + return null; + } + // Trim first, then validate + const trimmedReqId = reqId.trim(); + if (trimmedReqId === "") { return null; } // Validate ReqId length (max 127 chars as per schema) - if (reqId.length > 127) { - logger.warn("ReqId too long, truncating", { length: reqId.length }); - return reqId.substring(0, 127); + if (trimmedReqId.length > 127) { + logger.warn("ReqId too long, truncating", { length: trimmedReqId.length }); + return trimmedReqId.substring(0, 127); } - return reqId.trim(); + return trimmedReqId; } From b4f301310d8ac9d4aea1a8ca50e88be3832fb6ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sun, 25 Jan 2026 02:39:53 +0800 Subject: [PATCH 3/9] fix(api): finalize pre-created completion on failure paths When ReqId is used and a pending completion is pre-created, failure paths (failover error, internal error, no response body) must finalize the completion to prevent ReqId from being stuck in pending/in-flight state. This ensures subsequent requests with the same ReqId can succeed. Co-Authored-By: Claude Opus 4.5 --- backend/src/api/v1/completions.ts | 57 +++++++++++++++++++++++++++++++ backend/src/api/v1/messages.ts | 57 +++++++++++++++++++++++++++++++ backend/src/api/v1/responses.ts | 57 +++++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+) diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts index 53ba482..abd500e 100644 --- a/backend/src/api/v1/completions.ts +++ b/backend/src/api/v1/completions.ts @@ -672,6 +672,18 @@ export const completionsApi = new Elysia({ const errorResult = await processFailoverError(result, completion, bearer, "streaming"); + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } + if (errorResult.type === "upstream_error") { set.status = errorResult.status; return JSON.parse(errorResult.body) as Record; @@ -688,11 +700,33 @@ export const completionsApi = new Elysia({ } if (!result.response || !result.provider) { + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } set.status = 500; return { error: "Internal server error" }; } if (!result.response.body) { + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } set.status = 500; return { error: "No body in response" }; } @@ -763,6 +797,18 @@ export const completionsApi = new Elysia({ const errorResult = await processFailoverError(result, completion, bearer, "non-streaming"); + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } + if (errorResult.type === "upstream_error") { set.status = errorResult.status; return JSON.parse(errorResult.body) as Record; @@ -779,6 +825,17 @@ export const completionsApi = new Elysia({ } if (!result.response || !result.provider) { + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } set.status = 500; return { error: "Internal server error" }; } diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts index 81d614f..67678d1 100644 --- a/backend/src/api/v1/messages.ts +++ b/backend/src/api/v1/messages.ts @@ -628,6 +628,18 @@ export const messagesApi = new Elysia({ const errorResult = await processFailoverError(result, completion, bearer, "streaming"); + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } + if (errorResult.type === "upstream_error") { set.status = errorResult.status; return JSON.parse(errorResult.body) as Record; @@ -644,6 +656,17 @@ export const messagesApi = new Elysia({ } if (!result.response || !result.provider) { + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } set.status = 500; return { type: "error", @@ -652,6 +675,17 @@ export const messagesApi = new Elysia({ } if (!result.response.body) { + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } set.status = 500; return { type: "error", @@ -721,6 +755,18 @@ export const messagesApi = new Elysia({ const errorResult = await processFailoverError(result, completion, bearer, "non-streaming"); + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } + if (errorResult.type === "upstream_error") { set.status = errorResult.status; return JSON.parse(errorResult.body) as Record; @@ -737,6 +783,17 @@ export const messagesApi = new Elysia({ } if (!result.response || !result.provider) { + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } set.status = 500; return { type: "error", diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts index 4c63b47..8341d7e 100644 --- a/backend/src/api/v1/responses.ts +++ b/backend/src/api/v1/responses.ts @@ -666,6 +666,18 @@ export const responsesApi = new Elysia({ const errorResult = await processFailoverError(result, completion, bearer, "streaming"); + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } + if (errorResult.type === "upstream_error") { set.status = errorResult.status; return JSON.parse(errorResult.body) as Record; @@ -682,6 +694,17 @@ export const responsesApi = new Elysia({ } if (!result.response || !result.provider) { + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } set.status = 500; return { object: "error", @@ -690,6 +713,17 @@ export const responsesApi = new Elysia({ } if (!result.response.body) { + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } set.status = 500; return { object: "error", @@ -759,6 +793,18 @@ export const responsesApi = new Elysia({ const errorResult = await processFailoverError(result, completion, bearer, "non-streaming"); + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } + if (errorResult.type === "upstream_error") { set.status = errorResult.status; return JSON.parse(errorResult.body) as Record; @@ -775,6 +821,17 @@ export const responsesApi = new Elysia({ } if (!result.response || !result.provider) { + // Finalize pre-created completion if ReqId was used + if (preCreatedCompletionId && reqId && apiKeyRecord) { + await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); + } set.status = 500; return { object: "error", From a30105d961147fc6df8ca0bd9a43c4d0eb2836bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sun, 25 Jan 2026 02:48:40 +0800 Subject: [PATCH 4/9] fix(api): improve ReqId handling robustness - Wrap recordCacheHit in try-catch to prevent cache recording failures from blocking cached response delivery - Replace reqId! non-null assertions with explicit null checks and invariant error throws for better type safety - Add documentation about race conditions when Redis is unavailable Co-Authored-By: Claude Opus 4.5 --- backend/src/api/v1/completions.ts | 14 +++++++++++--- backend/src/api/v1/messages.ts | 13 +++++++++++-- backend/src/api/v1/responses.ts | 13 +++++++++++-- backend/src/utils/reqIdHandler.ts | 9 ++++++++- 4 files changed, 41 insertions(+), 8 deletions(-) diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts index abd500e..719c63b 100644 --- a/backend/src/api/v1/completions.ts +++ b/backend/src/api/v1/completions.ts @@ -570,8 +570,12 @@ export const completionsApi = new Elysia({ // Handle cache hit - return cached response if (reqIdResult.type === "cache_hit") { const sourceCompletion = reqIdResult.completion; - // Record the cache hit - await recordCacheHit(sourceCompletion, apiKeyRecord.id); + // Record the cache hit (best-effort; do not block returning cached response) + try { + await recordCacheHit(sourceCompletion, apiKeyRecord.id); + } catch (error) { + logger.error("Failed to record cache hit", error); + } // Return cached response if (sourceCompletion.cachedResponse) { @@ -604,10 +608,14 @@ export const completionsApi = new Elysia({ // Handle in-flight - return 409 Conflict if (reqIdResult.type === "in_flight") { + // reqId is guaranteed non-null here since checkReqId only returns in_flight when reqId is provided + if (!reqId) { + throw new Error("Invariant violated: reqId is null for in_flight result"); + } set.status = 409; set.headers["Retry-After"] = String(reqIdResult.retryAfter); return buildInFlightErrorResponse( - reqId!, + reqId, reqIdResult.inFlight, reqIdResult.retryAfter, apiFormat, diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts index 67678d1..c0575e5 100644 --- a/backend/src/api/v1/messages.ts +++ b/backend/src/api/v1/messages.ts @@ -542,7 +542,12 @@ export const messagesApi = new Elysia({ // Handle cache hit - return cached response if (reqIdResult.type === "cache_hit") { const sourceCompletion = reqIdResult.completion; - await recordCacheHit(sourceCompletion, apiKeyRecord.id); + // Record the cache hit (best-effort; do not block returning cached response) + try { + await recordCacheHit(sourceCompletion, apiKeyRecord.id); + } catch (error) { + logger.warn("Failed to record cache hit", error); + } if (sourceCompletion.cachedResponse) { return sourceCompletion.cachedResponse.body as Record; @@ -568,10 +573,14 @@ export const messagesApi = new Elysia({ // Handle in-flight - return 409 Conflict if (reqIdResult.type === "in_flight") { + // reqId is guaranteed non-null here since checkReqId only returns in_flight when reqId is provided + if (!reqId) { + throw new Error("Invariant violated: reqId is null for in_flight result"); + } set.status = 409; set.headers["Retry-After"] = String(reqIdResult.retryAfter); return buildInFlightErrorResponse( - reqId!, + reqId, reqIdResult.inFlight, reqIdResult.retryAfter, apiFormat, diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts index 8341d7e..d04d60e 100644 --- a/backend/src/api/v1/responses.ts +++ b/backend/src/api/v1/responses.ts @@ -579,7 +579,12 @@ export const responsesApi = new Elysia({ // Handle cache hit - return cached response if (reqIdResult.type === "cache_hit") { const sourceCompletion = reqIdResult.completion; - await recordCacheHit(sourceCompletion, apiKeyRecord.id); + // Record the cache hit (best-effort; do not block returning cached response) + try { + await recordCacheHit(sourceCompletion, apiKeyRecord.id); + } catch (error) { + logger.error("Failed to record cache hit", error); + } if (sourceCompletion.cachedResponse) { return sourceCompletion.cachedResponse.body as Record; @@ -606,10 +611,14 @@ export const responsesApi = new Elysia({ // Handle in-flight - return 409 Conflict if (reqIdResult.type === "in_flight") { + // reqId is guaranteed non-null here since checkReqId only returns in_flight when reqId is provided + if (!reqId) { + throw new Error("Invariant violated: reqId is null for in_flight result"); + } set.status = 409; set.headers["Retry-After"] = String(reqIdResult.retryAfter); return buildInFlightErrorResponse( - reqId!, + reqId, reqIdResult.inFlight, reqIdResult.retryAfter, apiFormat, diff --git a/backend/src/utils/reqIdHandler.ts b/backend/src/utils/reqIdHandler.ts index 52832ce..a536018 100644 --- a/backend/src/utils/reqIdHandler.ts +++ b/backend/src/utils/reqIdHandler.ts @@ -93,6 +93,13 @@ export async function checkReqId( } // Step 2: Check Redis for in-flight request + // Note on race conditions when Redis is unavailable: + // Without Redis, concurrent requests with the same ReqId will both proceed to step 3 + // (create pending completion). The database unique constraint on (api_key_id, req_id) + // will catch this - one request succeeds and the other fails with a constraint violation, + // triggering a re-check (lines 127-154). This means duplicate processing may occur briefly + // until one request claims the ReqId in the database. Redis provides faster in-flight + // detection but is not required for correctness. if (isRedisAvailable()) { const inFlight = await getInFlight(apiKeyId, reqId); if (inFlight) { @@ -101,7 +108,7 @@ export async function checkReqId( return { type: "in_flight", inFlight, retryAfter }; } } else { - logger.warn("Redis unavailable, skipping in-flight check"); + logger.warn("Redis unavailable, skipping in-flight check - race conditions possible until DB constraint catches duplicates"); } // Step 3: Create pending completion and mark as in-flight From 9cd6aadce096d20d3d3b1163e652f0fc22e8ddd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sun, 25 Jan 2026 02:52:39 +0800 Subject: [PATCH 5/9] fix(api): return 400 error when ReqId exceeds max length Instead of silently truncating ReqId values longer than 127 characters, now returns a 400 Bad Request error with a clear message indicating the length constraint. This provides explicit feedback to API consumers. Co-Authored-By: Claude Opus 4.5 --- backend/src/api/v1/completions.ts | 14 +++++++++++++- backend/src/api/v1/messages.ts | 14 +++++++++++++- backend/src/api/v1/responses.ts | 15 ++++++++++++++- backend/src/utils/reqIdHandler.ts | 31 ++++++++++++++++++++++--------- 4 files changed, 62 insertions(+), 12 deletions(-) diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts index 719c63b..1106c55 100644 --- a/backend/src/api/v1/completions.ts +++ b/backend/src/api/v1/completions.ts @@ -43,6 +43,7 @@ import { recordCacheHit, buildInFlightErrorResponse, extractReqId, + REQID_MAX_LENGTH, type ApiFormat, } from "@/utils/reqIdHandler"; @@ -497,7 +498,18 @@ export const completionsApi = new Elysia({ const begin = Date.now(); // Extract ReqId for request deduplication - const reqId = extractReqId(reqHeaders); + const reqIdExtraction = extractReqId(reqHeaders); + if (reqIdExtraction.type === "too_long") { + set.status = 400; + return { + error: { + message: `X-NexusGate-ReqId exceeds maximum length of ${REQID_MAX_LENGTH} characters (got ${reqIdExtraction.length})`, + type: "invalid_request_error", + code: "reqid_too_long", + }, + }; + } + const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null; const apiFormat: ApiFormat = "openai-chat"; // Parse model@provider format and extract provider from header diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts index c0575e5..a2f138d 100644 --- a/backend/src/api/v1/messages.ts +++ b/backend/src/api/v1/messages.ts @@ -36,6 +36,7 @@ import { recordCacheHit, buildInFlightErrorResponse, extractReqId, + REQID_MAX_LENGTH, type ApiFormat, } from "@/utils/reqIdHandler"; import type { CachedResponseType } from "@/db/schema"; @@ -468,7 +469,18 @@ export const messagesApi = new Elysia({ const begin = Date.now(); // Extract ReqId for request deduplication - const reqId = extractReqId(reqHeaders); + const reqIdExtraction = extractReqId(reqHeaders); + if (reqIdExtraction.type === "too_long") { + set.status = 400; + return { + type: "error", + error: { + type: "invalid_request_error", + message: `X-NexusGate-ReqId exceeds maximum length of ${REQID_MAX_LENGTH} characters (got ${reqIdExtraction.length})`, + }, + }; + } + const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null; const apiFormat: ApiFormat = "anthropic"; // Parse model@provider format and extract provider from header diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts index d04d60e..5e3adfd 100644 --- a/backend/src/api/v1/responses.ts +++ b/backend/src/api/v1/responses.ts @@ -36,6 +36,7 @@ import { recordCacheHit, buildInFlightErrorResponse, extractReqId, + REQID_MAX_LENGTH, type ApiFormat, } from "@/utils/reqIdHandler"; import type { CachedResponseType } from "@/db/schema"; @@ -485,7 +486,19 @@ export const responsesApi = new Elysia({ const begin = Date.now(); // Extract ReqId for request deduplication - const reqId = extractReqId(reqHeaders); + const reqIdExtraction = extractReqId(reqHeaders); + if (reqIdExtraction.type === "too_long") { + set.status = 400; + return { + object: "error", + error: { + type: "invalid_request_error", + message: `X-NexusGate-ReqId exceeds maximum length of ${REQID_MAX_LENGTH} characters (got ${reqIdExtraction.length})`, + code: "reqid_too_long", + }, + }; + } + const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null; const apiFormat: ApiFormat = "openai-responses"; // Parse model@provider format and extract provider from header diff --git a/backend/src/utils/reqIdHandler.ts b/backend/src/utils/reqIdHandler.ts index a536018..dc8fd03 100644 --- a/backend/src/utils/reqIdHandler.ts +++ b/backend/src/utils/reqIdHandler.ts @@ -309,25 +309,38 @@ export function buildInFlightErrorResponse( } /** - * Extract ReqId from request headers + * Maximum length for ReqId (database schema constraint) + */ +export const REQID_MAX_LENGTH = 127; + +/** + * Result type for extractReqId + */ +export type ExtractReqIdResult = + | { type: "valid"; value: string } + | { type: "empty" } + | { type: "too_long"; length: number }; + +/** + * Extract and validate ReqId from request headers * * @param headers - Request headers - * @returns The ReqId value or null if not present + * @returns Extraction result indicating valid value, empty, or error */ -export function extractReqId(headers: Headers): string | null { +export function extractReqId(headers: Headers): ExtractReqIdResult { const reqId = headers.get(REQID_HEADER); if (!reqId) { - return null; + return { type: "empty" }; } // Trim first, then validate const trimmedReqId = reqId.trim(); if (trimmedReqId === "") { - return null; + return { type: "empty" }; } // Validate ReqId length (max 127 chars as per schema) - if (trimmedReqId.length > 127) { - logger.warn("ReqId too long, truncating", { length: trimmedReqId.length }); - return trimmedReqId.substring(0, 127); + if (trimmedReqId.length > REQID_MAX_LENGTH) { + logger.warn("ReqId too long", { length: trimmedReqId.length, maxLength: REQID_MAX_LENGTH }); + return { type: "too_long", length: trimmedReqId.length }; } - return trimmedReqId; + return { type: "valid", value: trimmedReqId }; } From e849a88a858f0658646bb82b7376fa71c34f5e7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sun, 25 Jan 2026 02:59:04 +0800 Subject: [PATCH 6/9] fix(api): fix SSE error format and preserve tool_calls in cached responses - Fix completions.ts SSE error format to use `data: {...}\n\n` prefix - Fix messages.ts buildCachedResponse to include tool_use blocks from completion.tool_calls - Fix responses.ts buildCachedResponse to include function_call output items from completion.tool_calls - Update fallback response reconstruction to also handle tool_calls Co-Authored-By: Claude Opus 4.5 --- backend/src/api/v1/completions.ts | 3 +- backend/src/api/v1/messages.ts | 53 ++++++++++++++++++----- backend/src/api/v1/responses.ts | 71 +++++++++++++++++++++++++++---- 3 files changed, 108 insertions(+), 19 deletions(-) diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts index 1106c55..f135a68 100644 --- a/backend/src/api/v1/completions.ts +++ b/backend/src/api/v1/completions.ts @@ -792,7 +792,8 @@ export const completionsApi = new Elysia({ if (!streamSignal.aborted) { logger.error("Stream processing error", error); // Note: HTTP status cannot be changed after streaming has started - yield JSON.stringify({ error: "Stream processing error" }); + // Use SSE format for error: data: {...}\n\n + yield `data: ${JSON.stringify({ error: { message: "Stream processing error", type: "server_error", code: "stream_error" } })}\n\n`; } } })(); diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts index a2f138d..82308de 100644 --- a/backend/src/api/v1/messages.ts +++ b/backend/src/api/v1/messages.ts @@ -286,17 +286,35 @@ async function* processStreamingResponse( apiFormat: reqIdContext.apiFormat, buildCachedResponse: (comp: Completion): CachedResponseType => { // For streaming, build a complete non-streaming Anthropic response for cache + // Build content blocks including both text and tool_use + const contentBlocks: Array> = []; + for (const c of comp.completion) { + // Add text content if present + if (c.content) { + contentBlocks.push({ type: "text", text: c.content }); + } + // Add tool_use blocks if present + if (c.tool_calls) { + for (const tc of c.tool_calls) { + contentBlocks.push({ + type: "tool_use", + id: tc.id, + name: tc.function.name, + input: JSON.parse(tc.function.arguments || "{}"), + }); + } + } + } + // Determine stop_reason based on content + const hasToolUse = contentBlocks.some((b) => b.type === "tool_use"); return { body: { id: `msg-cache-${reqIdContext.preCreatedCompletionId}`, type: "message", role: "assistant", - content: comp.completion.map((c) => ({ - type: "text", - text: c.content || "", - })), + content: contentBlocks.length > 0 ? contentBlocks : [{ type: "text", text: "" }], model: comp.model, - stop_reason: "end_turn", + stop_reason: hasToolUse ? "tool_use" : "end_turn", usage: { input_tokens: comp.promptTokens, output_tokens: comp.completionTokens, @@ -566,16 +584,31 @@ export const messagesApi = new Elysia({ } // Fallback: reconstruct Anthropic response + // Build content blocks including both text and tool_use + const contentBlocks: Array> = []; + for (const c of sourceCompletion.completion) { + if (c.content) { + contentBlocks.push({ type: "text", text: c.content }); + } + if (c.tool_calls) { + for (const tc of c.tool_calls) { + contentBlocks.push({ + type: "tool_use", + id: tc.id, + name: tc.function.name, + input: JSON.parse(tc.function.arguments || "{}"), + }); + } + } + } + const hasToolUse = contentBlocks.some((b) => b.type === "tool_use"); return { id: `msg-cache-${sourceCompletion.id}`, type: "message", role: "assistant", - content: sourceCompletion.completion.map((c) => ({ - type: "text", - text: c.content || "", - })), + content: contentBlocks.length > 0 ? contentBlocks : [{ type: "text", text: "" }], model: sourceCompletion.model, - stop_reason: "end_turn", + stop_reason: hasToolUse ? "tool_use" : "end_turn", usage: { input_tokens: sourceCompletion.promptTokens, output_tokens: sourceCompletion.completionTokens, diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts index 5e3adfd..68651ae 100644 --- a/backend/src/api/v1/responses.ts +++ b/backend/src/api/v1/responses.ts @@ -297,17 +297,46 @@ async function* processStreamingResponse( apiFormat: reqIdContext.apiFormat, buildCachedResponse: (comp: Completion): CachedResponseType => { // For streaming, build a complete non-streaming Response API response for cache + // Build output items including both messages and function_call + const outputItems: Array> = []; + for (const c of comp.completion) { + // Build content array for message + const content: Array> = []; + if (c.content) { + content.push({ type: "output_text", text: c.content }); + } + // Add message output item if there's text content + if (content.length > 0) { + outputItems.push({ + type: "message", + role: c.role || "assistant", + content, + }); + } + // Add function_call output items for tool_calls + if (c.tool_calls) { + for (const tc of c.tool_calls) { + outputItems.push({ + type: "function_call", + id: tc.id, + call_id: tc.id, + name: tc.function.name, + arguments: tc.function.arguments || "{}", + }); + } + } + } return { body: { id: `resp-cache-${reqIdContext.preCreatedCompletionId}`, object: "response", created_at: Math.floor(Date.now() / 1000), model: comp.model, - output: comp.completion.map((c) => ({ + output: outputItems.length > 0 ? outputItems : [{ type: "message", - role: c.role || "assistant", - content: [{ type: "output_text", text: c.content || "" }], - })), + role: "assistant", + content: [{ type: "output_text", text: "" }], + }], usage: { input_tokens: comp.promptTokens, output_tokens: comp.completionTokens, @@ -604,16 +633,42 @@ export const responsesApi = new Elysia({ } // Fallback: reconstruct Response API response + // Build output items including both messages and function_call + const outputItems: Array> = []; + for (const c of sourceCompletion.completion) { + const content: Array> = []; + if (c.content) { + content.push({ type: "output_text", text: c.content }); + } + if (content.length > 0) { + outputItems.push({ + type: "message", + role: c.role || "assistant", + content, + }); + } + if (c.tool_calls) { + for (const tc of c.tool_calls) { + outputItems.push({ + type: "function_call", + id: tc.id, + call_id: tc.id, + name: tc.function.name, + arguments: tc.function.arguments || "{}", + }); + } + } + } return { id: `resp-cache-${sourceCompletion.id}`, object: "response", created_at: Math.floor(sourceCompletion.createdAt.getTime() / 1000), model: sourceCompletion.model, - output: sourceCompletion.completion.map((c) => ({ + output: outputItems.length > 0 ? outputItems : [{ type: "message", - role: c.role || "assistant", - content: [{ type: "output_text", text: c.content || "" }], - })), + role: "assistant", + content: [{ type: "output_text", text: "" }], + }], usage: { input_tokens: sourceCompletion.promptTokens, output_tokens: sourceCompletion.completionTokens, From b0443562c9039c144771c5d35ad605a343eefda2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sun, 25 Jan 2026 03:33:19 +0800 Subject: [PATCH 7/9] refactor(api): reduce ReqId handling code duplication and improve validation - Extract shared utilities to reqIdHandler.ts: - finalizeReqIdOnError() helper for error path finalization - Response builders for each API format (OpenAI Chat, Anthropic, OpenAI Responses) - buildReqIdValidationErrorResponse() for format-appropriate errors - Export ReqIdContext type for endpoint files - Enhance ReqId validation: - Add regex pattern to reject control characters and special characters - Only allow alphanumeric, hyphens, underscores, dots, colons, forward slashes - Add "invalid_characters" error type - Fix inconsistent logging: - Use warn level for non-critical recordCacheHit failures - Reduce code duplication: - Replace 6+ error finalization blocks per endpoint with single function calls - Replace ~40 lines of response reconstruction per endpoint with shared builders Co-Authored-By: Claude Opus 4.5 --- backend/src/api/v1/completions.ts | 151 ++++------------- backend/src/api/v1/messages.ts | 157 +++--------------- backend/src/api/v1/responses.ts | 172 ++++--------------- backend/src/utils/reqIdHandler.ts | 265 +++++++++++++++++++++++++++++- 4 files changed, 347 insertions(+), 398 deletions(-) diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts index f135a68..559d5c5 100644 --- a/backend/src/api/v1/completions.ts +++ b/backend/src/api/v1/completions.ts @@ -40,11 +40,14 @@ import { import { checkReqId, finalizeReqId, + finalizeReqIdOnError, recordCacheHit, buildInFlightErrorResponse, extractReqId, - REQID_MAX_LENGTH, + buildReqIdValidationErrorResponse, + buildCachedResponseByFormat, type ApiFormat, + type ReqIdContext, } from "@/utils/reqIdHandler"; const logger = consola.withTag("completionsApi"); @@ -170,15 +173,7 @@ function buildCompletionRecord( }; } -/** - * ReqId context for request deduplication - */ -interface ReqIdContext { - reqId: string; - apiKeyId: number; - preCreatedCompletionId: number; - apiFormat: ApiFormat; -} +// ReqIdContext is imported from reqIdHandler /** * Process a successful non-streaming response @@ -498,19 +493,14 @@ export const completionsApi = new Elysia({ const begin = Date.now(); // Extract ReqId for request deduplication + const apiFormat: ApiFormat = "openai-chat"; const reqIdExtraction = extractReqId(reqHeaders); - if (reqIdExtraction.type === "too_long") { - set.status = 400; - return { - error: { - message: `X-NexusGate-ReqId exceeds maximum length of ${REQID_MAX_LENGTH} characters (got ${reqIdExtraction.length})`, - type: "invalid_request_error", - code: "reqid_too_long", - }, - }; + if (reqIdExtraction.type === "too_long" || reqIdExtraction.type === "invalid_characters") { + const errorResponse = buildReqIdValidationErrorResponse(reqIdExtraction, apiFormat); + set.status = errorResponse.status; + return errorResponse.body; } const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null; - const apiFormat: ApiFormat = "openai-chat"; // Parse model@provider format and extract provider from header const { systemName, targetProvider } = parseModelProvider( @@ -586,36 +576,14 @@ export const completionsApi = new Elysia({ try { await recordCacheHit(sourceCompletion, apiKeyRecord.id); } catch (error) { - logger.error("Failed to record cache hit", error); + logger.warn("Failed to record cache hit", error); } - // Return cached response + // Return cached response if available, otherwise reconstruct if (sourceCompletion.cachedResponse) { return sourceCompletion.cachedResponse.body as Record; } - - // Fallback: reconstruct response from completion data - const reconstructed = { - id: `chatcmpl-cache-${sourceCompletion.id}`, - object: "chat.completion", - created: Math.floor(sourceCompletion.createdAt.getTime() / 1000), - model: sourceCompletion.model, - choices: sourceCompletion.completion.map((c, i) => ({ - index: i, - message: { - role: c.role || "assistant", - content: c.content, - tool_calls: c.tool_calls, - }, - finish_reason: c.tool_calls?.length ? "tool_calls" : "stop", - })), - usage: { - prompt_tokens: sourceCompletion.promptTokens, - completion_tokens: sourceCompletion.completionTokens, - total_tokens: sourceCompletion.promptTokens + sourceCompletion.completionTokens, - }, - }; - return reconstructed; + return buildCachedResponseByFormat(sourceCompletion, apiFormat); } // Handle in-flight - return 409 Conflict @@ -634,9 +602,14 @@ export const completionsApi = new Elysia({ ); } - // For new_request, we have a pre-created completionId - const preCreatedCompletionId = reqIdResult.type === "new_request" - ? reqIdResult.completionId + // For new_request, we have a pre-created completionId - build ReqId context + const reqIdContext: ReqIdContext | null = (reqIdResult.type === "new_request" && reqId) + ? { + reqId, + apiKeyId: apiKeyRecord.id, + preCreatedCompletionId: reqIdResult.completionId, + apiFormat, + } : null; // Parse request using adapter @@ -693,16 +666,7 @@ export const completionsApi = new Elysia({ const errorResult = await processFailoverError(result, completion, bearer, "streaming"); // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); if (errorResult.type === "upstream_error") { set.status = errorResult.status; @@ -720,33 +684,13 @@ export const completionsApi = new Elysia({ } if (!result.response || !result.provider) { - // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); set.status = 500; return { error: "Internal server error" }; } if (!result.response.body) { - // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); set.status = 500; return { error: "No body in response" }; } @@ -762,16 +706,6 @@ export const completionsApi = new Elysia({ extraHeaders, ); - // Build ReqId context if we have a pre-created completion - const streamReqIdContext = preCreatedCompletionId && reqId - ? { - reqId, - apiKeyId: apiKeyRecord.id, - preCreatedCompletionId, - apiFormat, - } - : undefined; - // Return an async generator for streaming const streamResponse = result.response; const streamSignal = request.signal; @@ -785,7 +719,7 @@ export const completionsApi = new Elysia({ apiKeyRecord ?? null, begin, streamSignal, - streamReqIdContext, + reqIdContext ?? undefined, ); } catch (error) { // Don't log error if it's due to client abort @@ -819,16 +753,7 @@ export const completionsApi = new Elysia({ const errorResult = await processFailoverError(result, completion, bearer, "non-streaming"); // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); if (errorResult.type === "upstream_error") { set.status = errorResult.status; @@ -846,17 +771,7 @@ export const completionsApi = new Elysia({ } if (!result.response || !result.provider) { - // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); set.status = 500; return { error: "Internal server error" }; } @@ -872,16 +787,6 @@ export const completionsApi = new Elysia({ extraHeaders, ); - // Build ReqId context if we have a pre-created completion - const nonStreamReqIdContext = preCreatedCompletionId && reqId - ? { - reqId, - apiKeyId: apiKeyRecord.id, - preCreatedCompletionId, - apiFormat, - } - : undefined; - try { const response = await processNonStreamingResponse( result.response, @@ -891,7 +796,7 @@ export const completionsApi = new Elysia({ apiKeyRecord ?? null, begin, request.signal, - nonStreamReqIdContext, + reqIdContext ?? undefined, ); // Return parsed JSON object for proper content-type return JSON.parse(response) as Record; diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts index 82308de..1cbeece 100644 --- a/backend/src/api/v1/messages.ts +++ b/backend/src/api/v1/messages.ts @@ -33,11 +33,14 @@ import { import { checkReqId, finalizeReqId, + finalizeReqIdOnError, recordCacheHit, buildInFlightErrorResponse, extractReqId, - REQID_MAX_LENGTH, + buildReqIdValidationErrorResponse, + buildCachedResponseByFormat, type ApiFormat, + type ReqIdContext, } from "@/utils/reqIdHandler"; import type { CachedResponseType } from "@/db/schema"; @@ -162,15 +165,7 @@ function buildCompletionRecord( }; } -/** - * ReqId context for request deduplication - */ -interface ReqIdContext { - reqId: string; - apiKeyId: number; - preCreatedCompletionId: number; - apiFormat: ApiFormat; -} +// ReqIdContext is imported from reqIdHandler /** * Process a successful non-streaming message response @@ -487,19 +482,14 @@ export const messagesApi = new Elysia({ const begin = Date.now(); // Extract ReqId for request deduplication + const apiFormat: ApiFormat = "anthropic"; const reqIdExtraction = extractReqId(reqHeaders); - if (reqIdExtraction.type === "too_long") { - set.status = 400; - return { - type: "error", - error: { - type: "invalid_request_error", - message: `X-NexusGate-ReqId exceeds maximum length of ${REQID_MAX_LENGTH} characters (got ${reqIdExtraction.length})`, - }, - }; + if (reqIdExtraction.type === "too_long" || reqIdExtraction.type === "invalid_characters") { + const errorResponse = buildReqIdValidationErrorResponse(reqIdExtraction, apiFormat); + set.status = errorResponse.status; + return errorResponse.body; } const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null; - const apiFormat: ApiFormat = "anthropic"; // Parse model@provider format and extract provider from header const { systemName, targetProvider } = parseModelProvider( @@ -579,41 +569,11 @@ export const messagesApi = new Elysia({ logger.warn("Failed to record cache hit", error); } + // Return cached response if available, otherwise reconstruct if (sourceCompletion.cachedResponse) { return sourceCompletion.cachedResponse.body as Record; } - - // Fallback: reconstruct Anthropic response - // Build content blocks including both text and tool_use - const contentBlocks: Array> = []; - for (const c of sourceCompletion.completion) { - if (c.content) { - contentBlocks.push({ type: "text", text: c.content }); - } - if (c.tool_calls) { - for (const tc of c.tool_calls) { - contentBlocks.push({ - type: "tool_use", - id: tc.id, - name: tc.function.name, - input: JSON.parse(tc.function.arguments || "{}"), - }); - } - } - } - const hasToolUse = contentBlocks.some((b) => b.type === "tool_use"); - return { - id: `msg-cache-${sourceCompletion.id}`, - type: "message", - role: "assistant", - content: contentBlocks.length > 0 ? contentBlocks : [{ type: "text", text: "" }], - model: sourceCompletion.model, - stop_reason: hasToolUse ? "tool_use" : "end_turn", - usage: { - input_tokens: sourceCompletion.promptTokens, - output_tokens: sourceCompletion.completionTokens, - }, - }; + return buildCachedResponseByFormat(sourceCompletion, apiFormat); } // Handle in-flight - return 409 Conflict @@ -632,9 +592,14 @@ export const messagesApi = new Elysia({ ); } - // For new_request, we have a pre-created completionId - const preCreatedCompletionId = reqIdResult.type === "new_request" - ? reqIdResult.completionId + // For new_request, we have a pre-created completionId - build ReqId context + const reqIdContext: ReqIdContext | null = (reqIdResult.type === "new_request" && reqId) + ? { + reqId, + apiKeyId: apiKeyRecord.id, + preCreatedCompletionId: reqIdResult.completionId, + apiFormat, + } : null; // Parse request using Anthropic adapter @@ -683,16 +648,7 @@ export const messagesApi = new Elysia({ const errorResult = await processFailoverError(result, completion, bearer, "streaming"); // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); if (errorResult.type === "upstream_error") { set.status = errorResult.status; @@ -710,17 +666,7 @@ export const messagesApi = new Elysia({ } if (!result.response || !result.provider) { - // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); set.status = 500; return { type: "error", @@ -729,17 +675,7 @@ export const messagesApi = new Elysia({ } if (!result.response.body) { - // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); set.status = 500; return { type: "error", @@ -756,16 +692,6 @@ export const messagesApi = new Elysia({ extraHeaders, ); - // Build ReqId context if we have a pre-created completion - const streamReqIdContext = preCreatedCompletionId && reqId - ? { - reqId, - apiKeyId: apiKeyRecord.id, - preCreatedCompletionId, - apiFormat, - } - : undefined; - // Return an async generator for streaming const streamResponse = result.response; const streamSignal = request.signal; @@ -779,7 +705,7 @@ export const messagesApi = new Elysia({ apiKeyRecord ?? null, begin, streamSignal, - streamReqIdContext, + reqIdContext ?? undefined, ); } catch (error) { // Don't log error if it's due to client abort @@ -810,16 +736,7 @@ export const messagesApi = new Elysia({ const errorResult = await processFailoverError(result, completion, bearer, "non-streaming"); // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); if (errorResult.type === "upstream_error") { set.status = errorResult.status; @@ -837,17 +754,7 @@ export const messagesApi = new Elysia({ } if (!result.response || !result.provider) { - // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); set.status = 500; return { type: "error", @@ -864,16 +771,6 @@ export const messagesApi = new Elysia({ extraHeaders, ); - // Build ReqId context if we have a pre-created completion - const nonStreamReqIdContext = preCreatedCompletionId && reqId - ? { - reqId, - apiKeyId: apiKeyRecord.id, - preCreatedCompletionId, - apiFormat, - } - : undefined; - try { const response = await processNonStreamingResponse( result.response, @@ -883,7 +780,7 @@ export const messagesApi = new Elysia({ apiKeyRecord ?? null, begin, request.signal, - nonStreamReqIdContext, + reqIdContext ?? undefined, ); // Return parsed JSON object for proper content-type return JSON.parse(response) as Record; diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts index 68651ae..0932895 100644 --- a/backend/src/api/v1/responses.ts +++ b/backend/src/api/v1/responses.ts @@ -33,11 +33,14 @@ import { import { checkReqId, finalizeReqId, + finalizeReqIdOnError, recordCacheHit, buildInFlightErrorResponse, extractReqId, - REQID_MAX_LENGTH, + buildReqIdValidationErrorResponse, + buildCachedResponseByFormat, type ApiFormat, + type ReqIdContext, } from "@/utils/reqIdHandler"; import type { CachedResponseType } from "@/db/schema"; @@ -173,15 +176,7 @@ function buildCompletionRecord( }; } -/** - * ReqId context for request deduplication - */ -interface ReqIdContext { - reqId: string; - apiKeyId: number; - preCreatedCompletionId: number; - apiFormat: ApiFormat; -} +// ReqIdContext is imported from reqIdHandler /** * Process a successful non-streaming response @@ -515,20 +510,14 @@ export const responsesApi = new Elysia({ const begin = Date.now(); // Extract ReqId for request deduplication + const apiFormat: ApiFormat = "openai-responses"; const reqIdExtraction = extractReqId(reqHeaders); - if (reqIdExtraction.type === "too_long") { - set.status = 400; - return { - object: "error", - error: { - type: "invalid_request_error", - message: `X-NexusGate-ReqId exceeds maximum length of ${REQID_MAX_LENGTH} characters (got ${reqIdExtraction.length})`, - code: "reqid_too_long", - }, - }; + if (reqIdExtraction.type === "too_long" || reqIdExtraction.type === "invalid_characters") { + const errorResponse = buildReqIdValidationErrorResponse(reqIdExtraction, apiFormat); + set.status = errorResponse.status; + return errorResponse.body; } const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null; - const apiFormat: ApiFormat = "openai-responses"; // Parse model@provider format and extract provider from header const { systemName, targetProvider } = parseModelProvider( @@ -625,56 +614,14 @@ export const responsesApi = new Elysia({ try { await recordCacheHit(sourceCompletion, apiKeyRecord.id); } catch (error) { - logger.error("Failed to record cache hit", error); + logger.warn("Failed to record cache hit", error); } + // Return cached response if available, otherwise reconstruct if (sourceCompletion.cachedResponse) { return sourceCompletion.cachedResponse.body as Record; } - - // Fallback: reconstruct Response API response - // Build output items including both messages and function_call - const outputItems: Array> = []; - for (const c of sourceCompletion.completion) { - const content: Array> = []; - if (c.content) { - content.push({ type: "output_text", text: c.content }); - } - if (content.length > 0) { - outputItems.push({ - type: "message", - role: c.role || "assistant", - content, - }); - } - if (c.tool_calls) { - for (const tc of c.tool_calls) { - outputItems.push({ - type: "function_call", - id: tc.id, - call_id: tc.id, - name: tc.function.name, - arguments: tc.function.arguments || "{}", - }); - } - } - } - return { - id: `resp-cache-${sourceCompletion.id}`, - object: "response", - created_at: Math.floor(sourceCompletion.createdAt.getTime() / 1000), - model: sourceCompletion.model, - output: outputItems.length > 0 ? outputItems : [{ - type: "message", - role: "assistant", - content: [{ type: "output_text", text: "" }], - }], - usage: { - input_tokens: sourceCompletion.promptTokens, - output_tokens: sourceCompletion.completionTokens, - total_tokens: sourceCompletion.promptTokens + sourceCompletion.completionTokens, - }, - }; + return buildCachedResponseByFormat(sourceCompletion, apiFormat); } // Handle in-flight - return 409 Conflict @@ -693,9 +640,14 @@ export const responsesApi = new Elysia({ ); } - // For new_request, we have a pre-created completionId - const preCreatedCompletionId = reqIdResult.type === "new_request" - ? reqIdResult.completionId + // For new_request, we have a pre-created completionId - build ReqId context + const reqIdContext: ReqIdContext | null = (reqIdResult.type === "new_request" && reqId) + ? { + reqId, + apiKeyId: apiKeyRecord.id, + preCreatedCompletionId: reqIdResult.completionId, + apiFormat, + } : null; // Parse request using Response API adapter @@ -744,16 +696,7 @@ export const responsesApi = new Elysia({ const errorResult = await processFailoverError(result, completion, bearer, "streaming"); // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); if (errorResult.type === "upstream_error") { set.status = errorResult.status; @@ -771,17 +714,7 @@ export const responsesApi = new Elysia({ } if (!result.response || !result.provider) { - // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); set.status = 500; return { object: "error", @@ -790,17 +723,7 @@ export const responsesApi = new Elysia({ } if (!result.response.body) { - // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); set.status = 500; return { object: "error", @@ -817,16 +740,6 @@ export const responsesApi = new Elysia({ extraHeaders, ); - // Build ReqId context if we have a pre-created completion - const streamReqIdContext = preCreatedCompletionId && reqId - ? { - reqId, - apiKeyId: apiKeyRecord.id, - preCreatedCompletionId, - apiFormat, - } - : undefined; - // Return an async generator for streaming const streamResponse = result.response; const streamSignal = request.signal; @@ -840,7 +753,7 @@ export const responsesApi = new Elysia({ apiKeyRecord ?? null, begin, streamSignal, - streamReqIdContext, + reqIdContext ?? undefined, ); } catch (error) { // Don't log error if it's due to client abort @@ -871,16 +784,7 @@ export const responsesApi = new Elysia({ const errorResult = await processFailoverError(result, completion, bearer, "non-streaming"); // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); if (errorResult.type === "upstream_error") { set.status = errorResult.status; @@ -898,17 +802,7 @@ export const responsesApi = new Elysia({ } if (!result.response || !result.provider) { - // Finalize pre-created completion if ReqId was used - if (preCreatedCompletionId && reqId && apiKeyRecord) { - await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, { - status: "failed", - promptTokens: 0, - completionTokens: 0, - completion: [], - ttft: -1, - duration: Date.now() - begin, - }); - } + await finalizeReqIdOnError(reqIdContext, begin); set.status = 500; return { object: "error", @@ -925,16 +819,6 @@ export const responsesApi = new Elysia({ extraHeaders, ); - // Build ReqId context if we have a pre-created completion - const nonStreamReqIdContext = preCreatedCompletionId && reqId - ? { - reqId, - apiKeyId: apiKeyRecord.id, - preCreatedCompletionId, - apiFormat, - } - : undefined; - try { const response = await processNonStreamingResponse( result.response, @@ -944,7 +828,7 @@ export const responsesApi = new Elysia({ apiKeyRecord ?? null, begin, request.signal, - nonStreamReqIdContext, + reqIdContext ?? undefined, ); // Return parsed JSON object for proper content-type return JSON.parse(response) as Record; diff --git a/backend/src/utils/reqIdHandler.ts b/backend/src/utils/reqIdHandler.ts index dc8fd03..d5dc5de 100644 --- a/backend/src/utils/reqIdHandler.ts +++ b/backend/src/utils/reqIdHandler.ts @@ -313,13 +313,21 @@ export function buildInFlightErrorResponse( */ export const REQID_MAX_LENGTH = 127; +/** + * Regex pattern for valid ReqId characters + * Allows alphanumeric, hyphens, underscores, dots, colons, and forward slashes + * This prevents control characters, null bytes, and other potentially problematic characters + */ +const REQID_VALID_PATTERN = /^[\w\-.:/]+$/; + /** * Result type for extractReqId */ export type ExtractReqIdResult = | { type: "valid"; value: string } | { type: "empty" } - | { type: "too_long"; length: number }; + | { type: "too_long"; length: number } + | { type: "invalid_characters" }; /** * Extract and validate ReqId from request headers @@ -342,5 +350,260 @@ export function extractReqId(headers: Headers): ExtractReqIdResult { logger.warn("ReqId too long", { length: trimmedReqId.length, maxLength: REQID_MAX_LENGTH }); return { type: "too_long", length: trimmedReqId.length }; } + // Validate ReqId contains only allowed characters + if (!REQID_VALID_PATTERN.test(trimmedReqId)) { + logger.warn("ReqId contains invalid characters", { reqId: trimmedReqId }); + return { type: "invalid_characters" }; + } return { type: "valid", value: trimmedReqId }; } + +// ============================================================================= +// Response Builders for Cache Hits +// ============================================================================= + +/** + * Build OpenAI Chat Completion format response from cached completion + */ +export function buildOpenAIChatResponse(completion: Completion): Record { + return { + id: `chatcmpl-cache-${completion.id}`, + object: "chat.completion", + created: Math.floor(completion.createdAt.getTime() / 1000), + model: completion.model, + choices: completion.completion.map((c, i) => ({ + index: i, + message: { + role: c.role || "assistant", + content: c.content, + tool_calls: c.tool_calls, + }, + finish_reason: c.tool_calls?.length ? "tool_calls" : "stop", + })), + usage: { + prompt_tokens: completion.promptTokens, + completion_tokens: completion.completionTokens, + total_tokens: completion.promptTokens + completion.completionTokens, + }, + }; +} + +/** + * Build Anthropic Messages format response from cached completion + */ +export function buildAnthropicResponse(completion: Completion): Record { + // Build content blocks including both text and tool_use + const contentBlocks: Array> = []; + for (const c of completion.completion) { + // Add text content if present + if (c.content) { + contentBlocks.push({ type: "text", text: c.content }); + } + // Add tool_use blocks if present + if (c.tool_calls) { + for (const tc of c.tool_calls) { + contentBlocks.push({ + type: "tool_use", + id: tc.id, + name: tc.function.name, + input: JSON.parse(tc.function.arguments || "{}"), + }); + } + } + } + // Determine stop_reason based on content + const hasToolUse = contentBlocks.some((b) => b.type === "tool_use"); + return { + id: `msg-cache-${completion.id}`, + type: "message", + role: "assistant", + content: contentBlocks.length > 0 ? contentBlocks : [{ type: "text", text: "" }], + model: completion.model, + stop_reason: hasToolUse ? "tool_use" : "end_turn", + usage: { + input_tokens: completion.promptTokens, + output_tokens: completion.completionTokens, + }, + }; +} + +/** + * Build OpenAI Responses API format response from cached completion + */ +export function buildOpenAIResponsesResponse(completion: Completion): Record { + // Build output items including both messages and function_call + const outputItems: Array> = []; + for (const c of completion.completion) { + // Build content array for message + const content: Array> = []; + if (c.content) { + content.push({ type: "output_text", text: c.content }); + } + // Add message output item if there's text content + if (content.length > 0) { + outputItems.push({ + type: "message", + role: c.role || "assistant", + content, + }); + } + // Add function_call output items for tool_calls + if (c.tool_calls) { + for (const tc of c.tool_calls) { + outputItems.push({ + type: "function_call", + id: tc.id, + call_id: tc.id, + name: tc.function.name, + arguments: tc.function.arguments || "{}", + }); + } + } + } + return { + id: `resp-cache-${completion.id}`, + object: "response", + created_at: Math.floor(completion.createdAt.getTime() / 1000), + model: completion.model, + output: outputItems.length > 0 ? outputItems : [{ + type: "message", + role: "assistant", + content: [{ type: "output_text", text: "" }], + }], + usage: { + input_tokens: completion.promptTokens, + output_tokens: completion.completionTokens, + total_tokens: completion.promptTokens + completion.completionTokens, + }, + }; +} + +/** + * Build cached response based on API format + */ +export function buildCachedResponseByFormat( + completion: Completion, + format: ApiFormat, +): Record { + switch (format) { + case "openai-chat": + return buildOpenAIChatResponse(completion); + case "anthropic": + return buildAnthropicResponse(completion); + case "openai-responses": + return buildOpenAIResponsesResponse(completion); + } +} + +// ============================================================================= +// Error Finalization Helper +// ============================================================================= + +/** + * Context for ReqId request handling + */ +export interface ReqIdContext { + reqId: string; + apiKeyId: number; + preCreatedCompletionId: number; + apiFormat: ApiFormat; +} + +/** + * Finalize a pre-created completion on error + * + * Helper function to reduce duplication in error handling paths + * + * @param context - ReqId context (or null if no ReqId) + * @param begin - Request start timestamp + */ +export async function finalizeReqIdOnError( + context: ReqIdContext | null | undefined, + begin: number, +): Promise { + if (!context) { + return; + } + + await finalizeReqId(context.apiKeyId, context.reqId, context.preCreatedCompletionId, { + status: "failed", + promptTokens: 0, + completionTokens: 0, + completion: [], + ttft: -1, + duration: Date.now() - begin, + }); +} + +// ============================================================================= +// ReqId Validation Error Responses +// ============================================================================= + +/** + * Build error response for invalid ReqId (too long or invalid characters) + */ +export function buildReqIdValidationErrorResponse( + extraction: ExtractReqIdResult, + format: ApiFormat, +): { status: number; body: Record } { + if (extraction.type === "too_long") { + const message = `X-NexusGate-ReqId exceeds maximum length of ${REQID_MAX_LENGTH} characters (got ${extraction.length})`; + return { + status: 400, + body: buildValidationErrorBody(message, "reqid_too_long", format), + }; + } + + if (extraction.type === "invalid_characters") { + const message = "X-NexusGate-ReqId contains invalid characters. Only alphanumeric characters, hyphens, underscores, dots, colons, and forward slashes are allowed."; + return { + status: 400, + body: buildValidationErrorBody(message, "reqid_invalid_characters", format), + }; + } + + // Should not reach here, but provide a fallback + return { + status: 400, + body: buildValidationErrorBody("Invalid X-NexusGate-ReqId", "reqid_invalid", format), + }; +} + +/** + * Build validation error body in the appropriate format + */ +function buildValidationErrorBody( + message: string, + code: string, + format: ApiFormat, +): Record { + if (format === "anthropic") { + return { + type: "error", + error: { + type: "invalid_request_error", + message, + }, + }; + } + + if (format === "openai-responses") { + return { + object: "error", + error: { + type: "invalid_request_error", + message, + code, + }, + }; + } + + // openai-chat format + return { + error: { + message, + type: "invalid_request_error", + code, + }, + }; +} From 60b78425ddd01804ee2467f79adc3e8e238a3dec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sun, 25 Jan 2026 06:18:25 +0800 Subject: [PATCH 8/9] refactor(api): consolidate ReqId handling with helper functions - Add extractAndValidateReqId() to combine extraction, validation, and error response building - Add handleReqIdResult() to consolidate cache_hit, in_flight, and context building logic - Update completions, messages, and responses endpoints to use new helpers - Reduces ~24 lines of duplicated code per endpoint (~72 lines total) Co-Authored-By: Claude Opus 4.5 --- backend/src/api/v1/completions.ts | 71 ++++++------------- backend/src/api/v1/messages.ts | 71 ++++++------------- backend/src/api/v1/responses.ts | 71 ++++++------------- backend/src/utils/reqIdHandler.ts | 112 +++++++++++++++++++++++++++++- 4 files changed, 177 insertions(+), 148 deletions(-) diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts index 559d5c5..eb1486f 100644 --- a/backend/src/api/v1/completions.ts +++ b/backend/src/api/v1/completions.ts @@ -41,11 +41,8 @@ import { checkReqId, finalizeReqId, finalizeReqIdOnError, - recordCacheHit, - buildInFlightErrorResponse, - extractReqId, - buildReqIdValidationErrorResponse, - buildCachedResponseByFormat, + extractAndValidateReqId, + handleReqIdResult, type ApiFormat, type ReqIdContext, } from "@/utils/reqIdHandler"; @@ -492,15 +489,14 @@ export const completionsApi = new Elysia({ const reqHeaders = request.headers; const begin = Date.now(); - // Extract ReqId for request deduplication + // Extract and validate ReqId for request deduplication const apiFormat: ApiFormat = "openai-chat"; - const reqIdExtraction = extractReqId(reqHeaders); - if (reqIdExtraction.type === "too_long" || reqIdExtraction.type === "invalid_characters") { - const errorResponse = buildReqIdValidationErrorResponse(reqIdExtraction, apiFormat); - set.status = errorResponse.status; - return errorResponse.body; + const reqIdExtraction = extractAndValidateReqId(reqHeaders, apiFormat); + if (reqIdExtraction.type === "error") { + set.status = reqIdExtraction.status; + return reqIdExtraction.body; } - const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null; + const reqId = reqIdExtraction.reqId; // Parse model@provider format and extract provider from header const { systemName, targetProvider } = parseModelProvider( @@ -569,48 +565,25 @@ export const completionsApi = new Elysia({ isStream, }); - // Handle cache hit - return cached response - if (reqIdResult.type === "cache_hit") { - const sourceCompletion = reqIdResult.completion; - // Record the cache hit (best-effort; do not block returning cached response) - try { - await recordCacheHit(sourceCompletion, apiKeyRecord.id); - } catch (error) { - logger.warn("Failed to record cache hit", error); - } + // Handle ReqId result (cache_hit, in_flight, or continue) + const reqIdHandleResult = await handleReqIdResult( + reqIdResult, + reqId, + apiKeyRecord.id, + apiFormat, + ); - // Return cached response if available, otherwise reconstruct - if (sourceCompletion.cachedResponse) { - return sourceCompletion.cachedResponse.body as Record; - } - return buildCachedResponseByFormat(sourceCompletion, apiFormat); + if (reqIdHandleResult.type === "cache_hit") { + return reqIdHandleResult.response; } - // Handle in-flight - return 409 Conflict - if (reqIdResult.type === "in_flight") { - // reqId is guaranteed non-null here since checkReqId only returns in_flight when reqId is provided - if (!reqId) { - throw new Error("Invariant violated: reqId is null for in_flight result"); - } - set.status = 409; - set.headers["Retry-After"] = String(reqIdResult.retryAfter); - return buildInFlightErrorResponse( - reqId, - reqIdResult.inFlight, - reqIdResult.retryAfter, - apiFormat, - ); + if (reqIdHandleResult.type === "in_flight") { + set.status = reqIdHandleResult.status; + set.headers["Retry-After"] = String(reqIdHandleResult.retryAfter); + return reqIdHandleResult.response; } - // For new_request, we have a pre-created completionId - build ReqId context - const reqIdContext: ReqIdContext | null = (reqIdResult.type === "new_request" && reqId) - ? { - reqId, - apiKeyId: apiKeyRecord.id, - preCreatedCompletionId: reqIdResult.completionId, - apiFormat, - } - : null; + const reqIdContext = reqIdHandleResult.context; // Parse request using adapter const requestAdapter = getRequestAdapter("openai-chat"); diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts index 1cbeece..6a698f9 100644 --- a/backend/src/api/v1/messages.ts +++ b/backend/src/api/v1/messages.ts @@ -34,11 +34,8 @@ import { checkReqId, finalizeReqId, finalizeReqIdOnError, - recordCacheHit, - buildInFlightErrorResponse, - extractReqId, - buildReqIdValidationErrorResponse, - buildCachedResponseByFormat, + extractAndValidateReqId, + handleReqIdResult, type ApiFormat, type ReqIdContext, } from "@/utils/reqIdHandler"; @@ -481,15 +478,14 @@ export const messagesApi = new Elysia({ const reqHeaders = request.headers; const begin = Date.now(); - // Extract ReqId for request deduplication + // Extract and validate ReqId for request deduplication const apiFormat: ApiFormat = "anthropic"; - const reqIdExtraction = extractReqId(reqHeaders); - if (reqIdExtraction.type === "too_long" || reqIdExtraction.type === "invalid_characters") { - const errorResponse = buildReqIdValidationErrorResponse(reqIdExtraction, apiFormat); - set.status = errorResponse.status; - return errorResponse.body; + const reqIdExtraction = extractAndValidateReqId(reqHeaders, apiFormat); + if (reqIdExtraction.type === "error") { + set.status = reqIdExtraction.status; + return reqIdExtraction.body; } - const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null; + const reqId = reqIdExtraction.reqId; // Parse model@provider format and extract provider from header const { systemName, targetProvider } = parseModelProvider( @@ -559,48 +555,25 @@ export const messagesApi = new Elysia({ isStream, }); - // Handle cache hit - return cached response - if (reqIdResult.type === "cache_hit") { - const sourceCompletion = reqIdResult.completion; - // Record the cache hit (best-effort; do not block returning cached response) - try { - await recordCacheHit(sourceCompletion, apiKeyRecord.id); - } catch (error) { - logger.warn("Failed to record cache hit", error); - } + // Handle ReqId result (cache_hit, in_flight, or continue) + const reqIdHandleResult = await handleReqIdResult( + reqIdResult, + reqId, + apiKeyRecord.id, + apiFormat, + ); - // Return cached response if available, otherwise reconstruct - if (sourceCompletion.cachedResponse) { - return sourceCompletion.cachedResponse.body as Record; - } - return buildCachedResponseByFormat(sourceCompletion, apiFormat); + if (reqIdHandleResult.type === "cache_hit") { + return reqIdHandleResult.response; } - // Handle in-flight - return 409 Conflict - if (reqIdResult.type === "in_flight") { - // reqId is guaranteed non-null here since checkReqId only returns in_flight when reqId is provided - if (!reqId) { - throw new Error("Invariant violated: reqId is null for in_flight result"); - } - set.status = 409; - set.headers["Retry-After"] = String(reqIdResult.retryAfter); - return buildInFlightErrorResponse( - reqId, - reqIdResult.inFlight, - reqIdResult.retryAfter, - apiFormat, - ); + if (reqIdHandleResult.type === "in_flight") { + set.status = reqIdHandleResult.status; + set.headers["Retry-After"] = String(reqIdHandleResult.retryAfter); + return reqIdHandleResult.response; } - // For new_request, we have a pre-created completionId - build ReqId context - const reqIdContext: ReqIdContext | null = (reqIdResult.type === "new_request" && reqId) - ? { - reqId, - apiKeyId: apiKeyRecord.id, - preCreatedCompletionId: reqIdResult.completionId, - apiFormat, - } - : null; + const reqIdContext = reqIdHandleResult.context; // Parse request using Anthropic adapter const requestAdapter = getRequestAdapter("anthropic"); diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts index 0932895..8cd7ed1 100644 --- a/backend/src/api/v1/responses.ts +++ b/backend/src/api/v1/responses.ts @@ -34,11 +34,8 @@ import { checkReqId, finalizeReqId, finalizeReqIdOnError, - recordCacheHit, - buildInFlightErrorResponse, - extractReqId, - buildReqIdValidationErrorResponse, - buildCachedResponseByFormat, + extractAndValidateReqId, + handleReqIdResult, type ApiFormat, type ReqIdContext, } from "@/utils/reqIdHandler"; @@ -509,15 +506,14 @@ export const responsesApi = new Elysia({ const reqHeaders = request.headers; const begin = Date.now(); - // Extract ReqId for request deduplication + // Extract and validate ReqId for request deduplication const apiFormat: ApiFormat = "openai-responses"; - const reqIdExtraction = extractReqId(reqHeaders); - if (reqIdExtraction.type === "too_long" || reqIdExtraction.type === "invalid_characters") { - const errorResponse = buildReqIdValidationErrorResponse(reqIdExtraction, apiFormat); - set.status = errorResponse.status; - return errorResponse.body; + const reqIdExtraction = extractAndValidateReqId(reqHeaders, apiFormat); + if (reqIdExtraction.type === "error") { + set.status = reqIdExtraction.status; + return reqIdExtraction.body; } - const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null; + const reqId = reqIdExtraction.reqId; // Parse model@provider format and extract provider from header const { systemName, targetProvider } = parseModelProvider( @@ -607,48 +603,25 @@ export const responsesApi = new Elysia({ isStream, }); - // Handle cache hit - return cached response - if (reqIdResult.type === "cache_hit") { - const sourceCompletion = reqIdResult.completion; - // Record the cache hit (best-effort; do not block returning cached response) - try { - await recordCacheHit(sourceCompletion, apiKeyRecord.id); - } catch (error) { - logger.warn("Failed to record cache hit", error); - } + // Handle ReqId result (cache_hit, in_flight, or continue) + const reqIdHandleResult = await handleReqIdResult( + reqIdResult, + reqId, + apiKeyRecord.id, + apiFormat, + ); - // Return cached response if available, otherwise reconstruct - if (sourceCompletion.cachedResponse) { - return sourceCompletion.cachedResponse.body as Record; - } - return buildCachedResponseByFormat(sourceCompletion, apiFormat); + if (reqIdHandleResult.type === "cache_hit") { + return reqIdHandleResult.response; } - // Handle in-flight - return 409 Conflict - if (reqIdResult.type === "in_flight") { - // reqId is guaranteed non-null here since checkReqId only returns in_flight when reqId is provided - if (!reqId) { - throw new Error("Invariant violated: reqId is null for in_flight result"); - } - set.status = 409; - set.headers["Retry-After"] = String(reqIdResult.retryAfter); - return buildInFlightErrorResponse( - reqId, - reqIdResult.inFlight, - reqIdResult.retryAfter, - apiFormat, - ); + if (reqIdHandleResult.type === "in_flight") { + set.status = reqIdHandleResult.status; + set.headers["Retry-After"] = String(reqIdHandleResult.retryAfter); + return reqIdHandleResult.response; } - // For new_request, we have a pre-created completionId - build ReqId context - const reqIdContext: ReqIdContext | null = (reqIdResult.type === "new_request" && reqId) - ? { - reqId, - apiKeyId: apiKeyRecord.id, - preCreatedCompletionId: reqIdResult.completionId, - apiFormat, - } - : null; + const reqIdContext = reqIdHandleResult.context; // Parse request using Response API adapter const requestAdapter = getRequestAdapter("openai-responses"); diff --git a/backend/src/utils/reqIdHandler.ts b/backend/src/utils/reqIdHandler.ts index d5dc5de..70fb415 100644 --- a/backend/src/utils/reqIdHandler.ts +++ b/backend/src/utils/reqIdHandler.ts @@ -535,6 +535,116 @@ export async function finalizeReqIdOnError( }); } +// ============================================================================= +// Consolidated ReqId Handling Helpers +// ============================================================================= + +/** + * Result of ReqId extraction and validation + */ +export type ReqIdExtractionResult = + | { type: "valid"; reqId: string } + | { type: "empty"; reqId: null } + | { type: "error"; status: number; body: Record }; + +/** + * Extract, validate, and return ReqId with proper error responses + * + * Consolidates the extraction + validation + error response building pattern + * + * @param headers - Request headers + * @param apiFormat - API format for error response formatting + * @returns Extraction result with reqId or error response + */ +export function extractAndValidateReqId( + headers: Headers, + apiFormat: ApiFormat, +): ReqIdExtractionResult { + const extraction = extractReqId(headers); + + if (extraction.type === "too_long" || extraction.type === "invalid_characters") { + const errorResponse = buildReqIdValidationErrorResponse(extraction, apiFormat); + return { type: "error", status: errorResponse.status, body: errorResponse.body }; + } + + if (extraction.type === "valid") { + return { type: "valid", reqId: extraction.value }; + } + + return { type: "empty", reqId: null }; +} + +/** + * Result of handling ReqId check result + */ +export type ReqIdHandleResult = + | { type: "cache_hit"; response: Record } + | { type: "in_flight"; status: 409; retryAfter: number; response: Record } + | { type: "continue"; context: ReqIdContext | null }; + +/** + * Handle ReqId check result - returns early response or context to continue + * + * Consolidates cache_hit handling, in_flight handling, and context building + * + * @param result - Result from checkReqId + * @param reqId - The extracted reqId (or null) + * @param apiKeyId - API key ID for the request + * @param apiFormat - API format for response formatting + * @returns Handle result indicating how to proceed + */ +export async function handleReqIdResult( + result: ReqIdCheckResult, + reqId: string | null, + apiKeyId: number, + apiFormat: ApiFormat, +): Promise { + // Handle cache hit - return cached response + if (result.type === "cache_hit") { + const sourceCompletion = result.completion; + + // Record the cache hit (best-effort) + try { + await recordCacheHit(sourceCompletion, apiKeyId); + } catch (error) { + logger.warn("Failed to record cache hit", error); + } + + // Return cached response if available, otherwise reconstruct + const response = sourceCompletion.cachedResponse + ? (sourceCompletion.cachedResponse.body as Record) + : buildCachedResponseByFormat(sourceCompletion, apiFormat); + + return { type: "cache_hit", response }; + } + + // Handle in-flight - return 409 Conflict + if (result.type === "in_flight") { + if (!reqId) { + throw new Error("Invariant violated: reqId is null for in_flight result"); + } + + return { + type: "in_flight", + status: 409, + retryAfter: result.retryAfter, + response: buildInFlightErrorResponse(reqId, result.inFlight, result.retryAfter, apiFormat), + }; + } + + // Build context for new_request or no_reqid + const context: ReqIdContext | null = (result.type === "new_request" && reqId) + ? { + reqId, + apiKeyId, + preCreatedCompletionId: result.completionId, + apiFormat, + } + : null; + + return { type: "continue", context }; +} + // ============================================================================= // ReqId Validation Error Responses // ============================================================================= @@ -542,7 +652,7 @@ export async function finalizeReqIdOnError( /** * Build error response for invalid ReqId (too long or invalid characters) */ -export function buildReqIdValidationErrorResponse( +function buildReqIdValidationErrorResponse( extraction: ExtractReqIdResult, format: ApiFormat, ): { status: number; body: Record } { From cdbefc4ace7b749d2403c0ee853f9d7bd5584387 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sun, 25 Jan 2026 06:38:57 +0800 Subject: [PATCH 9/9] fix(db): use PostgreSQL error code for unique constraint detection Replace brittle string matching (error.message.includes("unique")) with PostgreSQL error code check (code === "23505") for detecting unique constraint violations in createPendingCompletion. This matches the pattern already used in apiKey.ts and is more reliable across different PostgreSQL versions and locales. Co-Authored-By: Claude Opus 4.5 --- backend/src/db/index.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/src/db/index.ts b/backend/src/db/index.ts index a5567f1..72bb219 100644 --- a/backend/src/db/index.ts +++ b/backend/src/db/index.ts @@ -1310,7 +1310,12 @@ export async function createPendingCompletion( return first ?? null; } catch (error) { // Handle unique constraint violation (duplicate ReqId) - if (error instanceof Error && error.message.includes("unique")) { + // PostgreSQL error code 23505 = unique_violation + if ( + error instanceof Error && + "code" in error && + (error as { code: string }).code === "23505" + ) { logger.warn("Duplicate ReqId detected", c.reqId); return null; }