From 4336fffdabb7e9668dbed1098e9bb67fae308797 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= <admin@pescn.cn>
Date: Sun, 25 Jan 2026 02:08:11 +0800
Subject: [PATCH 1/9] feat(api): add X-NexusGate-ReqId request deduplication

Add support for request deduplication using X-NexusGate-ReqId header:

- Cache completed responses and return them for duplicate requests
- Return 409 Conflict with Retry-After for in-flight requests
- Support both streaming and non-streaming requests
- Scope ReqId by API key to prevent cross-user collisions
- Use Redis SETNX for atomic in-flight marking (600s TTL)
- Store cachedResponse in database for future cache hits
- Add cache_hit status to completions table and frontend UI

Database changes:
- Add req_id, source_completion_id, api_format, cached_response columns
- Add cache_hit status to completions_status enum
- Create partial unique index on (api_key_id, req_id)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .../drizzle/0011_outgoing_johnny_blaze.sql    |    7 +
 backend/drizzle/meta/0011_snapshot.json       | 1040 +++++++++++++++++
 backend/drizzle/meta/_journal.json            |    7 +
 backend/src/api/v1/completions.ts             |  251 +++-
 backend/src/api/v1/messages.ts                |  229 +++-
 backend/src/api/v1/responses.ts               |  251 +++-
 backend/src/db/index.ts                       |   81 ++
 backend/src/db/schema.ts                      |   19 +-
 backend/src/utils/redisClient.ts              |   28 +
 backend/src/utils/reqIdCache.ts               |  159 +++
 backend/src/utils/reqIdHandler.ts             |  325 ++++++
 backend/src/utils/streaming-context.ts        |   34 +-
 frontend/src/i18n/locales/en-US.json          |    2 +
 frontend/src/i18n/locales/zh-CN.json          |    2 +
 frontend/src/pages/requests/columns.tsx       |    3 +
 .../pages/requests/detail-panel/header.tsx    |    5 +
 16 files changed, 2349 insertions(+), 94 deletions(-)
 create mode 100644 backend/drizzle/0011_outgoing_johnny_blaze.sql
 create mode 100644 backend/drizzle/meta/0011_snapshot.json
 create mode 100644 backend/src/utils/reqIdCache.ts
 create mode 100644 backend/src/utils/reqIdHandler.ts

diff --git a/backend/drizzle/0011_outgoing_johnny_blaze.sql b/backend/drizzle/0011_outgoing_johnny_blaze.sql
new file mode 100644
index 0000000..76795fb
--- /dev/null
+++ b/backend/drizzle/0011_outgoing_johnny_blaze.sql
@@ -0,0 +1,7 @@
+ALTER TYPE "public"."completions_status" ADD VALUE 'cache_hit';--> statement-breakpoint
+ALTER TABLE "completions" ADD COLUMN "req_id" varchar(127);--> statement-breakpoint
+ALTER TABLE "completions" ADD COLUMN "source_completion_id" integer;--> statement-breakpoint
+ALTER TABLE "completions" ADD COLUMN "api_format" varchar(31);--> statement-breakpoint
+ALTER TABLE "completions" ADD COLUMN "cached_response" jsonb;--> statement-breakpoint
+ALTER TABLE "completions" ADD CONSTRAINT "completions_source_completion_id_completions_id_fk" FOREIGN KEY ("source_completion_id") REFERENCES "public"."completions"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint
+CREATE UNIQUE INDEX "completions_api_key_req_id_unique" ON "completions" ("api_key_id", "req_id") WHERE "req_id" IS NOT NULL;
\ No newline at end of file
diff --git a/backend/drizzle/meta/0011_snapshot.json b/backend/drizzle/meta/0011_snapshot.json
new file mode 100644
index 0000000..2df7511
--- /dev/null
+++ b/backend/drizzle/meta/0011_snapshot.json
@@ -0,0 +1,1040 @@
+{
+  "id": "7d19c611-e3a5-4186-b863-f3a792c60de3",
+  "prevId": "6be63fd9-50d1-4d49-acea-e53401c54b2f",
+  "version": "7",
+  "dialect": "postgresql",
+  "tables": {
+    "public.api_keys": {
+      "name": "api_keys",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "identity": {
+            "type": "always",
+            "name": "api_keys_id_seq",
+            "schema": "public",
+            "increment": "1",
+            "startWith": "1",
+            "minValue": "1",
+            "maxValue": "2147483647",
+            "cache": "1",
+            "cycle": false
+          }
+        },
+        "key": {
+          "name": "key",
+          "type": "varchar(63)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "comment": {
+          "name": "comment",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "expires_at": {
+          "name": "expires_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "last_seen": {
+          "name": "last_seen",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "revoked": {
+          "name": "revoked",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "rpm_limit": {
+          "name": "rpm_limit",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 50
+        },
+        "tpm_limit": {
+          "name": "tpm_limit",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 50000
+        },
+        "external_id": {
+          "name": "external_id",
+          "type": "varchar(127)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "source": {
+          "name": "source",
+          "type": "api_key_source",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "'manual'"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "api_keys_key_unique": {
+          "name": "api_keys_key_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "key"
+          ]
+        },
+        "api_keys_external_id_unique": {
+          "name": "api_keys_external_id_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "external_id"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.completions": {
+      "name": "completions",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "identity": {
+            "type": "always",
+            "name": "completions_id_seq",
+            "schema": "public",
+            "increment": "1",
+            "startWith": "1",
+            "minValue": "1",
+            "maxValue": "2147483647",
+            "cache": "1",
+            "cycle": false
+          }
+        },
+        "api_key_id": {
+          "name": "api_key_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "upstream_id": {
+          "name": "upstream_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "model_id": {
+          "name": "model_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "model": {
+          "name": "model",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "prompt": {
+          "name": "prompt",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "prompt_tokens": {
+          "name": "prompt_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "completion": {
+          "name": "completion",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "completion_tokens": {
+          "name": "completion_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "status": {
+          "name": "status",
+          "type": "completions_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'pending'"
+        },
+        "ttft": {
+          "name": "ttft",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "duration": {
+          "name": "duration",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "deleted": {
+          "name": "deleted",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "rating": {
+          "name": "rating",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "req_id": {
+          "name": "req_id",
+          "type": "varchar(127)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "source_completion_id": {
+          "name": "source_completion_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "api_format": {
+          "name": "api_format",
+          "type": "varchar(31)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "cached_response": {
+          "name": "cached_response",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "completions_api_key_id_api_keys_id_fk": {
+          "name": "completions_api_key_id_api_keys_id_fk",
+          "tableFrom": "completions",
+          "tableTo": "api_keys",
+          "columnsFrom": [
+            "api_key_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "completions_upstream_id_upstreams_id_fk": {
+          "name": "completions_upstream_id_upstreams_id_fk",
+          "tableFrom": "completions",
+          "tableTo": "upstreams",
+          "columnsFrom": [
+            "upstream_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "completions_source_completion_id_completions_id_fk": {
+          "name": "completions_source_completion_id_completions_id_fk",
+          "tableFrom": "completions",
+          "tableTo": "completions",
+          "columnsFrom": [
+            "source_completion_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "completions_id_unique": {
+          "name": "completions_id_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "id"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.embeddings": {
+      "name": "embeddings",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "identity": {
+            "type": "always",
+            "name": "embeddings_id_seq",
+            "schema": "public",
+            "increment": "1",
+            "startWith": "1",
+            "minValue": "1",
+            "maxValue": "2147483647",
+            "cache": "1",
+            "cycle": false
+          }
+        },
+        "api_key_id": {
+          "name": "api_key_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "model_id": {
+          "name": "model_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "model": {
+          "name": "model",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "input": {
+          "name": "input",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "input_tokens": {
+          "name": "input_tokens",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "embedding": {
+          "name": "embedding",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "dimensions": {
+          "name": "dimensions",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "status": {
+          "name": "status",
+          "type": "completions_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'pending'"
+        },
+        "duration": {
+          "name": "duration",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "deleted": {
+          "name": "deleted",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "embeddings_api_key_id_api_keys_id_fk": {
+          "name": "embeddings_api_key_id_api_keys_id_fk",
+          "tableFrom": "embeddings",
+          "tableTo": "api_keys",
+          "columnsFrom": [
+            "api_key_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "embeddings_model_id_models_id_fk": {
+          "name": "embeddings_model_id_models_id_fk",
+          "tableFrom": "embeddings",
+          "tableTo": "models",
+          "columnsFrom": [
+            "model_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "embeddings_id_unique": {
+          "name": "embeddings_id_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "id"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.models": {
+      "name": "models",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "identity": {
+            "type": "always",
+            "name": "models_id_seq",
+            "schema": "public",
+            "increment": "1",
+            "startWith": "1",
+            "minValue": "1",
+            "maxValue": "2147483647",
+            "cache": "1",
+            "cycle": false
+          }
+        },
+        "provider_id": {
+          "name": "provider_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "system_name": {
+          "name": "system_name",
+          "type": "varchar(63)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "remote_id": {
+          "name": "remote_id",
+          "type": "varchar(63)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "model_type": {
+          "name": "model_type",
+          "type": "model_type",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'chat'"
+        },
+        "context_length": {
+          "name": "context_length",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "input_price": {
+          "name": "input_price",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "output_price": {
+          "name": "output_price",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "weight": {
+          "name": "weight",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 1
+        },
+        "comment": {
+          "name": "comment",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "deleted": {
+          "name": "deleted",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "models_provider_id_providers_id_fk": {
+          "name": "models_provider_id_providers_id_fk",
+          "tableFrom": "models",
+          "tableTo": "providers",
+          "columnsFrom": [
+            "provider_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "models_provider_system_name_unique": {
+          "name": "models_provider_system_name_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "provider_id",
+            "system_name"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.providers": {
+      "name": "providers",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "identity": {
+            "type": "always",
+            "name": "providers_id_seq",
+            "schema": "public",
+            "increment": "1",
+            "startWith": "1",
+            "minValue": "1",
+            "maxValue": "2147483647",
+            "cache": "1",
+            "cycle": false
+          }
+        },
+        "name": {
+          "name": "name",
+          "type": "varchar(63)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "type": {
+          "name": "type",
+          "type": "provider_type",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'openai'"
+        },
+        "base_url": {
+          "name": "base_url",
+          "type": "varchar(255)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "api_key": {
+          "name": "api_key",
+          "type": "varchar(255)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "api_version": {
+          "name": "api_version",
+          "type": "varchar(31)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "comment": {
+          "name": "comment",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "deleted": {
+          "name": "deleted",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "providers_name_unique": {
+          "name": "providers_name_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "name"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.settings": {
+      "name": "settings",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "identity": {
+            "type": "always",
+            "name": "settings_id_seq",
+            "schema": "public",
+            "increment": "1",
+            "startWith": "1",
+            "minValue": "1",
+            "maxValue": "2147483647",
+            "cache": "1",
+            "cycle": false
+          }
+        },
+        "key": {
+          "name": "key",
+          "type": "varchar(63)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "value": {
+          "name": "value",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "settings_key_unique": {
+          "name": "settings_key_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "key"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.srv_logs": {
+      "name": "srv_logs",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "identity": {
+            "type": "always",
+            "name": "srv_logs_id_seq",
+            "schema": "public",
+            "increment": "1",
+            "startWith": "1",
+            "minValue": "1",
+            "maxValue": "2147483647",
+            "cache": "1",
+            "cycle": false
+          }
+        },
+        "related_api_key_id": {
+          "name": "related_api_key_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "related_upstream_id": {
+          "name": "related_upstream_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "related_completion_id": {
+          "name": "related_completion_id",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "message": {
+          "name": "message",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "level": {
+          "name": "level",
+          "type": "srv_logs_level",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "details": {
+          "name": "details",
+          "type": "jsonb",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "acknowledged": {
+          "name": "acknowledged",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        },
+        "ack_at": {
+          "name": "ack_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "srv_logs_related_api_key_id_api_keys_id_fk": {
+          "name": "srv_logs_related_api_key_id_api_keys_id_fk",
+          "tableFrom": "srv_logs",
+          "tableTo": "api_keys",
+          "columnsFrom": [
+            "related_api_key_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "srv_logs_related_upstream_id_upstreams_id_fk": {
+          "name": "srv_logs_related_upstream_id_upstreams_id_fk",
+          "tableFrom": "srv_logs",
+          "tableTo": "upstreams",
+          "columnsFrom": [
+            "related_upstream_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        },
+        "srv_logs_related_completion_id_completions_id_fk": {
+          "name": "srv_logs_related_completion_id_completions_id_fk",
+          "tableFrom": "srv_logs",
+          "tableTo": "completions",
+          "columnsFrom": [
+            "related_completion_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {
+        "srv_logs_id_unique": {
+          "name": "srv_logs_id_unique",
+          "nullsNotDistinct": false,
+          "columns": [
+            "id"
+          ]
+        }
+      },
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.upstreams": {
+      "name": "upstreams",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "identity": {
+            "type": "always",
+            "name": "upstreams_id_seq",
+            "schema": "public",
+            "increment": "1",
+            "startWith": "1",
+            "minValue": "1",
+            "maxValue": "2147483647",
+            "cache": "1",
+            "cycle": false
+          }
+        },
+        "name": {
+          "name": "name",
+          "type": "varchar(63)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "url": {
+          "name": "url",
+          "type": "varchar(255)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "model": {
+          "name": "model",
+          "type": "varchar(63)",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "upstream_model": {
+          "name": "upstream_model",
+          "type": "varchar(63)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "api_key": {
+          "name": "api_key",
+          "type": "varchar(255)",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "weight": {
+          "name": "weight",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "default": 1
+        },
+        "comment": {
+          "name": "comment",
+          "type": "varchar",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "now()"
+        },
+        "deleted": {
+          "name": "deleted",
+          "type": "boolean",
+          "primaryKey": false,
+          "notNull": true,
+          "default": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    }
+  },
+  "enums": {
+    "public.api_key_source": {
+      "name": "api_key_source",
+      "schema": "public",
+      "values": [
+        "manual",
+        "operator",
+        "init"
+      ]
+    },
+    "public.completions_status": {
+      "name": "completions_status",
+      "schema": "public",
+      "values": [
+        "pending",
+        "completed",
+        "failed",
+        "aborted",
+        "cache_hit"
+      ]
+    },
+    "public.model_type": {
+      "name": "model_type",
+      "schema": "public",
+      "values": [
+        "chat",
+        "embedding"
+      ]
+    },
+    "public.provider_type": {
+      "name": "provider_type",
+      "schema": "public",
+      "values": [
+        "openai",
+        "openai-responses",
+        "anthropic",
+        "azure",
+        "ollama"
+      ]
+    },
+    "public.srv_logs_level": {
+      "name": "srv_logs_level",
+      "schema": "public",
+      "values": [
+        "unspecific",
+        "info",
+        "warn",
+        "error"
+      ]
+    }
+  },
+  "schemas": {},
+  "sequences": {},
+  "roles": {},
+  "policies": {},
+  "views": {},
+  "_meta": {
+    "columns": {},
+    "schemas": {},
+    "tables": {}
+  }
+}
\ No newline at end of file
diff --git a/backend/drizzle/meta/_journal.json b/backend/drizzle/meta/_journal.json
index 9f2d7e8..8c4593c 100644
--- a/backend/drizzle/meta/_journal.json
+++ b/backend/drizzle/meta/_journal.json
@@ -78,6 +78,13 @@
       "when": 1769256296714,
       "tag": "0010_noisy_deathbird",
       "breakpoints": true
+    },
+    {
+      "idx": 11,
+      "version": "7",
+      "when": 1769275632379,
+      "tag": "0011_outgoing_johnny_blaze",
+      "breakpoints": true
     }
   ]
 }
\ No newline at end of file
diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts
index 97df736..41e32e1 100644
--- a/backend/src/api/v1/completions.ts
+++ b/backend/src/api/v1/completions.ts
@@ -19,6 +19,7 @@ import type {
   CompletionsMessageType,
   ToolDefinitionType,
   ToolChoiceType,
+  CachedResponseType,
 } from "@/db/schema";
 import {
   extractUpstreamHeaders,
@@ -36,6 +37,14 @@ import {
   selectMultipleCandidates,
   type FailoverConfig,
 } from "@/services/failover";
+import {
+  checkReqId,
+  finalizeReqId,
+  recordCacheHit,
+  buildInFlightErrorResponse,
+  extractReqId,
+  type ApiFormat,
+} from "@/utils/reqIdHandler";
 
 const logger = consola.withTag("completionsApi");
 
@@ -160,6 +169,16 @@ function buildCompletionRecord(
   };
 }
 
+/**
+ * ReqId context for request deduplication
+ */
+interface ReqIdContext {
+  reqId: string;
+  apiKeyId: number;
+  preCreatedCompletionId: number;
+  apiFormat: ApiFormat;
+}
+
 /**
  * Process a successful non-streaming response
  * Ensures completion is saved to database before returning
@@ -172,6 +191,7 @@ async function processNonStreamingResponse(
   apiKeyRecord: ApiKey | null,
   begin: number,
   signal?: AbortSignal,
+  reqIdContext?: ReqIdContext,
 ): Promise<string> {
   // Parse response using upstream adapter
   const upstreamAdapter = getUpstreamAdapter(providerType);
@@ -197,21 +217,53 @@ async function processNonStreamingResponse(
     },
   ];
 
+  // Build cached response for ReqId deduplication
+  const cachedResponse: CachedResponseType = {
+    body: serialized,
+    format: "openai-chat",
+  };
+
   // Check if client disconnected during processing
   if (signal?.aborted) {
     completion.status = "aborted";
-    await addCompletions(completion, bearer, {
-      level: "info",
-      message: "Client disconnected during non-streaming response",
-      details: {
-        type: "completionError",
-        data: { type: "aborted", msg: "Client disconnected" },
-      },
-    });
+    if (reqIdContext) {
+      // Use finalizeReqId for ReqId requests
+      await finalizeReqId(
+        reqIdContext.apiKeyId,
+        reqIdContext.reqId,
+        reqIdContext.preCreatedCompletionId,
+        {
+          ...completion,
+          cachedResponse,
+        },
+      );
+    } else {
+      await addCompletions(completion, bearer, {
+        level: "info",
+        message: "Client disconnected during non-streaming response",
+        details: {
+          type: "completionError",
+          data: { type: "aborted", msg: "Client disconnected" },
+        },
+      });
+    }
   } else {
     completion.status = "completed";
-    // Use await to ensure database write completes before returning
-    await addCompletions(completion, bearer);
+    if (reqIdContext) {
+      // Use finalizeReqId for ReqId requests
+      await finalizeReqId(
+        reqIdContext.apiKeyId,
+        reqIdContext.reqId,
+        reqIdContext.preCreatedCompletionId,
+        {
+          ...completion,
+          cachedResponse,
+        },
+      );
+    } else {
+      // Use await to ensure database write completes before returning
+      await addCompletions(completion, bearer);
+    }
   }
 
   // Consume tokens for TPM rate limiting (post-flight)
@@ -237,6 +289,7 @@ async function* processStreamingResponse(
   apiKeyRecord: ApiKey | null,
   begin: number,
   signal?: AbortSignal,
+  reqIdContext?: ReqIdContext,
 ): AsyncGenerator<string, void, unknown> {
   // Get adapters
   const upstreamAdapter = getUpstreamAdapter(providerType);
@@ -244,8 +297,44 @@ async function* processStreamingResponse(
 
   logger.debug("parse stream completions response");
 
+  // Build streaming ReqId context if provided
+  const streamingReqIdContext = reqIdContext
+    ? {
+        reqId: reqIdContext.reqId,
+        apiKeyId: reqIdContext.apiKeyId,
+        preCreatedCompletionId: reqIdContext.preCreatedCompletionId,
+        apiFormat: reqIdContext.apiFormat,
+        buildCachedResponse: (comp: Completion): CachedResponseType => {
+          // For streaming, we build a complete non-streaming response for cache
+          return {
+            body: {
+              id: `chatcmpl-cache-${reqIdContext.preCreatedCompletionId}`,
+              object: "chat.completion",
+              created: Math.floor(Date.now() / 1000),
+              model: comp.model,
+              choices: comp.completion.map((c, i) => ({
+                index: i,
+                message: {
+                  role: c.role || "assistant",
+                  content: c.content,
+                  tool_calls: c.tool_calls,
+                },
+                finish_reason: c.tool_calls?.length ? "tool_calls" : "stop",
+              })),
+              usage: {
+                prompt_tokens: comp.promptTokens,
+                completion_tokens: comp.completionTokens,
+                total_tokens: comp.promptTokens + comp.completionTokens,
+              },
+            },
+            format: "openai-chat",
+          };
+        },
+      }
+    : undefined;
+
   // Create streaming context with abort handling
-  const ctx = new StreamingContext(completion, bearer, apiKeyRecord, begin, signal);
+  const ctx = new StreamingContext(completion, bearer, apiKeyRecord, begin, signal, streamingReqIdContext);
 
   // Track whether we've logged the client abort (to avoid duplicate logs)
   let loggedAbort = false;
@@ -398,7 +487,7 @@ export const completionsApi = new Elysia({
   .use(rateLimitPlugin)
   .post(
     "/completions",
-    async function* ({ body, set, bearer, request, apiKeyRecord }) {
+    async function ({ body, set, bearer, request, apiKeyRecord }) {
       if (bearer === undefined) {
         set.status = 500;
         return { error: "Internal server error" };
@@ -407,6 +496,10 @@ export const completionsApi = new Elysia({
       const reqHeaders = request.headers;
       const begin = Date.now();
 
+      // Extract ReqId for request deduplication
+      const reqId = extractReqId(reqHeaders);
+      const apiFormat: ApiFormat = "openai-chat";
+
       // Parse model@provider format and extract provider from header
       const { systemName, targetProvider } = parseModelProvider(
         body.model,
@@ -457,6 +550,75 @@ export const completionsApi = new Elysia({
       // Extract extra headers for passthrough
       const extraHeaders = extractUpstreamHeaders(reqHeaders);
 
+      // Check ReqId for deduplication (if provided)
+      const isStream = body.stream === true;
+      const reqIdResult = await checkReqId(reqId, {
+        apiKeyId: apiKeyRecord.id,
+        model: body.model,
+        modelId: candidates[0]?.model.id,
+        prompt: {
+          messages: body.messages as CompletionsMessageType[],
+          tools: body.tools as ToolDefinitionType[] | undefined,
+          tool_choice: body.tool_choice as ToolChoiceType | undefined,
+          extraHeaders,
+        },
+        apiFormat,
+        endpoint: "/v1/chat/completions",
+        isStream,
+      });
+
+      // Handle cache hit - return cached response
+      if (reqIdResult.type === "cache_hit") {
+        const sourceCompletion = reqIdResult.completion;
+        // Record the cache hit
+        await recordCacheHit(sourceCompletion, apiKeyRecord.id);
+
+        // Return cached response
+        if (sourceCompletion.cachedResponse) {
+          return sourceCompletion.cachedResponse.body as Record<string, unknown>;
+        }
+
+        // Fallback: reconstruct response from completion data
+        const reconstructed = {
+          id: `chatcmpl-cache-${sourceCompletion.id}`,
+          object: "chat.completion",
+          created: Math.floor(sourceCompletion.createdAt.getTime() / 1000),
+          model: sourceCompletion.model,
+          choices: sourceCompletion.completion.map((c, i) => ({
+            index: i,
+            message: {
+              role: c.role || "assistant",
+              content: c.content,
+              tool_calls: c.tool_calls,
+            },
+            finish_reason: c.tool_calls?.length ? "tool_calls" : "stop",
+          })),
+          usage: {
+            prompt_tokens: sourceCompletion.promptTokens,
+            completion_tokens: sourceCompletion.completionTokens,
+            total_tokens: sourceCompletion.promptTokens + sourceCompletion.completionTokens,
+          },
+        };
+        return reconstructed;
+      }
+
+      // Handle in-flight - return 409 Conflict
+      if (reqIdResult.type === "in_flight") {
+        set.status = 409;
+        set.headers["Retry-After"] = String(reqIdResult.retryAfter);
+        return buildInFlightErrorResponse(
+          reqId!,
+          reqIdResult.inFlight,
+          reqIdResult.retryAfter,
+          apiFormat,
+        );
+      }
+
+      // For new_request, we have a pre-created completionId
+      const preCreatedCompletionId = reqIdResult.type === "new_request"
+        ? reqIdResult.completionId
+        : null;
+
       // Parse request using adapter
       const requestAdapter = getRequestAdapter("openai-chat");
       const internalRequest = requestAdapter.parse(
@@ -484,7 +646,7 @@ export const completionsApi = new Elysia({
 
       // Handle streaming vs non-streaming
       if (internalRequest.stream) {
-        // Streaming request - use yield for streaming responses
+        // Streaming request - return an async generator
         if (body.n && body.n > 1) {
           set.status = 400;
           return { error: "Stream completions with n > 1 is not supported" };
@@ -546,26 +708,42 @@ export const completionsApi = new Elysia({
           extraHeaders,
         );
 
-        try {
-          yield* processStreamingResponse(
-            result.response,
-            completion,
-            bearer,
-            providerType,
-            apiKeyRecord ?? null,
-            begin,
-            request.signal,
-          );
-        } catch (error) {
-          // Don't log error if it's due to client abort
-          if (!request.signal.aborted) {
-            logger.error("Stream processing error", error);
-            set.status = 500;
-            yield JSON.stringify({ error: "Stream processing error" });
+        // Build ReqId context if we have a pre-created completion
+        const streamReqIdContext = preCreatedCompletionId && reqId
+          ? {
+              reqId,
+              apiKeyId: apiKeyRecord.id,
+              preCreatedCompletionId,
+              apiFormat,
+            }
+          : undefined;
+
+        // Return an async generator for streaming
+        const streamResponse = result.response;
+        const streamSignal = request.signal;
+        return (async function* () {
+          try {
+            yield* processStreamingResponse(
+              streamResponse,
+              completion,
+              bearer,
+              providerType,
+              apiKeyRecord ?? null,
+              begin,
+              streamSignal,
+              streamReqIdContext,
+            );
+          } catch (error) {
+            // Don't log error if it's due to client abort
+            if (!streamSignal.aborted) {
+              logger.error("Stream processing error", error);
+              set.status = 500;
+              yield JSON.stringify({ error: "Stream processing error" });
+            }
           }
-        }
+        })();
       } else {
-        // Non-streaming request - use return for normal JSON response
+        // Non-streaming request - return JSON response directly
         const result = await executeWithFailover(
           candidates,
           buildRequestForProvider,
@@ -616,6 +794,16 @@ export const completionsApi = new Elysia({
           extraHeaders,
         );
 
+        // Build ReqId context if we have a pre-created completion
+        const nonStreamReqIdContext = preCreatedCompletionId && reqId
+          ? {
+              reqId,
+              apiKeyId: apiKeyRecord.id,
+              preCreatedCompletionId,
+              apiFormat,
+            }
+          : undefined;
+
         try {
           const response = await processNonStreamingResponse(
             result.response,
@@ -625,6 +813,7 @@ export const completionsApi = new Elysia({
             apiKeyRecord ?? null,
             begin,
             request.signal,
+            nonStreamReqIdContext,
           );
           // Return parsed JSON object for proper content-type
           return JSON.parse(response) as Record<string, unknown>;
diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts
index 9eb6a36..64024f7 100644
--- a/backend/src/api/v1/messages.ts
+++ b/backend/src/api/v1/messages.ts
@@ -30,6 +30,15 @@ import {
   selectMultipleCandidates,
   type FailoverConfig,
 } from "@/services/failover";
+import {
+  checkReqId,
+  finalizeReqId,
+  recordCacheHit,
+  buildInFlightErrorResponse,
+  extractReqId,
+  type ApiFormat,
+} from "@/utils/reqIdHandler";
+import type { CachedResponseType } from "@/db/schema";
 
 const logger = consola.withTag("messagesApi");
 
@@ -152,6 +161,16 @@ function buildCompletionRecord(
   };
 }
 
+/**
+ * ReqId context for request deduplication
+ */
+interface ReqIdContext {
+  reqId: string;
+  apiKeyId: number;
+  preCreatedCompletionId: number;
+  apiFormat: ApiFormat;
+}
+
 /**
  * Process a successful non-streaming message response
  * Ensures completion is saved to database before returning
@@ -164,6 +183,7 @@ async function processNonStreamingResponse(
   apiKeyRecord: ApiKey | null,
   begin: number,
   signal?: AbortSignal,
+  reqIdContext?: ReqIdContext,
 ): Promise<string> {
   // Parse response using upstream adapter
   const upstreamAdapter = getUpstreamAdapter(providerType);
@@ -185,20 +205,44 @@ async function processNonStreamingResponse(
     },
   ];
 
+  // Build cached response for ReqId deduplication
+  const cachedResponse: CachedResponseType = {
+    body: serialized,
+    format: "anthropic",
+  };
+
   // Check if client disconnected during processing
   if (signal?.aborted) {
     completion.status = "aborted";
-    await addCompletions(completion, bearer, {
-      level: "info",
-      message: "Client disconnected during non-streaming response",
-      details: {
-        type: "completionError",
-        data: { type: "aborted", msg: "Client disconnected" },
-      },
-    });
+    if (reqIdContext) {
+      await finalizeReqId(
+        reqIdContext.apiKeyId,
+        reqIdContext.reqId,
+        reqIdContext.preCreatedCompletionId,
+        { ...completion, cachedResponse },
+      );
+    } else {
+      await addCompletions(completion, bearer, {
+        level: "info",
+        message: "Client disconnected during non-streaming response",
+        details: {
+          type: "completionError",
+          data: { type: "aborted", msg: "Client disconnected" },
+        },
+      });
+    }
   } else {
     completion.status = "completed";
-    await addCompletions(completion, bearer);
+    if (reqIdContext) {
+      await finalizeReqId(
+        reqIdContext.apiKeyId,
+        reqIdContext.reqId,
+        reqIdContext.preCreatedCompletionId,
+        { ...completion, cachedResponse },
+      );
+    } else {
+      await addCompletions(completion, bearer);
+    }
   }
 
   // Consume tokens for TPM rate limiting (post-flight)
@@ -224,6 +268,7 @@ async function* processStreamingResponse(
   apiKeyRecord: ApiKey | null,
   begin: number,
   signal?: AbortSignal,
+  reqIdContext?: ReqIdContext,
 ): AsyncGenerator<string, void, unknown> {
   // Get adapters
   const upstreamAdapter = getUpstreamAdapter(providerType);
@@ -231,8 +276,39 @@ async function* processStreamingResponse(
 
   logger.debug("parse stream messages response");
 
+  // Build streaming ReqId context if provided
+  const streamingReqIdContext = reqIdContext
+    ? {
+        reqId: reqIdContext.reqId,
+        apiKeyId: reqIdContext.apiKeyId,
+        preCreatedCompletionId: reqIdContext.preCreatedCompletionId,
+        apiFormat: reqIdContext.apiFormat,
+        buildCachedResponse: (comp: Completion): CachedResponseType => {
+          // For streaming, build a complete non-streaming Anthropic response for cache
+          return {
+            body: {
+              id: `msg-cache-${reqIdContext.preCreatedCompletionId}`,
+              type: "message",
+              role: "assistant",
+              content: comp.completion.map((c) => ({
+                type: "text",
+                text: c.content || "",
+              })),
+              model: comp.model,
+              stop_reason: "end_turn",
+              usage: {
+                input_tokens: comp.promptTokens,
+                output_tokens: comp.completionTokens,
+              },
+            },
+            format: "anthropic",
+          };
+        },
+      }
+    : undefined;
+
   // Create streaming context with abort handling
-  const ctx = new StreamingContext(completion, bearer, apiKeyRecord, begin, signal);
+  const ctx = new StreamingContext(completion, bearer, apiKeyRecord, begin, signal, streamingReqIdContext);
 
   // Track whether we've logged the client abort (to avoid duplicate logs)
   let loggedAbort = false;
@@ -379,7 +455,7 @@ export const messagesApi = new Elysia({
   .use(rateLimitPlugin)
   .post(
     "/messages",
-    async function* ({ body, set, bearer, request, apiKeyRecord }) {
+    async function ({ body, set, bearer, request, apiKeyRecord }) {
       if (bearer === undefined) {
         set.status = 500;
         return {
@@ -391,6 +467,10 @@ export const messagesApi = new Elysia({
       const reqHeaders = request.headers;
       const begin = Date.now();
 
+      // Extract ReqId for request deduplication
+      const reqId = extractReqId(reqHeaders);
+      const apiFormat: ApiFormat = "anthropic";
+
       // Parse model@provider format and extract provider from header
       const { systemName, targetProvider } = parseModelProvider(
         body.model,
@@ -441,6 +521,68 @@ export const messagesApi = new Elysia({
       // Extract extra headers for passthrough
       const extraHeaders = extractUpstreamHeaders(reqHeaders);
 
+      // Check ReqId for deduplication (if provided)
+      const isStream = body.stream === true;
+      const reqIdResult = await checkReqId(reqId, {
+        apiKeyId: apiKeyRecord.id,
+        model: body.model,
+        modelId: candidates[0]?.model.id,
+        prompt: {
+          messages: body.messages.map((m: { role: string; content: unknown }) => ({
+            role: m.role,
+            content: typeof m.content === "string" ? m.content : JSON.stringify(m.content),
+          })),
+          extraHeaders,
+        },
+        apiFormat,
+        endpoint: "/v1/messages",
+        isStream,
+      });
+
+      // Handle cache hit - return cached response
+      if (reqIdResult.type === "cache_hit") {
+        const sourceCompletion = reqIdResult.completion;
+        await recordCacheHit(sourceCompletion, apiKeyRecord.id);
+
+        if (sourceCompletion.cachedResponse) {
+          return sourceCompletion.cachedResponse.body as Record<string, unknown>;
+        }
+
+        // Fallback: reconstruct Anthropic response
+        return {
+          id: `msg-cache-${sourceCompletion.id}`,
+          type: "message",
+          role: "assistant",
+          content: sourceCompletion.completion.map((c) => ({
+            type: "text",
+            text: c.content || "",
+          })),
+          model: sourceCompletion.model,
+          stop_reason: "end_turn",
+          usage: {
+            input_tokens: sourceCompletion.promptTokens,
+            output_tokens: sourceCompletion.completionTokens,
+          },
+        };
+      }
+
+      // Handle in-flight - return 409 Conflict
+      if (reqIdResult.type === "in_flight") {
+        set.status = 409;
+        set.headers["Retry-After"] = String(reqIdResult.retryAfter);
+        return buildInFlightErrorResponse(
+          reqId!,
+          reqIdResult.inFlight,
+          reqIdResult.retryAfter,
+          apiFormat,
+        );
+      }
+
+      // For new_request, we have a pre-created completionId
+      const preCreatedCompletionId = reqIdResult.type === "new_request"
+        ? reqIdResult.completionId
+        : null;
+
       // Parse request using Anthropic adapter
       const requestAdapter = getRequestAdapter("anthropic");
       const internalRequest = requestAdapter.parse(
@@ -468,7 +610,7 @@ export const messagesApi = new Elysia({
 
       // Handle streaming vs non-streaming
       if (internalRequest.stream) {
-        // Streaming request - use yield for streaming responses
+        // Streaming request - return an async generator
         const result = await executeWithFailover(
           candidates,
           buildRequestForProvider,
@@ -526,26 +668,42 @@ export const messagesApi = new Elysia({
           extraHeaders,
         );
 
-        try {
-          yield* processStreamingResponse(
-            result.response,
-            completion,
-            bearer,
-            providerType,
-            apiKeyRecord ?? null,
-            begin,
-            request.signal,
-          );
-        } catch (error) {
-          // Don't log error if it's due to client abort
-          if (!request.signal.aborted) {
-            logger.error("Stream processing error", error);
-            set.status = 500;
-            yield `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "server_error", message: "Stream processing error" } })}\n\n`;
+        // Build ReqId context if we have a pre-created completion
+        const streamReqIdContext = preCreatedCompletionId && reqId
+          ? {
+              reqId,
+              apiKeyId: apiKeyRecord.id,
+              preCreatedCompletionId,
+              apiFormat,
+            }
+          : undefined;
+
+        // Return an async generator for streaming
+        const streamResponse = result.response;
+        const streamSignal = request.signal;
+        return (async function* () {
+          try {
+            yield* processStreamingResponse(
+              streamResponse,
+              completion,
+              bearer,
+              providerType,
+              apiKeyRecord ?? null,
+              begin,
+              streamSignal,
+              streamReqIdContext,
+            );
+          } catch (error) {
+            // Don't log error if it's due to client abort
+            if (!streamSignal.aborted) {
+              logger.error("Stream processing error", error);
+              set.status = 500;
+              yield `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "server_error", message: "Stream processing error" } })}\n\n`;
+            }
           }
-        }
+        })();
       } else {
-        // Non-streaming request - use return for normal JSON response
+        // Non-streaming request - return JSON response directly
         const result = await executeWithFailover(
           candidates,
           buildRequestForProvider,
@@ -595,6 +753,16 @@ export const messagesApi = new Elysia({
           extraHeaders,
         );
 
+        // Build ReqId context if we have a pre-created completion
+        const nonStreamReqIdContext = preCreatedCompletionId && reqId
+          ? {
+              reqId,
+              apiKeyId: apiKeyRecord.id,
+              preCreatedCompletionId,
+              apiFormat,
+            }
+          : undefined;
+
         try {
           const response = await processNonStreamingResponse(
             result.response,
@@ -604,6 +772,7 @@ export const messagesApi = new Elysia({
             apiKeyRecord ?? null,
             begin,
             request.signal,
+            nonStreamReqIdContext,
           );
           // Return parsed JSON object for proper content-type
           return JSON.parse(response) as Record<string, unknown>;
diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts
index d86b44f..345c205 100644
--- a/backend/src/api/v1/responses.ts
+++ b/backend/src/api/v1/responses.ts
@@ -30,6 +30,15 @@ import {
   selectMultipleCandidates,
   type FailoverConfig,
 } from "@/services/failover";
+import {
+  checkReqId,
+  finalizeReqId,
+  recordCacheHit,
+  buildInFlightErrorResponse,
+  extractReqId,
+  type ApiFormat,
+} from "@/utils/reqIdHandler";
+import type { CachedResponseType } from "@/db/schema";
 
 const logger = consola.withTag("responsesApi");
 
@@ -163,6 +172,16 @@ function buildCompletionRecord(
   };
 }
 
+/**
+ * ReqId context for request deduplication
+ */
+interface ReqIdContext {
+  reqId: string;
+  apiKeyId: number;
+  preCreatedCompletionId: number;
+  apiFormat: ApiFormat;
+}
+
 /**
  * Process a successful non-streaming response
  * Ensures completion is saved to database before returning
@@ -175,6 +194,7 @@ async function processNonStreamingResponse(
   apiKeyRecord: ApiKey | null,
   begin: number,
   signal?: AbortSignal,
+  reqIdContext?: ReqIdContext,
 ): Promise<string> {
   // Parse response using upstream adapter
   const upstreamAdapter = getUpstreamAdapter(providerType);
@@ -196,20 +216,44 @@ async function processNonStreamingResponse(
     },
   ];
 
+  // Build cached response for ReqId deduplication
+  const cachedResponse: CachedResponseType = {
+    body: serialized,
+    format: "openai-responses",
+  };
+
   // Check if client disconnected during processing
   if (signal?.aborted) {
     completion.status = "aborted";
-    await addCompletions(completion, bearer, {
-      level: "info",
-      message: "Client disconnected during non-streaming response",
-      details: {
-        type: "completionError",
-        data: { type: "aborted", msg: "Client disconnected" },
-      },
-    });
+    if (reqIdContext) {
+      await finalizeReqId(
+        reqIdContext.apiKeyId,
+        reqIdContext.reqId,
+        reqIdContext.preCreatedCompletionId,
+        { ...completion, cachedResponse },
+      );
+    } else {
+      await addCompletions(completion, bearer, {
+        level: "info",
+        message: "Client disconnected during non-streaming response",
+        details: {
+          type: "completionError",
+          data: { type: "aborted", msg: "Client disconnected" },
+        },
+      });
+    }
   } else {
     completion.status = "completed";
-    await addCompletions(completion, bearer);
+    if (reqIdContext) {
+      await finalizeReqId(
+        reqIdContext.apiKeyId,
+        reqIdContext.reqId,
+        reqIdContext.preCreatedCompletionId,
+        { ...completion, cachedResponse },
+      );
+    } else {
+      await addCompletions(completion, bearer);
+    }
   }
 
   // Consume tokens for TPM rate limiting (post-flight)
@@ -235,6 +279,7 @@ async function* processStreamingResponse(
   apiKeyRecord: ApiKey | null,
   begin: number,
   signal?: AbortSignal,
+  reqIdContext?: ReqIdContext,
 ): AsyncGenerator<string, void, unknown> {
   // Get adapters
   const upstreamAdapter = getUpstreamAdapter(providerType);
@@ -242,8 +287,40 @@ async function* processStreamingResponse(
 
   logger.debug("parse stream responses");
 
+  // Build streaming ReqId context if provided
+  const streamingReqIdContext = reqIdContext
+    ? {
+        reqId: reqIdContext.reqId,
+        apiKeyId: reqIdContext.apiKeyId,
+        preCreatedCompletionId: reqIdContext.preCreatedCompletionId,
+        apiFormat: reqIdContext.apiFormat,
+        buildCachedResponse: (comp: Completion): CachedResponseType => {
+          // For streaming, build a complete non-streaming Response API response for cache
+          return {
+            body: {
+              id: `resp-cache-${reqIdContext.preCreatedCompletionId}`,
+              object: "response",
+              created_at: Math.floor(Date.now() / 1000),
+              model: comp.model,
+              output: comp.completion.map((c) => ({
+                type: "message",
+                role: c.role || "assistant",
+                content: [{ type: "output_text", text: c.content || "" }],
+              })),
+              usage: {
+                input_tokens: comp.promptTokens,
+                output_tokens: comp.completionTokens,
+                total_tokens: comp.promptTokens + comp.completionTokens,
+              },
+            },
+            format: "openai-responses",
+          };
+        },
+      }
+    : undefined;
+
   // Create streaming context with abort handling
-  const ctx = new StreamingContext(completion, bearer, apiKeyRecord, begin, signal);
+  const ctx = new StreamingContext(completion, bearer, apiKeyRecord, begin, signal, streamingReqIdContext);
 
   // Track whether we've logged the client abort (to avoid duplicate logs)
   let loggedAbort = false;
@@ -395,7 +472,7 @@ export const responsesApi = new Elysia({
   .use(rateLimitPlugin)
   .post(
     "/responses",
-    async function* ({ body, set, bearer, request, apiKeyRecord }) {
+    async function ({ body, set, bearer, request, apiKeyRecord }) {
       if (bearer === undefined) {
         set.status = 500;
         return {
@@ -407,6 +484,10 @@ export const responsesApi = new Elysia({
       const reqHeaders = request.headers;
       const begin = Date.now();
 
+      // Extract ReqId for request deduplication
+      const reqId = extractReqId(reqHeaders);
+      const apiFormat: ApiFormat = "openai-responses";
+
       // Parse model@provider format and extract provider from header
       const { systemName, targetProvider } = parseModelProvider(
         body.model,
@@ -457,6 +538,89 @@ export const responsesApi = new Elysia({
       // Extract extra headers for passthrough
       const extraHeaders = extractUpstreamHeaders(reqHeaders);
 
+      // Check ReqId for deduplication (if provided)
+      const isStream = body.stream === true;
+
+      // Convert input to messages format for storage
+      const inputMessages: Array<{ role: string; content: string }> = [];
+      if (typeof body.input === "string") {
+        inputMessages.push({ role: "user", content: body.input });
+      } else if (Array.isArray(body.input)) {
+        for (const item of body.input) {
+          if (typeof item === "object" && item !== null) {
+            if (item.type === "message") {
+              inputMessages.push({
+                role: item.role || "user",
+                content: typeof item.content === "string" ? item.content : JSON.stringify(item.content),
+              });
+            } else if (item.type === "function_call_output") {
+              inputMessages.push({
+                role: "tool",
+                content: item.output || "",
+              });
+            }
+          }
+        }
+      }
+
+      const reqIdResult = await checkReqId(reqId, {
+        apiKeyId: apiKeyRecord.id,
+        model: body.model,
+        modelId: candidates[0]?.model.id,
+        prompt: {
+          messages: inputMessages,
+          extraHeaders,
+        },
+        apiFormat,
+        endpoint: "/v1/responses",
+        isStream,
+      });
+
+      // Handle cache hit - return cached response
+      if (reqIdResult.type === "cache_hit") {
+        const sourceCompletion = reqIdResult.completion;
+        await recordCacheHit(sourceCompletion, apiKeyRecord.id);
+
+        if (sourceCompletion.cachedResponse) {
+          return sourceCompletion.cachedResponse.body as Record<string, unknown>;
+        }
+
+        // Fallback: reconstruct Response API response
+        return {
+          id: `resp-cache-${sourceCompletion.id}`,
+          object: "response",
+          created_at: Math.floor(sourceCompletion.createdAt.getTime() / 1000),
+          model: sourceCompletion.model,
+          output: sourceCompletion.completion.map((c) => ({
+            type: "message",
+            role: c.role || "assistant",
+            content: [{ type: "output_text", text: c.content || "" }],
+          })),
+          usage: {
+            input_tokens: sourceCompletion.promptTokens,
+            output_tokens: sourceCompletion.completionTokens,
+            total_tokens: sourceCompletion.promptTokens + sourceCompletion.completionTokens,
+          },
+        };
+      }
+
+      // Handle in-flight - return 409 Conflict
+      if (reqIdResult.type === "in_flight") {
+        set.status = 409;
+        set.headers["Retry-After"] = String(reqIdResult.retryAfter);
+        return buildInFlightErrorResponse(
+          reqId!,
+          reqIdResult.inFlight,
+          reqIdResult.retryAfter,
+          apiFormat,
+        );
+      }
+
+      // For new_request, we have a pre-created completionId
+      const preCreatedCompletionId = reqIdResult.type === "new_request"
+        ? reqIdResult.completionId
+        : null;
+
       // Parse request using Response API adapter
       const requestAdapter = getRequestAdapter("openai-responses");
       const internalRequest = requestAdapter.parse(
@@ -484,7 +648,7 @@ export const responsesApi = new Elysia({
 
       // Handle streaming vs non-streaming
       if (internalRequest.stream) {
-        // Streaming request - use yield for streaming responses
+        // Streaming request - return an async generator
         const result = await executeWithFailover(
           candidates,
           buildRequestForProvider,
@@ -542,26 +706,42 @@ export const responsesApi = new Elysia({
           extraHeaders,
         );
 
-        try {
-          yield* processStreamingResponse(
-            result.response,
-            completion,
-            bearer,
-            providerType,
-            apiKeyRecord ?? null,
-            begin,
-            request.signal,
-          );
-        } catch (error) {
-          // Don't log error if it's due to client abort
-          if (!request.signal.aborted) {
-            logger.error("Stream processing error", error);
-            set.status = 500;
-            yield `event: error\ndata: ${JSON.stringify({ type: "error", error: { code: "internal_error", message: "Stream processing error", param: null, help_url: null } })}\n\n`;
+        // Build ReqId context if we have a pre-created completion
+        const streamReqIdContext = preCreatedCompletionId && reqId
+          ? {
+              reqId,
+              apiKeyId: apiKeyRecord.id,
+              preCreatedCompletionId,
+              apiFormat,
+            }
+          : undefined;
+
+        // Return an async generator for streaming
+        const streamResponse = result.response;
+        const streamSignal = request.signal;
+        return (async function* () {
+          try {
+            yield* processStreamingResponse(
+              streamResponse,
+              completion,
+              bearer,
+              providerType,
+              apiKeyRecord ?? null,
+              begin,
+              streamSignal,
+              streamReqIdContext,
+            );
+          } catch (error) {
+            // Don't log error if it's due to client abort
+            if (!streamSignal.aborted) {
+              logger.error("Stream processing error", error);
+              set.status = 500;
+              yield `event: error\ndata: ${JSON.stringify({ type: "error", error: { code: "internal_error", message: "Stream processing error", param: null, help_url: null } })}\n\n`;
+            }
           }
-        }
+        })();
       } else {
-        // Non-streaming request - use return for normal JSON response
+        // Non-streaming request - return JSON response directly
         const result = await executeWithFailover(
           candidates,
           buildRequestForProvider,
@@ -611,6 +791,16 @@ export const responsesApi = new Elysia({
           extraHeaders,
         );
 
+        // Build ReqId context if we have a pre-created completion
+        const nonStreamReqIdContext = preCreatedCompletionId && reqId
+          ? {
+              reqId,
+              apiKeyId: apiKeyRecord.id,
+              preCreatedCompletionId,
+              apiFormat,
+            }
+          : undefined;
+
         try {
           const response = await processNonStreamingResponse(
             result.response,
@@ -620,6 +810,7 @@ export const responsesApi = new Elysia({
             apiKeyRecord ?? null,
             begin,
             request.signal,
+            nonStreamReqIdContext,
           );
           // Return parsed JSON object for proper content-type
           return JSON.parse(response) as Record<string, unknown>;
diff --git a/backend/src/db/index.ts b/backend/src/db/index.ts
index d51bd5b..a5567f1 100644
--- a/backend/src/db/index.ts
+++ b/backend/src/db/index.ts
@@ -1257,3 +1257,84 @@ export async function getEmbeddingsTimeSeries(
     avg_duration: string;
   }[];
 }
+
+// ============================================
+// ReqId Deduplication Operations
+// ============================================
+
+/**
+ * Find a completion by ReqId (for cache hit detection)
+ * Only returns completions that are not pending (completed, failed, aborted, cache_hit)
+ * @param apiKeyId the API key ID (ReqId is scoped per API key)
+ * @param reqId the client-provided request ID
+ * @returns completion record if found and not pending, null otherwise
+ */
+export async function findCompletionByReqId(
+  apiKeyId: number,
+  reqId: string,
+): Promise<Completion | null> {
+  logger.debug("findCompletionByReqId", apiKeyId, reqId);
+  const r = await db
+    .select()
+    .from(schema.CompletionsTable)
+    .where(
+      and(
+        eq(schema.CompletionsTable.apiKeyId, apiKeyId),
+        eq(schema.CompletionsTable.reqId, reqId),
+        not(schema.CompletionsTable.deleted),
+        // Only return non-pending completions (completed, failed, aborted, cache_hit)
+        not(eq(schema.CompletionsTable.status, "pending")),
+      ),
+    )
+    .limit(1);
+  const [first] = r;
+  return first ?? null;
+}
+
+/**
+ * Create a pending completion record with ReqId
+ * Used to reserve the ReqId before making the upstream request
+ * @param c completion data including reqId
+ * @returns the created completion record, null if ReqId already exists (unique constraint violation)
+ */
+export async function createPendingCompletion(
+  c: CompletionInsert,
+): Promise<Completion | null> {
+  logger.debug("createPendingCompletion", c.model, c.reqId);
+  try {
+    const r = await db
+      .insert(schema.CompletionsTable)
+      .values(c)
+      .returning();
+    const [first] = r;
+    return first ?? null;
+  } catch (error) {
+    // Handle unique constraint violation (duplicate ReqId)
+    if (error instanceof Error && error.message.includes("unique")) {
+      logger.warn("Duplicate ReqId detected", c.reqId);
+      return null;
+    }
+    throw error;
+  }
+}
+
+/**
+ * Update a completion record
+ * Used to update pending completions after upstream request completes
+ * @param id completion ID
+ * @param updates partial completion data to update
+ * @returns updated completion record, null if not found
+ */
+export async function updateCompletion(
+  id: number,
+  updates: Partial<CompletionInsert>,
+): Promise<Completion | null> {
+  logger.debug("updateCompletion", id);
+  const r = await db
+    .update(schema.CompletionsTable)
+    .set({ ...updates, updatedAt: new Date() })
+    .where(eq(schema.CompletionsTable.id, id))
+    .returning();
+  const [first] = r;
+  return first ?? null;
+}
diff --git a/backend/src/db/schema.ts b/backend/src/db/schema.ts
index 5ea0561..e70d6d6 100644
--- a/backend/src/db/schema.ts
+++ b/backend/src/db/schema.ts
@@ -151,8 +151,18 @@ export const CompletionsStatusEnum = pgEnum("completions_status", [
   "completed",
   "failed",
   "aborted",
+  "cache_hit",
 ]);
-export type CompletionsStatusEnumType = "pending" | "completed" | "failed" | "aborted";
+export type CompletionsStatusEnumType = "pending" | "completed" | "failed" | "aborted" | "cache_hit";
+
+/**
+ * Cached response type for ReqId deduplication
+ * Stores the serialized response for cache_hit returns
+ */
+export type CachedResponseType = {
+  body: unknown;
+  format: "openai-chat" | "openai-responses" | "anthropic";
+};
 
 export const CompletionsTable = pgTable("completions", {
   id: integer("id").primaryKey().generatedAlwaysAsIdentity().unique(),
@@ -176,6 +186,13 @@ export const CompletionsTable = pgTable("completions", {
   updatedAt: timestamp("updated_at").notNull().defaultNow(),
   deleted: boolean("deleted").notNull().default(false),
   rating: real("rating"),
+  // ReqId deduplication fields
+  reqId: varchar("req_id", { length: 127 }),
+  sourceCompletionId: integer("source_completion_id").references(
+    (): AnyPgColumn => CompletionsTable.id,
+  ),
+  apiFormat: varchar("api_format", { length: 31 }),
+  cachedResponse: jsonb("cached_response").$type<CachedResponseType>(),
 });
 
 export const SrvLogsLevelEnum = pgEnum("srv_logs_level", [
diff --git a/backend/src/utils/redisClient.ts b/backend/src/utils/redisClient.ts
index dc3d086..2a8d3f8 100644
--- a/backend/src/utils/redisClient.ts
+++ b/backend/src/utils/redisClient.ts
@@ -98,6 +98,34 @@ class RedisClient {
     }
   }
 
+  /**
+   * Set a value in Redis only if the key does not exist (atomic SETNX)
+   * @param {string} key - Key to set
+   * @param {string | number} value - Value to store
+   * @param {number} ttlSeconds - Time to live in seconds
+   * @returns {Promise<boolean>} true if the key was set, false if it already existed
+   */
+  public async setnx(
+    key: string,
+    value: string | number,
+    ttlSeconds: number,
+  ): Promise<boolean> {
+    try {
+      // SET key value EX ttl NX - sets only if key doesn't exist
+      const result = await this.client.set(
+        key,
+        value.toString(),
+        "EX",
+        ttlSeconds,
+        "NX",
+      );
+      return result === "OK";
+    } catch (error) {
+      logger.error(`Redis setnx error: ${(error as Error).message}`);
+      return false;
+    }
+  }
+
   /**
    * Execute a Lua script atomically
    * @param {string} script - Lua script to execute
diff --git a/backend/src/utils/reqIdCache.ts b/backend/src/utils/reqIdCache.ts
new file mode 100644
index 0000000..91336ad
--- /dev/null
+++ b/backend/src/utils/reqIdCache.ts
@@ -0,0 +1,159 @@
+/**
+ * Redis-based In-Flight request tracking for ReqId deduplication
+ *
+ * Tracks requests that are currently being processed to prevent duplicate
+ * concurrent requests with the same ReqId.
+ */
+
+import { consola } from "consola";
+import { redisClient } from "./redisClient";
+
+const logger = consola.withTag("reqIdCache");
+
+/**
+ * In-flight request data stored in Redis
+ */
+export interface InFlightRequest {
+  completionId: number;
+  startTime: number; // Unix timestamp in milliseconds
+  isStream: boolean;
+  endpoint: string;
+}
+
+// Redis key prefix for in-flight requests
+const KEY_PREFIX = "reqid:inflight";
+
+// TTL for in-flight markers (10 minutes)
+// This prevents orphan keys if the server crashes during request processing
+const IN_FLIGHT_TTL_SECONDS = 600;
+
+// Estimated request durations for Retry-After calculation
+const ESTIMATED_STREAM_DURATION_MS = 60000; // 60 seconds for streaming
+const ESTIMATED_NON_STREAM_DURATION_MS = 30000; // 30 seconds for non-streaming
+
+/**
+ * Build the Redis key for an in-flight request
+ */
+function buildKey(apiKeyId: number, reqId: string): string {
+  return `${KEY_PREFIX}:${apiKeyId}:${reqId}`;
+}
+
+/**
+ * Mark a request as in-flight (atomically using SETNX)
+ *
+ * @param apiKeyId - The API key ID
+ * @param reqId - The client-provided request ID
+ * @param completionId - The database completion ID
+ * @param endpoint - The API endpoint being called
+ * @param isStream - Whether this is a streaming request
+ * @returns true if successfully marked (request is new), false if already in-flight
+ */
+export async function markInFlight(
+  apiKeyId: number,
+  reqId: string,
+  completionId: number,
+  endpoint: string,
+  isStream: boolean,
+): Promise<boolean> {
+  const key = buildKey(apiKeyId, reqId);
+  const data: InFlightRequest = {
+    completionId,
+    startTime: Date.now(),
+    isStream,
+    endpoint,
+  };
+
+  try {
+    const success = await redisClient.setnx(
+      key,
+      JSON.stringify(data),
+      IN_FLIGHT_TTL_SECONDS,
+    );
+
+    if (success) {
+      logger.debug("Marked request as in-flight", { apiKeyId, reqId, completionId });
+    } else {
+      logger.debug("Request already in-flight", { apiKeyId, reqId });
+    }
+
+    return success;
+  } catch (error) {
+    logger.error("Failed to mark request as in-flight", error);
+    // Return false to be safe - treat as if already in-flight
+    return false;
+  }
+}
+
+/**
+ * Get in-flight request data
+ *
+ * @param apiKeyId - The API key ID
+ * @param reqId - The client-provided request ID
+ * @returns The in-flight request data, or null if not in-flight
+ */
+export async function getInFlight(
+  apiKeyId: number,
+  reqId: string,
+): Promise<InFlightRequest | null> {
+  const key = buildKey(apiKeyId, reqId);
+
+  try {
+    const data = await redisClient.get(key);
+    if (!data) {
+      return null;
+    }
+
+    return JSON.parse(data) as InFlightRequest;
+  } catch (error) {
+    logger.error("Failed to get in-flight request", error);
+    return null;
+  }
+}
+
+/**
+ * Clear the in-flight marker for a request
+ *
+ * Should be called when a request completes (successfully or with error)
+ *
+ * @param apiKeyId - The API key ID
+ * @param reqId - The client-provided request ID
+ */
+export async function clearInFlight(
+  apiKeyId: number,
+  reqId: string,
+): Promise<void> {
+  const key = buildKey(apiKeyId, reqId);
+
+  try {
+    await redisClient.del(key);
+    logger.debug("Cleared in-flight marker", { apiKeyId, reqId });
+  } catch (error) {
+    logger.error("Failed to clear in-flight marker", error);
+    // Non-critical - the TTL will eventually expire the key
+  }
+}
+
+/**
+ * Calculate the recommended Retry-After value based on in-flight request state
+ *
+ * @param inFlight - The in-flight request data
+ * @returns Retry-After value in seconds (minimum 1)
+ */
+export function calculateRetryAfter(inFlight: InFlightRequest): number {
+  const elapsed = Date.now() - inFlight.startTime;
+  const estimatedTotal = inFlight.isStream
+    ? ESTIMATED_STREAM_DURATION_MS
+    : ESTIMATED_NON_STREAM_DURATION_MS;
+
+  const remainingMs = Math.max(estimatedTotal - elapsed, 1000);
+  return Math.ceil(remainingMs / 1000);
+}
+
+/**
+ * Check if Redis is available for in-flight tracking
+ *
+ * @returns true if Redis is connected and ready
+ */
+export function isRedisAvailable(): boolean {
+  return redisClient.isConnected();
+}
diff --git a/backend/src/utils/reqIdHandler.ts b/backend/src/utils/reqIdHandler.ts
new file mode 100644
index 0000000..83c54a7
--- /dev/null
+++ b/backend/src/utils/reqIdHandler.ts
@@ -0,0 +1,325 @@
+/**
+ * ReqId Handler - Main logic for request deduplication
+ *
+ * Handles the full lifecycle of ReqId-based request deduplication:
+ * 1. Check if request is a cache hit (completed/failed/aborted)
+ * 2. Check if request is currently in-flight
+ * 3. Create new pending requests
+ * 4. Finalize requests after completion
+ */
+
+import { consola } from "consola";
+import {
+  findCompletionByReqId,
+  createPendingCompletion,
+  updateCompletion,
+  insertCompletion,
+  type Completion,
+  type CompletionInsert,
+} from "@/db";
+import {
+  markInFlight,
+  getInFlight,
+  clearInFlight,
+  calculateRetryAfter,
+  isRedisAvailable,
+  type InFlightRequest,
+} from "./reqIdCache";
+import type { CachedResponseType } from "@/db/schema";
+
+const logger = consola.withTag("reqIdHandler");
+
+/**
+ * HTTP header name for client-provided request ID
+ */
+export const REQID_HEADER = "x-nexusgate-reqid";
+
+/**
+ * API format types
+ */
+export type ApiFormat = "openai-chat" | "openai-responses" | "anthropic";
+
+/**
+ * Result types for ReqId check
+ */
+export type ReqIdCheckResult =
+  | { type: "cache_hit"; completion: Completion }
+  | { type: "in_flight"; inFlight: InFlightRequest; retryAfter: number }
+  | { type: "new_request"; completionId: number }
+  | { type: "no_reqid" }; // No ReqId provided - proceed normally
+
+/**
+ * Data needed to create a pending completion
+ */
+export interface PendingCompletionData {
+  apiKeyId: number;
+  model: string;
+  modelId?: number;
+  prompt: CompletionInsert["prompt"];
+  apiFormat: ApiFormat;
+  endpoint: string;
+  isStream: boolean;
+}
+
+/**
+ * Check ReqId status and determine how to handle the request
+ *
+ * Flow:
+ * 1. If no ReqId provided, return no_reqid (proceed normally)
+ * 2. Check database for completed request with this ReqId
+ * 3. Check Redis for in-flight request with this ReqId
+ * 4. Create new pending request and mark as in-flight
+ *
+ * @param reqId - The client-provided request ID (from header)
+ * @param data - Pending completion data
+ * @returns Check result indicating how to proceed
+ */
+export async function checkReqId(
+  reqId: string | null,
+  data: PendingCompletionData,
+): Promise<ReqIdCheckResult> {
+  // No ReqId provided - proceed with normal request flow
+  if (!reqId) {
+    return { type: "no_reqid" };
+  }
+
+  const { apiKeyId, model, modelId, prompt, apiFormat, endpoint, isStream } = data;
+
+  // Step 1: Check database for existing completed request
+  const existingCompletion = await findCompletionByReqId(apiKeyId, reqId);
+  if (existingCompletion) {
+    logger.info("Cache hit for ReqId", { reqId, completionId: existingCompletion.id });
+    return { type: "cache_hit", completion: existingCompletion };
+  }
+
+  // Step 2: Check Redis for in-flight request
+  if (isRedisAvailable()) {
+    const inFlight = await getInFlight(apiKeyId, reqId);
+    if (inFlight) {
+      const retryAfter = calculateRetryAfter(inFlight);
+      logger.info("Request in-flight for ReqId", { reqId, retryAfter });
+      return { type: "in_flight", inFlight, retryAfter };
+    }
+  } else {
+    logger.warn("Redis unavailable, skipping in-flight check");
+  }
+
+  // Step 3: Create pending completion and mark as in-flight
+  const pendingData: CompletionInsert = {
+    apiKeyId,
+    model,
+    modelId,
+    prompt,
+    promptTokens: -1,
+    completion: [],
+    completionTokens: -1,
+    status: "pending",
+    ttft: -1,
+    duration: -1,
+    reqId,
+    apiFormat,
+  };
+
+  const newCompletion = await createPendingCompletion(pendingData);
+
+  if (!newCompletion) {
+    // Unique constraint violation - another request beat us to it
+    // Re-check database (might have completed) or Redis (might be in-flight)
+    logger.warn("Failed to create pending completion, re-checking state", { reqId });
+
+    const recheck = await findCompletionByReqId(apiKeyId, reqId);
+    if (recheck) {
+      return { type: "cache_hit", completion: recheck };
+    }
+
+    if (isRedisAvailable()) {
+      const inFlight = await getInFlight(apiKeyId, reqId);
+      if (inFlight) {
+        const retryAfter = calculateRetryAfter(inFlight);
+        return { type: "in_flight", inFlight, retryAfter };
+      }
+    }
+
+    // Shouldn't happen, but treat as in-flight with default retry
+    logger.error("Unexpected state: ReqId exists but not found", { reqId });
+    return {
+      type: "in_flight",
+      inFlight: {
+        completionId: 0,
+        startTime: Date.now(),
+        isStream,
+        endpoint,
+      },
+      retryAfter: 5,
+    };
+  }
+
+  // Step 4: Mark as in-flight in Redis
+  if (isRedisAvailable()) {
+    const marked = await markInFlight(
+      apiKeyId,
+      reqId,
+      newCompletion.id,
+      endpoint,
+      isStream,
+    );
+
+    if (!marked) {
+      // Another process beat us - this shouldn't happen since we have DB unique constraint
+      // But handle gracefully
+      logger.warn("Failed to mark in-flight after DB insert", { reqId });
+    }
+  }
+
+  logger.debug("Created new pending request", { reqId, completionId: newCompletion.id });
+  return { type: "new_request", completionId: newCompletion.id };
+}
+
+/**
+ * Finalize a request after it completes
+ *
+ * Updates the completion record and clears the in-flight marker
+ *
+ * @param apiKeyId - The API key ID
+ * @param reqId - The client-provided request ID
+ * @param completionId - The completion ID to update
+ * @param updates - Completion updates (status, tokens, response, etc.)
+ */
+export async function finalizeReqId(
+  apiKeyId: number,
+  reqId: string,
+  completionId: number,
+  updates: Partial<CompletionInsert> & { cachedResponse?: CachedResponseType },
+): Promise<void> {
+  try {
+    // Update completion record
+    await updateCompletion(completionId, updates);
+
+    // Clear in-flight marker
+    if (isRedisAvailable()) {
+      await clearInFlight(apiKeyId, reqId);
+    }
+
+    logger.debug("Finalized request", { reqId, completionId, status: updates.status });
+  } catch (error) {
+    logger.error("Failed to finalize request", { reqId, completionId, error });
+    // Still try to clear in-flight marker
+    if (isRedisAvailable()) {
+      await clearInFlight(apiKeyId, reqId);
+    }
+    throw error;
+  }
+}
+
+/**
+ * Build a cache_hit completion record
+ *
+ * Creates a new completion record that references the source completion
+ *
+ * @param sourceCompletion - The original completion that was cached
+ * @param apiKeyId - The API key ID for the new request
+ * @returns CompletionInsert for the cache_hit record
+ */
+export function buildCacheHitRecord(
+  sourceCompletion: Completion,
+  apiKeyId: number,
+): CompletionInsert {
+  return {
+    apiKeyId,
+    model: sourceCompletion.model,
+    modelId: sourceCompletion.modelId,
+    upstreamId: sourceCompletion.upstreamId,
+    prompt: sourceCompletion.prompt,
+    promptTokens: 0, // Cache hit doesn't consume tokens
+    completion: sourceCompletion.completion,
+    completionTokens: 0, // Cache hit doesn't consume tokens
+    status: "cache_hit",
+    ttft: 0,
+    duration: 0,
+    reqId: sourceCompletion.reqId,
+    sourceCompletionId: sourceCompletion.id,
+    apiFormat: sourceCompletion.apiFormat,
+  };
+}
+
+/**
+ * Record a cache hit in the database
+ *
+ * @param sourceCompletion - The original completion that was cached
+ * @param apiKeyId - The API key ID for the new request
+ * @returns The created cache_hit completion record
+ */
+export async function recordCacheHit(
+  sourceCompletion: Completion,
+  apiKeyId: number,
+): Promise<Completion | null> {
+  const record = buildCacheHitRecord(sourceCompletion, apiKeyId);
+  // Use insertCompletion instead of createPendingCompletion to avoid ReqId conflicts
+  // since cache_hit records share the same ReqId as the source
+  return await insertCompletion({
+    ...record,
+    reqId: null, // Don't set ReqId on cache_hit records to avoid unique constraint
+  });
+}
+
+/**
+ * Build the 409 Conflict error response for in-flight requests
+ *
+ * @param reqId - The client-provided request ID
+ * @param inFlight - The in-flight request data
+ * @param retryAfter - Retry-After value in seconds
+ * @param format - The API format for response formatting
+ * @returns Error response object
+ */
+export function buildInFlightErrorResponse(
+  reqId: string,
+  inFlight: InFlightRequest,
+  retryAfter: number,
+  format: ApiFormat,
+): Record<string, unknown> {
+  const startedAt = new Date(inFlight.startTime).toISOString();
+
+  if (format === "anthropic") {
+    return {
+      type: "error",
+      error: {
+        type: "conflict",
+        message: "A request with this X-NexusGate-ReqId is already being processed",
+        req_id: reqId,
+        retry_after: retryAfter,
+        started_at: startedAt,
+      },
+    };
+  }
+
+  // OpenAI format (openai-chat, openai-responses)
+  return {
+    error: {
+      code: "request_in_flight",
+      message: "A request with this X-NexusGate-ReqId is already being processed",
+      type: "conflict",
+      req_id: reqId,
+      retry_after: retryAfter,
+      started_at: startedAt,
+    },
+  };
+}
+
+/**
+ * Extract ReqId from request headers
+ *
+ * @param headers - Request headers
+ * @returns The ReqId value or null if not present
+ */
+export function extractReqId(headers: Headers): string | null {
+  const reqId = headers.get(REQID_HEADER);
+  if (!reqId || reqId.trim() === "") {
+    return null;
+  }
+  // Validate ReqId length (max 127 chars as per schema)
+  if (reqId.length > 127) {
+    logger.warn("ReqId too long, truncating", { length: reqId.length });
+    return reqId.substring(0, 127);
+  }
+  return reqId.trim();
+}
diff --git a/backend/src/utils/streaming-context.ts b/backend/src/utils/streaming-context.ts
index 4554f46..e30f01e 100644
--- a/backend/src/utils/streaming-context.ts
+++ b/backend/src/utils/streaming-context.ts
@@ -4,12 +4,26 @@
  */
 
 import type {
+  CachedResponseType,
   CompletionsStatusEnumType,
   ToolCallType,
 } from "@/db/schema";
 import { addCompletions, type Completion } from "@/utils/completions";
 import { consumeTokens } from "@/plugins/apiKeyRateLimitPlugin";
 import type { ApiKey } from "@/plugins/apiKeyPlugin";
+import { finalizeReqId } from "@/utils/reqIdHandler";
+
+/**
+ * ReqId context for request deduplication
+ */
+export interface StreamingReqIdContext {
+  reqId: string;
+  apiKeyId: number;
+  preCreatedCompletionId: number;
+  apiFormat: "openai-chat" | "openai-responses" | "anthropic";
+  /** Function to build the cached response from accumulated data */
+  buildCachedResponse?: (completion: Completion) => CachedResponseType;
+}
 
 /**
  * StreamingContext manages the state of a streaming response.
@@ -22,6 +36,7 @@ export class StreamingContext {
   private begin: number;
   private saved = false;
   private signal?: AbortSignal;
+  private reqIdContext?: StreamingReqIdContext;
 
   // Accumulated data during streaming
   textParts: string[] = [];
@@ -41,12 +56,14 @@ export class StreamingContext {
     apiKeyRecord: ApiKey | null,
     begin: number,
     signal?: AbortSignal,
+    reqIdContext?: StreamingReqIdContext,
   ) {
     this.completion = completion;
     this.bearer = bearer;
     this.apiKeyRecord = apiKeyRecord;
     this.begin = begin;
     this.signal = signal;
+    this.reqIdContext = reqIdContext;
 
     // Note: We don't save immediately on abort anymore.
     // Instead, we continue processing chunks from upstream and save the full
@@ -112,8 +129,21 @@ export class StreamingContext {
     this.completion.ttft = this.ttft;
     this.completion.duration = Date.now() - this.begin;
 
-    // Save to database
-    if (error) {
+    // Save to database - use finalizeReqId if ReqId context is present
+    if (this.reqIdContext) {
+      // Build cached response if callback is provided
+      const cachedResponse = this.reqIdContext.buildCachedResponse?.(this.completion);
+
+      await finalizeReqId(
+        this.reqIdContext.apiKeyId,
+        this.reqIdContext.reqId,
+        this.reqIdContext.preCreatedCompletionId,
+        {
+          ...this.completion,
+          cachedResponse,
+        },
+      );
+    } else if (error) {
       await addCompletions(this.completion, this.bearer, {
         level: status === "aborted" ? "info" : "error",
         message: `Stream ${status}: ${error}`,
diff --git a/frontend/src/i18n/locales/en-US.json b/frontend/src/i18n/locales/en-US.json
index 8611ccc..6a3d98a 100644
--- a/frontend/src/i18n/locales/en-US.json
+++ b/frontend/src/i18n/locales/en-US.json
@@ -94,6 +94,7 @@
   "pages.requests.columns.Completed": "Completed",
   "pages.requests.columns.Failed": "Failed",
   "pages.requests.columns.Aborted": "Aborted",
+  "pages.requests.columns.CacheHit": "Cache Hit",
   "pages.requests.columns.Model": "Model",
   "pages.requests.columns.TTFT": "TTFT",
   "pages.requests.columns.TimeToFirstToken": "Time To First Token",
@@ -120,6 +121,7 @@
   "pages.requests.detail-panel.header.Completed": "Completed",
   "pages.requests.detail-panel.header.Failed": "Failed",
   "pages.requests.detail-panel.header.Aborted": "Aborted",
+  "pages.requests.detail-panel.header.CacheHit": "Cache Hit",
   "pages.requests.detail-panel.header.ClosePanel": "Close panel",
   "pages.requests.detail-panel.index.Close": "Close",
   "pages.requests.detail-panel.index.Retry": "Retry",
diff --git a/frontend/src/i18n/locales/zh-CN.json b/frontend/src/i18n/locales/zh-CN.json
index 61780ac..d2b3064 100644
--- a/frontend/src/i18n/locales/zh-CN.json
+++ b/frontend/src/i18n/locales/zh-CN.json
@@ -95,6 +95,7 @@
   "pages.requests.columns.Completed": "已完成",
   "pages.requests.columns.Failed": "失败",
   "pages.requests.columns.Aborted": "已中止",
+  "pages.requests.columns.CacheHit": "缓存命中",
   "pages.requests.columns.Model": "模型",
   "pages.requests.columns.TTFT": "TTFT",
   "pages.requests.columns.TimeToFirstToken": "首 Token 返回时间",
@@ -121,6 +122,7 @@
   "pages.requests.detail-panel.header.Completed": "已完成",
   "pages.requests.detail-panel.header.Failed": "失败",
   "pages.requests.detail-panel.header.Aborted": "已中止",
+  "pages.requests.detail-panel.header.CacheHit": "缓存命中",
   "pages.requests.detail-panel.header.ClosePanel": "关闭面板",
   "pages.requests.detail-panel.index.Close": "关闭",
   "pages.requests.detail-panel.index.Retry": "重试",
diff --git a/frontend/src/pages/requests/columns.tsx b/frontend/src/pages/requests/columns.tsx
index d6eb2f5..1df9a03 100644
--- a/frontend/src/pages/requests/columns.tsx
+++ b/frontend/src/pages/requests/columns.tsx
@@ -44,6 +44,9 @@ export const columns: ColumnDef<ChatRequest>[] = [
         .with('aborted', () => (
           <MiniIndicatorBadge className="bg-amber-500">{i18n.t('pages.requests.columns.Aborted')}</MiniIndicatorBadge>
         ))
+        .with('cache_hit', () => (
+          <MiniIndicatorBadge className="bg-blue-500">{i18n.t('pages.requests.columns.CacheHit')}</MiniIndicatorBadge>
+        ))
         .exhaustive()
       return (
         <div className="flex items-center gap-2.5">
diff --git a/frontend/src/pages/requests/detail-panel/header.tsx b/frontend/src/pages/requests/detail-panel/header.tsx
index 587b65d..f44f5e8 100644
--- a/frontend/src/pages/requests/detail-panel/header.tsx
+++ b/frontend/src/pages/requests/detail-panel/header.tsx
@@ -67,6 +67,11 @@ function StatusIndicator({ status }: { status: ChatRequest['status'] }) {
         {t('pages.requests.detail-panel.header.Aborted')}
       </IndicatorBadge>
     ))
+    .with('cache_hit', () => (
+      <IndicatorBadge className="bg-blue-500/15 text-blue-800 dark:text-blue-200">
+        {t('pages.requests.detail-panel.header.CacheHit')}
+      </IndicatorBadge>
+    ))
     .exhaustive()
 }
 

From 506d717368d47d6879cc05b8ad80dc63aa175168 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= <admin@pescn.cn>
Date: Sun, 25 Jan 2026 02:20:49 +0800
Subject: [PATCH 2/9] fix: address PR review feedback

- Simplify buildCacheHitRecord by removing reqId field (was immediately overwritten)
- Fix extractReqId to trim before length validation
- Remove ineffective set.status=500 in streaming error handlers (status cannot change after streaming starts)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/src/api/v1/completions.ts |  2 +-
 backend/src/api/v1/messages.ts    |  2 +-
 backend/src/api/v1/responses.ts   |  2 +-
 backend/src/utils/reqIdHandler.ts | 25 +++++++++++++------------
 4 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts
index 41e32e1..53ba482 100644
--- a/backend/src/api/v1/completions.ts
+++ b/backend/src/api/v1/completions.ts
@@ -737,7 +737,7 @@ export const completionsApi = new Elysia({
             // Don't log error if it's due to client abort
             if (!streamSignal.aborted) {
               logger.error("Stream processing error", error);
-              set.status = 500;
+              // Note: HTTP status cannot be changed after streaming has started
               yield JSON.stringify({ error: "Stream processing error" });
             }
           }
diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts
index 64024f7..81d614f 100644
--- a/backend/src/api/v1/messages.ts
+++ b/backend/src/api/v1/messages.ts
@@ -697,7 +697,7 @@ export const messagesApi = new Elysia({
             // Don't log error if it's due to client abort
             if (!streamSignal.aborted) {
               logger.error("Stream processing error", error);
-              set.status = 500;
+              // Note: HTTP status cannot be changed after streaming has started
               yield `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "server_error", message: "Stream processing error" } })}\n\n`;
             }
           }
diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts
index 345c205..4c63b47 100644
--- a/backend/src/api/v1/responses.ts
+++ b/backend/src/api/v1/responses.ts
@@ -735,7 +735,7 @@ export const responsesApi = new Elysia({
             // Don't log error if it's due to client abort
             if (!streamSignal.aborted) {
               logger.error("Stream processing error", error);
-              set.status = 500;
+              // Note: HTTP status cannot be changed after streaming has started
               yield `event: error\ndata: ${JSON.stringify({ type: "error", error: { code: "internal_error", message: "Stream processing error", param: null, help_url: null } })}\n\n`;
             }
           }
diff --git a/backend/src/utils/reqIdHandler.ts b/backend/src/utils/reqIdHandler.ts
index 83c54a7..52832ce 100644
--- a/backend/src/utils/reqIdHandler.ts
+++ b/backend/src/utils/reqIdHandler.ts
@@ -236,7 +236,8 @@ export function buildCacheHitRecord(
     status: "cache_hit",
     ttft: 0,
     duration: 0,
-    reqId: sourceCompletion.reqId,
+    // Note: reqId is intentionally omitted to avoid unique constraint violations
+    // cache_hit records don't need their own reqId since they reference sourceCompletionId
     sourceCompletionId: sourceCompletion.id,
     apiFormat: sourceCompletion.apiFormat,
   };
@@ -254,12 +255,7 @@ export async function recordCacheHit(
   apiKeyId: number,
 ): Promise<Completion | null> {
   const record = buildCacheHitRecord(sourceCompletion, apiKeyId);
-  // Use insertCompletion instead of createPendingCompletion to avoid ReqId conflicts
-  // since cache_hit records share the same ReqId as the source
-  return await insertCompletion({
-    ...record,
-    reqId: null, // Don't set ReqId on cache_hit records to avoid unique constraint
-  });
+  return await insertCompletion(record);
 }
 
 /**
@@ -313,13 +309,18 @@ export function buildInFlightErrorResponse(
  */
 export function extractReqId(headers: Headers): string | null {
   const reqId = headers.get(REQID_HEADER);
-  if (!reqId || reqId.trim() === "") {
+  if (!reqId) {
+    return null;
+  }
+  // Trim first, then validate
+  const trimmedReqId = reqId.trim();
+  if (trimmedReqId === "") {
     return null;
   }
   // Validate ReqId length (max 127 chars as per schema)
-  if (reqId.length > 127) {
-    logger.warn("ReqId too long, truncating", { length: reqId.length });
-    return reqId.substring(0, 127);
+  if (trimmedReqId.length > 127) {
+    logger.warn("ReqId too long, truncating", { length: trimmedReqId.length });
+    return trimmedReqId.substring(0, 127);
   }
-  return reqId.trim();
+  return trimmedReqId;
 }

From b4f301310d8ac9d4aea1a8ca50e88be3832fb6ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= <admin@pescn.cn>
Date: Sun, 25 Jan 2026 02:39:53 +0800
Subject: [PATCH 3/9] fix(api): finalize pre-created completion on failure
 paths

When ReqId is used and a pending completion is pre-created, failure
paths (failover error, internal error, no response body) must finalize
the completion to prevent ReqId from being stuck in pending/in-flight
state. This ensures subsequent requests with the same ReqId can succeed.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/src/api/v1/completions.ts | 57 +++++++++++++++++++++++++++++++
 backend/src/api/v1/messages.ts    | 57 +++++++++++++++++++++++++++++++
 backend/src/api/v1/responses.ts   | 57 +++++++++++++++++++++++++++++++
 3 files changed, 171 insertions(+)

diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts
index 53ba482..abd500e 100644
--- a/backend/src/api/v1/completions.ts
+++ b/backend/src/api/v1/completions.ts
@@ -672,6 +672,18 @@ export const completionsApi = new Elysia({
 
           const errorResult = await processFailoverError(result, completion, bearer, "streaming");
 
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
+
           if (errorResult.type === "upstream_error") {
             set.status = errorResult.status;
             return JSON.parse(errorResult.body) as Record<string, unknown>;
@@ -688,11 +700,33 @@ export const completionsApi = new Elysia({
         }
 
         if (!result.response || !result.provider) {
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
           set.status = 500;
           return { error: "Internal server error" };
         }
 
         if (!result.response.body) {
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
           set.status = 500;
           return { error: "No body in response" };
         }
@@ -763,6 +797,18 @@ export const completionsApi = new Elysia({
 
           const errorResult = await processFailoverError(result, completion, bearer, "non-streaming");
 
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
+
           if (errorResult.type === "upstream_error") {
             set.status = errorResult.status;
             return JSON.parse(errorResult.body) as Record<string, unknown>;
@@ -779,6 +825,17 @@ export const completionsApi = new Elysia({
         }
 
         if (!result.response || !result.provider) {
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
           set.status = 500;
           return { error: "Internal server error" };
         }
diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts
index 81d614f..67678d1 100644
--- a/backend/src/api/v1/messages.ts
+++ b/backend/src/api/v1/messages.ts
@@ -628,6 +628,18 @@ export const messagesApi = new Elysia({
 
           const errorResult = await processFailoverError(result, completion, bearer, "streaming");
 
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
+
           if (errorResult.type === "upstream_error") {
             set.status = errorResult.status;
             return JSON.parse(errorResult.body) as Record<string, unknown>;
@@ -644,6 +656,17 @@ export const messagesApi = new Elysia({
         }
 
         if (!result.response || !result.provider) {
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
           set.status = 500;
           return {
             type: "error",
@@ -652,6 +675,17 @@ export const messagesApi = new Elysia({
         }
 
         if (!result.response.body) {
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
           set.status = 500;
           return {
             type: "error",
@@ -721,6 +755,18 @@ export const messagesApi = new Elysia({
 
           const errorResult = await processFailoverError(result, completion, bearer, "non-streaming");
 
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
+
           if (errorResult.type === "upstream_error") {
             set.status = errorResult.status;
             return JSON.parse(errorResult.body) as Record<string, unknown>;
@@ -737,6 +783,17 @@ export const messagesApi = new Elysia({
         }
 
         if (!result.response || !result.provider) {
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
           set.status = 500;
           return {
             type: "error",
diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts
index 4c63b47..8341d7e 100644
--- a/backend/src/api/v1/responses.ts
+++ b/backend/src/api/v1/responses.ts
@@ -666,6 +666,18 @@ export const responsesApi = new Elysia({
 
           const errorResult = await processFailoverError(result, completion, bearer, "streaming");
 
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
+
           if (errorResult.type === "upstream_error") {
             set.status = errorResult.status;
             return JSON.parse(errorResult.body) as Record<string, unknown>;
@@ -682,6 +694,17 @@ export const responsesApi = new Elysia({
         }
 
         if (!result.response || !result.provider) {
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
           set.status = 500;
           return {
             object: "error",
@@ -690,6 +713,17 @@ export const responsesApi = new Elysia({
         }
 
         if (!result.response.body) {
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
           set.status = 500;
           return {
             object: "error",
@@ -759,6 +793,18 @@ export const responsesApi = new Elysia({
 
           const errorResult = await processFailoverError(result, completion, bearer, "non-streaming");
 
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
+
           if (errorResult.type === "upstream_error") {
             set.status = errorResult.status;
             return JSON.parse(errorResult.body) as Record<string, unknown>;
@@ -775,6 +821,17 @@ export const responsesApi = new Elysia({
         }
 
         if (!result.response || !result.provider) {
+          // Finalize pre-created completion if ReqId was used
+          if (preCreatedCompletionId && reqId && apiKeyRecord) {
+            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
+              status: "failed",
+              promptTokens: 0,
+              completionTokens: 0,
+              completion: [],
+              ttft: -1,
+              duration: Date.now() - begin,
+            });
+          }
           set.status = 500;
           return {
             object: "error",

From a30105d961147fc6df8ca0bd9a43c4d0eb2836bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= <admin@pescn.cn>
Date: Sun, 25 Jan 2026 02:48:40 +0800
Subject: [PATCH 4/9] fix(api): improve ReqId handling robustness

- Wrap recordCacheHit in try-catch to prevent cache recording failures
  from blocking cached response delivery
- Replace reqId! non-null assertions with explicit null checks and
  invariant error throws for better type safety
- Add documentation about race conditions when Redis is unavailable

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/src/api/v1/completions.ts | 14 +++++++++++---
 backend/src/api/v1/messages.ts    | 13 +++++++++++--
 backend/src/api/v1/responses.ts   | 13 +++++++++++--
 backend/src/utils/reqIdHandler.ts |  9 ++++++++-
 4 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts
index abd500e..719c63b 100644
--- a/backend/src/api/v1/completions.ts
+++ b/backend/src/api/v1/completions.ts
@@ -570,8 +570,12 @@ export const completionsApi = new Elysia({
       // Handle cache hit - return cached response
       if (reqIdResult.type === "cache_hit") {
         const sourceCompletion = reqIdResult.completion;
-        // Record the cache hit
-        await recordCacheHit(sourceCompletion, apiKeyRecord.id);
+        // Record the cache hit (best-effort; do not block returning cached response)
+        try {
+          await recordCacheHit(sourceCompletion, apiKeyRecord.id);
+        } catch (error) {
+          logger.error("Failed to record cache hit", error);
+        }
 
         // Return cached response
         if (sourceCompletion.cachedResponse) {
@@ -604,10 +608,14 @@ export const completionsApi = new Elysia({
 
       // Handle in-flight - return 409 Conflict
       if (reqIdResult.type === "in_flight") {
+        // reqId is guaranteed non-null here since checkReqId only returns in_flight when reqId is provided
+        if (!reqId) {
+          throw new Error("Invariant violated: reqId is null for in_flight result");
+        }
         set.status = 409;
         set.headers["Retry-After"] = String(reqIdResult.retryAfter);
         return buildInFlightErrorResponse(
-          reqId!,
+          reqId,
           reqIdResult.inFlight,
           reqIdResult.retryAfter,
           apiFormat,
diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts
index 67678d1..c0575e5 100644
--- a/backend/src/api/v1/messages.ts
+++ b/backend/src/api/v1/messages.ts
@@ -542,7 +542,12 @@ export const messagesApi = new Elysia({
       // Handle cache hit - return cached response
       if (reqIdResult.type === "cache_hit") {
         const sourceCompletion = reqIdResult.completion;
-        await recordCacheHit(sourceCompletion, apiKeyRecord.id);
+        // Record the cache hit (best-effort; do not block returning cached response)
+        try {
+          await recordCacheHit(sourceCompletion, apiKeyRecord.id);
+        } catch (error) {
+          logger.warn("Failed to record cache hit", error);
+        }
 
         if (sourceCompletion.cachedResponse) {
           return sourceCompletion.cachedResponse.body as Record<string, unknown>;
@@ -568,10 +573,14 @@ export const messagesApi = new Elysia({
 
       // Handle in-flight - return 409 Conflict
       if (reqIdResult.type === "in_flight") {
+        // reqId is guaranteed non-null here since checkReqId only returns in_flight when reqId is provided
+        if (!reqId) {
+          throw new Error("Invariant violated: reqId is null for in_flight result");
+        }
         set.status = 409;
         set.headers["Retry-After"] = String(reqIdResult.retryAfter);
         return buildInFlightErrorResponse(
-          reqId!,
+          reqId,
           reqIdResult.inFlight,
           reqIdResult.retryAfter,
           apiFormat,
diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts
index 8341d7e..d04d60e 100644
--- a/backend/src/api/v1/responses.ts
+++ b/backend/src/api/v1/responses.ts
@@ -579,7 +579,12 @@ export const responsesApi = new Elysia({
       // Handle cache hit - return cached response
       if (reqIdResult.type === "cache_hit") {
         const sourceCompletion = reqIdResult.completion;
-        await recordCacheHit(sourceCompletion, apiKeyRecord.id);
+        // Record the cache hit (best-effort; do not block returning cached response)
+        try {
+          await recordCacheHit(sourceCompletion, apiKeyRecord.id);
+        } catch (error) {
+          logger.error("Failed to record cache hit", error);
+        }
 
         if (sourceCompletion.cachedResponse) {
           return sourceCompletion.cachedResponse.body as Record<string, unknown>;
@@ -606,10 +611,14 @@ export const responsesApi = new Elysia({
 
       // Handle in-flight - return 409 Conflict
       if (reqIdResult.type === "in_flight") {
+        // reqId is guaranteed non-null here since checkReqId only returns in_flight when reqId is provided
+        if (!reqId) {
+          throw new Error("Invariant violated: reqId is null for in_flight result");
+        }
         set.status = 409;
         set.headers["Retry-After"] = String(reqIdResult.retryAfter);
         return buildInFlightErrorResponse(
-          reqId!,
+          reqId,
           reqIdResult.inFlight,
           reqIdResult.retryAfter,
           apiFormat,
diff --git a/backend/src/utils/reqIdHandler.ts b/backend/src/utils/reqIdHandler.ts
index 52832ce..a536018 100644
--- a/backend/src/utils/reqIdHandler.ts
+++ b/backend/src/utils/reqIdHandler.ts
@@ -93,6 +93,13 @@ export async function checkReqId(
   }
 
   // Step 2: Check Redis for in-flight request
+  // Note on race conditions when Redis is unavailable:
+  // Without Redis, concurrent requests with the same ReqId will both proceed to step 3
+  // (create pending completion). The database unique constraint on (api_key_id, req_id)
+  // will catch this - one request succeeds and the other fails with a constraint violation,
+  // triggering a re-check (lines 127-154). This means duplicate processing may occur briefly
+  // until one request claims the ReqId in the database. Redis provides faster in-flight
+  // detection but is not required for correctness.
   if (isRedisAvailable()) {
     const inFlight = await getInFlight(apiKeyId, reqId);
     if (inFlight) {
@@ -101,7 +108,7 @@ export async function checkReqId(
       return { type: "in_flight", inFlight, retryAfter };
     }
   } else {
-    logger.warn("Redis unavailable, skipping in-flight check");
+    logger.warn("Redis unavailable, skipping in-flight check - race conditions possible until DB constraint catches duplicates");
   }
 
   // Step 3: Create pending completion and mark as in-flight

From 9cd6aadce096d20d3d3b1163e652f0fc22e8ddd6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= <admin@pescn.cn>
Date: Sun, 25 Jan 2026 02:52:39 +0800
Subject: [PATCH 5/9] fix(api): return 400 error when ReqId exceeds max length

Instead of silently truncating ReqId values longer than 127 characters,
now returns a 400 Bad Request error with a clear message indicating the
length constraint. This provides explicit feedback to API consumers.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/src/api/v1/completions.ts | 14 +++++++++++++-
 backend/src/api/v1/messages.ts    | 14 +++++++++++++-
 backend/src/api/v1/responses.ts   | 15 ++++++++++++++-
 backend/src/utils/reqIdHandler.ts | 31 ++++++++++++++++++++++---------
 4 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts
index 719c63b..1106c55 100644
--- a/backend/src/api/v1/completions.ts
+++ b/backend/src/api/v1/completions.ts
@@ -43,6 +43,7 @@ import {
   recordCacheHit,
   buildInFlightErrorResponse,
   extractReqId,
+  REQID_MAX_LENGTH,
   type ApiFormat,
 } from "@/utils/reqIdHandler";
 
@@ -497,7 +498,18 @@ export const completionsApi = new Elysia({
       const begin = Date.now();
 
       // Extract ReqId for request deduplication
-      const reqId = extractReqId(reqHeaders);
+      const reqIdExtraction = extractReqId(reqHeaders);
+      if (reqIdExtraction.type === "too_long") {
+        set.status = 400;
+        return {
+          error: {
+            message: `X-NexusGate-ReqId exceeds maximum length of ${REQID_MAX_LENGTH} characters (got ${reqIdExtraction.length})`,
+            type: "invalid_request_error",
+            code: "reqid_too_long",
+          },
+        };
+      }
+      const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null;
       const apiFormat: ApiFormat = "openai-chat";
 
       // Parse model@provider format and extract provider from header
diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts
index c0575e5..a2f138d 100644
--- a/backend/src/api/v1/messages.ts
+++ b/backend/src/api/v1/messages.ts
@@ -36,6 +36,7 @@ import {
   recordCacheHit,
   buildInFlightErrorResponse,
   extractReqId,
+  REQID_MAX_LENGTH,
   type ApiFormat,
 } from "@/utils/reqIdHandler";
 import type { CachedResponseType } from "@/db/schema";
@@ -468,7 +469,18 @@ export const messagesApi = new Elysia({
       const begin = Date.now();
 
       // Extract ReqId for request deduplication
-      const reqId = extractReqId(reqHeaders);
+      const reqIdExtraction = extractReqId(reqHeaders);
+      if (reqIdExtraction.type === "too_long") {
+        set.status = 400;
+        return {
+          type: "error",
+          error: {
+            type: "invalid_request_error",
+            message: `X-NexusGate-ReqId exceeds maximum length of ${REQID_MAX_LENGTH} characters (got ${reqIdExtraction.length})`,
+          },
+        };
+      }
+      const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null;
       const apiFormat: ApiFormat = "anthropic";
 
       // Parse model@provider format and extract provider from header
diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts
index d04d60e..5e3adfd 100644
--- a/backend/src/api/v1/responses.ts
+++ b/backend/src/api/v1/responses.ts
@@ -36,6 +36,7 @@ import {
   recordCacheHit,
   buildInFlightErrorResponse,
   extractReqId,
+  REQID_MAX_LENGTH,
   type ApiFormat,
 } from "@/utils/reqIdHandler";
 import type { CachedResponseType } from "@/db/schema";
@@ -485,7 +486,19 @@ export const responsesApi = new Elysia({
       const begin = Date.now();
 
       // Extract ReqId for request deduplication
-      const reqId = extractReqId(reqHeaders);
+      const reqIdExtraction = extractReqId(reqHeaders);
+      if (reqIdExtraction.type === "too_long") {
+        set.status = 400;
+        return {
+          object: "error",
+          error: {
+            type: "invalid_request_error",
+            message: `X-NexusGate-ReqId exceeds maximum length of ${REQID_MAX_LENGTH} characters (got ${reqIdExtraction.length})`,
+            code: "reqid_too_long",
+          },
+        };
+      }
+      const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null;
       const apiFormat: ApiFormat = "openai-responses";
 
       // Parse model@provider format and extract provider from header
diff --git a/backend/src/utils/reqIdHandler.ts b/backend/src/utils/reqIdHandler.ts
index a536018..dc8fd03 100644
--- a/backend/src/utils/reqIdHandler.ts
+++ b/backend/src/utils/reqIdHandler.ts
@@ -309,25 +309,38 @@ export function buildInFlightErrorResponse(
 }
 
 /**
- * Extract ReqId from request headers
+ * Maximum length for ReqId (database schema constraint)
+ */
+export const REQID_MAX_LENGTH = 127;
+
+/**
+ * Result type for extractReqId
+ */
+export type ExtractReqIdResult =
+  | { type: "valid"; value: string }
+  | { type: "empty" }
+  | { type: "too_long"; length: number };
+
+/**
+ * Extract and validate ReqId from request headers
  *
  * @param headers - Request headers
- * @returns The ReqId value or null if not present
+ * @returns Extraction result indicating valid value, empty, or error
  */
-export function extractReqId(headers: Headers): string | null {
+export function extractReqId(headers: Headers): ExtractReqIdResult {
   const reqId = headers.get(REQID_HEADER);
   if (!reqId) {
-    return null;
+    return { type: "empty" };
   }
   // Trim first, then validate
   const trimmedReqId = reqId.trim();
   if (trimmedReqId === "") {
-    return null;
+    return { type: "empty" };
   }
   // Validate ReqId length (max 127 chars as per schema)
-  if (trimmedReqId.length > 127) {
-    logger.warn("ReqId too long, truncating", { length: trimmedReqId.length });
-    return trimmedReqId.substring(0, 127);
+  if (trimmedReqId.length > REQID_MAX_LENGTH) {
+    logger.warn("ReqId too long", { length: trimmedReqId.length, maxLength: REQID_MAX_LENGTH });
+    return { type: "too_long", length: trimmedReqId.length };
   }
-  return trimmedReqId;
+  return { type: "valid", value: trimmedReqId };
 }

From e849a88a858f0658646bb82b7376fa71c34f5e7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= <admin@pescn.cn>
Date: Sun, 25 Jan 2026 02:59:04 +0800
Subject: [PATCH 6/9] fix(api): fix SSE error format and preserve tool_calls in
 cached responses

- Fix completions.ts SSE error format to use `data: {...}\n\n` prefix
- Fix messages.ts buildCachedResponse to include tool_use blocks from completion.tool_calls
- Fix responses.ts buildCachedResponse to include function_call output items from completion.tool_calls
- Update fallback response reconstruction to also handle tool_calls

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/src/api/v1/completions.ts |  3 +-
 backend/src/api/v1/messages.ts    | 53 ++++++++++++++++++-----
 backend/src/api/v1/responses.ts   | 71 +++++++++++++++++++++++++++----
 3 files changed, 108 insertions(+), 19 deletions(-)

diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts
index 1106c55..f135a68 100644
--- a/backend/src/api/v1/completions.ts
+++ b/backend/src/api/v1/completions.ts
@@ -792,7 +792,8 @@ export const completionsApi = new Elysia({
             if (!streamSignal.aborted) {
               logger.error("Stream processing error", error);
               // Note: HTTP status cannot be changed after streaming has started
-              yield JSON.stringify({ error: "Stream processing error" });
+              // Use SSE format for error: data: {...}\n\n
+              yield `data: ${JSON.stringify({ error: { message: "Stream processing error", type: "server_error", code: "stream_error" } })}\n\n`;
             }
           }
         })();
diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts
index a2f138d..82308de 100644
--- a/backend/src/api/v1/messages.ts
+++ b/backend/src/api/v1/messages.ts
@@ -286,17 +286,35 @@ async function* processStreamingResponse(
         apiFormat: reqIdContext.apiFormat,
         buildCachedResponse: (comp: Completion): CachedResponseType => {
           // For streaming, build a complete non-streaming Anthropic response for cache
+          // Build content blocks including both text and tool_use
+          const contentBlocks: Array<Record<string, unknown>> = [];
+          for (const c of comp.completion) {
+            // Add text content if present
+            if (c.content) {
+              contentBlocks.push({ type: "text", text: c.content });
+            }
+            // Add tool_use blocks if present
+            if (c.tool_calls) {
+              for (const tc of c.tool_calls) {
+                contentBlocks.push({
+                  type: "tool_use",
+                  id: tc.id,
+                  name: tc.function.name,
+                  input: JSON.parse(tc.function.arguments || "{}"),
+                });
+              }
+            }
+          }
+          // Determine stop_reason based on content
+          const hasToolUse = contentBlocks.some((b) => b.type === "tool_use");
           return {
             body: {
               id: `msg-cache-${reqIdContext.preCreatedCompletionId}`,
               type: "message",
               role: "assistant",
-              content: comp.completion.map((c) => ({
-                type: "text",
-                text: c.content || "",
-              })),
+              content: contentBlocks.length > 0 ? contentBlocks : [{ type: "text", text: "" }],
               model: comp.model,
-              stop_reason: "end_turn",
+              stop_reason: hasToolUse ? "tool_use" : "end_turn",
               usage: {
                 input_tokens: comp.promptTokens,
                 output_tokens: comp.completionTokens,
@@ -566,16 +584,31 @@ export const messagesApi = new Elysia({
         }
 
         // Fallback: reconstruct Anthropic response
+        // Build content blocks including both text and tool_use
+        const contentBlocks: Array<Record<string, unknown>> = [];
+        for (const c of sourceCompletion.completion) {
+          if (c.content) {
+            contentBlocks.push({ type: "text", text: c.content });
+          }
+          if (c.tool_calls) {
+            for (const tc of c.tool_calls) {
+              contentBlocks.push({
+                type: "tool_use",
+                id: tc.id,
+                name: tc.function.name,
+                input: JSON.parse(tc.function.arguments || "{}"),
+              });
+            }
+          }
+        }
+        const hasToolUse = contentBlocks.some((b) => b.type === "tool_use");
         return {
           id: `msg-cache-${sourceCompletion.id}`,
           type: "message",
           role: "assistant",
-          content: sourceCompletion.completion.map((c) => ({
-            type: "text",
-            text: c.content || "",
-          })),
+          content: contentBlocks.length > 0 ? contentBlocks : [{ type: "text", text: "" }],
           model: sourceCompletion.model,
-          stop_reason: "end_turn",
+          stop_reason: hasToolUse ? "tool_use" : "end_turn",
           usage: {
             input_tokens: sourceCompletion.promptTokens,
             output_tokens: sourceCompletion.completionTokens,
diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts
index 5e3adfd..68651ae 100644
--- a/backend/src/api/v1/responses.ts
+++ b/backend/src/api/v1/responses.ts
@@ -297,17 +297,46 @@ async function* processStreamingResponse(
         apiFormat: reqIdContext.apiFormat,
         buildCachedResponse: (comp: Completion): CachedResponseType => {
           // For streaming, build a complete non-streaming Response API response for cache
+          // Build output items including both messages and function_call
+          const outputItems: Array<Record<string, unknown>> = [];
+          for (const c of comp.completion) {
+            // Build content array for message
+            const content: Array<Record<string, unknown>> = [];
+            if (c.content) {
+              content.push({ type: "output_text", text: c.content });
+            }
+            // Add message output item if there's text content
+            if (content.length > 0) {
+              outputItems.push({
+                type: "message",
+                role: c.role || "assistant",
+                content,
+              });
+            }
+            // Add function_call output items for tool_calls
+            if (c.tool_calls) {
+              for (const tc of c.tool_calls) {
+                outputItems.push({
+                  type: "function_call",
+                  id: tc.id,
+                  call_id: tc.id,
+                  name: tc.function.name,
+                  arguments: tc.function.arguments || "{}",
+                });
+              }
+            }
+          }
           return {
             body: {
               id: `resp-cache-${reqIdContext.preCreatedCompletionId}`,
               object: "response",
               created_at: Math.floor(Date.now() / 1000),
               model: comp.model,
-              output: comp.completion.map((c) => ({
+              output: outputItems.length > 0 ? outputItems : [{
                 type: "message",
-                role: c.role || "assistant",
-                content: [{ type: "output_text", text: c.content || "" }],
-              })),
+                role: "assistant",
+                content: [{ type: "output_text", text: "" }],
+              }],
               usage: {
                 input_tokens: comp.promptTokens,
                 output_tokens: comp.completionTokens,
@@ -604,16 +633,42 @@ export const responsesApi = new Elysia({
         }
 
         // Fallback: reconstruct Response API response
+        // Build output items including both messages and function_call
+        const outputItems: Array<Record<string, unknown>> = [];
+        for (const c of sourceCompletion.completion) {
+          const content: Array<Record<string, unknown>> = [];
+          if (c.content) {
+            content.push({ type: "output_text", text: c.content });
+          }
+          if (content.length > 0) {
+            outputItems.push({
+              type: "message",
+              role: c.role || "assistant",
+              content,
+            });
+          }
+          if (c.tool_calls) {
+            for (const tc of c.tool_calls) {
+              outputItems.push({
+                type: "function_call",
+                id: tc.id,
+                call_id: tc.id,
+                name: tc.function.name,
+                arguments: tc.function.arguments || "{}",
+              });
+            }
+          }
+        }
         return {
           id: `resp-cache-${sourceCompletion.id}`,
           object: "response",
           created_at: Math.floor(sourceCompletion.createdAt.getTime() / 1000),
           model: sourceCompletion.model,
-          output: sourceCompletion.completion.map((c) => ({
+          output: outputItems.length > 0 ? outputItems : [{
             type: "message",
-            role: c.role || "assistant",
-            content: [{ type: "output_text", text: c.content || "" }],
-          })),
+            role: "assistant",
+            content: [{ type: "output_text", text: "" }],
+          }],
           usage: {
             input_tokens: sourceCompletion.promptTokens,
             output_tokens: sourceCompletion.completionTokens,

From b0443562c9039c144771c5d35ad605a343eefda2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= <admin@pescn.cn>
Date: Sun, 25 Jan 2026 03:33:19 +0800
Subject: [PATCH 7/9] refactor(api): reduce ReqId handling code duplication and
 improve validation

- Extract shared utilities to reqIdHandler.ts:
  - finalizeReqIdOnError() helper for error path finalization
  - Response builders for each API format (OpenAI Chat, Anthropic, OpenAI Responses)
  - buildReqIdValidationErrorResponse() for format-appropriate errors
  - Export ReqIdContext type for endpoint files

- Enhance ReqId validation:
  - Add regex pattern to reject control characters and special characters
  - Only allow alphanumeric, hyphens, underscores, dots, colons, forward slashes
  - Add "invalid_characters" error type

- Fix inconsistent logging:
  - Use warn level for non-critical recordCacheHit failures

- Reduce code duplication:
  - Replace 6+ error finalization blocks per endpoint with single function calls
  - Replace ~40 lines of response reconstruction per endpoint with shared builders

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/src/api/v1/completions.ts | 151 ++++-------------
 backend/src/api/v1/messages.ts    | 157 +++---------------
 backend/src/api/v1/responses.ts   | 172 ++++---------------
 backend/src/utils/reqIdHandler.ts | 265 +++++++++++++++++++++++++++++-
 4 files changed, 347 insertions(+), 398 deletions(-)

diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts
index f135a68..559d5c5 100644
--- a/backend/src/api/v1/completions.ts
+++ b/backend/src/api/v1/completions.ts
@@ -40,11 +40,14 @@ import {
 import {
   checkReqId,
   finalizeReqId,
+  finalizeReqIdOnError,
   recordCacheHit,
   buildInFlightErrorResponse,
   extractReqId,
-  REQID_MAX_LENGTH,
+  buildReqIdValidationErrorResponse,
+  buildCachedResponseByFormat,
   type ApiFormat,
+  type ReqIdContext,
 } from "@/utils/reqIdHandler";
 
 const logger = consola.withTag("completionsApi");
@@ -170,15 +173,7 @@ function buildCompletionRecord(
   };
 }
 
-/**
- * ReqId context for request deduplication
- */
-interface ReqIdContext {
-  reqId: string;
-  apiKeyId: number;
-  preCreatedCompletionId: number;
-  apiFormat: ApiFormat;
-}
+// ReqIdContext is imported from reqIdHandler
 
 /**
  * Process a successful non-streaming response
@@ -498,19 +493,14 @@ export const completionsApi = new Elysia({
       const begin = Date.now();
 
       // Extract ReqId for request deduplication
+      const apiFormat: ApiFormat = "openai-chat";
       const reqIdExtraction = extractReqId(reqHeaders);
-      if (reqIdExtraction.type === "too_long") {
-        set.status = 400;
-        return {
-          error: {
-            message: `X-NexusGate-ReqId exceeds maximum length of ${REQID_MAX_LENGTH} characters (got ${reqIdExtraction.length})`,
-            type: "invalid_request_error",
-            code: "reqid_too_long",
-          },
-        };
+      if (reqIdExtraction.type === "too_long" || reqIdExtraction.type === "invalid_characters") {
+        const errorResponse = buildReqIdValidationErrorResponse(reqIdExtraction, apiFormat);
+        set.status = errorResponse.status;
+        return errorResponse.body;
       }
       const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null;
-      const apiFormat: ApiFormat = "openai-chat";
 
       // Parse model@provider format and extract provider from header
       const { systemName, targetProvider } = parseModelProvider(
@@ -586,36 +576,14 @@ export const completionsApi = new Elysia({
         try {
           await recordCacheHit(sourceCompletion, apiKeyRecord.id);
         } catch (error) {
-          logger.error("Failed to record cache hit", error);
+          logger.warn("Failed to record cache hit", error);
         }
 
-        // Return cached response
+        // Return cached response if available, otherwise reconstruct
         if (sourceCompletion.cachedResponse) {
           return sourceCompletion.cachedResponse.body as Record<string, unknown>;
         }
-
-        // Fallback: reconstruct response from completion data
-        const reconstructed = {
-          id: `chatcmpl-cache-${sourceCompletion.id}`,
-          object: "chat.completion",
-          created: Math.floor(sourceCompletion.createdAt.getTime() / 1000),
-          model: sourceCompletion.model,
-          choices: sourceCompletion.completion.map((c, i) => ({
-            index: i,
-            message: {
-              role: c.role || "assistant",
-              content: c.content,
-              tool_calls: c.tool_calls,
-            },
-            finish_reason: c.tool_calls?.length ? "tool_calls" : "stop",
-          })),
-          usage: {
-            prompt_tokens: sourceCompletion.promptTokens,
-            completion_tokens: sourceCompletion.completionTokens,
-            total_tokens: sourceCompletion.promptTokens + sourceCompletion.completionTokens,
-          },
-        };
-        return reconstructed;
+        return buildCachedResponseByFormat(sourceCompletion, apiFormat);
       }
 
       // Handle in-flight - return 409 Conflict
@@ -634,9 +602,14 @@ export const completionsApi = new Elysia({
         );
       }
 
-      // For new_request, we have a pre-created completionId
-      const preCreatedCompletionId = reqIdResult.type === "new_request"
-        ? reqIdResult.completionId
+      // For new_request, we have a pre-created completionId - build ReqId context
+      const reqIdContext: ReqIdContext | null = (reqIdResult.type === "new_request" && reqId)
+        ? {
+            reqId,
+            apiKeyId: apiKeyRecord.id,
+            preCreatedCompletionId: reqIdResult.completionId,
+            apiFormat,
+          }
         : null;
 
       // Parse request using adapter
@@ -693,16 +666,7 @@ export const completionsApi = new Elysia({
           const errorResult = await processFailoverError(result, completion, bearer, "streaming");
 
           // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
 
           if (errorResult.type === "upstream_error") {
             set.status = errorResult.status;
@@ -720,33 +684,13 @@ export const completionsApi = new Elysia({
         }
 
         if (!result.response || !result.provider) {
-          // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
           set.status = 500;
           return { error: "Internal server error" };
         }
 
         if (!result.response.body) {
-          // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
           set.status = 500;
           return { error: "No body in response" };
         }
@@ -762,16 +706,6 @@ export const completionsApi = new Elysia({
           extraHeaders,
         );
 
-        // Build ReqId context if we have a pre-created completion
-        const streamReqIdContext = preCreatedCompletionId && reqId
-          ? {
-              reqId,
-              apiKeyId: apiKeyRecord.id,
-              preCreatedCompletionId,
-              apiFormat,
-            }
-          : undefined;
-
         // Return an async generator for streaming
         const streamResponse = result.response;
         const streamSignal = request.signal;
@@ -785,7 +719,7 @@ export const completionsApi = new Elysia({
               apiKeyRecord ?? null,
               begin,
               streamSignal,
-              streamReqIdContext,
+              reqIdContext ?? undefined,
             );
           } catch (error) {
             // Don't log error if it's due to client abort
@@ -819,16 +753,7 @@ export const completionsApi = new Elysia({
           const errorResult = await processFailoverError(result, completion, bearer, "non-streaming");
 
           // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
 
           if (errorResult.type === "upstream_error") {
             set.status = errorResult.status;
@@ -846,17 +771,7 @@ export const completionsApi = new Elysia({
         }
 
         if (!result.response || !result.provider) {
-          // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
           set.status = 500;
           return { error: "Internal server error" };
         }
@@ -872,16 +787,6 @@ export const completionsApi = new Elysia({
           extraHeaders,
         );
 
-        // Build ReqId context if we have a pre-created completion
-        const nonStreamReqIdContext = preCreatedCompletionId && reqId
-          ? {
-              reqId,
-              apiKeyId: apiKeyRecord.id,
-              preCreatedCompletionId,
-              apiFormat,
-            }
-          : undefined;
-
         try {
           const response = await processNonStreamingResponse(
             result.response,
@@ -891,7 +796,7 @@ export const completionsApi = new Elysia({
             apiKeyRecord ?? null,
             begin,
             request.signal,
-            nonStreamReqIdContext,
+            reqIdContext ?? undefined,
           );
           // Return parsed JSON object for proper content-type
           return JSON.parse(response) as Record<string, unknown>;
diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts
index 82308de..1cbeece 100644
--- a/backend/src/api/v1/messages.ts
+++ b/backend/src/api/v1/messages.ts
@@ -33,11 +33,14 @@ import {
 import {
   checkReqId,
   finalizeReqId,
+  finalizeReqIdOnError,
   recordCacheHit,
   buildInFlightErrorResponse,
   extractReqId,
-  REQID_MAX_LENGTH,
+  buildReqIdValidationErrorResponse,
+  buildCachedResponseByFormat,
   type ApiFormat,
+  type ReqIdContext,
 } from "@/utils/reqIdHandler";
 import type { CachedResponseType } from "@/db/schema";
 
@@ -162,15 +165,7 @@ function buildCompletionRecord(
   };
 }
 
-/**
- * ReqId context for request deduplication
- */
-interface ReqIdContext {
-  reqId: string;
-  apiKeyId: number;
-  preCreatedCompletionId: number;
-  apiFormat: ApiFormat;
-}
+// ReqIdContext is imported from reqIdHandler
 
 /**
  * Process a successful non-streaming message response
@@ -487,19 +482,14 @@ export const messagesApi = new Elysia({
       const begin = Date.now();
 
       // Extract ReqId for request deduplication
+      const apiFormat: ApiFormat = "anthropic";
       const reqIdExtraction = extractReqId(reqHeaders);
-      if (reqIdExtraction.type === "too_long") {
-        set.status = 400;
-        return {
-          type: "error",
-          error: {
-            type: "invalid_request_error",
-            message: `X-NexusGate-ReqId exceeds maximum length of ${REQID_MAX_LENGTH} characters (got ${reqIdExtraction.length})`,
-          },
-        };
+      if (reqIdExtraction.type === "too_long" || reqIdExtraction.type === "invalid_characters") {
+        const errorResponse = buildReqIdValidationErrorResponse(reqIdExtraction, apiFormat);
+        set.status = errorResponse.status;
+        return errorResponse.body;
       }
       const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null;
-      const apiFormat: ApiFormat = "anthropic";
 
       // Parse model@provider format and extract provider from header
       const { systemName, targetProvider } = parseModelProvider(
@@ -579,41 +569,11 @@ export const messagesApi = new Elysia({
           logger.warn("Failed to record cache hit", error);
         }
 
+        // Return cached response if available, otherwise reconstruct
         if (sourceCompletion.cachedResponse) {
           return sourceCompletion.cachedResponse.body as Record<string, unknown>;
         }
-
-        // Fallback: reconstruct Anthropic response
-        // Build content blocks including both text and tool_use
-        const contentBlocks: Array<Record<string, unknown>> = [];
-        for (const c of sourceCompletion.completion) {
-          if (c.content) {
-            contentBlocks.push({ type: "text", text: c.content });
-          }
-          if (c.tool_calls) {
-            for (const tc of c.tool_calls) {
-              contentBlocks.push({
-                type: "tool_use",
-                id: tc.id,
-                name: tc.function.name,
-                input: JSON.parse(tc.function.arguments || "{}"),
-              });
-            }
-          }
-        }
-        const hasToolUse = contentBlocks.some((b) => b.type === "tool_use");
-        return {
-          id: `msg-cache-${sourceCompletion.id}`,
-          type: "message",
-          role: "assistant",
-          content: contentBlocks.length > 0 ? contentBlocks : [{ type: "text", text: "" }],
-          model: sourceCompletion.model,
-          stop_reason: hasToolUse ? "tool_use" : "end_turn",
-          usage: {
-            input_tokens: sourceCompletion.promptTokens,
-            output_tokens: sourceCompletion.completionTokens,
-          },
-        };
+        return buildCachedResponseByFormat(sourceCompletion, apiFormat);
       }
 
       // Handle in-flight - return 409 Conflict
@@ -632,9 +592,14 @@ export const messagesApi = new Elysia({
         );
       }
 
-      // For new_request, we have a pre-created completionId
-      const preCreatedCompletionId = reqIdResult.type === "new_request"
-        ? reqIdResult.completionId
+      // For new_request, we have a pre-created completionId - build ReqId context
+      const reqIdContext: ReqIdContext | null = (reqIdResult.type === "new_request" && reqId)
+        ? {
+            reqId,
+            apiKeyId: apiKeyRecord.id,
+            preCreatedCompletionId: reqIdResult.completionId,
+            apiFormat,
+          }
         : null;
 
       // Parse request using Anthropic adapter
@@ -683,16 +648,7 @@ export const messagesApi = new Elysia({
           const errorResult = await processFailoverError(result, completion, bearer, "streaming");
 
           // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
 
           if (errorResult.type === "upstream_error") {
             set.status = errorResult.status;
@@ -710,17 +666,7 @@ export const messagesApi = new Elysia({
         }
 
         if (!result.response || !result.provider) {
-          // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
           set.status = 500;
           return {
             type: "error",
@@ -729,17 +675,7 @@ export const messagesApi = new Elysia({
         }
 
         if (!result.response.body) {
-          // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
           set.status = 500;
           return {
             type: "error",
@@ -756,16 +692,6 @@ export const messagesApi = new Elysia({
           extraHeaders,
         );
 
-        // Build ReqId context if we have a pre-created completion
-        const streamReqIdContext = preCreatedCompletionId && reqId
-          ? {
-              reqId,
-              apiKeyId: apiKeyRecord.id,
-              preCreatedCompletionId,
-              apiFormat,
-            }
-          : undefined;
-
         // Return an async generator for streaming
         const streamResponse = result.response;
         const streamSignal = request.signal;
@@ -779,7 +705,7 @@ export const messagesApi = new Elysia({
               apiKeyRecord ?? null,
               begin,
               streamSignal,
-              streamReqIdContext,
+              reqIdContext ?? undefined,
             );
           } catch (error) {
             // Don't log error if it's due to client abort
@@ -810,16 +736,7 @@ export const messagesApi = new Elysia({
           const errorResult = await processFailoverError(result, completion, bearer, "non-streaming");
 
           // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
 
           if (errorResult.type === "upstream_error") {
             set.status = errorResult.status;
@@ -837,17 +754,7 @@ export const messagesApi = new Elysia({
         }
 
         if (!result.response || !result.provider) {
-          // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
           set.status = 500;
           return {
             type: "error",
@@ -864,16 +771,6 @@ export const messagesApi = new Elysia({
           extraHeaders,
         );
 
-        // Build ReqId context if we have a pre-created completion
-        const nonStreamReqIdContext = preCreatedCompletionId && reqId
-          ? {
-              reqId,
-              apiKeyId: apiKeyRecord.id,
-              preCreatedCompletionId,
-              apiFormat,
-            }
-          : undefined;
-
         try {
           const response = await processNonStreamingResponse(
             result.response,
@@ -883,7 +780,7 @@ export const messagesApi = new Elysia({
             apiKeyRecord ?? null,
             begin,
             request.signal,
-            nonStreamReqIdContext,
+            reqIdContext ?? undefined,
           );
           // Return parsed JSON object for proper content-type
           return JSON.parse(response) as Record<string, unknown>;
diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts
index 68651ae..0932895 100644
--- a/backend/src/api/v1/responses.ts
+++ b/backend/src/api/v1/responses.ts
@@ -33,11 +33,14 @@ import {
 import {
   checkReqId,
   finalizeReqId,
+  finalizeReqIdOnError,
   recordCacheHit,
   buildInFlightErrorResponse,
   extractReqId,
-  REQID_MAX_LENGTH,
+  buildReqIdValidationErrorResponse,
+  buildCachedResponseByFormat,
   type ApiFormat,
+  type ReqIdContext,
 } from "@/utils/reqIdHandler";
 import type { CachedResponseType } from "@/db/schema";
 
@@ -173,15 +176,7 @@ function buildCompletionRecord(
   };
 }
 
-/**
- * ReqId context for request deduplication
- */
-interface ReqIdContext {
-  reqId: string;
-  apiKeyId: number;
-  preCreatedCompletionId: number;
-  apiFormat: ApiFormat;
-}
+// ReqIdContext is imported from reqIdHandler
 
 /**
  * Process a successful non-streaming response
@@ -515,20 +510,14 @@ export const responsesApi = new Elysia({
       const begin = Date.now();
 
       // Extract ReqId for request deduplication
+      const apiFormat: ApiFormat = "openai-responses";
       const reqIdExtraction = extractReqId(reqHeaders);
-      if (reqIdExtraction.type === "too_long") {
-        set.status = 400;
-        return {
-          object: "error",
-          error: {
-            type: "invalid_request_error",
-            message: `X-NexusGate-ReqId exceeds maximum length of ${REQID_MAX_LENGTH} characters (got ${reqIdExtraction.length})`,
-            code: "reqid_too_long",
-          },
-        };
+      if (reqIdExtraction.type === "too_long" || reqIdExtraction.type === "invalid_characters") {
+        const errorResponse = buildReqIdValidationErrorResponse(reqIdExtraction, apiFormat);
+        set.status = errorResponse.status;
+        return errorResponse.body;
       }
       const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null;
-      const apiFormat: ApiFormat = "openai-responses";
 
       // Parse model@provider format and extract provider from header
       const { systemName, targetProvider } = parseModelProvider(
@@ -625,56 +614,14 @@ export const responsesApi = new Elysia({
         try {
           await recordCacheHit(sourceCompletion, apiKeyRecord.id);
         } catch (error) {
-          logger.error("Failed to record cache hit", error);
+          logger.warn("Failed to record cache hit", error);
         }
 
+        // Return cached response if available, otherwise reconstruct
         if (sourceCompletion.cachedResponse) {
           return sourceCompletion.cachedResponse.body as Record<string, unknown>;
         }
-
-        // Fallback: reconstruct Response API response
-        // Build output items including both messages and function_call
-        const outputItems: Array<Record<string, unknown>> = [];
-        for (const c of sourceCompletion.completion) {
-          const content: Array<Record<string, unknown>> = [];
-          if (c.content) {
-            content.push({ type: "output_text", text: c.content });
-          }
-          if (content.length > 0) {
-            outputItems.push({
-              type: "message",
-              role: c.role || "assistant",
-              content,
-            });
-          }
-          if (c.tool_calls) {
-            for (const tc of c.tool_calls) {
-              outputItems.push({
-                type: "function_call",
-                id: tc.id,
-                call_id: tc.id,
-                name: tc.function.name,
-                arguments: tc.function.arguments || "{}",
-              });
-            }
-          }
-        }
-        return {
-          id: `resp-cache-${sourceCompletion.id}`,
-          object: "response",
-          created_at: Math.floor(sourceCompletion.createdAt.getTime() / 1000),
-          model: sourceCompletion.model,
-          output: outputItems.length > 0 ? outputItems : [{
-            type: "message",
-            role: "assistant",
-            content: [{ type: "output_text", text: "" }],
-          }],
-          usage: {
-            input_tokens: sourceCompletion.promptTokens,
-            output_tokens: sourceCompletion.completionTokens,
-            total_tokens: sourceCompletion.promptTokens + sourceCompletion.completionTokens,
-          },
-        };
+        return buildCachedResponseByFormat(sourceCompletion, apiFormat);
       }
 
       // Handle in-flight - return 409 Conflict
@@ -693,9 +640,14 @@ export const responsesApi = new Elysia({
         );
       }
 
-      // For new_request, we have a pre-created completionId
-      const preCreatedCompletionId = reqIdResult.type === "new_request"
-        ? reqIdResult.completionId
+      // For new_request, we have a pre-created completionId - build ReqId context
+      const reqIdContext: ReqIdContext | null = (reqIdResult.type === "new_request" && reqId)
+        ? {
+            reqId,
+            apiKeyId: apiKeyRecord.id,
+            preCreatedCompletionId: reqIdResult.completionId,
+            apiFormat,
+          }
         : null;
 
       // Parse request using Response API adapter
@@ -744,16 +696,7 @@ export const responsesApi = new Elysia({
           const errorResult = await processFailoverError(result, completion, bearer, "streaming");
 
           // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
 
           if (errorResult.type === "upstream_error") {
             set.status = errorResult.status;
@@ -771,17 +714,7 @@ export const responsesApi = new Elysia({
         }
 
         if (!result.response || !result.provider) {
-          // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
           set.status = 500;
           return {
             object: "error",
@@ -790,17 +723,7 @@ export const responsesApi = new Elysia({
         }
 
         if (!result.response.body) {
-          // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
           set.status = 500;
           return {
             object: "error",
@@ -817,16 +740,6 @@ export const responsesApi = new Elysia({
           extraHeaders,
         );
 
-        // Build ReqId context if we have a pre-created completion
-        const streamReqIdContext = preCreatedCompletionId && reqId
-          ? {
-              reqId,
-              apiKeyId: apiKeyRecord.id,
-              preCreatedCompletionId,
-              apiFormat,
-            }
-          : undefined;
-
         // Return an async generator for streaming
         const streamResponse = result.response;
         const streamSignal = request.signal;
@@ -840,7 +753,7 @@ export const responsesApi = new Elysia({
               apiKeyRecord ?? null,
               begin,
               streamSignal,
-              streamReqIdContext,
+              reqIdContext ?? undefined,
             );
           } catch (error) {
             // Don't log error if it's due to client abort
@@ -871,16 +784,7 @@ export const responsesApi = new Elysia({
           const errorResult = await processFailoverError(result, completion, bearer, "non-streaming");
 
           // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
 
           if (errorResult.type === "upstream_error") {
             set.status = errorResult.status;
@@ -898,17 +802,7 @@ export const responsesApi = new Elysia({
         }
 
         if (!result.response || !result.provider) {
-          // Finalize pre-created completion if ReqId was used
-          if (preCreatedCompletionId && reqId && apiKeyRecord) {
-            await finalizeReqId(apiKeyRecord.id, reqId, preCreatedCompletionId, {
-              status: "failed",
-              promptTokens: 0,
-              completionTokens: 0,
-              completion: [],
-              ttft: -1,
-              duration: Date.now() - begin,
-            });
-          }
+          await finalizeReqIdOnError(reqIdContext, begin);
           set.status = 500;
           return {
             object: "error",
@@ -925,16 +819,6 @@ export const responsesApi = new Elysia({
           extraHeaders,
         );
 
-        // Build ReqId context if we have a pre-created completion
-        const nonStreamReqIdContext = preCreatedCompletionId && reqId
-          ? {
-              reqId,
-              apiKeyId: apiKeyRecord.id,
-              preCreatedCompletionId,
-              apiFormat,
-            }
-          : undefined;
-
         try {
           const response = await processNonStreamingResponse(
             result.response,
@@ -944,7 +828,7 @@ export const responsesApi = new Elysia({
             apiKeyRecord ?? null,
             begin,
             request.signal,
-            nonStreamReqIdContext,
+            reqIdContext ?? undefined,
           );
           // Return parsed JSON object for proper content-type
           return JSON.parse(response) as Record<string, unknown>;
diff --git a/backend/src/utils/reqIdHandler.ts b/backend/src/utils/reqIdHandler.ts
index dc8fd03..d5dc5de 100644
--- a/backend/src/utils/reqIdHandler.ts
+++ b/backend/src/utils/reqIdHandler.ts
@@ -313,13 +313,21 @@ export function buildInFlightErrorResponse(
  */
 export const REQID_MAX_LENGTH = 127;
 
+/**
+ * Regex pattern for valid ReqId characters
+ * Allows alphanumeric, hyphens, underscores, dots, colons, and forward slashes
+ * This prevents control characters, null bytes, and other potentially problematic characters
+ */
+const REQID_VALID_PATTERN = /^[\w\-.:/]+$/;
+
 /**
  * Result type for extractReqId
  */
 export type ExtractReqIdResult =
   | { type: "valid"; value: string }
   | { type: "empty" }
-  | { type: "too_long"; length: number };
+  | { type: "too_long"; length: number }
+  | { type: "invalid_characters" };
 
 /**
  * Extract and validate ReqId from request headers
@@ -342,5 +350,260 @@ export function extractReqId(headers: Headers): ExtractReqIdResult {
     logger.warn("ReqId too long", { length: trimmedReqId.length, maxLength: REQID_MAX_LENGTH });
     return { type: "too_long", length: trimmedReqId.length };
   }
+  // Validate ReqId contains only allowed characters
+  if (!REQID_VALID_PATTERN.test(trimmedReqId)) {
+    logger.warn("ReqId contains invalid characters", { reqId: trimmedReqId });
+    return { type: "invalid_characters" };
+  }
   return { type: "valid", value: trimmedReqId };
 }
+
+// =============================================================================
+// Response Builders for Cache Hits
+// =============================================================================
+
+/**
+ * Build OpenAI Chat Completion format response from cached completion
+ */
+export function buildOpenAIChatResponse(completion: Completion): Record<string, unknown> {
+  return {
+    id: `chatcmpl-cache-${completion.id}`,
+    object: "chat.completion",
+    created: Math.floor(completion.createdAt.getTime() / 1000),
+    model: completion.model,
+    choices: completion.completion.map((c, i) => ({
+      index: i,
+      message: {
+        role: c.role || "assistant",
+        content: c.content,
+        tool_calls: c.tool_calls,
+      },
+      finish_reason: c.tool_calls?.length ? "tool_calls" : "stop",
+    })),
+    usage: {
+      prompt_tokens: completion.promptTokens,
+      completion_tokens: completion.completionTokens,
+      total_tokens: completion.promptTokens + completion.completionTokens,
+    },
+  };
+}
+
+/**
+ * Build Anthropic Messages format response from cached completion
+ */
+export function buildAnthropicResponse(completion: Completion): Record<string, unknown> {
+  // Build content blocks including both text and tool_use
+  const contentBlocks: Array<Record<string, unknown>> = [];
+  for (const c of completion.completion) {
+    // Add text content if present
+    if (c.content) {
+      contentBlocks.push({ type: "text", text: c.content });
+    }
+    // Add tool_use blocks if present
+    if (c.tool_calls) {
+      for (const tc of c.tool_calls) {
+        contentBlocks.push({
+          type: "tool_use",
+          id: tc.id,
+          name: tc.function.name,
+          input: JSON.parse(tc.function.arguments || "{}"),
+        });
+      }
+    }
+  }
+  // Determine stop_reason based on content
+  const hasToolUse = contentBlocks.some((b) => b.type === "tool_use");
+  return {
+    id: `msg-cache-${completion.id}`,
+    type: "message",
+    role: "assistant",
+    content: contentBlocks.length > 0 ? contentBlocks : [{ type: "text", text: "" }],
+    model: completion.model,
+    stop_reason: hasToolUse ? "tool_use" : "end_turn",
+    usage: {
+      input_tokens: completion.promptTokens,
+      output_tokens: completion.completionTokens,
+    },
+  };
+}
+
+/**
+ * Build OpenAI Responses API format response from cached completion
+ */
+export function buildOpenAIResponsesResponse(completion: Completion): Record<string, unknown> {
+  // Build output items including both messages and function_call
+  const outputItems: Array<Record<string, unknown>> = [];
+  for (const c of completion.completion) {
+    // Build content array for message
+    const content: Array<Record<string, unknown>> = [];
+    if (c.content) {
+      content.push({ type: "output_text", text: c.content });
+    }
+    // Add message output item if there's text content
+    if (content.length > 0) {
+      outputItems.push({
+        type: "message",
+        role: c.role || "assistant",
+        content,
+      });
+    }
+    // Add function_call output items for tool_calls
+    if (c.tool_calls) {
+      for (const tc of c.tool_calls) {
+        outputItems.push({
+          type: "function_call",
+          id: tc.id,
+          call_id: tc.id,
+          name: tc.function.name,
+          arguments: tc.function.arguments || "{}",
+        });
+      }
+    }
+  }
+  return {
+    id: `resp-cache-${completion.id}`,
+    object: "response",
+    created_at: Math.floor(completion.createdAt.getTime() / 1000),
+    model: completion.model,
+    output: outputItems.length > 0 ? outputItems : [{
+      type: "message",
+      role: "assistant",
+      content: [{ type: "output_text", text: "" }],
+    }],
+    usage: {
+      input_tokens: completion.promptTokens,
+      output_tokens: completion.completionTokens,
+      total_tokens: completion.promptTokens + completion.completionTokens,
+    },
+  };
+}
+
+/**
+ * Build cached response based on API format
+ */
+export function buildCachedResponseByFormat(
+  completion: Completion,
+  format: ApiFormat,
+): Record<string, unknown> {
+  switch (format) {
+    case "openai-chat":
+      return buildOpenAIChatResponse(completion);
+    case "anthropic":
+      return buildAnthropicResponse(completion);
+    case "openai-responses":
+      return buildOpenAIResponsesResponse(completion);
+  }
+}
+
+// =============================================================================
+// Error Finalization Helper
+// =============================================================================
+
+/**
+ * Context for ReqId request handling
+ */
+export interface ReqIdContext {
+  reqId: string;
+  apiKeyId: number;
+  preCreatedCompletionId: number;
+  apiFormat: ApiFormat;
+}
+
+/**
+ * Finalize a pre-created completion on error
+ *
+ * Helper function to reduce duplication in error handling paths
+ *
+ * @param context - ReqId context (or null if no ReqId)
+ * @param begin - Request start timestamp
+ */
+export async function finalizeReqIdOnError(
+  context: ReqIdContext | null | undefined,
+  begin: number,
+): Promise<void> {
+  if (!context) {
+    return;
+  }
+
+  await finalizeReqId(context.apiKeyId, context.reqId, context.preCreatedCompletionId, {
+    status: "failed",
+    promptTokens: 0,
+    completionTokens: 0,
+    completion: [],
+    ttft: -1,
+    duration: Date.now() - begin,
+  });
+}
+
+// =============================================================================
+// ReqId Validation Error Responses
+// =============================================================================
+
+/**
+ * Build error response for invalid ReqId (too long or invalid characters)
+ */
+export function buildReqIdValidationErrorResponse(
+  extraction: ExtractReqIdResult,
+  format: ApiFormat,
+): { status: number; body: Record<string, unknown> } {
+  if (extraction.type === "too_long") {
+    const message = `X-NexusGate-ReqId exceeds maximum length of ${REQID_MAX_LENGTH} characters (got ${extraction.length})`;
+    return {
+      status: 400,
+      body: buildValidationErrorBody(message, "reqid_too_long", format),
+    };
+  }
+
+  if (extraction.type === "invalid_characters") {
+    const message = "X-NexusGate-ReqId contains invalid characters. Only alphanumeric characters, hyphens, underscores, dots, colons, and forward slashes are allowed.";
+    return {
+      status: 400,
+      body: buildValidationErrorBody(message, "reqid_invalid_characters", format),
+    };
+  }
+
+  // Should not reach here, but provide a fallback
+  return {
+    status: 400,
+    body: buildValidationErrorBody("Invalid X-NexusGate-ReqId", "reqid_invalid", format),
+  };
+}
+
+/**
+ * Build validation error body in the appropriate format
+ */
+function buildValidationErrorBody(
+  message: string,
+  code: string,
+  format: ApiFormat,
+): Record<string, unknown> {
+  if (format === "anthropic") {
+    return {
+      type: "error",
+      error: {
+        type: "invalid_request_error",
+        message,
+      },
+    };
+  }
+
+  if (format === "openai-responses") {
+    return {
+      object: "error",
+      error: {
+        type: "invalid_request_error",
+        message,
+        code,
+      },
+    };
+  }
+
+  // openai-chat format
+  return {
+    error: {
+      message,
+      type: "invalid_request_error",
+      code,
+    },
+  };
+}

From 60b78425ddd01804ee2467f79adc3e8e238a3dec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= <admin@pescn.cn>
Date: Sun, 25 Jan 2026 06:18:25 +0800
Subject: [PATCH 8/9] refactor(api): consolidate ReqId handling with helper
 functions

- Add extractAndValidateReqId() to combine extraction, validation, and error response building
- Add handleReqIdResult() to consolidate cache_hit, in_flight, and context building logic
- Update completions, messages, and responses endpoints to use new helpers
- Reduces ~24 lines of duplicated code per endpoint (~72 lines total)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/src/api/v1/completions.ts |  71 ++++++-------------
 backend/src/api/v1/messages.ts    |  71 ++++++-------------
 backend/src/api/v1/responses.ts   |  71 ++++++-------------
 backend/src/utils/reqIdHandler.ts | 112 +++++++++++++++++++++++++++++-
 4 files changed, 177 insertions(+), 148 deletions(-)

diff --git a/backend/src/api/v1/completions.ts b/backend/src/api/v1/completions.ts
index 559d5c5..eb1486f 100644
--- a/backend/src/api/v1/completions.ts
+++ b/backend/src/api/v1/completions.ts
@@ -41,11 +41,8 @@ import {
   checkReqId,
   finalizeReqId,
   finalizeReqIdOnError,
-  recordCacheHit,
-  buildInFlightErrorResponse,
-  extractReqId,
-  buildReqIdValidationErrorResponse,
-  buildCachedResponseByFormat,
+  extractAndValidateReqId,
+  handleReqIdResult,
   type ApiFormat,
   type ReqIdContext,
 } from "@/utils/reqIdHandler";
@@ -492,15 +489,14 @@ export const completionsApi = new Elysia({
       const reqHeaders = request.headers;
       const begin = Date.now();
 
-      // Extract ReqId for request deduplication
+      // Extract and validate ReqId for request deduplication
       const apiFormat: ApiFormat = "openai-chat";
-      const reqIdExtraction = extractReqId(reqHeaders);
-      if (reqIdExtraction.type === "too_long" || reqIdExtraction.type === "invalid_characters") {
-        const errorResponse = buildReqIdValidationErrorResponse(reqIdExtraction, apiFormat);
-        set.status = errorResponse.status;
-        return errorResponse.body;
+      const reqIdExtraction = extractAndValidateReqId(reqHeaders, apiFormat);
+      if (reqIdExtraction.type === "error") {
+        set.status = reqIdExtraction.status;
+        return reqIdExtraction.body;
       }
-      const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null;
+      const reqId = reqIdExtraction.reqId;
 
       // Parse model@provider format and extract provider from header
       const { systemName, targetProvider } = parseModelProvider(
@@ -569,48 +565,25 @@ export const completionsApi = new Elysia({
         isStream,
       });
 
-      // Handle cache hit - return cached response
-      if (reqIdResult.type === "cache_hit") {
-        const sourceCompletion = reqIdResult.completion;
-        // Record the cache hit (best-effort; do not block returning cached response)
-        try {
-          await recordCacheHit(sourceCompletion, apiKeyRecord.id);
-        } catch (error) {
-          logger.warn("Failed to record cache hit", error);
-        }
+      // Handle ReqId result (cache_hit, in_flight, or continue)
+      const reqIdHandleResult = await handleReqIdResult(
+        reqIdResult,
+        reqId,
+        apiKeyRecord.id,
+        apiFormat,
+      );
 
-        // Return cached response if available, otherwise reconstruct
-        if (sourceCompletion.cachedResponse) {
-          return sourceCompletion.cachedResponse.body as Record<string, unknown>;
-        }
-        return buildCachedResponseByFormat(sourceCompletion, apiFormat);
+      if (reqIdHandleResult.type === "cache_hit") {
+        return reqIdHandleResult.response;
       }
 
-      // Handle in-flight - return 409 Conflict
-      if (reqIdResult.type === "in_flight") {
-        // reqId is guaranteed non-null here since checkReqId only returns in_flight when reqId is provided
-        if (!reqId) {
-          throw new Error("Invariant violated: reqId is null for in_flight result");
-        }
-        set.status = 409;
-        set.headers["Retry-After"] = String(reqIdResult.retryAfter);
-        return buildInFlightErrorResponse(
-          reqId,
-          reqIdResult.inFlight,
-          reqIdResult.retryAfter,
-          apiFormat,
-        );
+      if (reqIdHandleResult.type === "in_flight") {
+        set.status = reqIdHandleResult.status;
+        set.headers["Retry-After"] = String(reqIdHandleResult.retryAfter);
+        return reqIdHandleResult.response;
       }
 
-      // For new_request, we have a pre-created completionId - build ReqId context
-      const reqIdContext: ReqIdContext | null = (reqIdResult.type === "new_request" && reqId)
-        ? {
-            reqId,
-            apiKeyId: apiKeyRecord.id,
-            preCreatedCompletionId: reqIdResult.completionId,
-            apiFormat,
-          }
-        : null;
+      const reqIdContext = reqIdHandleResult.context;
 
       // Parse request using adapter
       const requestAdapter = getRequestAdapter("openai-chat");
diff --git a/backend/src/api/v1/messages.ts b/backend/src/api/v1/messages.ts
index 1cbeece..6a698f9 100644
--- a/backend/src/api/v1/messages.ts
+++ b/backend/src/api/v1/messages.ts
@@ -34,11 +34,8 @@ import {
   checkReqId,
   finalizeReqId,
   finalizeReqIdOnError,
-  recordCacheHit,
-  buildInFlightErrorResponse,
-  extractReqId,
-  buildReqIdValidationErrorResponse,
-  buildCachedResponseByFormat,
+  extractAndValidateReqId,
+  handleReqIdResult,
   type ApiFormat,
   type ReqIdContext,
 } from "@/utils/reqIdHandler";
@@ -481,15 +478,14 @@ export const messagesApi = new Elysia({
       const reqHeaders = request.headers;
       const begin = Date.now();
 
-      // Extract ReqId for request deduplication
+      // Extract and validate ReqId for request deduplication
       const apiFormat: ApiFormat = "anthropic";
-      const reqIdExtraction = extractReqId(reqHeaders);
-      if (reqIdExtraction.type === "too_long" || reqIdExtraction.type === "invalid_characters") {
-        const errorResponse = buildReqIdValidationErrorResponse(reqIdExtraction, apiFormat);
-        set.status = errorResponse.status;
-        return errorResponse.body;
+      const reqIdExtraction = extractAndValidateReqId(reqHeaders, apiFormat);
+      if (reqIdExtraction.type === "error") {
+        set.status = reqIdExtraction.status;
+        return reqIdExtraction.body;
       }
-      const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null;
+      const reqId = reqIdExtraction.reqId;
 
       // Parse model@provider format and extract provider from header
       const { systemName, targetProvider } = parseModelProvider(
@@ -559,48 +555,25 @@ export const messagesApi = new Elysia({
         isStream,
       });
 
-      // Handle cache hit - return cached response
-      if (reqIdResult.type === "cache_hit") {
-        const sourceCompletion = reqIdResult.completion;
-        // Record the cache hit (best-effort; do not block returning cached response)
-        try {
-          await recordCacheHit(sourceCompletion, apiKeyRecord.id);
-        } catch (error) {
-          logger.warn("Failed to record cache hit", error);
-        }
+      // Handle ReqId result (cache_hit, in_flight, or continue)
+      const reqIdHandleResult = await handleReqIdResult(
+        reqIdResult,
+        reqId,
+        apiKeyRecord.id,
+        apiFormat,
+      );
 
-        // Return cached response if available, otherwise reconstruct
-        if (sourceCompletion.cachedResponse) {
-          return sourceCompletion.cachedResponse.body as Record<string, unknown>;
-        }
-        return buildCachedResponseByFormat(sourceCompletion, apiFormat);
+      if (reqIdHandleResult.type === "cache_hit") {
+        return reqIdHandleResult.response;
       }
 
-      // Handle in-flight - return 409 Conflict
-      if (reqIdResult.type === "in_flight") {
-        // reqId is guaranteed non-null here since checkReqId only returns in_flight when reqId is provided
-        if (!reqId) {
-          throw new Error("Invariant violated: reqId is null for in_flight result");
-        }
-        set.status = 409;
-        set.headers["Retry-After"] = String(reqIdResult.retryAfter);
-        return buildInFlightErrorResponse(
-          reqId,
-          reqIdResult.inFlight,
-          reqIdResult.retryAfter,
-          apiFormat,
-        );
+      if (reqIdHandleResult.type === "in_flight") {
+        set.status = reqIdHandleResult.status;
+        set.headers["Retry-After"] = String(reqIdHandleResult.retryAfter);
+        return reqIdHandleResult.response;
       }
 
-      // For new_request, we have a pre-created completionId - build ReqId context
-      const reqIdContext: ReqIdContext | null = (reqIdResult.type === "new_request" && reqId)
-        ? {
-            reqId,
-            apiKeyId: apiKeyRecord.id,
-            preCreatedCompletionId: reqIdResult.completionId,
-            apiFormat,
-          }
-        : null;
+      const reqIdContext = reqIdHandleResult.context;
 
       // Parse request using Anthropic adapter
       const requestAdapter = getRequestAdapter("anthropic");
diff --git a/backend/src/api/v1/responses.ts b/backend/src/api/v1/responses.ts
index 0932895..8cd7ed1 100644
--- a/backend/src/api/v1/responses.ts
+++ b/backend/src/api/v1/responses.ts
@@ -34,11 +34,8 @@ import {
   checkReqId,
   finalizeReqId,
   finalizeReqIdOnError,
-  recordCacheHit,
-  buildInFlightErrorResponse,
-  extractReqId,
-  buildReqIdValidationErrorResponse,
-  buildCachedResponseByFormat,
+  extractAndValidateReqId,
+  handleReqIdResult,
   type ApiFormat,
   type ReqIdContext,
 } from "@/utils/reqIdHandler";
@@ -509,15 +506,14 @@ export const responsesApi = new Elysia({
       const reqHeaders = request.headers;
       const begin = Date.now();
 
-      // Extract ReqId for request deduplication
+      // Extract and validate ReqId for request deduplication
       const apiFormat: ApiFormat = "openai-responses";
-      const reqIdExtraction = extractReqId(reqHeaders);
-      if (reqIdExtraction.type === "too_long" || reqIdExtraction.type === "invalid_characters") {
-        const errorResponse = buildReqIdValidationErrorResponse(reqIdExtraction, apiFormat);
-        set.status = errorResponse.status;
-        return errorResponse.body;
+      const reqIdExtraction = extractAndValidateReqId(reqHeaders, apiFormat);
+      if (reqIdExtraction.type === "error") {
+        set.status = reqIdExtraction.status;
+        return reqIdExtraction.body;
       }
-      const reqId = reqIdExtraction.type === "valid" ? reqIdExtraction.value : null;
+      const reqId = reqIdExtraction.reqId;
 
       // Parse model@provider format and extract provider from header
       const { systemName, targetProvider } = parseModelProvider(
@@ -607,48 +603,25 @@ export const responsesApi = new Elysia({
         isStream,
       });
 
-      // Handle cache hit - return cached response
-      if (reqIdResult.type === "cache_hit") {
-        const sourceCompletion = reqIdResult.completion;
-        // Record the cache hit (best-effort; do not block returning cached response)
-        try {
-          await recordCacheHit(sourceCompletion, apiKeyRecord.id);
-        } catch (error) {
-          logger.warn("Failed to record cache hit", error);
-        }
+      // Handle ReqId result (cache_hit, in_flight, or continue)
+      const reqIdHandleResult = await handleReqIdResult(
+        reqIdResult,
+        reqId,
+        apiKeyRecord.id,
+        apiFormat,
+      );
 
-        // Return cached response if available, otherwise reconstruct
-        if (sourceCompletion.cachedResponse) {
-          return sourceCompletion.cachedResponse.body as Record<string, unknown>;
-        }
-        return buildCachedResponseByFormat(sourceCompletion, apiFormat);
+      if (reqIdHandleResult.type === "cache_hit") {
+        return reqIdHandleResult.response;
       }
 
-      // Handle in-flight - return 409 Conflict
-      if (reqIdResult.type === "in_flight") {
-        // reqId is guaranteed non-null here since checkReqId only returns in_flight when reqId is provided
-        if (!reqId) {
-          throw new Error("Invariant violated: reqId is null for in_flight result");
-        }
-        set.status = 409;
-        set.headers["Retry-After"] = String(reqIdResult.retryAfter);
-        return buildInFlightErrorResponse(
-          reqId,
-          reqIdResult.inFlight,
-          reqIdResult.retryAfter,
-          apiFormat,
-        );
+      if (reqIdHandleResult.type === "in_flight") {
+        set.status = reqIdHandleResult.status;
+        set.headers["Retry-After"] = String(reqIdHandleResult.retryAfter);
+        return reqIdHandleResult.response;
       }
 
-      // For new_request, we have a pre-created completionId - build ReqId context
-      const reqIdContext: ReqIdContext | null = (reqIdResult.type === "new_request" && reqId)
-        ? {
-            reqId,
-            apiKeyId: apiKeyRecord.id,
-            preCreatedCompletionId: reqIdResult.completionId,
-            apiFormat,
-          }
-        : null;
+      const reqIdContext = reqIdHandleResult.context;
 
       // Parse request using Response API adapter
       const requestAdapter = getRequestAdapter("openai-responses");
diff --git a/backend/src/utils/reqIdHandler.ts b/backend/src/utils/reqIdHandler.ts
index d5dc5de..70fb415 100644
--- a/backend/src/utils/reqIdHandler.ts
+++ b/backend/src/utils/reqIdHandler.ts
@@ -535,6 +535,116 @@ export async function finalizeReqIdOnError(
   });
 }
 
+// =============================================================================
+// Consolidated ReqId Handling Helpers
+// =============================================================================
+
+/**
+ * Result of ReqId extraction and validation
+ */
+export type ReqIdExtractionResult =
+  | { type: "valid"; reqId: string }
+  | { type: "empty"; reqId: null }
+  | { type: "error"; status: number; body: Record<string, unknown> };
+
+/**
+ * Extract, validate, and return ReqId with proper error responses
+ *
+ * Consolidates the extraction + validation + error response building pattern
+ *
+ * @param headers - Request headers
+ * @param apiFormat - API format for error response formatting
+ * @returns Extraction result with reqId or error response
+ */
+export function extractAndValidateReqId(
+  headers: Headers,
+  apiFormat: ApiFormat,
+): ReqIdExtractionResult {
+  const extraction = extractReqId(headers);
+
+  if (extraction.type === "too_long" || extraction.type === "invalid_characters") {
+    const errorResponse = buildReqIdValidationErrorResponse(extraction, apiFormat);
+    return { type: "error", status: errorResponse.status, body: errorResponse.body };
+  }
+
+  if (extraction.type === "valid") {
+    return { type: "valid", reqId: extraction.value };
+  }
+
+  return { type: "empty", reqId: null };
+}
+
+/**
+ * Result of handling ReqId check result
+ */
+export type ReqIdHandleResult =
+  | { type: "cache_hit"; response: Record<string, unknown> }
+  | { type: "in_flight"; status: 409; retryAfter: number; response: Record<string, unknown> }
+  | { type: "continue"; context: ReqIdContext | null };
+
+/**
+ * Handle ReqId check result - returns early response or context to continue
+ *
+ * Consolidates cache_hit handling, in_flight handling, and context building
+ *
+ * @param result - Result from checkReqId
+ * @param reqId - The extracted reqId (or null)
+ * @param apiKeyId - API key ID for the request
+ * @param apiFormat - API format for response formatting
+ * @returns Handle result indicating how to proceed
+ */
+export async function handleReqIdResult(
+  result: ReqIdCheckResult,
+  reqId: string | null,
+  apiKeyId: number,
+  apiFormat: ApiFormat,
+): Promise<ReqIdHandleResult> {
+  // Handle cache hit - return cached response
+  if (result.type === "cache_hit") {
+    const sourceCompletion = result.completion;
+
+    // Record the cache hit (best-effort)
+    try {
+      await recordCacheHit(sourceCompletion, apiKeyId);
+    } catch (error) {
+      logger.warn("Failed to record cache hit", error);
+    }
+
+    // Return cached response if available, otherwise reconstruct
+    const response = sourceCompletion.cachedResponse
+      ? (sourceCompletion.cachedResponse.body as Record<string, unknown>)
+      : buildCachedResponseByFormat(sourceCompletion, apiFormat);
+
+    return { type: "cache_hit", response };
+  }
+
+  // Handle in-flight - return 409 Conflict
+  if (result.type === "in_flight") {
+    if (!reqId) {
+      throw new Error("Invariant violated: reqId is null for in_flight result");
+    }
+
+    return {
+      type: "in_flight",
+      status: 409,
+      retryAfter: result.retryAfter,
+      response: buildInFlightErrorResponse(reqId, result.inFlight, result.retryAfter, apiFormat),
+    };
+  }
+
+  // Build context for new_request or no_reqid
+  const context: ReqIdContext | null = (result.type === "new_request" && reqId)
+    ? {
+        reqId,
+        apiKeyId,
+        preCreatedCompletionId: result.completionId,
+        apiFormat,
+      }
+    : null;
+
+  return { type: "continue", context };
+}
+
 // =============================================================================
 // ReqId Validation Error Responses
 // =============================================================================
@@ -542,7 +652,7 @@ export async function finalizeReqIdOnError(
 /**
  * Build error response for invalid ReqId (too long or invalid characters)
  */
-export function buildReqIdValidationErrorResponse(
+function buildReqIdValidationErrorResponse(
   extraction: ExtractReqIdResult,
   format: ApiFormat,
 ): { status: number; body: Record<string, unknown> } {

From cdbefc4ace7b749d2403c0ee853f9d7bd5584387 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= <admin@pescn.cn>
Date: Sun, 25 Jan 2026 06:38:57 +0800
Subject: [PATCH 9/9] fix(db): use PostgreSQL error code for unique constraint
 detection

Replace brittle string matching (error.message.includes("unique")) with
PostgreSQL error code check (code === "23505") for detecting unique
constraint violations in createPendingCompletion.

This matches the pattern already used in apiKey.ts and is more reliable
across different PostgreSQL versions and locales.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/src/db/index.ts | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/backend/src/db/index.ts b/backend/src/db/index.ts
index a5567f1..72bb219 100644
--- a/backend/src/db/index.ts
+++ b/backend/src/db/index.ts
@@ -1310,7 +1310,12 @@ export async function createPendingCompletion(
     return first ?? null;
   } catch (error) {
     // Handle unique constraint violation (duplicate ReqId)
-    if (error instanceof Error && error.message.includes("unique")) {
+    // PostgreSQL error code 23505 = unique_violation
+    if (
+      error instanceof Error &&
+      "code" in error &&
+      (error as { code: string }).code === "23505"
+    ) {
       logger.warn("Duplicate ReqId detected", c.reqId);
       return null;
     }