From bd8a546186448cac27d6277fdc83e2690f4520d7 Mon Sep 17 00:00:00 2001
From: Koosha Paridehpour <kooshapari@gmail.com>
Date: Wed, 25 Feb 2026 03:04:29 -0700
Subject: [PATCH] backup: pre-wave full dirty snapshot before fresh-main
 worktree execution

---
 cmd/cliproxyctl/main_test.go                  |   6 ++
 config.example.yaml                           |   4 +
 docs/operations/index.md                      |   1 +
 docs/operations/provider-error-runbook.md     |  40 +++++++
 ...1-0830-implementation-batch-3-resume-12.md |  30 ++++++
 ...b-0781-0830-implementation-batch-4-code.md |  19 +++-
 .../issue-wave-cpb-0781-0830-lane-b.md        |  39 +++++++
 .../issue-wave-cpb-0781-0830-lane-c.md        |  34 ++++++
 ...ssue-wave-cpb-0781-0830-next-50-summary.md |  30 +++++-
 docs/provider-operations.md                   |  24 ++++-
 docs/provider-quickstarts.md                  |  53 ++++++++-
 docs/troubleshooting.md                       | 102 +++++++++++++++++-
 .../config/responses_compact_toggle_test.go   |  30 ++++++
 .../executor/openai_compat_executor.go        |   4 +
 .../openai_compat_executor_compact_test.go    |  29 +++++
 pkg/llmproxy/usage/metrics_test.go            |  23 ++++
 16 files changed, 452 insertions(+), 16 deletions(-)
 create mode 100644 docs/operations/provider-error-runbook.md
 create mode 100644 docs/planning/reports/issue-wave-cpb-0781-0830-implementation-batch-3-resume-12.md
 create mode 100644 pkg/llmproxy/config/responses_compact_toggle_test.go

diff --git a/cmd/cliproxyctl/main_test.go b/cmd/cliproxyctl/main_test.go
index 44b7803521..87ee89729b 100644
--- a/cmd/cliproxyctl/main_test.go
+++ b/cmd/cliproxyctl/main_test.go
@@ -559,6 +559,12 @@ func TestResolveLoginProviderNormalizesDroidAliases(t *testing.T) {
 		if details["provider_supported"] != true {
 			t.Fatalf("expected provider_supported=true for %q, details=%#v", input, details)
 		}
+		if details["provider_alias"] != "gemini" {
+			t.Fatalf("expected provider_alias=gemini for %q, details=%#v", input, details)
+		}
+		if details["provider_aliased"] != true {
+			t.Fatalf("expected provider_aliased=true for %q, details=%#v", input, details)
+		}
 	}
 }
 
diff --git a/config.example.yaml b/config.example.yaml
index b513eb60ac..169bdb6b37 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -95,6 +95,10 @@ routing:
 # When true, enable authentication for the WebSocket API (/v1/ws).
 ws-auth: false
 
+# Gates OpenAI-compatible /v1/responses/compact behavior.
+# Default enabled when omitted. Set false for staged rollout / rapid disable.
+# responses-compact-enabled: true
+
 # When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts.
 nonstream-keepalive-interval: 0
 
diff --git a/docs/operations/index.md b/docs/operations/index.md
index a4ff651270..26a3f39360 100644
--- a/docs/operations/index.md
+++ b/docs/operations/index.md
@@ -12,6 +12,7 @@ This section centralizes first-response runbooks for active incidents.
 2. [Auth Refresh Failure Symptom/Fix Table](./auth-refresh-failure-symptom-fix.md)
 3. [Critical Endpoints Curl Pack](./critical-endpoints-curl-pack.md)
 4. [Checks-to-Owner Responder Map](./checks-owner-responder-map.md)
+5. [Provider Error Runbook Snippets](./provider-error-runbook.md)
 
 ## Freshness Pattern
 
diff --git a/docs/operations/provider-error-runbook.md b/docs/operations/provider-error-runbook.md
new file mode 100644
index 0000000000..6b33df56a9
--- /dev/null
+++ b/docs/operations/provider-error-runbook.md
@@ -0,0 +1,40 @@
+# Provider Error Runbook Snippets
+
+These are the smallest actionable runbook entries for CPB-0803 and CPB-0804 so the on-call can exercise the correct validation commands before changing code or configuration.
+
+## CPB-0803 – Huggingface CLIProxyAPI errors
+
+**Symptom**: Huggingface calls fail silently in production logs, but observability lacks provider tags and the usage dashboard shows untracked traffic. Alerts are noisy because the log sink cannot route the error rate to the right channel.
+
+**Validation commands**
+
+- `curl -sS http://localhost:8317/v0/management/logs | jq '.logs[] | select(.provider == "huggingface" and .level == "error")'`
+- `curl -sS http://localhost:8317/v1/metrics/providers | jq '.data[] | select(.provider == "huggingface") | {error_rate, requests, last_seen}'`
+- `curl -sS http://localhost:8317/usage | jq '.providers.huggingface'`
+
+**Runbook steps**
+
+1. Make sure `cliproxyctl` has the `provider_filter` tags set for `huggingface` so the management log output includes `provider: "huggingface"`. If the logs lack tags, reapply the filter via `cliproxyctl config view` + `cliproxyctl config edit` (or update the `config.yaml` block) and restart the agent.
+2. Verify the `v1/metrics/providers` entry for `huggingface` shows a stable error rate; if it stays above 5% for 5 minutes, escalate to the platform on-call and mark the alert as a hurt-level incident.
+3. After correcting the tagging, confirm the `usage` endpoint reports the provider so the new alerting rule in `provider-error` dashboards can route to the right responder.
+
+## CPB-0804 – Codex backend-api `Not Found`
+
+**Symptom**: Translations still target `https://chatgpt.com/backend-api/codex/responses`, which now returns `404 Not Found`. The problem manifests as a `backend-api` status in the `management/logs` stream that cannot be mapped to the new `v1/responses` path.
+
+**Validation commands**
+
+- `curl -sS http://localhost:8317/v0/management/logs | jq '.logs[] | select(.provider == "codex" and (.path | contains("backend-api/codex")) and .status_code == 404)'`
+- `curl -sS http://localhost:8317/v1/responses -H "Authorization: Bearer <api-key>" -H "Content-Type: application/json" -d '{"model":"codex","messages":[{"role":"user","content":"ping"}],"stream":false}' -w "%{http_code}"`
+- `curl -sS http://localhost:8317/v1/metrics/providers | jq '.data[] | select(.provider == "codex") | {error_rate, last_seen}'`
+- `rg -n "backend-api/codex" config.example.yaml config.yaml`
+
+**Runbook steps**
+
+1. Use the management log command above to confirm the 404 comes from the old `backend-api/codex` target. If the request still hits that path, re-point the translator overrides in `config.yaml` (or environment overrides such as `CLIPROXY_PROVIDER_CODEX_BASE_URL`) to whatever URL serves the current Responses protocol.
+2. Re-run the `curl` to `/v1/responses` with the same payload to verify the translation path can resolve to an upstream that still works; if it succeeds, redeploy the next minor release with the provider-agnostic translator patch.
+3. If the problem persists after a config change, capture the raw `logs` and `metrics` output and hand it to the translations team together with the failing request body, because the final fix involves sharing translator hooks and the compatibility matrix described in the quickstart docs.
+
+---
+Last reviewed: `2026-02-23`
+Owner: `Platform On-Call`
diff --git a/docs/planning/reports/issue-wave-cpb-0781-0830-implementation-batch-3-resume-12.md b/docs/planning/reports/issue-wave-cpb-0781-0830-implementation-batch-3-resume-12.md
new file mode 100644
index 0000000000..b58985ecff
--- /dev/null
+++ b/docs/planning/reports/issue-wave-cpb-0781-0830-implementation-batch-3-resume-12.md
@@ -0,0 +1,30 @@
+# Issue Wave CPB-0781-0830 Implementation Batch 3 (Resume 12)
+
+- Date: `2026-02-23`
+- Scope: next 12-item execution wave after Batch 2
+- Mode: docs/runbook hardening using child-agent lane split (2 items per lane)
+
+## Implemented in this pass
+
+- Lane B set:
+  - `CPB-0789`, `CPB-0790`, `CPB-0791`, `CPB-0792`, `CPB-0793`, `CPB-0794`, `CPB-0795`
+- Lane C set:
+  - `CPB-0797`, `CPB-0798`, `CPB-0800`, `CPB-0803`, `CPB-0804`
+
+## Evidence Surfaces
+
+- `docs/provider-quickstarts.md`
+  - Added/expanded parity probes, cache guardrails, compose health checks, proxy/auth usage checks, Antigravity setup flow, and manual callback guidance for `CPB-0789..CPB-0804`.
+- `docs/troubleshooting.md`
+  - Added matrix/runbook entries covering stream-thinking parity, cache drift, auth toggle diagnostics, callback guardrails, huggingface diagnostics, and codex backend-api not-found handling.
+- `docs/operations/provider-error-runbook.md`
+  - Added focused runbook snippets for `CPB-0803` and `CPB-0804`.
+- `docs/operations/index.md`
+  - Linked the new provider error runbook.
+
+## Validation Commands
+
+```bash
+rg -n "CPB-0789|CPB-0790|CPB-0791|CPB-0792|CPB-0793|CPB-0794|CPB-0795|CPB-0797|CPB-0798|CPB-0800|CPB-0803|CPB-0804" docs/provider-quickstarts.md docs/troubleshooting.md docs/operations/provider-error-runbook.md
+rg -n "Provider Error Runbook Snippets" docs/operations/index.md
+```
diff --git a/docs/planning/reports/issue-wave-cpb-0781-0830-implementation-batch-4-code.md b/docs/planning/reports/issue-wave-cpb-0781-0830-implementation-batch-4-code.md
index 24f903380b..127ce84438 100644
--- a/docs/planning/reports/issue-wave-cpb-0781-0830-implementation-batch-4-code.md
+++ b/docs/planning/reports/issue-wave-cpb-0781-0830-implementation-batch-4-code.md
@@ -7,14 +7,31 @@
 ## IDs Implemented
 
 - `CPB-0810` (Copilot/OpenAI metadata consistency update for `gpt-5.1`)
+- `CPB-0819` (add staged rollout toggle for `/v1/responses/compact` behavior)
+- `CPB-0820` (add `gpt-5-pro` OpenAI model metadata with thinking support)
+- `CPB-0821` (tighten droid→gemini alias assertions in login and usage telemetry tests)
 
 ## Files Changed
 
 - `pkg/llmproxy/registry/model_definitions_static_data.go`
 - `pkg/llmproxy/registry/model_definitions_test.go`
+- `pkg/llmproxy/config/config.go`
+- `pkg/llmproxy/config/responses_compact_toggle_test.go`
+- `pkg/llmproxy/executor/openai_compat_executor.go`
+- `pkg/llmproxy/executor/openai_compat_executor_compact_test.go`
+- `pkg/llmproxy/runtime/executor/openai_compat_executor.go`
+- `pkg/llmproxy/runtime/executor/openai_compat_executor_compact_test.go`
+- `cmd/cliproxyctl/main_test.go`
+- `pkg/llmproxy/usage/metrics_test.go`
+- `config.example.yaml`
 
 ## Validation Commands
 
 ```bash
-GOCACHE=$PWD/.cache/go-build go test ./pkg/llmproxy/registry -run 'TestGetOpenAIModels_GPT51Metadata|TestGetGitHubCopilotModels|TestGetStaticModelDefinitionsByChannel' -count=1
+GOCACHE=$PWD/.cache/go-build go test ./pkg/llmproxy/registry -run 'TestGetOpenAIModels_GPT51Metadata|TestGetOpenAIModels_IncludesGPT5Pro|TestGetGitHubCopilotModels|TestGetStaticModelDefinitionsByChannel' -count=1
+GOCACHE=$PWD/.cache/go-build go test ./pkg/llmproxy/config -run 'TestIsResponsesCompactEnabled_DefaultTrue|TestIsResponsesCompactEnabled_RespectsToggle' -count=1
+GOCACHE=$PWD/.cache/go-build go test ./pkg/llmproxy/executor -run 'TestOpenAICompatExecutorCompactPassthrough|TestOpenAICompatExecutorCompactDisabledByConfig' -count=1
+GOCACHE=$PWD/.cache/go-build go test ./pkg/llmproxy/runtime/executor -run 'TestOpenAICompatExecutorCompactPassthrough|TestOpenAICompatExecutorCompactDisabledByConfig' -count=1
+GOCACHE=$PWD/.cache/go-build go test ./cmd/cliproxyctl -run 'TestResolveLoginProviderNormalizesDroidAliases' -count=1
+GOCACHE=$PWD/.cache/go-build go test ./pkg/llmproxy/usage -run 'TestNormalizeProviderAliasesDroidToGemini|TestGetProviderMetrics_MapsDroidAliasToGemini' -count=1
 ```
diff --git a/docs/planning/reports/issue-wave-cpb-0781-0830-lane-b.md b/docs/planning/reports/issue-wave-cpb-0781-0830-lane-b.md
index ff3fb488c6..bc9f9f0f52 100644
--- a/docs/planning/reports/issue-wave-cpb-0781-0830-lane-b.md
+++ b/docs/planning/reports/issue-wave-cpb-0781-0830-lane-b.md
@@ -75,3 +75,42 @@
 - `rg -n "CPB-0789|CPB-0796" docs/planning/CLIPROXYAPI_1000_ITEM_BOARD_2026-02-22.md`
 - `rg -n "quickstart|troubleshooting|stream|tool|reasoning|provider" docs/provider-quickstarts.md docs/troubleshooting.md`
 - `go test ./pkg/llmproxy/translator/... -run "TestConvert|TestTranslate" -count=1`
+
+## Execution Update (Batch 3 — 2026-02-23)
+
+- Snapshot:
+  - `implemented`: 8 (`CPB-0789`..`CPB-0796`)
+  - `in_progress`: 0
+
+### Implemented in this update
+
+- `CPB-0789`, `CPB-0790`
+  - Added rollout + Sonnet metadata guidance in quickstart/troubleshooting surfaces.
+  - Evidence:
+    - `docs/provider-quickstarts.md`
+    - `docs/troubleshooting.md`
+
+- `CPB-0791`, `CPB-0792`
+  - Added reasoning parity and prompt-cache guardrail probes.
+  - Evidence:
+    - `docs/provider-quickstarts.md`
+    - `docs/troubleshooting.md`
+
+- `CPB-0793`, `CPB-0794`
+  - Added compose-health and provider proxy behavior checks.
+  - Evidence:
+    - `docs/provider-quickstarts.md`
+    - `docs/troubleshooting.md`
+
+- `CPB-0795`
+  - Added AI Studio auth-file toggle diagnostics (`enabled/auth_index` + doctor snapshot).
+  - Evidence:
+    - `docs/provider-quickstarts.md`
+    - `docs/troubleshooting.md`
+
+- `CPB-0796`
+  - Already implemented in prior execution batch; retained as implemented in lane snapshot.
+
+### Validation
+
+- `rg -n "CPB-0789|CPB-0790|CPB-0791|CPB-0792|CPB-0793|CPB-0794|CPB-0795|CPB-0796" docs/provider-quickstarts.md docs/troubleshooting.md`
diff --git a/docs/planning/reports/issue-wave-cpb-0781-0830-lane-c.md b/docs/planning/reports/issue-wave-cpb-0781-0830-lane-c.md
index 9c434b6a86..ce504c1900 100644
--- a/docs/planning/reports/issue-wave-cpb-0781-0830-lane-c.md
+++ b/docs/planning/reports/issue-wave-cpb-0781-0830-lane-c.md
@@ -75,3 +75,37 @@
 - `rg -n "CPB-0797|CPB-0804" docs/planning/CLIPROXYAPI_1000_ITEM_BOARD_2026-02-22.md`
 - `rg -n "quickstart|troubleshooting|stream|tool|reasoning|provider" docs/provider-quickstarts.md docs/troubleshooting.md`
 - `go test ./pkg/llmproxy/translator/... -run "TestConvert|TestTranslate" -count=1`
+
+## Execution Update (Batch 3 — 2026-02-23)
+
+- Snapshot:
+  - `implemented`: 8 (`CPB-0797`..`CPB-0804`)
+  - `in_progress`: 0
+
+### Implemented in this update
+
+- `CPB-0797`
+  - Added token-count diagnostics parity checks in quickstart + troubleshooting matrix.
+  - Evidence:
+    - `docs/provider-quickstarts.md`
+    - `docs/troubleshooting.md`
+
+- `CPB-0798`, `CPB-0800`
+  - Added Antigravity setup/login flow and manual callback headless OAuth guidance.
+  - Evidence:
+    - `docs/provider-quickstarts.md`
+    - `docs/troubleshooting.md`
+
+- `CPB-0803`, `CPB-0804`
+  - Added provider error runbook anchors and troubleshooting action entries.
+  - Evidence:
+    - `docs/operations/provider-error-runbook.md`
+    - `docs/operations/index.md`
+    - `docs/troubleshooting.md`
+
+- `CPB-0799`, `CPB-0801`, `CPB-0802`
+  - Already implemented in prior execution batch; retained as implemented in lane snapshot.
+
+### Validation
+
+- `rg -n "CPB-0797|CPB-0798|CPB-0799|CPB-0800|CPB-0801|CPB-0802|CPB-0803|CPB-0804" docs/provider-quickstarts.md docs/troubleshooting.md docs/operations/provider-error-runbook.md`
diff --git a/docs/planning/reports/issue-wave-cpb-0781-0830-next-50-summary.md b/docs/planning/reports/issue-wave-cpb-0781-0830-next-50-summary.md
index e80bb428b1..7153c25318 100644
--- a/docs/planning/reports/issue-wave-cpb-0781-0830-next-50-summary.md
+++ b/docs/planning/reports/issue-wave-cpb-0781-0830-next-50-summary.md
@@ -10,8 +10,9 @@
 
 - `proposed` in board snapshot: 50/50
 - `triaged with concrete file/test targets in this pass`: 50/50
-- `implemented so far`: 16/50
-- `remaining`: 34/50
+- `implemented so far`: 28/50
+- `remaining`: 22/50
+- `count basis`: resume-scoped verification in this session (Batch 1 + Batch 2 + Resume Scoped 12)
 
 ## Lane Index
 
@@ -117,7 +118,30 @@ Validation evidence:
 Implemented in this batch:
 
 - `CPB-0810`: corrected `gpt-5.1` static metadata to use version-accurate display/description text for OpenAI/Copilot-facing model surfaces.
+- `CPB-0819`: added config-gated `/v1/responses/compact` rollout control (`responses-compact-enabled`) with safe default enabled and explicit disabled behavior tests.
+- `CPB-0820`: added `gpt-5-pro` static model metadata with explicit thinking support for OpenAI/Copilot-facing model lists.
+- `CPB-0821`: tightened droid alias coverage with explicit `provider_alias`/`provider_aliased` assertions and usage telemetry mapping tests.
 
 Validation evidence:
 
-- `go test ./pkg/llmproxy/registry -run 'TestGetOpenAIModels_GPT51Metadata|TestGetGitHubCopilotModels|TestGetStaticModelDefinitionsByChannel' -count=1` → `ok`
+- `go test ./pkg/llmproxy/registry -run 'TestGetOpenAIModels_GPT51Metadata|TestGetOpenAIModels_IncludesGPT5Pro|TestGetGitHubCopilotModels|TestGetStaticModelDefinitionsByChannel' -count=1` → `ok`
+- `go test ./pkg/llmproxy/config -run 'TestIsResponsesCompactEnabled_DefaultTrue|TestIsResponsesCompactEnabled_RespectsToggle' -count=1` → `ok`
+- `go test ./pkg/llmproxy/executor -run 'TestOpenAICompatExecutorCompactPassthrough|TestOpenAICompatExecutorCompactDisabledByConfig' -count=1` → `ok`
+- `go test ./pkg/llmproxy/runtime/executor -run 'TestOpenAICompatExecutorCompactPassthrough|TestOpenAICompatExecutorCompactDisabledByConfig' -count=1` → `ok`
+- `go test ./cmd/cliproxyctl -run 'TestResolveLoginProviderNormalizesDroidAliases' -count=1` → `ok`
+- `go test ./pkg/llmproxy/usage -run 'TestNormalizeProviderAliasesDroidToGemini|TestGetProviderMetrics_MapsDroidAliasToGemini' -count=1` → `ok`
+
+## Execution Update (Resume Scoped 12)
+
+- Date: `2026-02-23`
+- Status: completed next 12-item docs/runbook batch with child-agent split.
+- Tracking report: `docs/planning/reports/issue-wave-cpb-0781-0830-implementation-batch-3-resume-12.md`
+
+Implemented in this batch:
+
+- `CPB-0789`, `CPB-0790`, `CPB-0791`, `CPB-0792`, `CPB-0793`, `CPB-0794`, `CPB-0795`
+- `CPB-0797`, `CPB-0798`, `CPB-0800`, `CPB-0803`, `CPB-0804`
+
+Verification:
+
+- `rg -n "CPB-0789|CPB-0790|CPB-0791|CPB-0792|CPB-0793|CPB-0794|CPB-0795|CPB-0797|CPB-0798|CPB-0800|CPB-0803|CPB-0804" docs/provider-quickstarts.md docs/troubleshooting.md docs/operations/provider-error-runbook.md`
diff --git a/docs/provider-operations.md b/docs/provider-operations.md
index b4b1f4d9c6..4d1966012b 100644
--- a/docs/provider-operations.md
+++ b/docs/provider-operations.md
@@ -241,9 +241,27 @@ Avoid per-tool aliases for these fields in ops docs to keep telemetry queries de
   - Critical: processed thinking mode mismatch ratio > 5% over 10 minutes.
   - Warn: reasoning token growth > 25% above baseline for fixed-thinking workloads over 10 minutes.
 - Mitigation:
-  - Force explicit thinking-capable model alias for affected workloads.
-  - Reduce rollout blast radius by pinning the model suffix/level per workload class.
-  - Keep one non-stream and one stream canary for each affected client integration.
+- Force explicit thinking-capable model alias for affected workloads.
+- Reduce rollout blast radius by pinning the model suffix/level per workload class.
+- Keep one non-stream and one stream canary for each affected client integration.
+
+### Provider-specific proxy overrides (`CPB-0794`)
+
+- **Goal:** route some providers through a corporate proxy while letting others go direct (for example, Gemini through `socks5://corp-proxy:1080` while Claude works through the default gateway).
+- **Config knobs:** `config.yaml` already exposes `proxy-url` at the root (global egress) and a `proxy-url` field per credential or API key entry. Adding the override looks like:
+
+```yaml
+gemini-api-key:
+  - api-key: "AIzaSy..."
+    proxy-url: "socks5://corp-proxy:1080" # per-provider override
+```
+
+- **Validation steps:**
+  1. `rg -n "proxy-url" config.yaml` (or open `config.example.yaml`) to confirm the override is placed next to the target credential block.
+  2. `cliproxyctl doctor --json | jq '.config.providers | to_entries[] | {provider:.key,credentials:.value}'` to ensure each credential surfaces the intended `proxy_url` value.
+  3. After editing, save the file so the built-in watcher hot-reloads the settings (or run `docker compose restart cliproxyapi-plusplus` for a deterministic reload) and rerun an affected client request while tailing `docker compose logs cliproxyapi-plusplus --follow` to watch for proxy-specific connection errors.
+
+- **Fallback behavior:** When no per-credential override exists, the root `proxy-url` applies; clearing the override (set to empty string) forces a direct connection even if the root proxy is set.
 
 ## Recommended Production Pattern
 
diff --git a/docs/provider-quickstarts.md b/docs/provider-quickstarts.md
index c7988bc13e..8bf5419621 100644
--- a/docs/provider-quickstarts.md
+++ b/docs/provider-quickstarts.md
@@ -1134,24 +1134,43 @@ curl -sS http://localhost:8317/v1/models -H "Authorization: Bearer demo-client-k
 
 Expected: no cross-request leakage in stream translation, feature-flag state is explicit, and Sonnet 4.5 model metadata is consistent.
 
-### Reasoning/cache/compose checks (`CPB-0791`, `CPB-0792`, `CPB-0793`)
+### Reasoning parity probe (`CPB-0791`)
 
 ```bash
 curl -sS -X POST http://localhost:8317/v1/chat/completions -H "Authorization: Bearer demo-client-key" -H "Content-Type: application/json" -d '{"model":"gemini-2.5-pro","messages":[{"role":"user","content":"reasoning normalization probe"}],"reasoning":{"effort":"x-high"},"stream":false}' | jq '{model,usage,error}'
-curl -sS -X POST http://localhost:8317/v1/chat/completions -H "Authorization: Bearer demo-client-key" -H "Content-Type: application/json" -d '{"model":"gemini-2.5-pro","messages":[{"role":"user","content":"cache token probe"}],"stream":false}' | jq '{usage,error}'
+curl -N -X POST http://localhost:8317/v1/chat/completions -H "Authorization: Bearer demo-client-key" -H "Content-Type: application/json" -d '{"model":"gemini-2.5-pro","messages":[{"role":"user","content":"reasoning normalization probe"}],"reasoning":{"effort":"x-high"},"stream":true}'
+```
+
+Expected: both non-stream and stream responses return the same reasoning metadata, `usage` totals stay in sync, and no errors drop the `thinking` result when it reaches Gemini/Antigravity.
+
+### Prompt cache guardrails (`CPB-0792`, `CPB-0797`)
+
+```bash
+curl -sS -X POST http://localhost:8317/v1/chat/completions -H "Authorization: Bearer demo-client-key" -H "Content-Type: application/json" -d '{"model":"gemini-2.5-pro","messages":[{"role":"user","content":"cache guard probe"}],"stream":false}' | jq '{model,usage,error}'
+curl -sS -X POST http://localhost:8317/v1/chat/completions -H "Authorization: Bearer demo-client-key" -H "Content-Type: application/json" -d '{"model":"antigravity/claude-sonnet-4-5-thinking","messages":[{"role":"user","content":"cache guard probe"}],"stream":false}' | jq '{model,usage,error}'
+cliproxyctl doctor --json | jq '.warnings[]? | select(.message | test("cache"; "i"))'
+```
+
+Expected: repeated probes for Gemini and Antigravity stick to the requested model, the `usage` objects always include prompt/completion totals, and no cache-related warnings appear in `cliproxyctl doctor` output.
+
+### Compose health check (`CPB-0793`)
+
+```bash
 docker compose ps
 curl -sS http://localhost:8317/health | jq
 ```
 
-Expected: reasoning normalization is accepted, cache token fields are coherent, and docker-compose startup failures are visible via service state + health checks.
+Expected: all CLIProxyAPI services stay `Up` in `docker compose ps`, and the health endpoint returns a healthy payload so startup errors surface before they block workloads.
 
 ### Proxy/auth/usage checks (`CPB-0794`, `CPB-0795`, `CPB-0797`)
 
 ```bash
 cliproxyctl doctor --json | jq '.auth,.routing,.warnings'
-curl -sS http://localhost:8317/v0/management/auth-files -H "X-Management-Secret: ${MANAGEMENT_SECRET}" | jq '.[] | select(.type=="aistudio") | {name,type,disabled}'
+curl -sS http://localhost:8317/v0/management/auth-files -H "X-Management-Secret: ${MANAGEMENT_SECRET}" | jq '.[] | select(.type=="aistudio") | {name,type,enabled,auth_index}'
 curl -sS -X PATCH http://localhost:8317/v0/management/auth-files/status -H "X-Management-Secret: ${MANAGEMENT_SECRET}" -H "Content-Type: application/json" -d '{"name":"aistudio-default","enabled":true}' | jq
-curl -sS -X POST http://localhost:8317/v1/responses -H "Authorization: Bearer demo-client-key" -H "Content-Type: application/json" -d '{"model":"gemini-2.5-pro","input":[{"role":"user","content":"usage parity probe"}],"stream":false}' | jq '.usage,.error'
+cliproxyctl doctor --json | jq '.auth_files'
+curl -sS -X POST http://localhost:8317/v1/responses -H "Authorization: Bearer demo-client-key" -H "Content-Type: application/json" -d '{"model":"gemini-2.5-pro","input":[{"role":"user","content":"usage parity probe"}],"stream":false}' | jq '{model,id,usage}'
+curl -sS http://localhost:8317/v0/management/usage -H "X-Management-Secret: ${MANAGEMENT_SECRET}" | jq '.providers | to_entries[] | {name:.key, tokens:.value.usage}'
 ```
 
 Expected: per-provider proxy/auth behavior is inspectable, AI Studio auth toggle is controllable, and usage/token metadata is present in non-stream probes.
@@ -1167,6 +1186,30 @@ curl -sS http://localhost:8317/v0/management/usage -H "X-Management-Secret: ${MA
 
 Expected: setup/login surfaces include manual callback support, and huggingface failures are visible in management logs/usage.
 
+### Antigravity cliproxyctl flow (`CPB-0798`)
+
+```
+cliproxyctl setup --config ./config.yaml
+  (interactive prompt -> choose "Antigravity login" when the list appears)
+cliproxyctl login --provider antigravity --no-browser --oauth-callback-port 51121
+cliproxyctl doctor --json | jq '{auth,warnings,models}'
+curl -sS http://localhost:8317/v1/models -H "Authorization: Bearer demo-client-key" | jq '.data[] | select(.id|test("^antigravity/")) | {id,owned_by,description}'
+```
+
+Expected: `cliproxyctl setup` seeds the auth-dir used by cliproxyctl, the non-browser antigravity login prints the callback URL (copy it into a reachable browser), `cliproxyctl doctor` reports the new auth credentials, and the runtime model catalog exposes every `antigravity/…` entry.
+
+### Manual callback headless OAuth (`CPB-0800`)
+
+```
+cliproxyctl login --provider openai --no-browser --oauth-callback-port 0
+cliproxyctl login --provider gemini --no-browser --oauth-callback-port 0
+cliproxyctl doctor --json | jq '{auth,warnings}'
+curl -sS http://localhost:8317/v0/management/auth-files -H "X-Management-Secret: ${MANAGEMENT_SECRET}" | jq '.[] | select(.manual) | {provider,name,status}'
+curl -sS http://localhost:8317/v0/management/logs -H "X-Management-Secret: ${MANAGEMENT_SECRET}" | jq '.entries[]? | select(.message|test("manual callback";"i"))'
+```
+
+Expected: login flows emit a manual callback URL that can be pasted into the reachable browser, doctor validates the newly minted credential, the management auth-files list shows a `manual` entry for the provider, and recent logs surface the manual callback handshake.
+
 ### Codex/Gemini integration parity (`CPB-0804`, `CPB-0805`, `CPB-0807`, `CPB-0808`)
 
 ```bash
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
index 7d67c346a5..8b29b69424 100644
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -40,17 +40,36 @@ curl -sS http://localhost:8317/v1/metrics/providers | jq
 | `iflow` `glm-4.7` returns `406` | Request format/headers do not match IFlow acceptance rules for that model | Retry once with non-stream mode and capture response body + headers | Pin to known-working alias for `glm-4.7`, normalize request format, and keep fallback model route available |
 | iFlow OAuth login succeeded but most iFlow models unavailable | Account currently exposes only a non-CLI subset (model inventory mismatch) | `GET /v1/models` and filter `^iflow/` | Route only to listed `iflow/*` IDs; avoid requesting non-listed upstream aliases; keep one known-good canary model |
 | Usage statistics remain `0` after many requests | Upstream omitted usage metadata in stream/non-stream responses | Compare one `stream:false` and one `stream:true` canary and inspect `usage` fields/logs | Keep request counting enabled via server-side usage fallback; validate parity with both request modes before rollout |
+| Kiro remaining quota unknown or near exhaustion | Wrong auth credential exhausted or stale quota signal | `curl -sS http://localhost:8317/v0/management/kiro-quota -H "Authorization: Bearer `MANAGEMENT_KEY`" | jq` | Find `auth_index`, confirm `quota_exhausted` and `remaining_quota`, then enable quota-fallback switches and rotate to alternate credentials |
 | Gemini via OpenAI-compatible client cannot control thinking length | Thinking controls were dropped/normalized unexpectedly before provider dispatch | Compare request payload vs debug logs for `thinking: original config` and `thinking: processed config` | Use explicit thinking suffix/level supported by exposed model, enforce canary checks, and alert when processed thinking mode mismatches request intent |
+| `Gemini CLI OAuth 认证失败: failed to start callback server` | Default callback port `8085` is already bound on localhost | `lsof -iTCP:8085` or `ss -tnlp | grep 8085` | Stop the conflicting server, or re-run `cliproxyctl login --oauth-callback-port `FREE_PORT``; the CLI now also falls back to an ephemeral port and prints the final callback URL/SSH tunnel instructions. |
 | `codex5.3` availability unclear across environments | Integration path mismatch (in-process SDK vs remote HTTP fallback) | Probe `/health` then `/v1/models`, verify `gpt-5.3-codex` exposure | Use in-process `sdk/cliproxy` when local runtime is controlled; fall back to `/v1/*` only when process boundaries require HTTP |
 | Amp requests bypass CLIProxyAPI | Amp process missing `OPENAI_API_BASE`/`OPENAI_API_KEY` or stale shell env | Run direct canary to `http://127.0.0.1:8317/v1/chat/completions` with same credentials | Export env in the same process/shell that launches Amp, then verify proxy logs show Amp traffic |
+| `auth-dir` mode is too permissive (`0755`/`0777`) | OAuth/API key login writes fail fast due insecure permissions | `ls -ld ~/.cli-proxy-api` or `ls -ld `CONFIGURED_AUTH_DIR`` | Run `chmod 700` on the configured auth directory, then retry the login/refresh command |
+| Login succeeds but runtime still says provider unavailable | Login and runtime are reading different `auth-dir` paths (container path vs local path mismatch) | Print effective config path + `auth-dir` in both login shell and runtime process (`cliproxyctl doctor --json`) | Align both processes to one config and one `auth-dir`; avoid duplicate configs in different working directories |
+| Gemini 3 Pro / Roo shows no response | Model is missing from current auth inventory or stream path dropped before translator dispatch | Check `/v1/models` for `gemini-3-pro-preview` and run one non-stream canary | Refresh auth inventory, re-login if needed, and only enable Roo stream mode after non-stream canary passes |
+| `candidate_count` > 1 returns only one answer | Provider path does not support multi-candidate fanout yet | Re-run with `candidate_count: 1` and compare logs/request payload | Treat multi-candidate as gated rollout: document unsupported path, keep deterministic single-candidate behavior, and avoid silent fanout assumptions |
 | Runtime config write errors | Read-only mount or immutable filesystem | `find /CLIProxyAPI -maxdepth 1 -name config.yaml -print` | Use writable mount, re-run with read-only warning, confirm management persistence status |
 | Kiro/OAuth auth loops | Expired or missing token refresh fields | Re-run `cliproxyapi++ auth`/reimport token path | Refresh credentials, run with fresh token file, avoid duplicate token imports |
 | Streaming hangs or truncation | Reverse proxy buffering / payload compatibility issue | Reproduce with `stream: false`, then compare SSE response | Verify reverse-proxy config, compare tool schema compatibility and payload shape |
+| `Cherry Studio can't find the model even though CLI runs succeed` (CPB-0373) | Workspace-specific model filters (Cherry Studio) do not include the alias/prefix that the CLI is routing, so the UI never lists the model. | `curl -sS http://localhost:8317/v1/models -H "Authorization: Bearer `CLIENT_KEY`" | jq '.data[].id' | rg '`WORKSPACE_PREFIX`'` and compare with the workspace filter used in Cherry Studio. | Add the missing alias/prefix to the workspace's allowed set or align the workspace selection with the alias returned by `/v1/models`, then reload Cherry Studio so it sees the same inventory as CLI. |
+| `Antigravity 2 API Opus model returns Error searching files` (CPB-0375) | The search tool block is missing or does not match the upstream tool schema, so translator rejects `tool_call` payloads when the Opus model tries to access files. | Replay the search payload against `/v1/chat/completions` and tail the translator logs for `tool_call`/`SearchFiles` entries to see why the tool request was pruned or reformatted. | Register the `searchFiles` alias for the Opus provider (or the tool name Cherry Studio sends), adjust the `tools` block to match upstream requirements, and rerun the flow so the translator forwards the tool call instead of aborting. |
+| `Streaming response never emits [DONE] even though upstream closes` (CPB-0376) | SSE translator drops the `[DONE]` marker or misses the final `model_status: "succeeded"` transition, so downstream clients never see completion. | Compare the SSE stream emitted by `/v1/chat/completions` to the upstream stream and watch translator logs for `[DONE]` / `model_status` transitions; tail `cliproxy` logs around the final chunks. | Ensure the translation layer forwards `[DONE]` immediately after the upstream `model_status` indicates completion (or emits a synthetic `[DONE]`), and log a warning if the stream closes without sending the final marker so future incidents can be traced. |
 | `Cannot use Claude Models in Codex CLI` | Missing oauth alias bridge for Claude model IDs | `curl -sS .../v1/models | jq '.data[].id' | rg 'claude-opus|claude-sonnet|claude-haiku'` | Add/restore `oauth-model-alias` entries (or keep default injection enabled), then reload and re-check `/v1/models` |
+| RooCode UI shows `undefined is not an object (evaluating 'T.match')` | Provider alias mismatch or no visible Roo models for the same key/session used by the UI | `cliproxyctl login --provider roocode --json --config ./config.yaml | jq '{ok,provider:.details.provider}'` then `curl -sS http://localhost:8317/v1/models -H "Authorization: Bearer `CLIENT_KEY`" | jq -r '.data[].id' | rg '^roo/'` | Use normalized Roo aliases (`roocode`/`roo-code`), ensure at least one `roo/*` model is exposed, then re-run one non-stream canary request before retrying UI stream mode |
 | `claude-opus-4-6` missing or returns `bad model` | Alias/prefix mapping is stale after Claude model refresh | `curl -sS http://localhost:8317/v1/models -H "Authorization: Bearer YOUR_CLIENT_KEY" | jq -r '.data[].id' | rg 'claude-opus-4-6|claude-sonnet-4-6'` | Update `claude-api-key` model alias mappings, reload config, then re-run non-stream Opus 4.6 request before stream rollout |
 | `/v1/responses/compact` fails or hangs | Wrong endpoint/mode expectations (streaming not supported for compact) | Retry with non-stream `POST /v1/responses/compact` and inspect JSON `object` field | Use compact only in non-stream mode; for streaming flows keep `/v1/responses` or `/v1/chat/completions` |
 | MCP memory tools fail (`tool not found`, invalid params, or empty result) | MCP server missing memory tool registration or request schema mismatch | Run `tools/list` then one minimal `tools/call` against the same MCP endpoint | Enable/register memory tools, align `tools/call` arguments to server schema, then repeat `tools/list` and `tools/call` smoke tests |
 
+## `gemini-3-pro-preview` Tool-Use Triage
+
+- Use this flow when tool calls to `gemini-3-pro-preview` return unexpected `400/500` patterns and non-stream canaries work:
+  - `touch config.yaml`
+  - `process-compose -f examples/process-compose.dev.yaml down`
+  - `process-compose -f examples/process-compose.dev.yaml up`
+  - `curl -sS http://localhost:8317/v1/models -H "Authorization: Bearer `CLIENT_KEY`" | jq '.data[].id' | rg 'gemini-3-pro-preview'`
+  - `curl -sS -X POST http://localhost:8317/v1/chat/completions -H "Authorization: Bearer `CLIENT_KEY`" -H "Content-Type: application/json" -d '{"model":"gemini-3-pro-preview","messages":[{"role":"user","content":"ping"}],"stream":false}'`
+
 Use this matrix as an issue-entry checklist:
 
 ```bash
@@ -112,21 +131,46 @@ Use this for repeated `403` on Kiro/Copilot/Antigravity-like channels:
 ```bash
 # 1) Verify model is exposed to the current key
 curl -sS http://localhost:8317/v1/models \
-  -H "Authorization: Bearer <your-client-key>" | jq '.data[].id' | head -n 20
+  -H "Authorization: Bearer `YOUR_CLIENT_KEY`" | jq '.data[].id' | head -n 20
 
 # 2) Run a minimal non-stream request for the same model
 curl -sS -X POST http://localhost:8317/v1/chat/completions \
-  -H "Authorization: Bearer <your-client-key>" \
+  -H "Authorization: Bearer `YOUR_CLIENT_KEY`" \
   -H "Content-Type: application/json" \
-  -d '{"model":"<model-id>","messages":[{"role":"user","content":"ping"}],"stream":false}'
+  -d '{"model":"`MODEL_ID`","messages":[{"role":"user","content":"ping"}],"stream":false}'
 
 # 3) Inspect provider metrics + recent logs for status bursts
 curl -sS http://localhost:8317/v1/metrics/providers \
-  -H "Authorization: Bearer <your-client-key>" | jq
+  -H "Authorization: Bearer `YOUR_CLIENT_KEY`" | jq
 ```
 
 If step (2) fails with `403` while model listing works, treat it as upstream entitlement/channel policy mismatch first, not model registry corruption.
 
+## OAuth Callback Server Start Failure (Gemini/Antigravity)
+
+Symptom:
+
+- Login fails with `failed to start callback server` or `bind: address already in use`.
+
+Checks:
+
+```bash
+lsof -iTCP:51121 -sTCP:LISTEN
+lsof -iTCP:51122 -sTCP:LISTEN
+```
+
+Remediation:
+
+```bash
+# Pick an unused callback port explicitly
+./cliproxyapi++ auth --provider antigravity --oauth-callback-port 51221
+
+# Server mode
+./cliproxyapi++ --oauth-callback-port 51221
+```
+
+If callback setup keeps failing, run with `--no-browser`, copy the printed URL manually, and paste the callback URL back into the CLI prompt.
+
 ## Model Not Found / Unsupported Model
 
 Checks:
@@ -165,6 +209,56 @@ If non-stream succeeds but stream chunks are delayed/batched:
 - verify client reads SSE incrementally,
 - confirm no middleware rewrites the event stream response.
 
+## Wave Batch 2 triage matrix (`CPB-0783..CPB-0808`)
+
+| Symptom | Likely cause | Quick check | Action |
+|---|---|---|---|
+| Dev cycle regresses after Gemini tool changes (`CPB-0783`) | stale process-compose/HMR state | `cliproxyctl dev --json` + `docker compose ps` | restart process-compose and rerun non-stream canary |
+| RooCode fails with `T.match` (`CPB-0784`, `CPB-0785`) | provider alias mismatch (`roocode`/`roo-code`) | `GET /v1/models` and search Roo IDs | normalize aliases to canonical `roo` mapping and retry |
+| Channel toggle works on non-stream only (`CPB-0787`) | stream bootstrap path misses toggle | compare stream vs non-stream same prompt | align bootstrap settings and re-run parity probe |
+| Antigravity stream returns stale chunks (`CPB-0788`) | request-scoped translator state leak | run two back-to-back stream requests | reset per-request stream state and verify isolation |
+| Sonnet 4.5 rollout confusion (`CPB-0789`, `CPB-0790`) | feature flag/metadata mismatch | `cliproxyctl doctor --json` + `/v1/models` metadata | align flag gating + static registry metadata |
+| Gemini thinking stream parity gap (`CPB-0791`) | reasoning metadata normalization splits between CLI/translator and upstream, so the stream response drops `thinking` results or mismatches non-stream output | `curl -sS -X POST http://localhost:8317/v1/chat/completions -H "Authorization: Bearer demo-client-key" -H "Content-Type: application/json" -d '{"model":"gemini-2.5-pro","messages":[{"role":"user","content":"reasoning normalization probe"}],"reasoning":{"effort":"x-high"},"stream":false}' | jq '{model,usage,error}'` then `curl -N -X POST http://localhost:8317/v1/chat/completions -H "Authorization: Bearer demo-client-key" -H "Content-Type: application/json" -d '{"model":"gemini-2.5-pro","messages":[{"role":"user","content":"reasoning normalization probe"}],"reasoning":{"effort":"x-high"},"stream":true}'` | align translator normalization and telemetry so thinking metadata survives stream translation, re-run the reasoning probe, and confirm matching `usage` counts in stream/non-stream outputs |
+| Gemini CLI/Antigravity prompt cache drift (`CPB-0792`, `CPB-0797`) | prompt cache keying or executor fallback lacks validation, letting round-robin slip to stale providers and emit unexpected usage totals | re-run the `gemini-2.5-pro` chat completion three times and repeat with `antigravity/claude-sonnet-4-5-thinking`, e.g. `curl -sS -X POST http://localhost:8317/v1/chat/completions -H "Authorization: Bearer demo-client-key" -H "Content-Type: application/json" -d '{"model":"<model>","messages":[{"role":"user","content":"cache guard probe"}],"stream":false}' | jq '{model,usage,error}'` | reset prompt caches, enforce provider-specific cache keys/fallbacks, and alert when round-robin reroutes to unexpected providers |
+| Docker compose startup error (`CPB-0793`) | service boot failure before bind | `docker compose ps` + `/health` | inspect startup logs, fix bind/config, restart |
+| AI Studio auth status unclear (`CPB-0795`) | auth-file toggle not visible/used | `GET/PATCH /v0/management/auth-files` | enable target auth file and re-run provider login |
+| Setup/login callback breaks (`CPB-0798`, `CPB-0800`) | callback mode mismatch/manual callback unset | inspect `cliproxyctl setup/login --help` | use `--manual-callback` and verify one stable auth-dir |
+| Huggingface provider errors not actionable (`CPB-0803`) | logs/usage missing provider tags | `GET /v0/management/logs` + `/usage` | add/provider-filter tags and alert routing |
+| Codex/Gemini parity drifts (`CPB-0804`, `CPB-0805`, `CPB-0807`, `CPB-0808`) | provider-specific response path divergence | compare `/v1/responses` stream/non-stream | keep translation hooks shared and cache path deterministic |
+
+### Setup/login callback guardrails (`CPB-0798`, `CPB-0800`)
+
+When headless environments or proxy tunnels prevent automatic callbacks, fall back to the CLI-managed Antigravity cursor flow and the manual callback mode that keep one `auth` directory deterministic.
+
+```bash
+cliproxyctl setup --config ./config.yaml
+  # choose “Antigravity login” when prompted to seed ~/.cliproxy/auth.
+cliproxyctl login --provider antigravity --no-browser --oauth-callback-port 51121
+cliproxyctl login --provider openai --no-browser --oauth-callback-port 0
+cliproxyctl doctor --json | jq '{auth,warnings}'
+curl -sS http://localhost:8317/v0/management/auth-files -H "X-Management-Secret: ${MANAGEMENT_SECRET}" | jq '.[] | select(.manual) | {provider,name,status}'
+curl -sS http://localhost:8317/v0/management/logs -H "X-Management-Secret: ${MANAGEMENT_SECRET}" | jq '.entries[]? | select(.message|test("manual callback";"i"))'
+```
+
+Copy the callback URL `cliproxyctl` prints, complete it from a reachable browser, and keep a single stable `auth` directory per CLI invocation so the manual callback metadata stays up to date.
+
+## Wave Batch 3 triage matrix (`CPB-0809..CPB-0830` remaining 17)
+
+| Symptom | Likely cause | Quick check | Action |
+|---|---|---|---|
+| Antigravity rollout is inconsistent (`CPB-0809`) | feature flag state differs by environment | `cliproxyctl doctor --json` | enforce staged flag defaults and migration notes |
+| Copilot CLI mapping mismatch (`CPB-0810`) | registry metadata naming drift | `/v1/models` and match copilot IDs | normalize registry names and docs wording |
+| HMR/refresh flow flaky (`CPB-0812`) | compose/process watcher drift | `docker compose ... config` | align compose watch/restart workflow |
+| Remote management ban feels silent (`CPB-0813`) | ban counter/log signal missing | `GET /v0/management/logs` + usage stats | add/monitor ban telemetry and remediation runbook |
+| Gemini OAuth guidance unclear (`CPB-0816`, `CPB-0817`) | login flow and docs out of sync | `cliproxyctl login --provider gemini` | keep CLI flow and quickstart steps aligned |
+| Droid CLI says unknown provider (`CPB-0821`) | alias normalization missing | `cliproxyctl login --provider droid-cli` | normalize alias to Gemini-compatible provider path |
+| Auth file sync misses fresh token (`CPB-0822`) | reload logic ignores newest credential | check management auth state + runtime logs | use validated sync path with metadata checks |
+| Kimi K2 thinking failures hard to triage (`CPB-0823`) | provider-specific logs/alerts absent | filter management logs for `kimi` | add tagged logs and alert thresholds |
+| Nano Banana translator path unstable (`CPB-0824`) | translator mapping not centralized | probe `nanobanana` model via `/v1/models` | consolidate translator alias/format helpers |
+| AI Studio runtime behavior drifts (`CPB-0825`, `CPB-0827`) | executor side-effects and WSS path gaps | compare stream/non-stream + WSS probes | split helper layers and gate WSS rollout with tests |
+| Gemini image integration routing uncertain (`CPB-0828`) | subprocess vs HTTP fallback path ambiguity | inspect management routes + logs | expose explicit non-subprocess + fallback controls |
+| GPT metadata migration risk (`CPB-0818`, `CPB-0819`, `CPB-0820`, `CPB-0830`) | model-version naming/contract drift | inspect `/v1/models` + compact endpoint | centralize normalization and document migration path |
+
 ## Useful Endpoints
 
 - `GET /health`
diff --git a/pkg/llmproxy/config/responses_compact_toggle_test.go b/pkg/llmproxy/config/responses_compact_toggle_test.go
new file mode 100644
index 0000000000..267d5cde40
--- /dev/null
+++ b/pkg/llmproxy/config/responses_compact_toggle_test.go
@@ -0,0 +1,30 @@
+package config
+
+import "testing"
+
+func TestIsResponsesCompactEnabled_DefaultTrue(t *testing.T) {
+	var cfg *Config
+	if !cfg.IsResponsesCompactEnabled() {
+		t.Fatal("nil config should default responses compact to enabled")
+	}
+
+	cfg = &Config{}
+	if !cfg.IsResponsesCompactEnabled() {
+		t.Fatal("unset responses compact toggle should default to enabled")
+	}
+}
+
+func TestIsResponsesCompactEnabled_RespectsToggle(t *testing.T) {
+	enabled := true
+	disabled := false
+
+	cfgEnabled := &Config{ResponsesCompactEnabled: &enabled}
+	if !cfgEnabled.IsResponsesCompactEnabled() {
+		t.Fatal("expected explicit true toggle to enable responses compact")
+	}
+
+	cfgDisabled := &Config{ResponsesCompactEnabled: &disabled}
+	if cfgDisabled.IsResponsesCompactEnabled() {
+		t.Fatal("expected explicit false toggle to disable responses compact")
+	}
+}
diff --git a/pkg/llmproxy/executor/openai_compat_executor.go b/pkg/llmproxy/executor/openai_compat_executor.go
index bc018d2055..f1ff27f871 100644
--- a/pkg/llmproxy/executor/openai_compat_executor.go
+++ b/pkg/llmproxy/executor/openai_compat_executor.go
@@ -86,6 +86,10 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	to := sdktranslator.FromString("openai")
 	endpoint := "/chat/completions"
 	if opts.Alt == "responses/compact" {
+		if e.cfg != nil && !e.cfg.IsResponsesCompactEnabled() {
+			err = statusErr{code: http.StatusNotFound, msg: "/responses/compact disabled by config"}
+			return
+		}
 		to = sdktranslator.FromString("openai-response")
 		endpoint = "/responses/compact"
 	}
diff --git a/pkg/llmproxy/executor/openai_compat_executor_compact_test.go b/pkg/llmproxy/executor/openai_compat_executor_compact_test.go
index fe2812623b..5ddeda3ed4 100644
--- a/pkg/llmproxy/executor/openai_compat_executor_compact_test.go
+++ b/pkg/llmproxy/executor/openai_compat_executor_compact_test.go
@@ -56,3 +56,32 @@ func TestOpenAICompatExecutorCompactPassthrough(t *testing.T) {
 		t.Fatalf("payload = %s", string(resp.Payload))
 	}
 }
+
+func TestOpenAICompatExecutorCompactDisabledByConfig(t *testing.T) {
+	disabled := false
+	executor := NewOpenAICompatExecutor("openai-compatibility", &config.Config{
+		ResponsesCompactEnabled: &disabled,
+	})
+	auth := &cliproxyauth.Auth{Attributes: map[string]string{
+		"base_url": "https://example.com/v1",
+		"api_key":  "test",
+	}}
+	_, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{
+		Model:   "gpt-5.1-codex-max",
+		Payload: []byte(`{"model":"gpt-5.1-codex-max","input":[{"role":"user","content":"hi"}]}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai-response"),
+		Alt:          "responses/compact",
+		Stream:       false,
+	})
+	if err == nil {
+		t.Fatal("expected compact-disabled error, got nil")
+	}
+	se, ok := err.(statusErr)
+	if !ok {
+		t.Fatalf("expected statusErr, got %T", err)
+	}
+	if se.StatusCode() != http.StatusNotFound {
+		t.Fatalf("status = %d, want %d", se.StatusCode(), http.StatusNotFound)
+	}
+}
diff --git a/pkg/llmproxy/usage/metrics_test.go b/pkg/llmproxy/usage/metrics_test.go
index 7b0ada1e9a..f8d5cbb373 100644
--- a/pkg/llmproxy/usage/metrics_test.go
+++ b/pkg/llmproxy/usage/metrics_test.go
@@ -91,6 +91,29 @@ func TestNormalizeProviderAliasesDroidToGemini(t *testing.T) {
 	}
 }
 
+func TestGetProviderMetrics_MapsDroidAliasToGemini(t *testing.T) {
+	stats := GetRequestStatistics()
+	ctx := context.Background()
+
+	stats.Record(ctx, coreusage.Record{
+		Provider: "droid",
+		APIKey:   "droid-cli-primary",
+		Model:    "gemini-2.5-pro",
+		Detail: coreusage.Detail{
+			TotalTokens: 11,
+		},
+	})
+
+	metrics := GetProviderMetrics()
+	geminiMetrics, ok := metrics["gemini"]
+	if !ok {
+		t.Fatal("expected gemini metrics from droid alias input")
+	}
+	if geminiMetrics.RequestCount < 1 {
+		t.Fatalf("expected gemini request count >= 1, got %d", geminiMetrics.RequestCount)
+	}
+}
+
 func TestGetProviderMetrics_IncludesKiroAndCursor(t *testing.T) {
 	stats := GetRequestStatistics()
 	ctx := context.Background()