From f1ab68554b2dec351904ebf10b06d28024f56630 Mon Sep 17 00:00:00 2001
From: Koosha Paridehpour <kooshapari@gmail.com>
Date: Sun, 22 Feb 2026 21:23:05 -0700
Subject: [PATCH 1/7] fix(#253): support endpoint override for provider-pinned
 codex models

---
 sdk/api/handlers/openai/endpoint_compat.go    | 40 ++++++++++++++++-
 .../handlers/openai/endpoint_compat_test.go   | 45 +++++++++++++++++++
 2 files changed, 83 insertions(+), 2 deletions(-)
 create mode 100644 sdk/api/handlers/openai/endpoint_compat_test.go

diff --git a/sdk/api/handlers/openai/endpoint_compat.go b/sdk/api/handlers/openai/endpoint_compat.go
index 27cc94fa34..4801ff33e5 100644
--- a/sdk/api/handlers/openai/endpoint_compat.go
+++ b/sdk/api/handlers/openai/endpoint_compat.go
@@ -1,6 +1,11 @@
 package openai
 
-import "github.com/router-for-me/CLIProxyAPI/v6/pkg/llmproxy/registry"
+import (
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/pkg/llmproxy/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/pkg/llmproxy/thinking"
+)
 
 const (
 	openAIChatEndpoint      = "/chat/completions"
@@ -8,10 +13,11 @@ const (
 )
 
 func resolveEndpointOverride(modelName, requestedEndpoint string) (string, bool) {
+	modelName = strings.TrimSpace(modelName)
 	if modelName == "" {
 		return "", false
 	}
-	info := registry.GetGlobalRegistry().GetModelInfo(modelName, "")
+	info := lookupModelInfoForEndpointOverride(modelName)
 	if info == nil || len(info.SupportedEndpoints) == 0 {
 		return "", false
 	}
@@ -27,6 +33,36 @@ func resolveEndpointOverride(modelName, requestedEndpoint string) (string, bool)
 	return "", false
 }
 
+func lookupModelInfoForEndpointOverride(modelName string) *registry.ModelInfo {
+	if info := registry.GetGlobalRegistry().GetModelInfo(modelName, ""); info != nil {
+		return info
+	}
+
+	baseModel := strings.TrimSpace(thinking.ParseSuffix(modelName).ModelName)
+	if baseModel != "" && baseModel != modelName {
+		if info := registry.GetGlobalRegistry().GetModelInfo(baseModel, ""); info != nil {
+			return info
+		}
+	}
+
+	providerPinnedModel := modelName
+	if slash := strings.IndexByte(modelName, '/'); slash > 0 && slash+1 < len(modelName) {
+		providerPinnedModel = strings.TrimSpace(modelName[slash+1:])
+	}
+	if providerPinnedModel != "" && providerPinnedModel != modelName {
+		if info := registry.GetGlobalRegistry().GetModelInfo(providerPinnedModel, ""); info != nil {
+			return info
+		}
+		if providerPinnedBase := strings.TrimSpace(thinking.ParseSuffix(providerPinnedModel).ModelName); providerPinnedBase != "" && providerPinnedBase != providerPinnedModel {
+			if info := registry.GetGlobalRegistry().GetModelInfo(providerPinnedBase, ""); info != nil {
+				return info
+			}
+		}
+	}
+
+	return nil
+}
+
 func endpointListContains(items []string, value string) bool {
 	for _, item := range items {
 		if item == value {
diff --git a/sdk/api/handlers/openai/endpoint_compat_test.go b/sdk/api/handlers/openai/endpoint_compat_test.go
new file mode 100644
index 0000000000..823c9806bf
--- /dev/null
+++ b/sdk/api/handlers/openai/endpoint_compat_test.go
@@ -0,0 +1,45 @@
+package openai
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/pkg/llmproxy/registry"
+)
+
+func TestResolveEndpointOverride_UsesRegisteredResponsesOnlyModel(t *testing.T) {
+	clientID := "endpoint-compat-test-client-1"
+	registry.GetGlobalRegistry().RegisterClient(clientID, "codex", []*registry.ModelInfo{{
+		ID:                 "gpt-5.1-codex",
+		SupportedEndpoints: []string{openAIResponsesEndpoint},
+	}})
+	t.Cleanup(func() {
+		registry.GetGlobalRegistry().UnregisterClient(clientID)
+	})
+
+	override, ok := resolveEndpointOverride("gpt-5.1-codex", openAIChatEndpoint)
+	if !ok {
+		t.Fatal("expected endpoint override")
+	}
+	if override != openAIResponsesEndpoint {
+		t.Fatalf("override = %q, want %q", override, openAIResponsesEndpoint)
+	}
+}
+
+func TestResolveEndpointOverride_UsesProviderPinnedSuffixedModel(t *testing.T) {
+	clientID := "endpoint-compat-test-client-2"
+	registry.GetGlobalRegistry().RegisterClient(clientID, "codex", []*registry.ModelInfo{{
+		ID:                 "gpt-5.1-codex",
+		SupportedEndpoints: []string{openAIResponsesEndpoint},
+	}})
+	t.Cleanup(func() {
+		registry.GetGlobalRegistry().UnregisterClient(clientID)
+	})
+
+	override, ok := resolveEndpointOverride("codex/gpt-5.1-codex(high)", openAIChatEndpoint)
+	if !ok {
+		t.Fatal("expected endpoint override for provider-pinned model with suffix")
+	}
+	if override != openAIResponsesEndpoint {
+		t.Fatalf("override = %q, want %q", override, openAIResponsesEndpoint)
+	}
+}

From 05f894bf9981bddb4d031769799cc4ec38b295ab Mon Sep 17 00:00:00 2001
From: Koosha Paridehpour <kooshapari@gmail.com>
Date: Sun, 22 Feb 2026 21:23:14 -0700
Subject: [PATCH 2/7] fix(registry): enforce copilot context length 128K at
 registration (#241)

---
 .../reports/issue-wave-gh-next32-lane-2.md    | 10 +++++
 pkg/llmproxy/registry/model_registry.go       | 16 ++++++++
 .../registry/model_registry_hook_test.go      | 41 +++++++++++++++++++
 3 files changed, 67 insertions(+)

diff --git a/docs/planning/reports/issue-wave-gh-next32-lane-2.md b/docs/planning/reports/issue-wave-gh-next32-lane-2.md
index 87ae0840cd..63a97b9e28 100644
--- a/docs/planning/reports/issue-wave-gh-next32-lane-2.md
+++ b/docs/planning/reports/issue-wave-gh-next32-lane-2.md
@@ -55,3 +55,13 @@ Worktree: `cliproxyapi-plusplus-wave-cpb-2`
 ## Blockers
 
 - #160 blocked on missing deterministic reproduction fixture for duplicate-output stream bug in current repo state.
+
+## Wave2 Lane 2 Entry - #241
+
+- Issue: `#241` copilot context length should always be `128K`
+- Status: `implemented`
+- Mapping:
+  - normalization at runtime registration: `pkg/llmproxy/registry/model_registry.go`
+  - regression coverage: `pkg/llmproxy/registry/model_registry_hook_test.go`
+- Tests:
+  - `go test ./pkg/llmproxy/registry -run 'TestRegisterClient_NormalizesCopilotContextLength|TestGetGitHubCopilotModels' -count=1`
diff --git a/pkg/llmproxy/registry/model_registry.go b/pkg/llmproxy/registry/model_registry.go
index 2509afd260..dd4b0b335c 100644
--- a/pkg/llmproxy/registry/model_registry.go
+++ b/pkg/llmproxy/registry/model_registry.go
@@ -211,6 +211,9 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 	defer r.mutex.Unlock()
 
 	provider := strings.ToLower(clientProvider)
+	if provider == "github-copilot" {
+		models = normalizeCopilotContextLength(models)
+	}
 	uniqueModelIDs := make([]string, 0, len(models))
 	rawModelIDs := make([]string, 0, len(models))
 	newModels := make(map[string]*ModelInfo, len(models))
@@ -414,6 +417,19 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
 	misc.LogCredentialSeparator()
 }
 
+func normalizeCopilotContextLength(models []*ModelInfo) []*ModelInfo {
+	normalized := make([]*ModelInfo, 0, len(models))
+	for _, model := range models {
+		if model == nil {
+			continue
+		}
+		copyModel := cloneModelInfo(model)
+		copyModel.ContextLength = 128000
+		normalized = append(normalized, copyModel)
+	}
+	return normalized
+}
+
 func (r *ModelRegistry) addModelRegistration(modelID, provider string, model *ModelInfo, now time.Time) {
 	if model == nil || modelID == "" {
 		return
diff --git a/pkg/llmproxy/registry/model_registry_hook_test.go b/pkg/llmproxy/registry/model_registry_hook_test.go
index 70226b9eaf..3e023d8f87 100644
--- a/pkg/llmproxy/registry/model_registry_hook_test.go
+++ b/pkg/llmproxy/registry/model_registry_hook_test.go
@@ -202,3 +202,44 @@ func TestModelRegistryHook_PanicDoesNotAffectRegistry(t *testing.T) {
 		t.Fatal("timeout waiting for OnModelsUnregistered hook call")
 	}
 }
+
+func TestRegisterClient_NormalizesCopilotContextLength(t *testing.T) {
+	r := newTestModelRegistry()
+	hook := &capturingHook{
+		registeredCh:   make(chan registeredCall, 1),
+		unregisteredCh: make(chan unregisteredCall, 1),
+	}
+	r.SetHook(hook)
+
+	r.RegisterClient("client-copilot", "github-copilot", []*ModelInfo{
+		{ID: "gpt-5", ContextLength: 200000},
+		{ID: "gpt-5-mini", ContextLength: 1048576},
+	})
+
+	select {
+	case call := <-hook.registeredCh:
+		for _, model := range call.models {
+			if model.ContextLength != 128000 {
+				t.Fatalf("hook model %q context_length=%d, want 128000", model.ID, model.ContextLength)
+			}
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("timeout waiting for OnModelsRegistered hook call")
+	}
+
+	registration, ok := r.models["gpt-5"]
+	if !ok || registration == nil || registration.Info == nil {
+		t.Fatal("expected gpt-5 registration info")
+	}
+	if registration.Info.ContextLength != 128000 {
+		t.Fatalf("registry info context_length=%d, want 128000", registration.Info.ContextLength)
+	}
+
+	clientInfo, ok := r.clientModelInfos["client-copilot"]["gpt-5-mini"]
+	if !ok || clientInfo == nil {
+		t.Fatal("expected client model info for gpt-5-mini")
+	}
+	if clientInfo.ContextLength != 128000 {
+		t.Fatalf("client model info context_length=%d, want 128000", clientInfo.ContextLength)
+	}
+}

From 947883cb550da9611d2ebcf5ab2b94580207f3b4 Mon Sep 17 00:00:00 2001
From: Koosha Paridehpour <kooshapari@gmail.com>
Date: Sun, 22 Feb 2026 21:23:03 -0700
Subject: [PATCH 3/7] fix(kiro): handle banned account 403 payloads (#221)

---
 .../reports/issue-wave-gh-next32-lane-3.md    | 12 +++++-
 .../runtime/executor/kiro_executor.go         | 20 ++++++++--
 .../executor/kiro_executor_extra_test.go      | 40 +++++++++++++++++++
 3 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/docs/planning/reports/issue-wave-gh-next32-lane-3.md b/docs/planning/reports/issue-wave-gh-next32-lane-3.md
index 6292f73b40..180fdb8607 100644
--- a/docs/planning/reports/issue-wave-gh-next32-lane-3.md
+++ b/docs/planning/reports/issue-wave-gh-next32-lane-3.md
@@ -29,6 +29,17 @@ Worktree: `cliproxyapi-plusplus-wave-cpb-3`
 - Status: `pending`
 - Notes: lane-started
 
+### Wave2 #221 - `kiro账号被封`
+- Status: `implemented`
+- Source mapping:
+  - Source issue: `router-for-me/CLIProxyAPIPlus#221` (Kiro account banned handling)
+  - Fix: broaden Kiro 403 suspension detection to case-insensitive suspended/banned signals so banned accounts consistently trigger cooldown + remediation messaging in both non-stream and stream paths.
+  - Code: `pkg/llmproxy/runtime/executor/kiro_executor.go`
+  - Tests: `pkg/llmproxy/runtime/executor/kiro_executor_extra_test.go`
+- Test commands:
+  - `go test ./pkg/llmproxy/runtime/executor -run 'Test(IsKiroSuspendedOrBannedResponse|FormatKiroCooldownError|FormatKiroSuspendedStatusMessage)' -count=1`
+  - Result: blocked by pre-existing package build failures in `pkg/llmproxy/runtime/executor/codex_websockets_executor.go` (`unused imports`, `undefined: authID`, `undefined: wsURL`).
+
 ## Focused Checks
 
 - `task quality:fmt:check`
@@ -37,4 +48,3 @@ Worktree: `cliproxyapi-plusplus-wave-cpb-3`
 ## Blockers
 
 - None recorded yet; work is in planning state.
-
diff --git a/pkg/llmproxy/runtime/executor/kiro_executor.go b/pkg/llmproxy/runtime/executor/kiro_executor.go
index ee62cedf67..c3215a29d0 100644
--- a/pkg/llmproxy/runtime/executor/kiro_executor.go
+++ b/pkg/llmproxy/runtime/executor/kiro_executor.go
@@ -638,6 +638,18 @@ func formatKiroSuspendedStatusMessage(respBody []byte) string {
 	return "account suspended by upstream Kiro endpoint: " + string(respBody) + "; re-auth this Kiro entry or use another auth index"
 }
 
+func isKiroSuspendedOrBannedResponse(respBody string) bool {
+	if strings.TrimSpace(respBody) == "" {
+		return false
+	}
+	lowerBody := strings.ToLower(respBody)
+	return strings.Contains(lowerBody, "temporarily_suspended") ||
+		strings.Contains(lowerBody, "suspended") ||
+		strings.Contains(lowerBody, "account_banned") ||
+		strings.Contains(lowerBody, "account banned") ||
+		strings.Contains(lowerBody, "banned")
+}
+
 // Execute sends the request to Kiro API and returns the response.
 // Supports automatic token refresh on 401/403 errors.
 func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
@@ -945,8 +957,8 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
 
 				respBodyStr := string(respBody)
 
-				// Check for SUSPENDED status - return immediately without retry
-				if strings.Contains(respBodyStr, "SUSPENDED") || strings.Contains(respBodyStr, "TEMPORARILY_SUSPENDED") {
+				// Check for suspended/banned status - return immediately without retry
+				if isKiroSuspendedOrBannedResponse(respBodyStr) {
 					// Set long cooldown for suspended accounts
 					rateLimiter.CheckAndMarkSuspended(tokenKey, respBodyStr)
 					cooldownMgr.SetCooldown(tokenKey, kiroauth.LongCooldown, kiroauth.CooldownReasonSuspended)
@@ -1381,8 +1393,8 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
 
 				respBodyStr := string(respBody)
 
-				// Check for SUSPENDED status - return immediately without retry
-				if strings.Contains(respBodyStr, "SUSPENDED") || strings.Contains(respBodyStr, "TEMPORARILY_SUSPENDED") {
+				// Check for suspended/banned status - return immediately without retry
+				if isKiroSuspendedOrBannedResponse(respBodyStr) {
 					// Set long cooldown for suspended accounts
 					rateLimiter.CheckAndMarkSuspended(tokenKey, respBodyStr)
 					cooldownMgr.SetCooldown(tokenKey, kiroauth.LongCooldown, kiroauth.CooldownReasonSuspended)
diff --git a/pkg/llmproxy/runtime/executor/kiro_executor_extra_test.go b/pkg/llmproxy/runtime/executor/kiro_executor_extra_test.go
index 60afeaa545..0efae05df4 100644
--- a/pkg/llmproxy/runtime/executor/kiro_executor_extra_test.go
+++ b/pkg/llmproxy/runtime/executor/kiro_executor_extra_test.go
@@ -105,3 +105,43 @@ func TestFormatKiroSuspendedStatusMessage(t *testing.T) {
 		t.Fatalf("expected remediation text in message, got %q", msg)
 	}
 }
+
+func TestIsKiroSuspendedOrBannedResponse(t *testing.T) {
+	tests := []struct {
+		name string
+		body string
+		want bool
+	}{
+		{
+			name: "uppercase suspended token",
+			body: `{"status":"SUSPENDED"}`,
+			want: true,
+		},
+		{
+			name: "lowercase banned sentence",
+			body: `{"message":"account banned due to abuse checks"}`,
+			want: true,
+		},
+		{
+			name: "temporary suspended lowercase key",
+			body: `{"status":"temporarily_suspended"}`,
+			want: true,
+		},
+		{
+			name: "token expired should not count as banned",
+			body: `{"error":"token expired"}`,
+			want: false,
+		},
+		{
+			name: "empty body",
+			body: ` `,
+			want: false,
+		},
+	}
+
+	for _, tt := range tests {
+		if got := isKiroSuspendedOrBannedResponse(tt.body); got != tt.want {
+			t.Fatalf("%s: isKiroSuspendedOrBannedResponse(%q) = %v, want %v", tt.name, tt.body, got, tt.want)
+		}
+	}
+}

From 9fa8479d482f1cc32762f214420d868b1b51cbc9 Mon Sep 17 00:00:00 2001
From: Koosha Paridehpour <kooshapari@gmail.com>
Date: Sun, 22 Feb 2026 21:26:28 -0700
Subject: [PATCH 4/7] fix(kiro): broaden cmd alias handling for command tools
 (#210)

---
 .../reports/issue-wave-gh-next32-lane-4.md    | 13 ++++++++++++
 .../kiro/claude/truncation_detector.go        |  4 ++--
 .../kiro/claude/truncation_detector_test.go   | 21 +++++++++++++++++++
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/docs/planning/reports/issue-wave-gh-next32-lane-4.md b/docs/planning/reports/issue-wave-gh-next32-lane-4.md
index 14117c9043..5d1dce3175 100644
--- a/docs/planning/reports/issue-wave-gh-next32-lane-4.md
+++ b/docs/planning/reports/issue-wave-gh-next32-lane-4.md
@@ -34,3 +34,16 @@ Worktree: `cliproxyapi-plusplus-wave-cpb-4`
 
 - None recorded yet; work is in planning state.
 
+## Wave2 Updates
+
+### Wave2 Lane 4 - Issue #210
+- Issue: `#210` Kiro/Ampcode Bash tool parameter incompatibility
+- Mapping:
+  - `pkg/llmproxy/translator/kiro/claude/truncation_detector.go`
+  - `pkg/llmproxy/translator/kiro/claude/truncation_detector_test.go`
+- Change:
+  - Extended command-parameter alias compatibility so `execute` and `run_command` accept `cmd` in addition to `command`, matching existing Bash alias handling and preventing false truncation loops.
+- Tests:
+  - `go test ./pkg/llmproxy/translator/kiro/claude -run 'TestDetectTruncation|TestBuildSoftFailureToolResult'`
+- Quality gate:
+  - `task quality` failed due pre-existing syntax errors in `pkg/llmproxy/executor/kiro_executor.go` (`expected '(' found kiroModelFingerprint`), unrelated to this issue scope.
diff --git a/pkg/llmproxy/translator/kiro/claude/truncation_detector.go b/pkg/llmproxy/translator/kiro/claude/truncation_detector.go
index e245e68bea..e0a1c133f9 100644
--- a/pkg/llmproxy/translator/kiro/claude/truncation_detector.go
+++ b/pkg/llmproxy/translator/kiro/claude/truncation_detector.go
@@ -66,8 +66,8 @@ var RequiredFieldsByTool = map[string][]string{
 	// Ampcode-compatible Bash tool uses "cmd", while other clients commonly use "command".
 	// Accept either key to avoid false truncation detection loops.
 	"Bash":        {"command", "cmd"},
-	"execute":     {"command"},
-	"run_command": {"command"},
+	"execute":     {"command", "cmd"},
+	"run_command": {"command", "cmd"},
 }
 
 // DetectTruncation checks if the tool use input appears to be truncated.
diff --git a/pkg/llmproxy/translator/kiro/claude/truncation_detector_test.go b/pkg/llmproxy/translator/kiro/claude/truncation_detector_test.go
index 7e60600612..f4f36275fa 100644
--- a/pkg/llmproxy/translator/kiro/claude/truncation_detector_test.go
+++ b/pkg/llmproxy/translator/kiro/claude/truncation_detector_test.go
@@ -51,6 +51,27 @@ func TestDetectTruncation(t *testing.T) {
 	if info6.IsTruncated {
 		t.Errorf("expected no truncation for Bash cmd alias, got %v", info6)
 	}
+
+	// 7. execute cmd alias compatibility
+	parsed7 := map[string]interface{}{"cmd": "ls -la"}
+	info7 := DetectTruncation("execute", "c3", `{"cmd":"ls -la"}`, parsed7)
+	if info7.IsTruncated {
+		t.Errorf("expected no truncation for execute cmd alias, got %v", info7)
+	}
+
+	// 8. run_command cmd alias compatibility
+	parsed8 := map[string]interface{}{"cmd": "pwd"}
+	info8 := DetectTruncation("run_command", "c4", `{"cmd":"pwd"}`, parsed8)
+	if info8.IsTruncated {
+		t.Errorf("expected no truncation for run_command cmd alias, got %v", info8)
+	}
+
+	// 9. command tool still truncates when both command aliases are missing
+	parsed9 := map[string]interface{}{"path": "/tmp"}
+	info9 := DetectTruncation("execute", "c5", `{"path":"/tmp"}`, parsed9)
+	if !info9.IsTruncated || info9.TruncationType != TruncationTypeMissingFields {
+		t.Errorf("expected missing_fields truncation when command aliases are absent, got %v", info9)
+	}
 }
 
 func TestBuildSoftFailureToolResult(t *testing.T) {

From d921c09bfd6cd56bb31d779d8586183359890ac8 Mon Sep 17 00:00:00 2001
From: Koosha Paridehpour <kooshapari@gmail.com>
Date: Sun, 22 Feb 2026 21:25:56 -0700
Subject: [PATCH 5/7] fix(#200): honor Gemini quota reset durations for
 cooldown

---
 .../reports/issue-wave-gh-next32-lane-5.md    | 14 ++++-
 pkg/llmproxy/executor/gemini_cli_executor.go  |  9 ++--
 .../gemini_cli_executor_retry_delay_test.go   | 54 +++++++++++++++++++
 .../runtime/executor/gemini_cli_executor.go   |  9 ++--
 .../gemini_cli_executor_retry_delay_test.go   | 54 +++++++++++++++++++
 5 files changed, 129 insertions(+), 11 deletions(-)
 create mode 100644 pkg/llmproxy/executor/gemini_cli_executor_retry_delay_test.go
 create mode 100644 pkg/llmproxy/runtime/executor/gemini_cli_executor_retry_delay_test.go

diff --git a/docs/planning/reports/issue-wave-gh-next32-lane-5.md b/docs/planning/reports/issue-wave-gh-next32-lane-5.md
index 397906e2f2..cfa575fc90 100644
--- a/docs/planning/reports/issue-wave-gh-next32-lane-5.md
+++ b/docs/planning/reports/issue-wave-gh-next32-lane-5.md
@@ -30,7 +30,19 @@ Worktree: `cliproxyapi-plusplus-wave-cpb-5`
 - `task quality:fmt:check`
 - `QUALITY_PACKAGES='./pkg/llmproxy/api ./sdk/api/handlers/openai' task quality:quick`
 
+## Wave2 Execution Entry
+
+### #200
+- Status: `done`
+- Mapping: `router-for-me/CLIProxyAPIPlus issue#200` -> `CP2K-0020` -> Gemini quota auto disable/enable timing now honors fractional/unit retry hints from upstream quota messages.
+- Code:
+  - `pkg/llmproxy/executor/gemini_cli_executor.go`
+  - `pkg/llmproxy/runtime/executor/gemini_cli_executor.go`
+- Tests:
+  - `pkg/llmproxy/executor/gemini_cli_executor_retry_delay_test.go`
+  - `pkg/llmproxy/runtime/executor/gemini_cli_executor_retry_delay_test.go`
+  - `go test ./pkg/llmproxy/executor ./pkg/llmproxy/runtime/executor -run 'TestParseRetryDelay_(MessageDuration|MessageMilliseconds|PrefersRetryInfo)$'`
+
 ## Blockers
 
 - None recorded yet; work is in planning state.
-
diff --git a/pkg/llmproxy/executor/gemini_cli_executor.go b/pkg/llmproxy/executor/gemini_cli_executor.go
index 214373bbfc..c8421412ab 100644
--- a/pkg/llmproxy/executor/gemini_cli_executor.go
+++ b/pkg/llmproxy/executor/gemini_cli_executor.go
@@ -13,7 +13,6 @@ import (
 	"math/rand"
 	"net/http"
 	"regexp"
-	"strconv"
 	"strings"
 	"time"
 
@@ -937,14 +936,14 @@ func parseRetryDelay(errorBody []byte) (*time.Duration, error) {
 		}
 	}
 
-	// Fallback: parse from error.message "Your quota will reset after Xs."
+	// Fallback: parse from error.message (supports units like ms/s/m/h with optional decimals)
 	message := gjson.GetBytes(errorBody, "error.message").String()
 	if message != "" {
-		re := regexp.MustCompile(`after\s+(\d+)s\.?`)
+		re := regexp.MustCompile(`after\s+([0-9]+(?:\.[0-9]+)?(?:ms|s|m|h))\.?`)
 		if matches := re.FindStringSubmatch(message); len(matches) > 1 {
-			seconds, err := strconv.Atoi(matches[1])
+			duration, err := time.ParseDuration(matches[1])
 			if err == nil {
-				return new(time.Duration(seconds) * time.Second), nil
+				return &duration, nil
 			}
 		}
 	}
diff --git a/pkg/llmproxy/executor/gemini_cli_executor_retry_delay_test.go b/pkg/llmproxy/executor/gemini_cli_executor_retry_delay_test.go
new file mode 100644
index 0000000000..f26c5a95e1
--- /dev/null
+++ b/pkg/llmproxy/executor/gemini_cli_executor_retry_delay_test.go
@@ -0,0 +1,54 @@
+package executor
+
+import (
+	"testing"
+	"time"
+)
+
+func TestParseRetryDelay_MessageDuration(t *testing.T) {
+	t.Parallel()
+
+	body := []byte(`{"error":{"message":"Quota exceeded. Your quota will reset after 1.5s."}}`)
+	got, err := parseRetryDelay(body)
+	if err != nil {
+		t.Fatalf("parseRetryDelay returned error: %v", err)
+	}
+	if got == nil {
+		t.Fatal("parseRetryDelay returned nil duration")
+	}
+	if *got != 1500*time.Millisecond {
+		t.Fatalf("parseRetryDelay = %v, want %v", *got, 1500*time.Millisecond)
+	}
+}
+
+func TestParseRetryDelay_MessageMilliseconds(t *testing.T) {
+	t.Parallel()
+
+	body := []byte(`{"error":{"message":"Please retry after 250ms."}}`)
+	got, err := parseRetryDelay(body)
+	if err != nil {
+		t.Fatalf("parseRetryDelay returned error: %v", err)
+	}
+	if got == nil {
+		t.Fatal("parseRetryDelay returned nil duration")
+	}
+	if *got != 250*time.Millisecond {
+		t.Fatalf("parseRetryDelay = %v, want %v", *got, 250*time.Millisecond)
+	}
+}
+
+func TestParseRetryDelay_PrefersRetryInfo(t *testing.T) {
+	t.Parallel()
+
+	body := []byte(`{"error":{"message":"Your quota will reset after 99s.","details":[{"@type":"type.googleapis.com/google.rpc.RetryInfo","retryDelay":"2s"}]}}`)
+	got, err := parseRetryDelay(body)
+	if err != nil {
+		t.Fatalf("parseRetryDelay returned error: %v", err)
+	}
+	if got == nil {
+		t.Fatal("parseRetryDelay returned nil duration")
+	}
+	if *got != 2*time.Second {
+		t.Fatalf("parseRetryDelay = %v, want %v", *got, 2*time.Second)
+	}
+}
diff --git a/pkg/llmproxy/runtime/executor/gemini_cli_executor.go b/pkg/llmproxy/runtime/executor/gemini_cli_executor.go
index fec47dddb9..47538569ee 100644
--- a/pkg/llmproxy/runtime/executor/gemini_cli_executor.go
+++ b/pkg/llmproxy/runtime/executor/gemini_cli_executor.go
@@ -13,7 +13,6 @@ import (
 	"math/rand"
 	"net/http"
 	"regexp"
-	"strconv"
 	"strings"
 	"time"
 
@@ -937,14 +936,14 @@ func parseRetryDelay(errorBody []byte) (*time.Duration, error) {
 		}
 	}
 
-	// Fallback: parse from error.message "Your quota will reset after Xs."
+	// Fallback: parse from error.message (supports units like ms/s/m/h with optional decimals)
 	message := gjson.GetBytes(errorBody, "error.message").String()
 	if message != "" {
-		re := regexp.MustCompile(`after\s+(\d+)s\.?`)
+		re := regexp.MustCompile(`after\s+([0-9]+(?:\.[0-9]+)?(?:ms|s|m|h))\.?`)
 		if matches := re.FindStringSubmatch(message); len(matches) > 1 {
-			seconds, err := strconv.Atoi(matches[1])
+			duration, err := time.ParseDuration(matches[1])
 			if err == nil {
-				return new(time.Duration(seconds) * time.Second), nil
+				return &duration, nil
 			}
 		}
 	}
diff --git a/pkg/llmproxy/runtime/executor/gemini_cli_executor_retry_delay_test.go b/pkg/llmproxy/runtime/executor/gemini_cli_executor_retry_delay_test.go
new file mode 100644
index 0000000000..f26c5a95e1
--- /dev/null
+++ b/pkg/llmproxy/runtime/executor/gemini_cli_executor_retry_delay_test.go
@@ -0,0 +1,54 @@
+package executor
+
+import (
+	"testing"
+	"time"
+)
+
+func TestParseRetryDelay_MessageDuration(t *testing.T) {
+	t.Parallel()
+
+	body := []byte(`{"error":{"message":"Quota exceeded. Your quota will reset after 1.5s."}}`)
+	got, err := parseRetryDelay(body)
+	if err != nil {
+		t.Fatalf("parseRetryDelay returned error: %v", err)
+	}
+	if got == nil {
+		t.Fatal("parseRetryDelay returned nil duration")
+	}
+	if *got != 1500*time.Millisecond {
+		t.Fatalf("parseRetryDelay = %v, want %v", *got, 1500*time.Millisecond)
+	}
+}
+
+func TestParseRetryDelay_MessageMilliseconds(t *testing.T) {
+	t.Parallel()
+
+	body := []byte(`{"error":{"message":"Please retry after 250ms."}}`)
+	got, err := parseRetryDelay(body)
+	if err != nil {
+		t.Fatalf("parseRetryDelay returned error: %v", err)
+	}
+	if got == nil {
+		t.Fatal("parseRetryDelay returned nil duration")
+	}
+	if *got != 250*time.Millisecond {
+		t.Fatalf("parseRetryDelay = %v, want %v", *got, 250*time.Millisecond)
+	}
+}
+
+func TestParseRetryDelay_PrefersRetryInfo(t *testing.T) {
+	t.Parallel()
+
+	body := []byte(`{"error":{"message":"Your quota will reset after 99s.","details":[{"@type":"type.googleapis.com/google.rpc.RetryInfo","retryDelay":"2s"}]}}`)
+	got, err := parseRetryDelay(body)
+	if err != nil {
+		t.Fatalf("parseRetryDelay returned error: %v", err)
+	}
+	if got == nil {
+		t.Fatal("parseRetryDelay returned nil duration")
+	}
+	if *got != 2*time.Second {
+		t.Fatalf("parseRetryDelay = %v, want %v", *got, 2*time.Second)
+	}
+}

From a2571c90fe001a8d8b1f75c07d4049ad3820d7a1 Mon Sep 17 00:00:00 2001
From: Koosha Paridehpour <kooshapari@gmail.com>
Date: Sun, 22 Feb 2026 21:26:03 -0700
Subject: [PATCH 6/7] fix(#179): honor openai-compat models-endpoint overrides

---
 .../reports/issue-wave-gh-next32-lane-6.md    | 20 ++++++++++++
 .../executor/openai_models_fetcher.go         | 31 +++++++++++++++++++
 .../executor/openai_models_fetcher_test.go    | 16 ++++++++++
 .../runtime/executor/openai_models_fetcher.go | 31 +++++++++++++++++++
 .../executor/openai_models_fetcher_test.go    | 16 ++++++++++
 5 files changed, 114 insertions(+)

diff --git a/docs/planning/reports/issue-wave-gh-next32-lane-6.md b/docs/planning/reports/issue-wave-gh-next32-lane-6.md
index a46a1a9372..7e4d67337a 100644
--- a/docs/planning/reports/issue-wave-gh-next32-lane-6.md
+++ b/docs/planning/reports/issue-wave-gh-next32-lane-6.md
@@ -34,3 +34,23 @@ Worktree: `cliproxyapi-plusplus-wave-cpb-6`
 
 - None recorded yet; work is in planning state.
 
+## Wave2 Entries
+
+### 2026-02-23 - #179 OpenAI-MLX/vLLM-MLX support
+- Status: `done`
+- Mapping:
+  - Source issue: `router-for-me/CLIProxyAPIPlus#179`
+  - Implemented fix: OpenAI-compatible model discovery now honors `models_endpoint` auth attribute (emitted from `models-endpoint` config), including absolute URL and absolute path overrides.
+  - Why this is low risk: fallback/default `/v1/models` behavior is unchanged; only explicit override handling is added.
+- Files:
+  - `pkg/llmproxy/executor/openai_models_fetcher.go`
+  - `pkg/llmproxy/executor/openai_models_fetcher_test.go`
+  - `pkg/llmproxy/runtime/executor/openai_models_fetcher.go`
+  - `pkg/llmproxy/runtime/executor/openai_models_fetcher_test.go`
+- Tests:
+  - `go test pkg/llmproxy/executor/openai_models_fetcher.go pkg/llmproxy/executor/proxy_helpers.go pkg/llmproxy/executor/openai_models_fetcher_test.go`
+  - `go test pkg/llmproxy/runtime/executor/openai_models_fetcher.go pkg/llmproxy/runtime/executor/proxy_helpers.go pkg/llmproxy/runtime/executor/openai_models_fetcher_test.go`
+- Verification notes:
+  - Added regression coverage for `models_endpoint` path override and absolute URL override in both mirrored executor test suites.
+- Blockers:
+  - Package-level `go test ./pkg/llmproxy/executor` and `go test ./pkg/llmproxy/runtime/executor` are currently blocked by unrelated compile errors in existing lane files (`kiro_executor.go`, `codex_websockets_executor.go`).
diff --git a/pkg/llmproxy/executor/openai_models_fetcher.go b/pkg/llmproxy/executor/openai_models_fetcher.go
index 7ddf0d826d..48b62d7a4b 100644
--- a/pkg/llmproxy/executor/openai_models_fetcher.go
+++ b/pkg/llmproxy/executor/openai_models_fetcher.go
@@ -111,6 +111,9 @@ func resolveOpenAIModelsURL(baseURL string, attrs map[string]string) string {
 		if modelsURL := strings.TrimSpace(attrs["models_url"]); modelsURL != "" {
 			return modelsURL
 		}
+		if modelsEndpoint := strings.TrimSpace(attrs["models_endpoint"]); modelsEndpoint != "" {
+			return resolveOpenAIModelsEndpointURL(baseURL, modelsEndpoint)
+		}
 	}
 
 	trimmedBaseURL := strings.TrimRight(strings.TrimSpace(baseURL), "/")
@@ -134,6 +137,34 @@ func resolveOpenAIModelsURL(baseURL string, attrs map[string]string) string {
 	return trimmedBaseURL + "/v1/models"
 }
 
+func resolveOpenAIModelsEndpointURL(baseURL, modelsEndpoint string) string {
+	modelsEndpoint = strings.TrimSpace(modelsEndpoint)
+	if modelsEndpoint == "" {
+		return ""
+	}
+	if parsed, err := url.Parse(modelsEndpoint); err == nil && parsed.IsAbs() {
+		return modelsEndpoint
+	}
+
+	trimmedBaseURL := strings.TrimRight(strings.TrimSpace(baseURL), "/")
+	if trimmedBaseURL == "" {
+		return modelsEndpoint
+	}
+
+	if strings.HasPrefix(modelsEndpoint, "/") {
+		baseParsed, err := url.Parse(trimmedBaseURL)
+		if err == nil && baseParsed.Scheme != "" && baseParsed.Host != "" {
+			baseParsed.Path = modelsEndpoint
+			baseParsed.RawQuery = ""
+			baseParsed.Fragment = ""
+			return baseParsed.String()
+		}
+		return trimmedBaseURL + modelsEndpoint
+	}
+
+	return trimmedBaseURL + "/" + strings.TrimLeft(modelsEndpoint, "/")
+}
+
 func isVersionSegment(segment string) bool {
 	if len(segment) < 2 || segment[0] != 'v' {
 		return false
diff --git a/pkg/llmproxy/executor/openai_models_fetcher_test.go b/pkg/llmproxy/executor/openai_models_fetcher_test.go
index b7957d1244..8b4e2ffb3f 100644
--- a/pkg/llmproxy/executor/openai_models_fetcher_test.go
+++ b/pkg/llmproxy/executor/openai_models_fetcher_test.go
@@ -35,6 +35,22 @@ func TestResolveOpenAIModelsURL(t *testing.T) {
 			},
 			want: "https://custom.example.com/models",
 		},
+		{
+			name:    "ModelsEndpointPathOverrideUsesBaseHost",
+			baseURL: "https://api.z.ai/api/coding/paas/v4",
+			attrs: map[string]string{
+				"models_endpoint": "/api/coding/paas/v4/models",
+			},
+			want: "https://api.z.ai/api/coding/paas/v4/models",
+		},
+		{
+			name:    "ModelsEndpointAbsoluteURLOverrideWins",
+			baseURL: "https://api.z.ai/api/coding/paas/v4",
+			attrs: map[string]string{
+				"models_endpoint": "https://custom.example.com/models",
+			},
+			want: "https://custom.example.com/models",
+		},
 	}
 
 	for _, tc := range testCases {
diff --git a/pkg/llmproxy/runtime/executor/openai_models_fetcher.go b/pkg/llmproxy/runtime/executor/openai_models_fetcher.go
index 7ddf0d826d..48b62d7a4b 100644
--- a/pkg/llmproxy/runtime/executor/openai_models_fetcher.go
+++ b/pkg/llmproxy/runtime/executor/openai_models_fetcher.go
@@ -111,6 +111,9 @@ func resolveOpenAIModelsURL(baseURL string, attrs map[string]string) string {
 		if modelsURL := strings.TrimSpace(attrs["models_url"]); modelsURL != "" {
 			return modelsURL
 		}
+		if modelsEndpoint := strings.TrimSpace(attrs["models_endpoint"]); modelsEndpoint != "" {
+			return resolveOpenAIModelsEndpointURL(baseURL, modelsEndpoint)
+		}
 	}
 
 	trimmedBaseURL := strings.TrimRight(strings.TrimSpace(baseURL), "/")
@@ -134,6 +137,34 @@ func resolveOpenAIModelsURL(baseURL string, attrs map[string]string) string {
 	return trimmedBaseURL + "/v1/models"
 }
 
+func resolveOpenAIModelsEndpointURL(baseURL, modelsEndpoint string) string {
+	modelsEndpoint = strings.TrimSpace(modelsEndpoint)
+	if modelsEndpoint == "" {
+		return ""
+	}
+	if parsed, err := url.Parse(modelsEndpoint); err == nil && parsed.IsAbs() {
+		return modelsEndpoint
+	}
+
+	trimmedBaseURL := strings.TrimRight(strings.TrimSpace(baseURL), "/")
+	if trimmedBaseURL == "" {
+		return modelsEndpoint
+	}
+
+	if strings.HasPrefix(modelsEndpoint, "/") {
+		baseParsed, err := url.Parse(trimmedBaseURL)
+		if err == nil && baseParsed.Scheme != "" && baseParsed.Host != "" {
+			baseParsed.Path = modelsEndpoint
+			baseParsed.RawQuery = ""
+			baseParsed.Fragment = ""
+			return baseParsed.String()
+		}
+		return trimmedBaseURL + modelsEndpoint
+	}
+
+	return trimmedBaseURL + "/" + strings.TrimLeft(modelsEndpoint, "/")
+}
+
 func isVersionSegment(segment string) bool {
 	if len(segment) < 2 || segment[0] != 'v' {
 		return false
diff --git a/pkg/llmproxy/runtime/executor/openai_models_fetcher_test.go b/pkg/llmproxy/runtime/executor/openai_models_fetcher_test.go
index b7957d1244..8b4e2ffb3f 100644
--- a/pkg/llmproxy/runtime/executor/openai_models_fetcher_test.go
+++ b/pkg/llmproxy/runtime/executor/openai_models_fetcher_test.go
@@ -35,6 +35,22 @@ func TestResolveOpenAIModelsURL(t *testing.T) {
 			},
 			want: "https://custom.example.com/models",
 		},
+		{
+			name:    "ModelsEndpointPathOverrideUsesBaseHost",
+			baseURL: "https://api.z.ai/api/coding/paas/v4",
+			attrs: map[string]string{
+				"models_endpoint": "/api/coding/paas/v4/models",
+			},
+			want: "https://api.z.ai/api/coding/paas/v4/models",
+		},
+		{
+			name:    "ModelsEndpointAbsoluteURLOverrideWins",
+			baseURL: "https://api.z.ai/api/coding/paas/v4",
+			attrs: map[string]string{
+				"models_endpoint": "https://custom.example.com/models",
+			},
+			want: "https://custom.example.com/models",
+		},
 	}
 
 	for _, tc := range testCases {

From 20bbbe479a1428a331eb44bcf1d741d072190590 Mon Sep 17 00:00:00 2001
From: Koosha Paridehpour <kooshapari@gmail.com>
Date: Sun, 22 Feb 2026 21:32:15 -0700
Subject: [PATCH 7/7] docs: add wave2 next32 merge mapping and validation
 report

---
 ...e-wave-gh-next32-merge-wave2-2026-02-23.md | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 docs/planning/reports/issue-wave-gh-next32-merge-wave2-2026-02-23.md

diff --git a/docs/planning/reports/issue-wave-gh-next32-merge-wave2-2026-02-23.md b/docs/planning/reports/issue-wave-gh-next32-merge-wave2-2026-02-23.md
new file mode 100644
index 0000000000..2acd243997
--- /dev/null
+++ b/docs/planning/reports/issue-wave-gh-next32-merge-wave2-2026-02-23.md
@@ -0,0 +1,28 @@
+# Issue Wave GH Next32 Merge Report - Wave 2 (2026-02-23)
+
+## Scope
+- Wave 2, one item per lane (6 lanes total).
+- Base: `origin/main` @ `f7e56f05`.
+
+## Merged Commits
+- `f1ab6855` - `fix(#253): support endpoint override for provider-pinned codex models`
+- `05f894bf` - `fix(registry): enforce copilot context length 128K at registration (#241)`
+- `947883cb` - `fix(kiro): handle banned account 403 payloads (#221)`
+- `9fa8479d` - `fix(kiro): broaden cmd alias handling for command tools (#210)`
+- `d921c09b` - `fix(#200): honor Gemini quota reset durations for cooldown`
+- `a2571c90` - `fix(#179): honor openai-compat models-endpoint overrides`
+
+## Issue Mapping
+- `#253` -> `f1ab6855`
+- `#241` -> `05f894bf`
+- `#221` -> `947883cb`
+- `#210` -> `9fa8479d`
+- `#200` -> `d921c09b`
+- `#179` -> `a2571c90`
+
+## Validation
+- `go test ./sdk/api/handlers/openai -run 'TestResolveEndpointOverride_' -count=1`
+- `go test ./pkg/llmproxy/registry -run 'TestRegisterClient_NormalizesCopilotContextLength|TestGetGitHubCopilotModels' -count=1`
+- `go test ./pkg/llmproxy/translator/kiro/claude -run 'TestDetectTruncation|TestBuildSoftFailureToolResult' -count=1`
+- `go test pkg/llmproxy/executor/openai_models_fetcher.go pkg/llmproxy/executor/proxy_helpers.go pkg/llmproxy/executor/openai_models_fetcher_test.go -count=1`
+- `go test pkg/llmproxy/runtime/executor/openai_models_fetcher.go pkg/llmproxy/runtime/executor/proxy_helpers.go pkg/llmproxy/runtime/executor/openai_models_fetcher_test.go -count=1`