From f1ab68554b2dec351904ebf10b06d28024f56630 Mon Sep 17 00:00:00 2001 From: Koosha Paridehpour Date: Sun, 22 Feb 2026 21:23:05 -0700 Subject: [PATCH 1/7] fix(#253): support endpoint override for provider-pinned codex models --- sdk/api/handlers/openai/endpoint_compat.go | 40 ++++++++++++++++- .../handlers/openai/endpoint_compat_test.go | 45 +++++++++++++++++++ 2 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 sdk/api/handlers/openai/endpoint_compat_test.go diff --git a/sdk/api/handlers/openai/endpoint_compat.go b/sdk/api/handlers/openai/endpoint_compat.go index 27cc94fa34..4801ff33e5 100644 --- a/sdk/api/handlers/openai/endpoint_compat.go +++ b/sdk/api/handlers/openai/endpoint_compat.go @@ -1,6 +1,11 @@ package openai -import "github.com/router-for-me/CLIProxyAPI/v6/pkg/llmproxy/registry" +import ( + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/pkg/llmproxy/registry" + "github.com/router-for-me/CLIProxyAPI/v6/pkg/llmproxy/thinking" +) const ( openAIChatEndpoint = "/chat/completions" @@ -8,10 +13,11 @@ const ( ) func resolveEndpointOverride(modelName, requestedEndpoint string) (string, bool) { + modelName = strings.TrimSpace(modelName) if modelName == "" { return "", false } - info := registry.GetGlobalRegistry().GetModelInfo(modelName, "") + info := lookupModelInfoForEndpointOverride(modelName) if info == nil || len(info.SupportedEndpoints) == 0 { return "", false } @@ -27,6 +33,36 @@ func resolveEndpointOverride(modelName, requestedEndpoint string) (string, bool) return "", false } +func lookupModelInfoForEndpointOverride(modelName string) *registry.ModelInfo { + if info := registry.GetGlobalRegistry().GetModelInfo(modelName, ""); info != nil { + return info + } + + baseModel := strings.TrimSpace(thinking.ParseSuffix(modelName).ModelName) + if baseModel != "" && baseModel != modelName { + if info := registry.GetGlobalRegistry().GetModelInfo(baseModel, ""); info != nil { + return info + } + } + + providerPinnedModel := modelName + if slash := strings.IndexByte(modelName, '/'); slash > 0 && slash+1 < len(modelName) { + providerPinnedModel = strings.TrimSpace(modelName[slash+1:]) + } + if providerPinnedModel != "" && providerPinnedModel != modelName { + if info := registry.GetGlobalRegistry().GetModelInfo(providerPinnedModel, ""); info != nil { + return info + } + if providerPinnedBase := strings.TrimSpace(thinking.ParseSuffix(providerPinnedModel).ModelName); providerPinnedBase != "" && providerPinnedBase != providerPinnedModel { + if info := registry.GetGlobalRegistry().GetModelInfo(providerPinnedBase, ""); info != nil { + return info + } + } + } + + return nil +} + func endpointListContains(items []string, value string) bool { for _, item := range items { if item == value { diff --git a/sdk/api/handlers/openai/endpoint_compat_test.go b/sdk/api/handlers/openai/endpoint_compat_test.go new file mode 100644 index 0000000000..823c9806bf --- /dev/null +++ b/sdk/api/handlers/openai/endpoint_compat_test.go @@ -0,0 +1,45 @@ +package openai + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/pkg/llmproxy/registry" +) + +func TestResolveEndpointOverride_UsesRegisteredResponsesOnlyModel(t *testing.T) { + clientID := "endpoint-compat-test-client-1" + registry.GetGlobalRegistry().RegisterClient(clientID, "codex", []*registry.ModelInfo{{ + ID: "gpt-5.1-codex", + SupportedEndpoints: []string{openAIResponsesEndpoint}, + }}) + t.Cleanup(func() { + registry.GetGlobalRegistry().UnregisterClient(clientID) + }) + + override, ok := resolveEndpointOverride("gpt-5.1-codex", openAIChatEndpoint) + if !ok { + t.Fatal("expected endpoint override") + } + if override != openAIResponsesEndpoint { + t.Fatalf("override = %q, want %q", override, openAIResponsesEndpoint) + } +} + +func TestResolveEndpointOverride_UsesProviderPinnedSuffixedModel(t *testing.T) { + clientID := "endpoint-compat-test-client-2" + registry.GetGlobalRegistry().RegisterClient(clientID, "codex", []*registry.ModelInfo{{ + ID: "gpt-5.1-codex", + SupportedEndpoints: []string{openAIResponsesEndpoint}, + }}) + t.Cleanup(func() { + registry.GetGlobalRegistry().UnregisterClient(clientID) + }) + + override, ok := resolveEndpointOverride("codex/gpt-5.1-codex(high)", openAIChatEndpoint) + if !ok { + t.Fatal("expected endpoint override for provider-pinned model with suffix") + } + if override != openAIResponsesEndpoint { + t.Fatalf("override = %q, want %q", override, openAIResponsesEndpoint) + } +} From 05f894bf9981bddb4d031769799cc4ec38b295ab Mon Sep 17 00:00:00 2001 From: Koosha Paridehpour Date: Sun, 22 Feb 2026 21:23:14 -0700 Subject: [PATCH 2/7] fix(registry): enforce copilot context length 128K at registration (#241) --- .../reports/issue-wave-gh-next32-lane-2.md | 10 +++++ pkg/llmproxy/registry/model_registry.go | 16 ++++++++ .../registry/model_registry_hook_test.go | 41 +++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/docs/planning/reports/issue-wave-gh-next32-lane-2.md b/docs/planning/reports/issue-wave-gh-next32-lane-2.md index 87ae0840cd..63a97b9e28 100644 --- a/docs/planning/reports/issue-wave-gh-next32-lane-2.md +++ b/docs/planning/reports/issue-wave-gh-next32-lane-2.md @@ -55,3 +55,13 @@ Worktree: `cliproxyapi-plusplus-wave-cpb-2` ## Blockers - #160 blocked on missing deterministic reproduction fixture for duplicate-output stream bug in current repo state. + +## Wave2 Lane 2 Entry - #241 + +- Issue: `#241` copilot context length should always be `128K` +- Status: `implemented` +- Mapping: + - normalization at runtime registration: `pkg/llmproxy/registry/model_registry.go` + - regression coverage: `pkg/llmproxy/registry/model_registry_hook_test.go` +- Tests: + - `go test ./pkg/llmproxy/registry -run 'TestRegisterClient_NormalizesCopilotContextLength|TestGetGitHubCopilotModels' -count=1` diff --git a/pkg/llmproxy/registry/model_registry.go b/pkg/llmproxy/registry/model_registry.go index 2509afd260..dd4b0b335c 100644 --- a/pkg/llmproxy/registry/model_registry.go +++ b/pkg/llmproxy/registry/model_registry.go @@ -211,6 +211,9 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [ defer r.mutex.Unlock() provider := strings.ToLower(clientProvider) + if provider == "github-copilot" { + models = normalizeCopilotContextLength(models) + } uniqueModelIDs := make([]string, 0, len(models)) rawModelIDs := make([]string, 0, len(models)) newModels := make(map[string]*ModelInfo, len(models)) @@ -414,6 +417,19 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [ misc.LogCredentialSeparator() } +func normalizeCopilotContextLength(models []*ModelInfo) []*ModelInfo { + normalized := make([]*ModelInfo, 0, len(models)) + for _, model := range models { + if model == nil { + continue + } + copyModel := cloneModelInfo(model) + copyModel.ContextLength = 128000 + normalized = append(normalized, copyModel) + } + return normalized +} + func (r *ModelRegistry) addModelRegistration(modelID, provider string, model *ModelInfo, now time.Time) { if model == nil || modelID == "" { return diff --git a/pkg/llmproxy/registry/model_registry_hook_test.go b/pkg/llmproxy/registry/model_registry_hook_test.go index 70226b9eaf..3e023d8f87 100644 --- a/pkg/llmproxy/registry/model_registry_hook_test.go +++ b/pkg/llmproxy/registry/model_registry_hook_test.go @@ -202,3 +202,44 @@ func TestModelRegistryHook_PanicDoesNotAffectRegistry(t *testing.T) { t.Fatal("timeout waiting for OnModelsUnregistered hook call") } } + +func TestRegisterClient_NormalizesCopilotContextLength(t *testing.T) { + r := newTestModelRegistry() + hook := &capturingHook{ + registeredCh: make(chan registeredCall, 1), + unregisteredCh: make(chan unregisteredCall, 1), + } + r.SetHook(hook) + + r.RegisterClient("client-copilot", "github-copilot", []*ModelInfo{ + {ID: "gpt-5", ContextLength: 200000}, + {ID: "gpt-5-mini", ContextLength: 1048576}, + }) + + select { + case call := <-hook.registeredCh: + for _, model := range call.models { + if model.ContextLength != 128000 { + t.Fatalf("hook model %q context_length=%d, want 128000", model.ID, model.ContextLength) + } + } + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for OnModelsRegistered hook call") + } + + registration, ok := r.models["gpt-5"] + if !ok || registration == nil || registration.Info == nil { + t.Fatal("expected gpt-5 registration info") + } + if registration.Info.ContextLength != 128000 { + t.Fatalf("registry info context_length=%d, want 128000", registration.Info.ContextLength) + } + + clientInfo, ok := r.clientModelInfos["client-copilot"]["gpt-5-mini"] + if !ok || clientInfo == nil { + t.Fatal("expected client model info for gpt-5-mini") + } + if clientInfo.ContextLength != 128000 { + t.Fatalf("client model info context_length=%d, want 128000", clientInfo.ContextLength) + } +} From 947883cb550da9611d2ebcf5ab2b94580207f3b4 Mon Sep 17 00:00:00 2001 From: Koosha Paridehpour Date: Sun, 22 Feb 2026 21:23:03 -0700 Subject: [PATCH 3/7] fix(kiro): handle banned account 403 payloads (#221) --- .../reports/issue-wave-gh-next32-lane-3.md | 12 +++++- .../runtime/executor/kiro_executor.go | 20 ++++++++-- .../executor/kiro_executor_extra_test.go | 40 +++++++++++++++++++ 3 files changed, 67 insertions(+), 5 deletions(-) diff --git a/docs/planning/reports/issue-wave-gh-next32-lane-3.md b/docs/planning/reports/issue-wave-gh-next32-lane-3.md index 6292f73b40..180fdb8607 100644 --- a/docs/planning/reports/issue-wave-gh-next32-lane-3.md +++ b/docs/planning/reports/issue-wave-gh-next32-lane-3.md @@ -29,6 +29,17 @@ Worktree: `cliproxyapi-plusplus-wave-cpb-3` - Status: `pending` - Notes: lane-started +### Wave2 #221 - `kiro账号被封` +- Status: `implemented` +- Source mapping: + - Source issue: `router-for-me/CLIProxyAPIPlus#221` (Kiro account banned handling) + - Fix: broaden Kiro 403 suspension detection to case-insensitive suspended/banned signals so banned accounts consistently trigger cooldown + remediation messaging in both non-stream and stream paths. + - Code: `pkg/llmproxy/runtime/executor/kiro_executor.go` + - Tests: `pkg/llmproxy/runtime/executor/kiro_executor_extra_test.go` +- Test commands: + - `go test ./pkg/llmproxy/runtime/executor -run 'Test(IsKiroSuspendedOrBannedResponse|FormatKiroCooldownError|FormatKiroSuspendedStatusMessage)' -count=1` + - Result: blocked by pre-existing package build failures in `pkg/llmproxy/runtime/executor/codex_websockets_executor.go` (`unused imports`, `undefined: authID`, `undefined: wsURL`). + ## Focused Checks - `task quality:fmt:check` @@ -37,4 +48,3 @@ Worktree: `cliproxyapi-plusplus-wave-cpb-3` ## Blockers - None recorded yet; work is in planning state. - diff --git a/pkg/llmproxy/runtime/executor/kiro_executor.go b/pkg/llmproxy/runtime/executor/kiro_executor.go index ee62cedf67..c3215a29d0 100644 --- a/pkg/llmproxy/runtime/executor/kiro_executor.go +++ b/pkg/llmproxy/runtime/executor/kiro_executor.go @@ -638,6 +638,18 @@ func formatKiroSuspendedStatusMessage(respBody []byte) string { return "account suspended by upstream Kiro endpoint: " + string(respBody) + "; re-auth this Kiro entry or use another auth index" } +func isKiroSuspendedOrBannedResponse(respBody string) bool { + if strings.TrimSpace(respBody) == "" { + return false + } + lowerBody := strings.ToLower(respBody) + return strings.Contains(lowerBody, "temporarily_suspended") || + strings.Contains(lowerBody, "suspended") || + strings.Contains(lowerBody, "account_banned") || + strings.Contains(lowerBody, "account banned") || + strings.Contains(lowerBody, "banned") +} + // Execute sends the request to Kiro API and returns the response. // Supports automatic token refresh on 401/403 errors. func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { @@ -945,8 +957,8 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth. respBodyStr := string(respBody) - // Check for SUSPENDED status - return immediately without retry - if strings.Contains(respBodyStr, "SUSPENDED") || strings.Contains(respBodyStr, "TEMPORARILY_SUSPENDED") { + // Check for suspended/banned status - return immediately without retry + if isKiroSuspendedOrBannedResponse(respBodyStr) { // Set long cooldown for suspended accounts rateLimiter.CheckAndMarkSuspended(tokenKey, respBodyStr) cooldownMgr.SetCooldown(tokenKey, kiroauth.LongCooldown, kiroauth.CooldownReasonSuspended) @@ -1381,8 +1393,8 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox respBodyStr := string(respBody) - // Check for SUSPENDED status - return immediately without retry - if strings.Contains(respBodyStr, "SUSPENDED") || strings.Contains(respBodyStr, "TEMPORARILY_SUSPENDED") { + // Check for suspended/banned status - return immediately without retry + if isKiroSuspendedOrBannedResponse(respBodyStr) { // Set long cooldown for suspended accounts rateLimiter.CheckAndMarkSuspended(tokenKey, respBodyStr) cooldownMgr.SetCooldown(tokenKey, kiroauth.LongCooldown, kiroauth.CooldownReasonSuspended) diff --git a/pkg/llmproxy/runtime/executor/kiro_executor_extra_test.go b/pkg/llmproxy/runtime/executor/kiro_executor_extra_test.go index 60afeaa545..0efae05df4 100644 --- a/pkg/llmproxy/runtime/executor/kiro_executor_extra_test.go +++ b/pkg/llmproxy/runtime/executor/kiro_executor_extra_test.go @@ -105,3 +105,43 @@ func TestFormatKiroSuspendedStatusMessage(t *testing.T) { t.Fatalf("expected remediation text in message, got %q", msg) } } + +func TestIsKiroSuspendedOrBannedResponse(t *testing.T) { + tests := []struct { + name string + body string + want bool + }{ + { + name: "uppercase suspended token", + body: `{"status":"SUSPENDED"}`, + want: true, + }, + { + name: "lowercase banned sentence", + body: `{"message":"account banned due to abuse checks"}`, + want: true, + }, + { + name: "temporary suspended lowercase key", + body: `{"status":"temporarily_suspended"}`, + want: true, + }, + { + name: "token expired should not count as banned", + body: `{"error":"token expired"}`, + want: false, + }, + { + name: "empty body", + body: ` `, + want: false, + }, + } + + for _, tt := range tests { + if got := isKiroSuspendedOrBannedResponse(tt.body); got != tt.want { + t.Fatalf("%s: isKiroSuspendedOrBannedResponse(%q) = %v, want %v", tt.name, tt.body, got, tt.want) + } + } +} From 9fa8479d482f1cc32762f214420d868b1b51cbc9 Mon Sep 17 00:00:00 2001 From: Koosha Paridehpour Date: Sun, 22 Feb 2026 21:26:28 -0700 Subject: [PATCH 4/7] fix(kiro): broaden cmd alias handling for command tools (#210) --- .../reports/issue-wave-gh-next32-lane-4.md | 13 ++++++++++++ .../kiro/claude/truncation_detector.go | 4 ++-- .../kiro/claude/truncation_detector_test.go | 21 +++++++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/docs/planning/reports/issue-wave-gh-next32-lane-4.md b/docs/planning/reports/issue-wave-gh-next32-lane-4.md index 14117c9043..5d1dce3175 100644 --- a/docs/planning/reports/issue-wave-gh-next32-lane-4.md +++ b/docs/planning/reports/issue-wave-gh-next32-lane-4.md @@ -34,3 +34,16 @@ Worktree: `cliproxyapi-plusplus-wave-cpb-4` - None recorded yet; work is in planning state. +## Wave2 Updates + +### Wave2 Lane 4 - Issue #210 +- Issue: `#210` Kiro/Ampcode Bash tool parameter incompatibility +- Mapping: + - `pkg/llmproxy/translator/kiro/claude/truncation_detector.go` + - `pkg/llmproxy/translator/kiro/claude/truncation_detector_test.go` +- Change: + - Extended command-parameter alias compatibility so `execute` and `run_command` accept `cmd` in addition to `command`, matching existing Bash alias handling and preventing false truncation loops. +- Tests: + - `go test ./pkg/llmproxy/translator/kiro/claude -run 'TestDetectTruncation|TestBuildSoftFailureToolResult'` +- Quality gate: + - `task quality` failed due pre-existing syntax errors in `pkg/llmproxy/executor/kiro_executor.go` (`expected '(' found kiroModelFingerprint`), unrelated to this issue scope. diff --git a/pkg/llmproxy/translator/kiro/claude/truncation_detector.go b/pkg/llmproxy/translator/kiro/claude/truncation_detector.go index e245e68bea..e0a1c133f9 100644 --- a/pkg/llmproxy/translator/kiro/claude/truncation_detector.go +++ b/pkg/llmproxy/translator/kiro/claude/truncation_detector.go @@ -66,8 +66,8 @@ var RequiredFieldsByTool = map[string][]string{ // Ampcode-compatible Bash tool uses "cmd", while other clients commonly use "command". // Accept either key to avoid false truncation detection loops. "Bash": {"command", "cmd"}, - "execute": {"command"}, - "run_command": {"command"}, + "execute": {"command", "cmd"}, + "run_command": {"command", "cmd"}, } // DetectTruncation checks if the tool use input appears to be truncated. diff --git a/pkg/llmproxy/translator/kiro/claude/truncation_detector_test.go b/pkg/llmproxy/translator/kiro/claude/truncation_detector_test.go index 7e60600612..f4f36275fa 100644 --- a/pkg/llmproxy/translator/kiro/claude/truncation_detector_test.go +++ b/pkg/llmproxy/translator/kiro/claude/truncation_detector_test.go @@ -51,6 +51,27 @@ func TestDetectTruncation(t *testing.T) { if info6.IsTruncated { t.Errorf("expected no truncation for Bash cmd alias, got %v", info6) } + + // 7. execute cmd alias compatibility + parsed7 := map[string]interface{}{"cmd": "ls -la"} + info7 := DetectTruncation("execute", "c3", `{"cmd":"ls -la"}`, parsed7) + if info7.IsTruncated { + t.Errorf("expected no truncation for execute cmd alias, got %v", info7) + } + + // 8. run_command cmd alias compatibility + parsed8 := map[string]interface{}{"cmd": "pwd"} + info8 := DetectTruncation("run_command", "c4", `{"cmd":"pwd"}`, parsed8) + if info8.IsTruncated { + t.Errorf("expected no truncation for run_command cmd alias, got %v", info8) + } + + // 9. command tool still truncates when both command aliases are missing + parsed9 := map[string]interface{}{"path": "/tmp"} + info9 := DetectTruncation("execute", "c5", `{"path":"/tmp"}`, parsed9) + if !info9.IsTruncated || info9.TruncationType != TruncationTypeMissingFields { + t.Errorf("expected missing_fields truncation when command aliases are absent, got %v", info9) + } } func TestBuildSoftFailureToolResult(t *testing.T) { From d921c09bfd6cd56bb31d779d8586183359890ac8 Mon Sep 17 00:00:00 2001 From: Koosha Paridehpour Date: Sun, 22 Feb 2026 21:25:56 -0700 Subject: [PATCH 5/7] fix(#200): honor Gemini quota reset durations for cooldown --- .../reports/issue-wave-gh-next32-lane-5.md | 14 ++++- pkg/llmproxy/executor/gemini_cli_executor.go | 9 ++-- .../gemini_cli_executor_retry_delay_test.go | 54 +++++++++++++++++++ .../runtime/executor/gemini_cli_executor.go | 9 ++-- .../gemini_cli_executor_retry_delay_test.go | 54 +++++++++++++++++++ 5 files changed, 129 insertions(+), 11 deletions(-) create mode 100644 pkg/llmproxy/executor/gemini_cli_executor_retry_delay_test.go create mode 100644 pkg/llmproxy/runtime/executor/gemini_cli_executor_retry_delay_test.go diff --git a/docs/planning/reports/issue-wave-gh-next32-lane-5.md b/docs/planning/reports/issue-wave-gh-next32-lane-5.md index 397906e2f2..cfa575fc90 100644 --- a/docs/planning/reports/issue-wave-gh-next32-lane-5.md +++ b/docs/planning/reports/issue-wave-gh-next32-lane-5.md @@ -30,7 +30,19 @@ Worktree: `cliproxyapi-plusplus-wave-cpb-5` - `task quality:fmt:check` - `QUALITY_PACKAGES='./pkg/llmproxy/api ./sdk/api/handlers/openai' task quality:quick` +## Wave2 Execution Entry + +### #200 +- Status: `done` +- Mapping: `router-for-me/CLIProxyAPIPlus issue#200` -> `CP2K-0020` -> Gemini quota auto disable/enable timing now honors fractional/unit retry hints from upstream quota messages. +- Code: + - `pkg/llmproxy/executor/gemini_cli_executor.go` + - `pkg/llmproxy/runtime/executor/gemini_cli_executor.go` +- Tests: + - `pkg/llmproxy/executor/gemini_cli_executor_retry_delay_test.go` + - `pkg/llmproxy/runtime/executor/gemini_cli_executor_retry_delay_test.go` + - `go test ./pkg/llmproxy/executor ./pkg/llmproxy/runtime/executor -run 'TestParseRetryDelay_(MessageDuration|MessageMilliseconds|PrefersRetryInfo)$'` + ## Blockers - None recorded yet; work is in planning state. - diff --git a/pkg/llmproxy/executor/gemini_cli_executor.go b/pkg/llmproxy/executor/gemini_cli_executor.go index 214373bbfc..c8421412ab 100644 --- a/pkg/llmproxy/executor/gemini_cli_executor.go +++ b/pkg/llmproxy/executor/gemini_cli_executor.go @@ -13,7 +13,6 @@ import ( "math/rand" "net/http" "regexp" - "strconv" "strings" "time" @@ -937,14 +936,14 @@ func parseRetryDelay(errorBody []byte) (*time.Duration, error) { } } - // Fallback: parse from error.message "Your quota will reset after Xs." + // Fallback: parse from error.message (supports units like ms/s/m/h with optional decimals) message := gjson.GetBytes(errorBody, "error.message").String() if message != "" { - re := regexp.MustCompile(`after\s+(\d+)s\.?`) + re := regexp.MustCompile(`after\s+([0-9]+(?:\.[0-9]+)?(?:ms|s|m|h))\.?`) if matches := re.FindStringSubmatch(message); len(matches) > 1 { - seconds, err := strconv.Atoi(matches[1]) + duration, err := time.ParseDuration(matches[1]) if err == nil { - return new(time.Duration(seconds) * time.Second), nil + return &duration, nil } } } diff --git a/pkg/llmproxy/executor/gemini_cli_executor_retry_delay_test.go b/pkg/llmproxy/executor/gemini_cli_executor_retry_delay_test.go new file mode 100644 index 0000000000..f26c5a95e1 --- /dev/null +++ b/pkg/llmproxy/executor/gemini_cli_executor_retry_delay_test.go @@ -0,0 +1,54 @@ +package executor + +import ( + "testing" + "time" +) + +func TestParseRetryDelay_MessageDuration(t *testing.T) { + t.Parallel() + + body := []byte(`{"error":{"message":"Quota exceeded. Your quota will reset after 1.5s."}}`) + got, err := parseRetryDelay(body) + if err != nil { + t.Fatalf("parseRetryDelay returned error: %v", err) + } + if got == nil { + t.Fatal("parseRetryDelay returned nil duration") + } + if *got != 1500*time.Millisecond { + t.Fatalf("parseRetryDelay = %v, want %v", *got, 1500*time.Millisecond) + } +} + +func TestParseRetryDelay_MessageMilliseconds(t *testing.T) { + t.Parallel() + + body := []byte(`{"error":{"message":"Please retry after 250ms."}}`) + got, err := parseRetryDelay(body) + if err != nil { + t.Fatalf("parseRetryDelay returned error: %v", err) + } + if got == nil { + t.Fatal("parseRetryDelay returned nil duration") + } + if *got != 250*time.Millisecond { + t.Fatalf("parseRetryDelay = %v, want %v", *got, 250*time.Millisecond) + } +} + +func TestParseRetryDelay_PrefersRetryInfo(t *testing.T) { + t.Parallel() + + body := []byte(`{"error":{"message":"Your quota will reset after 99s.","details":[{"@type":"type.googleapis.com/google.rpc.RetryInfo","retryDelay":"2s"}]}}`) + got, err := parseRetryDelay(body) + if err != nil { + t.Fatalf("parseRetryDelay returned error: %v", err) + } + if got == nil { + t.Fatal("parseRetryDelay returned nil duration") + } + if *got != 2*time.Second { + t.Fatalf("parseRetryDelay = %v, want %v", *got, 2*time.Second) + } +} diff --git a/pkg/llmproxy/runtime/executor/gemini_cli_executor.go b/pkg/llmproxy/runtime/executor/gemini_cli_executor.go index fec47dddb9..47538569ee 100644 --- a/pkg/llmproxy/runtime/executor/gemini_cli_executor.go +++ b/pkg/llmproxy/runtime/executor/gemini_cli_executor.go @@ -13,7 +13,6 @@ import ( "math/rand" "net/http" "regexp" - "strconv" "strings" "time" @@ -937,14 +936,14 @@ func parseRetryDelay(errorBody []byte) (*time.Duration, error) { } } - // Fallback: parse from error.message "Your quota will reset after Xs." + // Fallback: parse from error.message (supports units like ms/s/m/h with optional decimals) message := gjson.GetBytes(errorBody, "error.message").String() if message != "" { - re := regexp.MustCompile(`after\s+(\d+)s\.?`) + re := regexp.MustCompile(`after\s+([0-9]+(?:\.[0-9]+)?(?:ms|s|m|h))\.?`) if matches := re.FindStringSubmatch(message); len(matches) > 1 { - seconds, err := strconv.Atoi(matches[1]) + duration, err := time.ParseDuration(matches[1]) if err == nil { - return new(time.Duration(seconds) * time.Second), nil + return &duration, nil } } } diff --git a/pkg/llmproxy/runtime/executor/gemini_cli_executor_retry_delay_test.go b/pkg/llmproxy/runtime/executor/gemini_cli_executor_retry_delay_test.go new file mode 100644 index 0000000000..f26c5a95e1 --- /dev/null +++ b/pkg/llmproxy/runtime/executor/gemini_cli_executor_retry_delay_test.go @@ -0,0 +1,54 @@ +package executor + +import ( + "testing" + "time" +) + +func TestParseRetryDelay_MessageDuration(t *testing.T) { + t.Parallel() + + body := []byte(`{"error":{"message":"Quota exceeded. Your quota will reset after 1.5s."}}`) + got, err := parseRetryDelay(body) + if err != nil { + t.Fatalf("parseRetryDelay returned error: %v", err) + } + if got == nil { + t.Fatal("parseRetryDelay returned nil duration") + } + if *got != 1500*time.Millisecond { + t.Fatalf("parseRetryDelay = %v, want %v", *got, 1500*time.Millisecond) + } +} + +func TestParseRetryDelay_MessageMilliseconds(t *testing.T) { + t.Parallel() + + body := []byte(`{"error":{"message":"Please retry after 250ms."}}`) + got, err := parseRetryDelay(body) + if err != nil { + t.Fatalf("parseRetryDelay returned error: %v", err) + } + if got == nil { + t.Fatal("parseRetryDelay returned nil duration") + } + if *got != 250*time.Millisecond { + t.Fatalf("parseRetryDelay = %v, want %v", *got, 250*time.Millisecond) + } +} + +func TestParseRetryDelay_PrefersRetryInfo(t *testing.T) { + t.Parallel() + + body := []byte(`{"error":{"message":"Your quota will reset after 99s.","details":[{"@type":"type.googleapis.com/google.rpc.RetryInfo","retryDelay":"2s"}]}}`) + got, err := parseRetryDelay(body) + if err != nil { + t.Fatalf("parseRetryDelay returned error: %v", err) + } + if got == nil { + t.Fatal("parseRetryDelay returned nil duration") + } + if *got != 2*time.Second { + t.Fatalf("parseRetryDelay = %v, want %v", *got, 2*time.Second) + } +} From a2571c90fe001a8d8b1f75c07d4049ad3820d7a1 Mon Sep 17 00:00:00 2001 From: Koosha Paridehpour Date: Sun, 22 Feb 2026 21:26:03 -0700 Subject: [PATCH 6/7] fix(#179): honor openai-compat models-endpoint overrides --- .../reports/issue-wave-gh-next32-lane-6.md | 20 ++++++++++++ .../executor/openai_models_fetcher.go | 31 +++++++++++++++++++ .../executor/openai_models_fetcher_test.go | 16 ++++++++++ .../runtime/executor/openai_models_fetcher.go | 31 +++++++++++++++++++ .../executor/openai_models_fetcher_test.go | 16 ++++++++++ 5 files changed, 114 insertions(+) diff --git a/docs/planning/reports/issue-wave-gh-next32-lane-6.md b/docs/planning/reports/issue-wave-gh-next32-lane-6.md index a46a1a9372..7e4d67337a 100644 --- a/docs/planning/reports/issue-wave-gh-next32-lane-6.md +++ b/docs/planning/reports/issue-wave-gh-next32-lane-6.md @@ -34,3 +34,23 @@ Worktree: `cliproxyapi-plusplus-wave-cpb-6` - None recorded yet; work is in planning state. +## Wave2 Entries + +### 2026-02-23 - #179 OpenAI-MLX/vLLM-MLX support +- Status: `done` +- Mapping: + - Source issue: `router-for-me/CLIProxyAPIPlus#179` + - Implemented fix: OpenAI-compatible model discovery now honors `models_endpoint` auth attribute (emitted from `models-endpoint` config), including absolute URL and absolute path overrides. + - Why this is low risk: fallback/default `/v1/models` behavior is unchanged; only explicit override handling is added. +- Files: + - `pkg/llmproxy/executor/openai_models_fetcher.go` + - `pkg/llmproxy/executor/openai_models_fetcher_test.go` + - `pkg/llmproxy/runtime/executor/openai_models_fetcher.go` + - `pkg/llmproxy/runtime/executor/openai_models_fetcher_test.go` +- Tests: + - `go test pkg/llmproxy/executor/openai_models_fetcher.go pkg/llmproxy/executor/proxy_helpers.go pkg/llmproxy/executor/openai_models_fetcher_test.go` + - `go test pkg/llmproxy/runtime/executor/openai_models_fetcher.go pkg/llmproxy/runtime/executor/proxy_helpers.go pkg/llmproxy/runtime/executor/openai_models_fetcher_test.go` +- Verification notes: + - Added regression coverage for `models_endpoint` path override and absolute URL override in both mirrored executor test suites. +- Blockers: + - Package-level `go test ./pkg/llmproxy/executor` and `go test ./pkg/llmproxy/runtime/executor` are currently blocked by unrelated compile errors in existing lane files (`kiro_executor.go`, `codex_websockets_executor.go`). diff --git a/pkg/llmproxy/executor/openai_models_fetcher.go b/pkg/llmproxy/executor/openai_models_fetcher.go index 7ddf0d826d..48b62d7a4b 100644 --- a/pkg/llmproxy/executor/openai_models_fetcher.go +++ b/pkg/llmproxy/executor/openai_models_fetcher.go @@ -111,6 +111,9 @@ func resolveOpenAIModelsURL(baseURL string, attrs map[string]string) string { if modelsURL := strings.TrimSpace(attrs["models_url"]); modelsURL != "" { return modelsURL } + if modelsEndpoint := strings.TrimSpace(attrs["models_endpoint"]); modelsEndpoint != "" { + return resolveOpenAIModelsEndpointURL(baseURL, modelsEndpoint) + } } trimmedBaseURL := strings.TrimRight(strings.TrimSpace(baseURL), "/") @@ -134,6 +137,34 @@ func resolveOpenAIModelsURL(baseURL string, attrs map[string]string) string { return trimmedBaseURL + "/v1/models" } +func resolveOpenAIModelsEndpointURL(baseURL, modelsEndpoint string) string { + modelsEndpoint = strings.TrimSpace(modelsEndpoint) + if modelsEndpoint == "" { + return "" + } + if parsed, err := url.Parse(modelsEndpoint); err == nil && parsed.IsAbs() { + return modelsEndpoint + } + + trimmedBaseURL := strings.TrimRight(strings.TrimSpace(baseURL), "/") + if trimmedBaseURL == "" { + return modelsEndpoint + } + + if strings.HasPrefix(modelsEndpoint, "/") { + baseParsed, err := url.Parse(trimmedBaseURL) + if err == nil && baseParsed.Scheme != "" && baseParsed.Host != "" { + baseParsed.Path = modelsEndpoint + baseParsed.RawQuery = "" + baseParsed.Fragment = "" + return baseParsed.String() + } + return trimmedBaseURL + modelsEndpoint + } + + return trimmedBaseURL + "/" + strings.TrimLeft(modelsEndpoint, "/") +} + func isVersionSegment(segment string) bool { if len(segment) < 2 || segment[0] != 'v' { return false diff --git a/pkg/llmproxy/executor/openai_models_fetcher_test.go b/pkg/llmproxy/executor/openai_models_fetcher_test.go index b7957d1244..8b4e2ffb3f 100644 --- a/pkg/llmproxy/executor/openai_models_fetcher_test.go +++ b/pkg/llmproxy/executor/openai_models_fetcher_test.go @@ -35,6 +35,22 @@ func TestResolveOpenAIModelsURL(t *testing.T) { }, want: "https://custom.example.com/models", }, + { + name: "ModelsEndpointPathOverrideUsesBaseHost", + baseURL: "https://api.z.ai/api/coding/paas/v4", + attrs: map[string]string{ + "models_endpoint": "/api/coding/paas/v4/models", + }, + want: "https://api.z.ai/api/coding/paas/v4/models", + }, + { + name: "ModelsEndpointAbsoluteURLOverrideWins", + baseURL: "https://api.z.ai/api/coding/paas/v4", + attrs: map[string]string{ + "models_endpoint": "https://custom.example.com/models", + }, + want: "https://custom.example.com/models", + }, } for _, tc := range testCases { diff --git a/pkg/llmproxy/runtime/executor/openai_models_fetcher.go b/pkg/llmproxy/runtime/executor/openai_models_fetcher.go index 7ddf0d826d..48b62d7a4b 100644 --- a/pkg/llmproxy/runtime/executor/openai_models_fetcher.go +++ b/pkg/llmproxy/runtime/executor/openai_models_fetcher.go @@ -111,6 +111,9 @@ func resolveOpenAIModelsURL(baseURL string, attrs map[string]string) string { if modelsURL := strings.TrimSpace(attrs["models_url"]); modelsURL != "" { return modelsURL } + if modelsEndpoint := strings.TrimSpace(attrs["models_endpoint"]); modelsEndpoint != "" { + return resolveOpenAIModelsEndpointURL(baseURL, modelsEndpoint) + } } trimmedBaseURL := strings.TrimRight(strings.TrimSpace(baseURL), "/") @@ -134,6 +137,34 @@ func resolveOpenAIModelsURL(baseURL string, attrs map[string]string) string { return trimmedBaseURL + "/v1/models" } +func resolveOpenAIModelsEndpointURL(baseURL, modelsEndpoint string) string { + modelsEndpoint = strings.TrimSpace(modelsEndpoint) + if modelsEndpoint == "" { + return "" + } + if parsed, err := url.Parse(modelsEndpoint); err == nil && parsed.IsAbs() { + return modelsEndpoint + } + + trimmedBaseURL := strings.TrimRight(strings.TrimSpace(baseURL), "/") + if trimmedBaseURL == "" { + return modelsEndpoint + } + + if strings.HasPrefix(modelsEndpoint, "/") { + baseParsed, err := url.Parse(trimmedBaseURL) + if err == nil && baseParsed.Scheme != "" && baseParsed.Host != "" { + baseParsed.Path = modelsEndpoint + baseParsed.RawQuery = "" + baseParsed.Fragment = "" + return baseParsed.String() + } + return trimmedBaseURL + modelsEndpoint + } + + return trimmedBaseURL + "/" + strings.TrimLeft(modelsEndpoint, "/") +} + func isVersionSegment(segment string) bool { if len(segment) < 2 || segment[0] != 'v' { return false diff --git a/pkg/llmproxy/runtime/executor/openai_models_fetcher_test.go b/pkg/llmproxy/runtime/executor/openai_models_fetcher_test.go index b7957d1244..8b4e2ffb3f 100644 --- a/pkg/llmproxy/runtime/executor/openai_models_fetcher_test.go +++ b/pkg/llmproxy/runtime/executor/openai_models_fetcher_test.go @@ -35,6 +35,22 @@ func TestResolveOpenAIModelsURL(t *testing.T) { }, want: "https://custom.example.com/models", }, + { + name: "ModelsEndpointPathOverrideUsesBaseHost", + baseURL: "https://api.z.ai/api/coding/paas/v4", + attrs: map[string]string{ + "models_endpoint": "/api/coding/paas/v4/models", + }, + want: "https://api.z.ai/api/coding/paas/v4/models", + }, + { + name: "ModelsEndpointAbsoluteURLOverrideWins", + baseURL: "https://api.z.ai/api/coding/paas/v4", + attrs: map[string]string{ + "models_endpoint": "https://custom.example.com/models", + }, + want: "https://custom.example.com/models", + }, } for _, tc := range testCases { From 20bbbe479a1428a331eb44bcf1d741d072190590 Mon Sep 17 00:00:00 2001 From: Koosha Paridehpour Date: Sun, 22 Feb 2026 21:32:15 -0700 Subject: [PATCH 7/7] docs: add wave2 next32 merge mapping and validation report --- ...e-wave-gh-next32-merge-wave2-2026-02-23.md | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 docs/planning/reports/issue-wave-gh-next32-merge-wave2-2026-02-23.md diff --git a/docs/planning/reports/issue-wave-gh-next32-merge-wave2-2026-02-23.md b/docs/planning/reports/issue-wave-gh-next32-merge-wave2-2026-02-23.md new file mode 100644 index 0000000000..2acd243997 --- /dev/null +++ b/docs/planning/reports/issue-wave-gh-next32-merge-wave2-2026-02-23.md @@ -0,0 +1,28 @@ +# Issue Wave GH Next32 Merge Report - Wave 2 (2026-02-23) + +## Scope +- Wave 2, one item per lane (6 lanes total). +- Base: `origin/main` @ `f7e56f05`. + +## Merged Commits +- `f1ab6855` - `fix(#253): support endpoint override for provider-pinned codex models` +- `05f894bf` - `fix(registry): enforce copilot context length 128K at registration (#241)` +- `947883cb` - `fix(kiro): handle banned account 403 payloads (#221)` +- `9fa8479d` - `fix(kiro): broaden cmd alias handling for command tools (#210)` +- `d921c09b` - `fix(#200): honor Gemini quota reset durations for cooldown` +- `a2571c90` - `fix(#179): honor openai-compat models-endpoint overrides` + +## Issue Mapping +- `#253` -> `f1ab6855` +- `#241` -> `05f894bf` +- `#221` -> `947883cb` +- `#210` -> `9fa8479d` +- `#200` -> `d921c09b` +- `#179` -> `a2571c90` + +## Validation +- `go test ./sdk/api/handlers/openai -run 'TestResolveEndpointOverride_' -count=1` +- `go test ./pkg/llmproxy/registry -run 'TestRegisterClient_NormalizesCopilotContextLength|TestGetGitHubCopilotModels' -count=1` +- `go test ./pkg/llmproxy/translator/kiro/claude -run 'TestDetectTruncation|TestBuildSoftFailureToolResult' -count=1` +- `go test pkg/llmproxy/executor/openai_models_fetcher.go pkg/llmproxy/executor/proxy_helpers.go pkg/llmproxy/executor/openai_models_fetcher_test.go -count=1` +- `go test pkg/llmproxy/runtime/executor/openai_models_fetcher.go pkg/llmproxy/runtime/executor/proxy_helpers.go pkg/llmproxy/runtime/executor/openai_models_fetcher_test.go -count=1`