From 46fe3316fb5739f22395eb23563439d4ae0e68da Mon Sep 17 00:00:00 2001 From: Emanuel Ehmki Date: Mon, 30 Mar 2026 18:09:48 +0200 Subject: [PATCH] feat(provider): automatic model fallback on transient errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add ProviderFallback module for configurable model fallback - Integrate fallback middleware into LLM stream pipeline - shouldFallback() handles 429, 500, 502, 503, transient 403 - Copilot-specific: reauth guidance for 403, transient retryability - Config: fallback mapping (providerID/modelID → fallback target) --- packages/opencode/src/config/config.ts | 6 + packages/opencode/src/provider/error.ts | 27 +- packages/opencode/src/provider/fallback.ts | 151 +++++++++++ packages/opencode/src/session/llm.ts | 15 ++ .../opencode/test/provider/fallback.test.ts | 252 ++++++++++++++++++ 5 files changed, 449 insertions(+), 2 deletions(-) create mode 100644 packages/opencode/src/provider/fallback.ts create mode 100644 packages/opencode/test/provider/fallback.test.ts diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index f86d8d32af60..bc71d6a7f1bc 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -1055,6 +1055,12 @@ export namespace Config { .describe("Token buffer for compaction. Leaves enough window to avoid overflow during compaction."), }) .optional(), + fallback: z + .record(z.string(), z.string()) + .optional() + .describe( + 'Provider fallback map. Key is source provider ID, value is target provider ID. E.g. { "github-copilot": "amazon-bedrock" }', + ), experimental: z .object({ disable_paste_summary: z.boolean().optional(), diff --git a/packages/opencode/src/provider/error.ts b/packages/opencode/src/provider/error.ts index 52e525177a5b..2d84cf9f6dd8 100644 --- a/packages/opencode/src/provider/error.ts +++ b/packages/opencode/src/provider/error.ts @@ -35,6 +35,16 @@ export namespace ProviderError { return status === 404 || e.isRetryable } + // Copilot gateway returns bare text 400s for transient issues. + // These are gateway-level rejections, not model errors, and should be retried. + // 403s are also transient — the copilot gateway sometimes returns 403 for + // rate/capacity reasons that resolve on retry or fallback. + function isCopilotErrorRetryable(e: APICallError) { + if (e.statusCode === 403) return true + if (e.statusCode === 400 && e.responseBody && !json(e.responseBody)) return true + return e.isRetryable ?? false + } + // Providers not reliably handled in this function: // - z.ai: can accept overflow silently (needs token-count/context-window checks) function isOverflow(message: string) { @@ -47,8 +57,12 @@ export namespace ProviderError { } function message(providerID: ProviderID, e: APICallError) { + if (providerID.includes("github-copilot") && e.statusCode === 403) { + return "Please reauthenticate with the copilot provider to ensure your credentials work properly with OpenCode." + } + return iife(() => { - const msg = e.message + const msg = e.message ?? "" if (msg === "") { if (e.responseBody) return e.responseBody if (e.statusCode) { @@ -62,6 +76,13 @@ export namespace ProviderError { return msg } + // Avoid tautological "X: X" when response body is just the status text + const text = e.responseBody.trim() + if (e.statusCode && text.toLowerCase() === (STATUS_CODES[e.statusCode] ?? "").toLowerCase()) { + const provider = providerID.split("/")[0] ?? providerID + return `${provider} rejected the request (HTTP ${e.statusCode}). This may indicate context overflow, an unsupported request, or a gateway-level rejection.` + } + try { const body = JSON.parse(e.responseBody) // try to extract common error message fields @@ -188,7 +209,9 @@ export namespace ProviderError { statusCode: input.error.statusCode, isRetryable: input.providerID.startsWith("openai") ? isOpenAiErrorRetryable(input.error) - : input.error.isRetryable, + : input.providerID.includes("github-copilot") + ? isCopilotErrorRetryable(input.error) + : input.error.isRetryable, responseHeaders: input.error.responseHeaders, responseBody: input.error.responseBody, metadata, diff --git a/packages/opencode/src/provider/fallback.ts b/packages/opencode/src/provider/fallback.ts new file mode 100644 index 000000000000..7550d2f563d7 --- /dev/null +++ b/packages/opencode/src/provider/fallback.ts @@ -0,0 +1,151 @@ +import { APICallError, type LanguageModelMiddleware } from "ai" +import type { LanguageModelV3 } from "@ai-sdk/provider" +import { Log } from "@/util/log" +import { Bus } from "@/bus" +import { TuiEvent } from "@/cli/cmd/tui/event" + +// Copilot → Bedrock model ID mapping for provider fallback. +// Bedrock inference-profile IDs use the us. cross-region prefix. +// Both us. and global. prefixes are ACTIVE per list-inference-profiles +// and confirmed working via direct invoke-model testing (2026-03-08). +export namespace ProviderFallback { + const log = Log.create({ service: "fallback" }) + + // sourceProvider → sourceModel → targetModel (just the model ID, no provider prefix) + const models: Record> = { + "github-copilot": { + "claude-sonnet-4.6": "us.anthropic.claude-sonnet-4-6", + "claude-opus-4.6": "us.anthropic.claude-opus-4-6-v1", + "claude-haiku-4.5": "us.anthropic.claude-haiku-4-5-20251001-v1:0", + }, + } + + // Resolves a fallback target for the given provider/model pair. + // Uses the config fallback map to find the target provider, + // then the built-in model mapping table to translate model IDs. + // Returns undefined if no fallback is configured or no model mapping exists. + export function resolve(providerID: string, modelID: string, fallback?: Record) { + if (!fallback) return undefined + const target = fallback[providerID] + if (!target) return undefined + const mapped = models[providerID]?.[modelID] + if (!mapped) return undefined + return { providerID: target, modelID: mapped } + } + + // Full overflow pattern list — mirrors error.ts OVERFLOW_PATTERNS. + // Context overflow must trigger compaction, never provider fallback. + const OVERFLOW = [ + /prompt is too long/i, + /input is too long for requested model/i, + /exceeds the context window/i, + /input token count.*exceeds the maximum/i, + /maximum prompt length is \d+/i, + /reduce the length of the messages/i, + /maximum context length is \d+ tokens/i, + /exceeds the limit of \d+/i, + /exceeds the available context size/i, + /greater than the context length/i, + /context window exceeds limit/i, + /exceeded model token limit/i, + /context[_ ]length[_ ]exceeded/i, + /request entity too large/i, + /^4(00|13)\s*(status code)?\s*\(no body\)/i, + ] + + // Determines whether an error from the primary provider should trigger + // a fallback attempt on the secondary provider. Called inside the + // wrapStream/wrapGenerate middleware catch block with the raw error. + // + // Fallback-worthy: transient gateway/rate errors where a different + // provider likely succeeds (403, 429, 503, 500, bare-400 from Copilot), + // network failures (ECONNREFUSED, ECONNRESET, timeouts). + // 403 is included because the copilot gateway returns transient 403s + // for rate/capacity reasons; the fallback model table only maps copilot + // providers so this won't affect providers where 403 means real auth failure. + // + // NOT fallback-worthy: auth failures (401 — different provider + // has different creds, but the request shape is fine), context overflow + // (413 / overflow patterns — needs compaction, not a provider switch), + // and validation errors (prompt issues stay broken on any provider). + export function shouldFallback(err: unknown): boolean { + if (APICallError.isInstance(err)) { + const status = err.statusCode + // Auth errors — won't fix by switching provider + if (status === 401) return false + // Context overflow — needs compaction + if (status === 413) return false + if (err.message && OVERFLOW.some((p) => p.test(err.message))) return false + // Rate limits, gateway errors, and transient 403 — fallback + if (status === 429 || status === 503 || status === 500 || status === 502 || status === 403) return true + // Copilot bare-400: text/plain body, no JSON — transient rate limit + if (status === 400 && err.responseBody && !isJSON(err.responseBody)) return true + // SDK-wrapped network errors: no statusCode but marked retryable + // (AI SDK wraps ECONNREFUSED/ECONNRESET into APICallError) + if (status === undefined && err.isRetryable) return true + return false + } + // AbortSignal.timeout() fires DOMException with name "TimeoutError" + // (provider.ts applies AbortSignal.timeout on every fetch call) + if (err instanceof DOMException && err.name === "TimeoutError") return true + // Raw network failures not wrapped by AI SDK + if (err instanceof TypeError) return true + if (err instanceof Error && (err as NodeJS.ErrnoException).code === "ECONNREFUSED") return true + if (err instanceof Error && (err as NodeJS.ErrnoException).code === "ECONNRESET") return true + return false + } + + function isJSON(input: string) { + try { + const r = JSON.parse(input) + return r && typeof r === "object" + } catch { + return false + } + } + + // Creates an AI SDK middleware that attempts the primary provider, + // and on shouldFallback-worthy errors retries once on the fallback model. + // If the fallback also fails, the error propagates to the session retry loop. + export function middleware(fallback: LanguageModelV3): LanguageModelMiddleware { + return { + specificationVersion: "v3" as const, + wrapGenerate: async ({ doGenerate, params }) => { + try { + return await doGenerate() + } catch (err) { + if (!shouldFallback(err)) throw err + log.info("fallback", { + target: fallback.modelId, + error: err instanceof Error ? err.message : String(err), + status: APICallError.isInstance(err) ? err.statusCode : undefined, + }) + Bus.publish(TuiEvent.ToastShow, { + title: "Provider fallback activated", + message: `Switched to ${fallback.modelId}`, + variant: "warning", + }).catch(() => {}) + return await fallback.doGenerate(params) + } + }, + wrapStream: async ({ doStream, params }) => { + try { + return await doStream() + } catch (err) { + if (!shouldFallback(err)) throw err + log.info("fallback", { + target: fallback.modelId, + error: err instanceof Error ? err.message : String(err), + status: APICallError.isInstance(err) ? err.statusCode : undefined, + }) + Bus.publish(TuiEvent.ToastShow, { + title: "Provider fallback activated", + message: `Switched to ${fallback.modelId}`, + variant: "warning", + }).catch(() => {}) + return await fallback.doStream(params) + } + }, + } + } +} diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index dc89db409e87..93daada15a4a 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -1,4 +1,5 @@ import { Provider } from "@/provider/provider" +import { ProviderID, ModelID } from "@/provider/schema" import { Log } from "@/util/log" import { Cause, Effect, Layer, Record, ServiceMap } from "effect" import * as Queue from "effect/Queue" @@ -7,6 +8,7 @@ import { streamText, wrapLanguageModel, type ModelMessage, type Tool, tool, json import { mergeDeep, pipe } from "remeda" import { GitLabWorkflowLanguageModel } from "gitlab-ai-provider" import { ProviderTransform } from "@/provider/transform" +import { ProviderFallback } from "@/provider/fallback" import { Config } from "@/config/config" import { Instance } from "@/project/instance" import type { Agent } from "@/agent/agent" @@ -98,6 +100,18 @@ export namespace LLM { // TODO: move this to a proper hook const isOpenaiOauth = provider.id === "openai" && auth?.type === "oauth" + // Resolve fallback provider if configured + const target = ProviderFallback.resolve(input.model.providerID, input.model.id, cfg.fallback) + let fallback: Awaited> | undefined + if (target) { + try { + const model = await Provider.getModel(ProviderID.make(target.providerID), ModelID.make(target.modelID)) + fallback = await Provider.getLanguage(model) + } catch { + l.warn("fallback unavailable", { target: `${target.providerID}/${target.modelID}` }) + } + } + const system: string[] = [] system.push( [ @@ -321,6 +335,7 @@ export namespace LLM { return args.params }, }, + ...(fallback ? [ProviderFallback.middleware(fallback)] : []), ], }), experimental_telemetry: { diff --git a/packages/opencode/test/provider/fallback.test.ts b/packages/opencode/test/provider/fallback.test.ts new file mode 100644 index 000000000000..ad646903a84d --- /dev/null +++ b/packages/opencode/test/provider/fallback.test.ts @@ -0,0 +1,252 @@ +import { describe, expect, test } from "bun:test" +import { APICallError, wrapLanguageModel } from "ai" +import { ProviderFallback } from "../../src/provider/fallback" +import type { LanguageModelV3 } from "@ai-sdk/provider" + +function api(opts: { status?: number; body?: string; retryable?: boolean; message?: string }) { + return new APICallError({ + message: opts.message ?? "", + url: "https://example.com", + requestBodyValues: {}, + statusCode: opts.status, + responseHeaders: {}, + responseBody: opts.body, + isRetryable: opts.retryable ?? false, + data: undefined, + }) +} + +describe("ProviderFallback.resolve", () => { + const cfg = { "github-copilot": "amazon-bedrock" } + + test("maps copilot sonnet to bedrock", () => { + const r = ProviderFallback.resolve("github-copilot", "claude-sonnet-4.6", cfg) + expect(r).toEqual({ providerID: "amazon-bedrock", modelID: "us.anthropic.claude-sonnet-4-6" }) + }) + + test("maps copilot opus to bedrock", () => { + const r = ProviderFallback.resolve("github-copilot", "claude-opus-4.6", cfg) + expect(r).toEqual({ providerID: "amazon-bedrock", modelID: "us.anthropic.claude-opus-4-6-v1" }) + }) + + test("maps copilot haiku to bedrock", () => { + const r = ProviderFallback.resolve("github-copilot", "claude-haiku-4.5", cfg) + expect(r).toEqual({ providerID: "amazon-bedrock", modelID: "us.anthropic.claude-haiku-4-5-20251001-v1:0" }) + }) + + test("returns undefined for unknown provider", () => { + expect(ProviderFallback.resolve("openai", "gpt-4", cfg)).toBeUndefined() + }) + + test("returns undefined for unknown model", () => { + expect(ProviderFallback.resolve("github-copilot", "gpt-4", cfg)).toBeUndefined() + }) + + test("returns undefined when no config", () => { + expect(ProviderFallback.resolve("github-copilot", "claude-sonnet-4.6")).toBeUndefined() + }) + + test("returns undefined when provider not in config", () => { + expect(ProviderFallback.resolve("github-copilot", "claude-sonnet-4.6", { openai: "anthropic" })).toBeUndefined() + }) +}) + +describe("ProviderFallback.shouldFallback", () => { + // Auth errors — no fallback + test("401 returns false", () => { + expect(ProviderFallback.shouldFallback(api({ status: 401 }))).toBe(false) + }) + + test("403 returns true (copilot gateway transient)", () => { + expect(ProviderFallback.shouldFallback(api({ status: 403 }))).toBe(true) + }) + + // Context overflow — no fallback + test("413 returns false", () => { + expect(ProviderFallback.shouldFallback(api({ status: 413 }))).toBe(false) + }) + + test("overflow message returns false", () => { + expect(ProviderFallback.shouldFallback(api({ status: 400, message: "prompt is too long" }))).toBe(false) + }) + + test("copilot overflow pattern returns false", () => { + expect(ProviderFallback.shouldFallback(api({ status: 400, message: "exceeds the limit of 200000" }))).toBe(false) + }) + + test("bedrock overflow pattern returns false", () => { + expect( + ProviderFallback.shouldFallback(api({ status: 400, message: "input is too long for requested model" })), + ).toBe(false) + }) + + test("context_length_exceeded returns false", () => { + expect(ProviderFallback.shouldFallback(api({ status: 400, message: "context length exceeded" }))).toBe(false) + }) + + // Rate limits and gateway errors — fallback + test("429 returns true", () => { + expect(ProviderFallback.shouldFallback(api({ status: 429 }))).toBe(true) + }) + + test("500 returns true", () => { + expect(ProviderFallback.shouldFallback(api({ status: 500 }))).toBe(true) + }) + + test("502 returns true", () => { + expect(ProviderFallback.shouldFallback(api({ status: 502 }))).toBe(true) + }) + + test("503 returns true", () => { + expect(ProviderFallback.shouldFallback(api({ status: 503 }))).toBe(true) + }) + + // Copilot bare-400 + test("bare-400 text body returns true", () => { + expect(ProviderFallback.shouldFallback(api({ status: 400, body: "Bad Request\n" }))).toBe(true) + }) + + test("400 with JSON body returns false", () => { + expect(ProviderFallback.shouldFallback(api({ status: 400, body: '{"error":"validation"}' }))).toBe(false) + }) + + // SDK-wrapped network errors (APICallError with no statusCode) + test("no statusCode + retryable returns true", () => { + expect(ProviderFallback.shouldFallback(api({ retryable: true }))).toBe(true) + }) + + test("no statusCode + not retryable returns false", () => { + expect(ProviderFallback.shouldFallback(api({ retryable: false }))).toBe(false) + }) + + // DOMException TimeoutError + test("DOMException TimeoutError returns true", () => { + expect(ProviderFallback.shouldFallback(new DOMException("timeout", "TimeoutError"))).toBe(true) + }) + + test("DOMException AbortError returns false", () => { + expect(ProviderFallback.shouldFallback(new DOMException("aborted", "AbortError"))).toBe(false) + }) + + // Raw network errors + test("TypeError returns true", () => { + expect(ProviderFallback.shouldFallback(new TypeError("fetch failed"))).toBe(true) + }) + + test("ECONNREFUSED returns true", () => { + const err = Object.assign(new Error("connect ECONNREFUSED"), { code: "ECONNREFUSED" }) + expect(ProviderFallback.shouldFallback(err)).toBe(true) + }) + + test("ECONNRESET returns true", () => { + const err = Object.assign(new Error("socket hang up"), { code: "ECONNRESET" }) + expect(ProviderFallback.shouldFallback(err)).toBe(true) + }) + + // Unknown errors — no fallback + test("plain Error returns false", () => { + expect(ProviderFallback.shouldFallback(new Error("something"))).toBe(false) + }) + + test("string returns false", () => { + expect(ProviderFallback.shouldFallback("error")).toBe(false) + }) + + test("null returns false", () => { + expect(ProviderFallback.shouldFallback(null)).toBe(false) + }) +}) + +// Minimal mock model for middleware tests +function mock(opts?: { fail?: Error }): LanguageModelV3 { + return { + specificationVersion: "v3", + provider: "test", + modelId: "test-model", + supportedUrls: undefined as any, + doGenerate: async () => { + if (opts?.fail) throw opts.fail + return { text: "generated" } as any + }, + doStream: async () => { + if (opts?.fail) throw opts.fail + return { stream: "streamed" } as any + }, + } as LanguageModelV3 +} + +describe("ProviderFallback.middleware", () => { + test("calls primary on success (generate)", async () => { + const primary = mock() + const fallback = mock() + const wrapped = wrapLanguageModel({ + model: primary, + middleware: [ProviderFallback.middleware(fallback)], + }) + const result = await wrapped.doGenerate({} as any) + expect((result as any).text).toBe("generated") + }) + + test("calls primary on success (stream)", async () => { + const primary = mock() + const fallback = mock() + const wrapped = wrapLanguageModel({ + model: primary, + middleware: [ProviderFallback.middleware(fallback)], + }) + const result = await wrapped.doStream({} as any) + expect((result as any).stream).toBe("streamed") + }) + + test("falls back on retryable error (generate)", async () => { + const primary = mock({ fail: api({ status: 429 }) }) + const fallback = mock() + const wrapped = wrapLanguageModel({ + model: primary, + middleware: [ProviderFallback.middleware(fallback)], + }) + const result = await wrapped.doGenerate({} as any) + expect((result as any).text).toBe("generated") + }) + + test("falls back on retryable error (stream)", async () => { + const primary = mock({ fail: api({ status: 503 }) }) + const fallback = mock() + const wrapped = wrapLanguageModel({ + model: primary, + middleware: [ProviderFallback.middleware(fallback)], + }) + const result = await wrapped.doStream({} as any) + expect((result as any).stream).toBe("streamed") + }) + + test("rethrows non-fallback error (generate)", async () => { + const primary = mock({ fail: api({ status: 401 }) }) + const fallback = mock() + const wrapped = wrapLanguageModel({ + model: primary, + middleware: [ProviderFallback.middleware(fallback)], + }) + await expect(wrapped.doGenerate({} as any)).rejects.toThrow() + }) + + test("rethrows non-fallback error (stream)", async () => { + const primary = mock({ fail: api({ status: 413 }) }) + const fallback = mock() + const wrapped = wrapLanguageModel({ + model: primary, + middleware: [ProviderFallback.middleware(fallback)], + }) + await expect(wrapped.doStream({} as any)).rejects.toThrow() + }) + + test("propagates fallback error when both fail", async () => { + const primary = mock({ fail: api({ status: 429 }) }) + const fallback = mock({ fail: api({ status: 500 }) }) + const wrapped = wrapLanguageModel({ + model: primary, + middleware: [ProviderFallback.middleware(fallback)], + }) + await expect(wrapped.doGenerate({} as any)).rejects.toThrow() + }) +})