diff --git a/packages/opencode/src/agent/agent.ts b/packages/opencode/src/agent/agent.ts index 72e7f8985da1..ccc7e98243f0 100644 --- a/packages/opencode/src/agent/agent.ts +++ b/packages/opencode/src/agent/agent.ts @@ -41,6 +41,15 @@ export namespace Agent { prompt: z.string().optional(), options: z.record(z.string(), z.any()), steps: z.number().int().positive().optional(), + fallbackModels: z + .array( + z.object({ + modelID: z.string(), + providerID: z.string(), + }), + ) + .optional(), + maxRetriesBeforeFallback: z.number().int().positive().optional(), }) .meta({ ref: "Agent", @@ -226,6 +235,10 @@ export namespace Agent { item.name = value.name ?? item.name item.steps = value.steps ?? item.steps item.options = mergeDeep(item.options, value.options ?? {}) + if (value.fallback_models?.length) { + item.fallbackModels = value.fallback_models.map((m: string) => Provider.parseModel(m)) + } + item.maxRetriesBeforeFallback = value.max_retries_before_fallback ?? item.maxRetriesBeforeFallback item.permission = PermissionNext.merge(item.permission, PermissionNext.fromConfig(value.permission ?? {})) } diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index 98970ba392dc..222dac3cb063 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -621,6 +621,18 @@ export namespace Config { .optional() .describe("Maximum number of agentic iterations before forcing text-only response"), maxSteps: z.number().int().positive().optional().describe("@deprecated Use 'steps' field instead."), + fallback_models: z + .array(z.string()) + .optional() + .describe( + "Ordered list of fallback models (provider/model format) to try when the primary model exhausts retries", + ), + max_retries_before_fallback: z + .number() + .int() + .positive() + .optional() + .describe("Number of retry attempts on the current model before falling back to the next one (default: 3)"), permission: Permission.optional(), }) .catchall(z.any()) @@ -642,6 +654,8 @@ export namespace Config { "permission", "disable", "tools", + "fallback_models", + "max_retries_before_fallback", ]) // Extract unknown properties into options diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 24b4a4f9fbc8..237404695356 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -9,7 +9,7 @@ import { Bus } from "@/bus" import { SessionRetry } from "./retry" import { SessionStatus } from "./status" import { Plugin } from "@/plugin" -import type { Provider } from "@/provider/provider" +import { Provider } from "@/provider/provider" import { LLM } from "./llm" import { Config } from "@/config/config" import { SessionCompaction } from "./compaction" @@ -18,6 +18,7 @@ import { Question } from "@/question" export namespace SessionProcessor { const DOOM_LOOP_THRESHOLD = 3 + const DEFAULT_MAX_RETRIES_BEFORE_FALLBACK = 3 const log = Log.create({ service: "session.processor" }) export type Info = Awaited> @@ -33,6 +34,8 @@ export namespace SessionProcessor { let snapshot: string | undefined let blocked = false let attempt = 0 + let attemptsOnCurrentModel = 0 + let fallbackIndex = 0 let needsCompaction = false const result = { @@ -349,11 +352,54 @@ export namespace SessionProcessor { error: e, stack: JSON.stringify(e.stack), }) - const error = MessageV2.fromError(e, { providerID: input.model.providerID }) + const error = MessageV2.fromError(e, { providerID: streamInput.model.providerID }) const retry = SessionRetry.retryable(error) if (retry !== undefined) { attempt++ - const delay = SessionRetry.delay(attempt, error.name === "APIError" ? error : undefined) + attemptsOnCurrentModel++ + + // Check if we should fall back to a different model + const fallbackModels = streamInput.agent.fallbackModels + const maxRetries = streamInput.agent.maxRetriesBeforeFallback ?? DEFAULT_MAX_RETRIES_BEFORE_FALLBACK + if (fallbackModels?.length && attemptsOnCurrentModel >= maxRetries && fallbackIndex < fallbackModels.length) { + const fromModel = `${streamInput.model.providerID}/${streamInput.model.id}` + let fellBack = false + while (fallbackIndex < fallbackModels.length) { + const next = fallbackModels[fallbackIndex] + fallbackIndex++ + const toModel = `${next.providerID}/${next.modelID}` + log.info("falling back to next model", { + from: fromModel, + to: toModel, + attemptsExhausted: attemptsOnCurrentModel, + fallbackIndex: fallbackIndex - 1, + }) + try { + const resolved = await Provider.getModel(next.providerID, next.modelID) + streamInput.model = resolved + input.model = resolved + input.assistantMessage.modelID = resolved.id + input.assistantMessage.providerID = resolved.providerID + attemptsOnCurrentModel = 0 + SessionStatus.set(input.sessionID, { + type: "fallback", + from: fromModel, + to: toModel, + }) + fellBack = true + break + } catch (fallbackErr: any) { + log.error("fallback model resolution failed", { + model: toModel, + error: fallbackErr.message, + }) + } + } + if (fellBack) continue + // All fallbacks failed to resolve, fall through to normal backoff + } + + const delay = SessionRetry.delay(attemptsOnCurrentModel, error.name === "APIError" ? error : undefined) SessionStatus.set(input.sessionID, { type: "retry", attempt, diff --git a/packages/opencode/src/session/status.ts b/packages/opencode/src/session/status.ts index 1db03b5db0d8..003353dc0ee3 100644 --- a/packages/opencode/src/session/status.ts +++ b/packages/opencode/src/session/status.ts @@ -18,6 +18,11 @@ export namespace SessionStatus { z.object({ type: z.literal("busy"), }), + z.object({ + type: z.literal("fallback"), + from: z.string(), + to: z.string(), + }), ]) .meta({ ref: "SessionStatus",