From 787b2307281a35ad160d6f4122fa36dccbdeeb72 Mon Sep 17 00:00:00 2001 From: Cole Leavitt Date: Tue, 31 Mar 2026 11:47:43 -0700 Subject: [PATCH 1/4] fix(provider): add sdk.responses fallback and prevent SDK cache corruption - Add fallback to languageModel when sdk.responses is undefined (e.g., @ai-sdk/openai-compatible) - Create local copy of options before adding fetch wrapper to avoid mutating provider.options, which caused Anthropic SDK to fail after switching from Codex --- packages/opencode/src/provider/provider.ts | 26 ++++++++++++---------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts index c6784f450244..9d9c068d5c1f 100644 --- a/packages/opencode/src/provider/provider.ts +++ b/packages/opencode/src/provider/provider.ts @@ -193,6 +193,8 @@ export namespace Provider { return { autoload: false, async getModel(sdk: any, modelID: string, _options?: Record) { + // Fallback to languageModel if responses is not available (e.g., @ai-sdk/openai-compatible) + if (sdk.responses === undefined) return sdk.languageModel(modelID) return sdk.responses(modelID) }, options: {}, @@ -229,10 +231,9 @@ export namespace Provider { async getModel(sdk: any, modelID: string, options?: Record) { if (useLanguageModel(sdk)) return sdk.languageModel(modelID) if (options?.["useCompletionUrls"]) { - return sdk.chat(modelID) - } else { - return sdk.responses(modelID) + return sdk.chat ? sdk.chat(modelID) : sdk.languageModel(modelID) } + return sdk.responses ? sdk.responses(modelID) : sdk.languageModel(modelID) }, options: {}, vars(_options) { @@ -249,10 +250,9 @@ export namespace Provider { async getModel(sdk: any, modelID: string, options?: Record) { if (useLanguageModel(sdk)) return sdk.languageModel(modelID) if (options?.["useCompletionUrls"]) { - return sdk.chat(modelID) - } else { - return sdk.responses(modelID) + return sdk.chat ? sdk.chat(modelID) : sdk.languageModel(modelID) } + return sdk.responses ? sdk.responses(modelID) : sdk.languageModel(modelID) }, options: { baseURL: resourceName ? `https://${resourceName}.cognitiveservices.azure.com/openai` : undefined, @@ -1304,11 +1304,13 @@ export namespace Provider { const existing = s.sdk.get(key) if (existing) return existing - const customFetch = options["fetch"] - const chunkTimeout = options["chunkTimeout"] - delete options["chunkTimeout"] + // Create a copy to avoid mutating provider.options + const sdkOptions = { ...options } + const customFetch = sdkOptions["fetch"] + const chunkTimeout = sdkOptions["chunkTimeout"] + delete sdkOptions["chunkTimeout"] - options["fetch"] = async (input: any, init?: BunFetchRequestInit) => { + sdkOptions["fetch"] = async (input: any, init?: BunFetchRequestInit) => { const fetchFn = customFetch ?? fetch const opts = init ?? {} const chunkAbortCtl = @@ -1356,7 +1358,7 @@ export namespace Provider { }) const loaded = bundledFn({ name: model.providerID, - ...options, + ...sdkOptions, }) s.sdk.set(key, loaded) return loaded as SDK @@ -1375,7 +1377,7 @@ export namespace Provider { const fn = mod[Object.keys(mod).find((key) => key.startsWith("create"))!] const loaded = fn({ name: model.providerID, - ...options, + ...sdkOptions, }) s.sdk.set(key, loaded) return loaded as SDK From 9df734b9885359ed27d57f4fe077a96ed24a5590 Mon Sep 17 00:00:00 2001 From: Cole Leavitt Date: Tue, 31 Mar 2026 11:54:31 -0700 Subject: [PATCH 2/4] fix(config): use package.json name for file:// plugin identity file:// plugins now resolve their identity via the nearest package.json name, allowing local development plugins to properly deduplicate against their npm equivalents. Falls back to the full file URL if no package.json is found. --- packages/opencode/src/config/config.ts | 33 +++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index 9e56c980fbeb..5398602a3bec 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -1,6 +1,6 @@ import { Log } from "../util/log" import path from "path" -import { pathToFileURL } from "url" +import { fileURLToPath, pathToFileURL } from "url" import os from "os" import z from "zod" import { ModelsDev } from "../provider/models" @@ -23,7 +23,7 @@ import { LSPServer } from "../lsp/server" import { BunProc } from "@/bun" import { Installation } from "@/installation" import { ConfigMarkdown } from "./markdown" -import { constants, existsSync } from "fs" +import { constants, existsSync, readFileSync } from "fs" import { Bus } from "@/bus" import { GlobalBus } from "@/bus/global" import { Event } from "../server/event" @@ -393,13 +393,40 @@ export namespace Config { * Since plugins are added in low-to-high priority order, * we reverse, deduplicate (keeping first occurrence), then restore order. */ + function findPackageJsonName(filePath: string): string | undefined { + let dir = path.dirname(filePath) + const root = path.parse(dir).root + + for (let i = 0; i < 5 && dir !== root; i++) { + const pkgPath = path.join(dir, "package.json") + if (existsSync(pkgPath)) { + try { + const pkg = JSON.parse(readFileSync(pkgPath, "utf-8")) + if (pkg.name && typeof pkg.name === "string") { + return pkg.name + } + } catch {} + } + dir = path.dirname(dir) + } + return undefined + } + + function getPluginIdentity(spec: string): string { + if (spec.startsWith("file://")) { + const filePath = fileURLToPath(spec) + return findPackageJsonName(filePath) ?? spec + } + return parsePluginSpecifier(spec).pkg + } + export function deduplicatePlugins(plugins: PluginSpec[]): PluginSpec[] { const seenNames = new Set() const uniqueSpecifiers: PluginSpec[] = [] for (const specifier of plugins.toReversed()) { const spec = pluginSpecifier(specifier) - const name = spec.startsWith("file://") ? spec : parsePluginSpecifier(spec).pkg + const name = getPluginIdentity(spec) if (!seenNames.has(name)) { seenNames.add(name) uniqueSpecifiers.push(specifier) From e0f79be5d8d46fad91f3c9d862c9ec0cf51e122a Mon Sep 17 00:00:00 2001 From: Cole Leavitt Date: Wed, 1 Apr 2026 06:24:25 -0700 Subject: [PATCH 3/4] feat: add resource leak fixes, graceful shutdown, security hardening MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Global cleanup registry with timeout-capped shutdown orchestration - WeakRef-based parent-child AbortController to prevent GC leaks - MCP subprocess signal escalation (SIGTERM → SIGKILL) - Worker try/finally guarantee in TUI thread - OAuth server auto-cleanup on timeout/resolve/reject - Circuit breaker on compaction (max 3 consecutive failures) - SSL error classification and stale connection detection - HTML error page title extraction for proxy/WAF errors - Anti-ptrace security hardening via prctl on Linux --- packages/opencode/src/cli/cmd/tui/thread.ts | 168 +++++++++--------- packages/opencode/src/index.ts | 20 ++- packages/opencode/src/mcp/index.ts | 8 + packages/opencode/src/plugin/codex.ts | 5 +- packages/opencode/src/provider/error.ts | 8 +- packages/opencode/src/session/prompt.ts | 9 +- packages/opencode/src/util/abort.ts | 36 +++- packages/opencode/src/util/cleanup.ts | 18 ++ .../opencode/src/util/connection-error.ts | 73 ++++++++ packages/opencode/src/util/security.ts | 17 ++ 10 files changed, 265 insertions(+), 97 deletions(-) create mode 100644 packages/opencode/src/util/cleanup.ts create mode 100644 packages/opencode/src/util/connection-error.ts create mode 100644 packages/opencode/src/util/security.ts diff --git a/packages/opencode/src/cli/cmd/tui/thread.ts b/packages/opencode/src/cli/cmd/tui/thread.ts index 3bb56937a6cb..62c686dfd3dd 100644 --- a/packages/opencode/src/cli/cmd/tui/thread.ts +++ b/packages/opencode/src/cli/cmd/tui/thread.ts @@ -135,94 +135,98 @@ export const TuiThreadCommand = cmd({ Object.entries(process.env).filter((entry): entry is [string, string] => entry[1] !== undefined), ), }) - worker.onerror = (e) => { - Log.Default.error(e) - } - - const client = Rpc.client(worker) - const error = (e: unknown) => { - Log.Default.error(e) - } - const reload = () => { - client.call("reload", undefined).catch((err) => { - Log.Default.warn("worker reload failed", { - error: errorMessage(err), + try { + worker.onerror = (e) => { + Log.Default.error(e) + } + + const client = Rpc.client(worker) + const error = (e: unknown) => { + Log.Default.error(e) + } + const reload = () => { + client.call("reload", undefined).catch((err) => { + Log.Default.warn("worker reload failed", { + error: errorMessage(err), + }) }) - }) - } - process.on("uncaughtException", error) - process.on("unhandledRejection", error) - process.on("SIGUSR2", reload) - - let stopped = false - const stop = async () => { - if (stopped) return - stopped = true - process.off("uncaughtException", error) - process.off("unhandledRejection", error) - process.off("SIGUSR2", reload) - await withTimeout(client.call("shutdown", undefined), 5000).catch((error) => { - Log.Default.warn("worker shutdown failed", { - error: errorMessage(error), + } + process.on("uncaughtException", error) + process.on("unhandledRejection", error) + process.on("SIGUSR2", reload) + + let stopped = false + const stop = async () => { + if (stopped) return + stopped = true + process.off("uncaughtException", error) + process.off("unhandledRejection", error) + process.off("SIGUSR2", reload) + await withTimeout(client.call("shutdown", undefined), 5000).catch((error) => { + Log.Default.warn("worker shutdown failed", { + error: errorMessage(error), + }) }) - }) - worker.terminate() - } - - const prompt = await input(args.prompt) - const config = await Instance.provide({ - directory: cwd, - fn: () => TuiConfig.get(), - }) - - const network = await resolveNetworkOptions(args) - const external = - process.argv.includes("--port") || - process.argv.includes("--hostname") || - process.argv.includes("--mdns") || - network.mdns || - network.port !== 0 || - network.hostname !== "127.0.0.1" - - const transport = external - ? { - url: (await client.call("server", network)).url, - fetch: undefined, - events: undefined, - } - : { - url: "http://opencode.internal", - fetch: createWorkerFetch(client), - events: createEventSource(client), - } - - setTimeout(() => { - client.call("checkUpgrade", { directory: cwd }).catch(() => {}) - }, 1000).unref?.() + worker.terminate() + } - try { - await tui({ - url: transport.url, - async onSnapshot() { - const tui = writeHeapSnapshot("tui.heapsnapshot") - const server = await client.call("snapshot", undefined) - return [tui, server] - }, - config, + const prompt = await input(args.prompt) + const config = await Instance.provide({ directory: cwd, - fetch: transport.fetch, - events: transport.events, - args: { - continue: args.continue, - sessionID: args.session, - agent: args.agent, - model: args.model, - prompt, - fork: args.fork, - }, + fn: () => TuiConfig.get(), }) + + const network = await resolveNetworkOptions(args) + const external = + process.argv.includes("--port") || + process.argv.includes("--hostname") || + process.argv.includes("--mdns") || + network.mdns || + network.port !== 0 || + network.hostname !== "127.0.0.1" + + const transport = external + ? { + url: (await client.call("server", network)).url, + fetch: undefined, + events: undefined, + } + : { + url: "http://opencode.internal", + fetch: createWorkerFetch(client), + events: createEventSource(client), + } + + setTimeout(() => { + client.call("checkUpgrade", { directory: cwd }).catch(() => {}) + }, 1000).unref?.() + + try { + await tui({ + url: transport.url, + async onSnapshot() { + const tui = writeHeapSnapshot("tui.heapsnapshot") + const server = await client.call("snapshot", undefined) + return [tui, server] + }, + config, + directory: cwd, + fetch: transport.fetch, + events: transport.events, + args: { + continue: args.continue, + sessionID: args.session, + agent: args.agent, + model: args.model, + prompt, + fork: args.fork, + }, + }) + } finally { + await stop() + } } finally { - await stop() + worker.terminate() } } finally { unguard?.() diff --git a/packages/opencode/src/index.ts b/packages/opencode/src/index.ts index 2da35ace1dd8..38484da9b6b3 100644 --- a/packages/opencode/src/index.ts +++ b/packages/opencode/src/index.ts @@ -35,6 +35,11 @@ import { JsonMigration } from "./storage/json-migration" import { Database } from "./storage/db" import { errorMessage } from "./util/error" import { PluginCommand } from "./cli/cmd/plug" +import { runCleanup } from "./util/cleanup" +import { Instance } from "./project/instance" +import { setNonDumpable } from "./util/security" + +setNonDumpable() process.on("unhandledRejection", (e) => { Log.Default.error("rejection", { @@ -209,9 +214,16 @@ try { } process.exitCode = 1 } finally { - // Some subprocesses don't react properly to SIGTERM and similar signals. - // Most notably, some docker-container-based MCP servers don't handle such signals unless - // run using `docker run --init`. - // Explicitly exit to avoid any hanging subprocesses. + // Multi-phase graceful shutdown: + // 1. Run global cleanup registry (2s timeout) + // 2. Dispose all instances (2s timeout) + // 3. Failsafe: force exit after 5s total + const failsafe = setTimeout(() => process.exit(process.exitCode ?? 0), 5000) + failsafe.unref?.() + try { + await runCleanup(2000) + await Promise.race([Instance.disposeAll(), new Promise((r) => setTimeout(r, 2000))]) + } catch {} + clearTimeout(failsafe) process.exit() } diff --git a/packages/opencode/src/mcp/index.ts b/packages/opencode/src/mcp/index.ts index e3bf4cac0688..4cdbd3142870 100644 --- a/packages/opencode/src/mcp/index.ts +++ b/packages/opencode/src/mcp/index.ts @@ -523,11 +523,19 @@ export namespace MCP { const pid = (client.transport as any)?.pid if (typeof pid === "number") { const pids = yield* descendants(pid) + // Signal escalation: SIGTERM → wait 400ms → SIGKILL for (const dpid of pids) { try { process.kill(dpid, "SIGTERM") } catch {} } + yield* Effect.sleep("400 millis") + for (const dpid of pids) { + try { + process.kill(dpid, 0) + process.kill(dpid, "SIGKILL") + } catch {} + } } yield* Effect.tryPromise(() => client.close()).pipe(Effect.ignore) }), diff --git a/packages/opencode/src/plugin/codex.ts b/packages/opencode/src/plugin/codex.ts index ee42b9517198..c340957433f7 100644 --- a/packages/opencode/src/plugin/codex.ts +++ b/packages/opencode/src/plugin/codex.ts @@ -331,19 +331,22 @@ function waitForOAuthCallback(pkce: PkceCodes, state: string): Promise { clearTimeout(timeout) + stopOAuthServer() resolve(tokens) }, reject: (error) => { clearTimeout(timeout) + stopOAuthServer() reject(error) }, } diff --git a/packages/opencode/src/provider/error.ts b/packages/opencode/src/provider/error.ts index 52e525177a5b..7fd0a8969844 100644 --- a/packages/opencode/src/provider/error.ts +++ b/packages/opencode/src/provider/error.ts @@ -2,6 +2,7 @@ import { APICallError } from "ai" import { STATUS_CODES } from "http" import { iife } from "@/util/iife" import type { ProviderID } from "./schema" +import * as ConnectionError from "@/util/connection-error" export namespace ProviderError { // Adapted from overflow detection patterns in: @@ -48,6 +49,9 @@ export namespace ProviderError { function message(providerID: ProviderID, e: APICallError) { return iife(() => { + const conn = ConnectionError.extract(e) + if (conn) return ConnectionError.format(conn) + const msg = e.message if (msg === "") { if (e.responseBody) return e.responseBody @@ -71,8 +75,6 @@ export namespace ProviderError { } } catch {} - // If responseBody is HTML (e.g. from a gateway or proxy error page), - // provide a human-readable message instead of dumping raw markup if (/^\s*` to re-authenticate." @@ -80,6 +82,8 @@ export namespace ProviderError { if (e.statusCode === 403) { return "Forbidden: request was blocked by a gateway or proxy. You may not have permission to access this resource — check your account and provider settings." } + const title = e.responseBody.match(/([^<]+)<\/title>/i)?.[1]?.trim() + if (title) return title return msg } diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index 78f4fae52111..ae562453008d 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -1354,6 +1354,8 @@ NOTE: At any point in time through this workflow you should feel free to ask the const ctx = yield* InstanceState.context let structured: unknown | undefined let step = 0 + let compactFailures = 0 + const MAX_COMPACT_FAILURES = 3 const session = yield* sessions.get(sessionID) while (true) { @@ -1411,13 +1413,18 @@ NOTE: At any point in time through this workflow you should feel free to ask the auto: task.auto, overflow: task.overflow, }) - if (result === "stop") break + if (result === "stop") { + if (task.auto) compactFailures++ + break + } + if (task.auto) compactFailures = 0 continue } if ( lastFinished && lastFinished.summary !== true && + compactFailures < MAX_COMPACT_FAILURES && (yield* compaction.isOverflow({ tokens: lastFinished.tokens, model })) ) { yield* compaction.create({ sessionID, agent: lastUser.agent, model: lastUser.model, auto: true }) diff --git a/packages/opencode/src/util/abort.ts b/packages/opencode/src/util/abort.ts index 3e7cfd8b28b1..54259191537d 100644 --- a/packages/opencode/src/util/abort.ts +++ b/packages/opencode/src/util/abort.ts @@ -18,13 +18,6 @@ export function abortAfter(ms: number) { } } -/** - * Combines multiple AbortSignals with a timeout. - * - * @param ms Timeout in milliseconds - * @param signals Additional signals to combine - * @returns Combined signal that aborts on timeout or when any input signal aborts - */ export function abortAfterAny(ms: number, ...signals: AbortSignal[]) { const timeout = abortAfter(ms) const signal = AbortSignal.any([timeout.signal, ...signals]) @@ -33,3 +26,32 @@ export function abortAfterAny(ms: number, ...signals: AbortSignal[]) { clearTimeout: timeout.clearTimeout, } } + +function propagate(this: WeakRef<AbortController>, ref: WeakRef<AbortController>) { + const child = ref.deref() + const parent = this.deref() + if (child && !child.signal.aborted) child.abort(parent?.signal.reason) +} + +function detach(this: WeakRef<AbortController>, ref: WeakRef<(...args: any[]) => void>) { + const parent = this.deref() + const handler = ref.deref() + if (parent && handler) parent.signal.removeEventListener("abort", handler) +} + +// Creates a child AbortController linked to a parent via WeakRef. +// The parent does not retain the child — abandoned children are GC'd. +// When the child aborts, it auto-removes its listener from the parent. +export function childAbort(parent: AbortController) { + const child = new AbortController() + if (parent.signal.aborted) { + child.abort(parent.signal.reason) + return child + } + const weak = new WeakRef(child) + const weakParent = new WeakRef(parent) + const handler = propagate.bind(weakParent, weak) + parent.signal.addEventListener("abort", handler, { once: true }) + child.signal.addEventListener("abort", detach.bind(weakParent, new WeakRef(handler)), { once: true }) + return child +} diff --git a/packages/opencode/src/util/cleanup.ts b/packages/opencode/src/util/cleanup.ts new file mode 100644 index 000000000000..c612301c1ca1 --- /dev/null +++ b/packages/opencode/src/util/cleanup.ts @@ -0,0 +1,18 @@ +const registry = new Set<() => Promise<void>>() + +export function registerCleanup(fn: () => Promise<void>) { + registry.add(fn) + return () => { + registry.delete(fn) + } +} + +export async function runCleanup(timeout = 2000) { + if (registry.size === 0) return + const fns = [...registry] + registry.clear() + await Promise.race([ + Promise.allSettled(fns.map((fn) => fn())), + new Promise((resolve) => setTimeout(resolve, timeout)), + ]) +} diff --git a/packages/opencode/src/util/connection-error.ts b/packages/opencode/src/util/connection-error.ts new file mode 100644 index 000000000000..ce1c68dfe8c5 --- /dev/null +++ b/packages/opencode/src/util/connection-error.ts @@ -0,0 +1,73 @@ +const SSL_CODES = new Set([ + "UNABLE_TO_VERIFY_LEAF_SIGNATURE", + "UNABLE_TO_GET_ISSUER_CERT", + "UNABLE_TO_GET_ISSUER_CERT_LOCALLY", + "CERT_SIGNATURE_FAILURE", + "CERT_NOT_YET_VALID", + "CERT_HAS_EXPIRED", + "CERT_REVOKED", + "CERT_REJECTED", + "CERT_UNTRUSTED", + "DEPTH_ZERO_SELF_SIGNED_CERT", + "SELF_SIGNED_CERT_IN_CHAIN", + "CERT_CHAIN_TOO_LONG", + "PATH_LENGTH_EXCEEDED", + "ERR_TLS_CERT_ALTNAME_INVALID", + "HOSTNAME_MISMATCH", + "ERR_TLS_HANDSHAKE_TIMEOUT", + "ERR_SSL_WRONG_VERSION_NUMBER", + "ERR_SSL_DECRYPTION_FAILED_OR_BAD_RECORD_MAC", +]) + +export type ConnectionErrorInfo = { + code: string + message: string + ssl: boolean + stale: boolean +} + +export function extract(error: unknown): ConnectionErrorInfo | undefined { + let current: unknown = error + for (let depth = 0; current && depth < 5; depth++) { + if (current instanceof Error && "code" in current && typeof current.code === "string") { + const code = current.code + return { + code, + message: current.message, + ssl: SSL_CODES.has(code), + stale: code === "ECONNRESET" || code === "EPIPE", + } + } + if (current instanceof Error && "cause" in current && current.cause !== current) { + current = current.cause + } else break + } + return undefined +} + +export function format(info: ConnectionErrorInfo): string { + if (info.code === "ETIMEDOUT") return "Request timed out. Check your internet connection and proxy settings" + if (info.stale) return `Connection reset (${info.code}). Retrying with fresh connection.` + if (!info.ssl) return `Unable to connect to API (${info.code})` + + switch (info.code) { + case "UNABLE_TO_VERIFY_LEAF_SIGNATURE": + case "UNABLE_TO_GET_ISSUER_CERT": + case "UNABLE_TO_GET_ISSUER_CERT_LOCALLY": + return "SSL certificate verification failed. If behind a corporate proxy, set NODE_EXTRA_CA_CERTS to your CA bundle path." + case "CERT_HAS_EXPIRED": + return "SSL certificate has expired" + case "CERT_REVOKED": + return "SSL certificate has been revoked" + case "DEPTH_ZERO_SELF_SIGNED_CERT": + case "SELF_SIGNED_CERT_IN_CHAIN": + return "Self-signed certificate detected. If behind a corporate proxy, set NODE_EXTRA_CA_CERTS to your CA bundle path." + case "ERR_TLS_CERT_ALTNAME_INVALID": + case "HOSTNAME_MISMATCH": + return "SSL certificate hostname mismatch" + case "CERT_NOT_YET_VALID": + return "SSL certificate is not yet valid" + default: + return `SSL error (${info.code}). If behind a corporate proxy, set NODE_EXTRA_CA_CERTS to your CA bundle path.` + } +} diff --git a/packages/opencode/src/util/security.ts b/packages/opencode/src/util/security.ts new file mode 100644 index 000000000000..03f93a9d0f80 --- /dev/null +++ b/packages/opencode/src/util/security.ts @@ -0,0 +1,17 @@ +// Blocks same-UID ptrace of this process on Linux. +// Prevents a prompt-injected `gdb -p $PPID` from scraping +// API keys or tokens from the heap. No-op on non-Linux. +export function setNonDumpable() { + if (process.platform !== "linux" || typeof Bun === "undefined") return + try { + const ffi = require("bun:ffi") as typeof import("bun:ffi") + const lib = ffi.dlopen("libc.so.6", { + prctl: { + args: ["int", "u64", "u64", "u64", "u64"], + returns: "int", + }, + } as const) + const PR_SET_DUMPABLE = 4 + lib.symbols.prctl(PR_SET_DUMPABLE, 0n, 0n, 0n, 0n) + } catch {} +} From 48f5e3429921567ec9778793e1062de47b305bb0 Mon Sep 17 00:00:00 2001 From: Cole Leavitt <cole@unwrap.rs> Date: Wed, 1 Apr 2026 06:55:16 -0700 Subject: [PATCH 4/4] feat: add retry jitter, token gap parser, cache break detection, non-streaming fallback - Exponential backoff with 25% jitter to prevent thundering herd - Stale connection (ECONNRESET/EPIPE) errors now retryable - parseTokenGap() for reactive compaction targeting - DJB2 hash-based cache break detection with bounded LRU - generateText() fallback wrapper for non-streaming recovery --- packages/opencode/src/session/overflow.ts | 13 ++++ packages/opencode/src/session/retry.ts | 11 +++- packages/opencode/src/util/cache-break.ts | 52 ++++++++++++++++ .../src/util/nonstreaming-fallback.ts | 62 +++++++++++++++++++ 4 files changed, 135 insertions(+), 3 deletions(-) create mode 100644 packages/opencode/src/util/cache-break.ts create mode 100644 packages/opencode/src/util/nonstreaming-fallback.ts diff --git a/packages/opencode/src/session/overflow.ts b/packages/opencode/src/session/overflow.ts index f0e52565d81f..e1a837a9178a 100644 --- a/packages/opencode/src/session/overflow.ts +++ b/packages/opencode/src/session/overflow.ts @@ -20,3 +20,16 @@ export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistan : context - ProviderTransform.maxOutputTokens(input.model) return count >= usable } + +// Parses "prompt is too long: 250000 tokens > 200000" style error messages +// to extract the exact token gap for targeted compaction +const GAP_PATTERN = /(\d[\d,]*)\s*tokens?\s*>\s*(\d[\d,]*)/i + +export function parseTokenGap(msg: string): { actual: number; limit: number; gap: number } | undefined { + const match = msg.match(GAP_PATTERN) + if (!match) return undefined + const actual = parseInt(match[1].replace(/,/g, ""), 10) + const limit = parseInt(match[2].replace(/,/g, ""), 10) + if (isNaN(actual) || isNaN(limit) || actual <= limit) return undefined + return { actual, limit, gap: actual - limit } +} diff --git a/packages/opencode/src/session/retry.ts b/packages/opencode/src/session/retry.ts index 8ba48375bcfe..122397aebfb5 100644 --- a/packages/opencode/src/session/retry.ts +++ b/packages/opencode/src/session/retry.ts @@ -2,6 +2,7 @@ import type { NamedError } from "@opencode-ai/util/error" import { Cause, Clock, Duration, Effect, Schedule } from "effect" import { MessageV2 } from "./message-v2" import { iife } from "@/util/iife" +import * as ConnectionError from "@/util/connection-error" export namespace SessionRetry { export type Err = ReturnType<NamedError["toObject"]> @@ -41,15 +42,16 @@ export namespace SessionRetry { } } - return cap(RETRY_INITIAL_DELAY * Math.pow(RETRY_BACKOFF_FACTOR, attempt - 1)) + const base = RETRY_INITIAL_DELAY * Math.pow(RETRY_BACKOFF_FACTOR, attempt - 1) + return cap(base + Math.random() * 0.25 * base) } } - return cap(Math.min(RETRY_INITIAL_DELAY * Math.pow(RETRY_BACKOFF_FACTOR, attempt - 1), RETRY_MAX_DELAY_NO_HEADERS)) + const base = Math.min(RETRY_INITIAL_DELAY * Math.pow(RETRY_BACKOFF_FACTOR, attempt - 1), RETRY_MAX_DELAY_NO_HEADERS) + return cap(base + Math.random() * 0.25 * base) } export function retryable(error: Err) { - // context overflow errors should not be retried if (MessageV2.ContextOverflowError.isInstance(error)) return undefined if (MessageV2.APIError.isInstance(error)) { if (!error.data.isRetryable) return undefined @@ -58,6 +60,9 @@ export namespace SessionRetry { return error.data.message.includes("Overloaded") ? "Provider is overloaded" : error.data.message } + const conn = ConnectionError.extract(error.data) + if (conn?.stale) return `Connection reset (${conn.code})` + const json = iife(() => { try { if (typeof error.data?.message === "string") { diff --git a/packages/opencode/src/util/cache-break.ts b/packages/opencode/src/util/cache-break.ts new file mode 100644 index 000000000000..45431ff1486d --- /dev/null +++ b/packages/opencode/src/util/cache-break.ts @@ -0,0 +1,52 @@ +const MAX_SOURCES = 10 + +type Snapshot = { + system: number + tools: number + model: string + calls: number + prev: number | null +} + +const sources = new Map<string, Snapshot>() + +function hash(str: string): number { + let h = 5381 + for (let i = 0; i < str.length; i++) { + h = ((h << 5) + h + str.charCodeAt(i)) | 0 + } + return h >>> 0 +} + +export function record(id: string, system: string, tools: string, model: string, cache: number) { + if (sources.size >= MAX_SOURCES && !sources.has(id)) { + const oldest = sources.keys().next().value + if (oldest) sources.delete(oldest) + } + const prev = sources.get(id) + sources.set(id, { + system: hash(system), + tools: hash(tools), + model, + calls: (prev?.calls ?? 0) + 1, + prev: cache, + }) +} + +export type BreakReason = "system" | "tools" | "model" | "unknown" + +export function detect(id: string, system: string, tools: string, model: string, cache: number): BreakReason | null { + const snap = sources.get(id) + if (!snap) return null + if (snap.prev !== null && cache === 0 && snap.prev > 0) { + if (hash(system) !== snap.system) return "system" + if (hash(tools) !== snap.tools) return "tools" + if (model !== snap.model) return "model" + return "unknown" + } + return null +} + +export function clear(id: string) { + sources.delete(id) +} diff --git a/packages/opencode/src/util/nonstreaming-fallback.ts b/packages/opencode/src/util/nonstreaming-fallback.ts new file mode 100644 index 000000000000..2db33dad36ce --- /dev/null +++ b/packages/opencode/src/util/nonstreaming-fallback.ts @@ -0,0 +1,62 @@ +import { generateText, type ModelMessage, type Tool, type LanguageModel } from "ai" +import { Log } from "@/util/log" + +const log = Log.create({ service: "nonstreaming-fallback" }) + +export type FallbackInput = { + model: LanguageModel + messages: ModelMessage[] + tools: Record<string, Tool> + temperature?: number + topP?: number + topK?: number + maxOutputTokens?: number + headers?: Record<string, string> + abort?: AbortSignal +} + +export type FallbackResult = { + text: string + usage: { inputTokens: number; outputTokens: number; totalTokens: number } + finishReason: string + toolCalls: Array<{ id: string; name: string; input: unknown }> + toolResults: Array<{ id: string; result: unknown }> +} + +export async function fallback(input: FallbackInput): Promise<FallbackResult> { + log.info("attempting non-streaming fallback") + const result = await generateText({ + model: input.model, + messages: input.messages, + tools: input.tools, + temperature: input.temperature, + topP: input.topP, + topK: input.topK, + maxOutputTokens: input.maxOutputTokens, + headers: input.headers, + abortSignal: input.abort, + maxRetries: 0, + }) + + const calls = (result.toolCalls ?? []) as Array<{ toolCallId: string; toolName: string; input: unknown }> + const results = (result.toolResults ?? []) as Array<{ toolCallId: string; output: unknown }> + + return { + text: result.text, + usage: { + inputTokens: result.usage?.inputTokens ?? 0, + outputTokens: result.usage?.outputTokens ?? 0, + totalTokens: result.usage?.totalTokens ?? 0, + }, + finishReason: result.finishReason, + toolCalls: calls.map((tc) => ({ + id: tc.toolCallId, + name: tc.toolName, + input: tc.input, + })), + toolResults: results.map((tr) => ({ + id: tr.toolCallId, + result: tr.output, + })), + } +}