diff --git a/plugins/opencode/scripts/lib/opencode-server.mjs b/plugins/opencode/scripts/lib/opencode-server.mjs index c2bb308..4935cfc 100644 --- a/plugins/opencode/scripts/lib/opencode-server.mjs +++ b/plugins/opencode/scripts/lib/opencode-server.mjs @@ -22,6 +22,7 @@ import crypto from "node:crypto"; import os from "node:os"; import { spawn } from "node:child_process"; +import http from "node:http"; import fs from "node:fs"; import path from "node:path"; import { platformShellOption, isProcessAlive as isProcessAliveWithToken } from "./process.mjs"; @@ -32,6 +33,102 @@ const DEFAULT_HOST = "127.0.0.1"; const SERVER_START_TIMEOUT = 30_000; const SERVER_REAP_IDLE_TIMEOUT = 5 * 60 * 1000; // 5 minutes +// Wall-clock budget for a single `sendPrompt` call. Must be longer than +// any single model turn the user can realistically wait for, but bounded +// so a wedged socket can't hang the companion forever. Overridable via +// OPENCODE_COMPANION_PROMPT_TIMEOUT_MS. The tracked-jobs layer has its +// own 30 min hard timer (OPENCODE_COMPANION_JOB_TIMEOUT_MS) on top of +// this — this constant is the per-HTTP-call fallback, not the +// authoritative cap. +const DEFAULT_PROMPT_TIMEOUT_MS = 30 * 60 * 1000; + +function resolvePromptTimeoutMs() { + const fromEnv = Number(process.env.OPENCODE_COMPANION_PROMPT_TIMEOUT_MS); + if (Number.isFinite(fromEnv) && fromEnv > 0) return fromEnv; + return DEFAULT_PROMPT_TIMEOUT_MS; +} + +/** + * POST a JSON body via `node:http` and return the parsed response. + * + * We deliberately avoid `fetch()` here because Node's bundled undici + * imposes a 300_000 ms default `bodyTimeout` that surfaces as + * `TypeError: terminated` when the OpenCode server holds the connection + * open mid-body for longer than 5 minutes — which is the normal case + * for long adversarial reviews against slow/free models. `node:http` + * has no such default, so this helper only enforces the explicit + * wall-clock timer we pass in. + * + * See issue: "OpenCode adversarial review failed: terminated" (fetch + * failed ~4.5 min into a run). The outer `AbortSignal.timeout()` we + * used before was a wall-clock abort, not a dispatcher-level body + * timeout, so it did not prevent undici from killing the socket first. + * + * @param {string} urlString Absolute URL to POST to. + * @param {Record} headers + * @param {unknown} bodyObj JSON-serializable body. + * @param {{ timeoutMs?: number }} [opts] + * @returns {Promise<{ status: number, body: string }>} + */ +function httpPostJson(urlString, headers, bodyObj, opts = {}) { + const timeoutMs = Number.isFinite(opts.timeoutMs) && opts.timeoutMs > 0 + ? opts.timeoutMs + : resolvePromptTimeoutMs(); + const url = new URL(urlString); + const payload = Buffer.from(JSON.stringify(bodyObj), "utf8"); + + return new Promise((resolve, reject) => { + let settled = false; + const finish = (fn, val) => { + if (settled) return; + settled = true; + clearTimeout(timer); + fn(val); + }; + + const req = http.request( + { + protocol: url.protocol, + host: url.hostname, + port: url.port || (url.protocol === "https:" ? 443 : 80), + method: "POST", + path: `${url.pathname}${url.search}`, + headers: { + ...headers, + "Content-Length": payload.length, + }, + }, + (res) => { + let data = ""; + res.setEncoding("utf8"); + res.on("data", (chunk) => { + data += chunk; + }); + res.on("end", () => { + finish(resolve, { status: res.statusCode ?? 0, body: data }); + }); + res.on("error", (err) => finish(reject, err)); + } + ); + + req.on("error", (err) => finish(reject, err)); + + const timer = setTimeout(() => { + finish( + reject, + new Error( + `OpenCode prompt exceeded ${Math.round(timeoutMs / 1000)}s wall-clock timeout ` + + `(set OPENCODE_COMPANION_PROMPT_TIMEOUT_MS to raise)` + ) + ); + req.destroy(); + }, timeoutMs); + + req.write(payload); + req.end(); + }); +} + function serverStatePath(workspacePath) { const key = typeof workspacePath === "string" && workspacePath.length > 0 ? workspacePath : process.cwd(); const hash = crypto.createHash("sha256").update(key).digest("hex").slice(0, 16); @@ -255,6 +352,11 @@ export function createClient(baseUrl, opts = {}) { /** * Send a prompt (synchronous / streaming). * Returns the full response text from SSE stream. + * + * Uses `node:http` directly (via `httpPostJson`) instead of `fetch()` + * because Node's bundled undici has a hidden 300_000 ms default + * `bodyTimeout` that surfaces as `TypeError: terminated` on long + * reviews — see the helper for details. */ sendPrompt: async (sessionId, promptText, opts = {}) => { const body = { @@ -268,19 +370,23 @@ export function createClient(baseUrl, opts = {}) { // the custom `review` agent isn't available on a pre-running server. if (opts.tools) body.tools = opts.tools; - const res = await fetch(`${baseUrl}/session/${sessionId}/message`, { - method: "POST", + const { status, body: responseText } = await httpPostJson( + `${baseUrl}/session/${sessionId}/message`, headers, - body: JSON.stringify(body), - signal: AbortSignal.timeout(600_000), // 10 min for long tasks - }); + body + ); - if (!res.ok) { - const text = await res.text().catch(() => ""); - throw new Error(`OpenCode prompt failed ${res.status}: ${text}`); + if (status < 200 || status >= 300) { + throw new Error(`OpenCode prompt failed ${status}: ${responseText}`); } - return res.json(); + try { + return JSON.parse(responseText); + } catch (err) { + throw new Error( + `OpenCode prompt returned non-JSON response (${status}): ${err.message}` + ); + } }, /** diff --git a/tests/send-prompt-body-timeout.test.mjs b/tests/send-prompt-body-timeout.test.mjs new file mode 100644 index 0000000..4cebc74 --- /dev/null +++ b/tests/send-prompt-body-timeout.test.mjs @@ -0,0 +1,76 @@ +// Regression test for the undici `bodyTimeout` bug that made long +// adversarial reviews die with `TypeError: terminated` around the +// 4.5–5 minute mark. +// +// Node's bundled undici has a 300_000 ms default `bodyTimeout` that +// fires when the server holds the connection open mid-body longer than +// 5 minutes. The OpenCode `/session/{id}/message` endpoint legitimately +// does exactly that while the model thinks on a long review. Our +// `sendPrompt` used to be built on `fetch()` and was subject to this +// hidden timer; it now uses `node:http` directly so no dispatcher-level +// body timer can pull the rug out from under us. +// +// We don't actually wait 5 minutes in the test — we stall the server +// for 7 seconds, which is far longer than any `fetch()`-based +// implementation could be configured to tolerate without importing +// `undici` explicitly (which we refuse to do because it's not an +// installed dependency). 7 s is enough to prove the request is not +// being cut off by a short internal timer while still keeping `npm +// test` fast. + +import { describe, it, before, after } from "node:test"; +import assert from "node:assert/strict"; +import http from "node:http"; +import { createClient } from "../plugins/opencode/scripts/lib/opencode-server.mjs"; + +describe("sendPrompt body-timeout resilience", () => { + /** @type {http.Server} */ + let server; + let baseUrl; + + before(async () => { + server = http.createServer((req, res) => { + if (req.url !== "/session/stall-test/message") { + res.writeHead(404).end(); + return; + } + // Drain the request body, then begin a chunked response and stall + // for several seconds before completing it. This simulates + // OpenCode holding the connection open while the model thinks. + req.on("data", () => {}); + req.on("end", () => { + res.writeHead(200, { + "Content-Type": "application/json", + "Transfer-Encoding": "chunked", + }); + res.write("{"); + setTimeout(() => { + res.end('"ok":true}'); + }, 7_000); + }); + }); + await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve)); + const addr = server.address(); + baseUrl = `http://127.0.0.1:${addr.port}`; + }); + + after(async () => { + await new Promise((resolve) => server.close(resolve)); + }); + + it( + "tolerates a server that holds the response body for 7+ seconds", + { timeout: 20_000 }, + async () => { + const client = createClient(baseUrl); + const start = Date.now(); + const result = await client.sendPrompt("stall-test", "hello"); + const elapsedMs = Date.now() - start; + assert.deepEqual(result, { ok: true }); + assert.ok( + elapsedMs >= 6_500, + `expected request to take ~7s, actually took ${elapsedMs}ms` + ); + } + ); +});