From 2e2e1c9b74132740f65424d40e250c22433adb6f Mon Sep 17 00:00:00 2001 From: JohnnyVicious Date: Tue, 14 Apr 2026 19:46:48 +0200 Subject: [PATCH] feat(review): opt-in --post publishes PR reviews with inline comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When `/opencode:review --pr N --post` or `/opencode:adversarial-review --pr N --post` runs, the companion now publishes the review back to the GitHub PR as a review comment: - The summary body is a structured markdown block: verdict badge, model, a findings table (severity / confidence / file / lines / title), and a collapsible "Full findings" section with body + recommendation per finding. - Findings with confidence at or above `--confidence-threshold` (default 0.8, configurable as a float or percent) whose line is addressable on the PR's unified diff are posted as inline review comments anchored to that specific line. Findings below the threshold or outside the diff stay in the summary table only. - Reviews are always published with `event: "COMMENT"` — never `REQUEST_CHANGES` — because this tool is advisory. - Posting failures are non-fatal: the local review output is already on stdout, so a network or auth glitch on GitHub logs a warning to stderr instead of flipping the whole run to a non-zero exit. Opt-in was the explicit design choice: anyone already using `--pr N` for private sanity checks should not get surprise public comments, and `--post` is easy to add to muscle memory. Also relax the 7-second body-stall assertion in send-prompt-body-timeout.test.mjs — the deepEqual already proves the code path survives the stall, and the timing bound flaked under WSL2 clock skew. --- .../opencode/commands/adversarial-review.md | 4 +- plugins/opencode/commands/review.md | 4 +- plugins/opencode/scripts/lib/pr-comments.mjs | 537 ++++++++++++++++++ .../opencode/scripts/opencode-companion.mjs | 121 +++- tests/pr-comments.test.mjs | 379 ++++++++++++ tests/send-prompt-body-timeout.test.mjs | 16 +- 6 files changed, 1048 insertions(+), 13 deletions(-) create mode 100644 plugins/opencode/scripts/lib/pr-comments.mjs create mode 100644 tests/pr-comments.test.mjs diff --git a/plugins/opencode/commands/adversarial-review.md b/plugins/opencode/commands/adversarial-review.md index 6a692ad..ad91e9c 100644 --- a/plugins/opencode/commands/adversarial-review.md +++ b/plugins/opencode/commands/adversarial-review.md @@ -1,6 +1,6 @@ --- description: Run a steerable adversarial OpenCode review that challenges implementation and design decisions -argument-hint: '[--wait|--background] [--base ] [--model | --free] [--pr ] [--path ] [--path ] [focus area or custom review instructions]' +argument-hint: '[--wait|--background] [--base ] [--model | --free] [--pr ] [--post] [--confidence-threshold ] [--path ] [--path ] [focus area or custom review instructions]' disable-model-invocation: true allowed-tools: Read, Glob, Grep, Bash(node:*), Bash(git:*), Bash(gh:*), AskUserQuestion --- @@ -40,6 +40,8 @@ Argument handling: - `--model ` overrides the saved setup default model and OpenCode's own default model for this single review (e.g. `--model openrouter/anthropic/claude-opus-4-6`). Pass it through verbatim if the user supplied it. - `--free` tells the companion script to shell out to `opencode models`, filter for first-party `opencode/*` free-tier models (those ending in `:free` or `-free`), and pick one at random for this review. Restricted to the `opencode/*` provider because OpenRouter free-tier models have inconsistent tool-use support, and the review agent needs `read`/`grep`/`glob`/`list`. Pass it through verbatim if the user supplied it. `--free` and `--model` are mutually exclusive — the companion will error if both are given. - `--pr ` reviews a GitHub pull request via `gh pr diff` instead of the local working tree. The cwd must be a git repo whose remote points at the PR's repository, and `gh` must be installed and authenticated. +- `--post` (opt-in, requires `--pr`) publishes the adversarial review back to the PR as a GitHub review comment. The summary (verdict + findings table + collapsible full findings) is posted as the review body, and any finding with confidence at or above the threshold whose line is part of the PR diff is posted as an inline review comment. Findings below the threshold or pointing at lines outside the diff stay in the summary only. The review is always published with `event: "COMMENT"` — never `REQUEST_CHANGES` — because this tool is advisory. Pass `--post` through verbatim if the user supplied it. +- `--confidence-threshold ` (optional, default `0.8`) sets the confidence bar for inline comments when `--post` is set. Accepts `0..1` floats or percentages (`80`, `80%`). Pass through verbatim. - `--path ` reviews a specific file or directory instead of git diff. Can be specified multiple times (`--path src --path lib`). When `--path` is set, the review is assembled from the actual file contents at those paths rather than from `git diff`. This is useful for reviewing specific directories, fixed sets of files, or large untracked/imported code drops. Mutually exclusive with `--pr` (paths take precedence over PR mode). PR reference extraction (REQUIRED — read this carefully): diff --git a/plugins/opencode/commands/review.md b/plugins/opencode/commands/review.md index bbc54f9..e9179ef 100644 --- a/plugins/opencode/commands/review.md +++ b/plugins/opencode/commands/review.md @@ -1,6 +1,6 @@ --- description: Run an OpenCode code review against local git state or a GitHub PR -argument-hint: '[--wait|--background] [--base ] [--scope auto|working-tree|branch] [--model | --free] [--pr ] [--path ] [--path ]' +argument-hint: '[--wait|--background] [--base ] [--scope auto|working-tree|branch] [--model | --free] [--pr ] [--post] [--confidence-threshold ] [--path ] [--path ]' disable-model-invocation: true allowed-tools: Read, Glob, Grep, Bash(node:*), Bash(git:*), Bash(gh:*), AskUserQuestion --- @@ -42,6 +42,8 @@ Argument handling: - `--model ` overrides the saved setup default model and OpenCode's own default model for this single review (e.g. `--model openrouter/anthropic/claude-opus-4-6`). Pass it through verbatim if the user supplied it. - `--free` tells the companion script to shell out to `opencode models`, filter for first-party `opencode/*` free-tier models (those ending in `:free` or `-free`), and pick one at random for this review. Restricted to the `opencode/*` provider because OpenRouter free-tier models have inconsistent tool-use support, and the review agent needs `read`/`grep`/`glob`/`list`. Pass it through verbatim if the user supplied it. `--free` and `--model` are mutually exclusive — the companion will error if both are given. - `--pr ` reviews a GitHub pull request via `gh pr diff` instead of the local working tree. The cwd must be a git repo whose remote points at the PR's repository, and `gh` must be installed and authenticated. Pass it through verbatim if the user supplied it. +- `--post` (opt-in, requires `--pr`) publishes the review as a PR review comment on GitHub. The summary (verdict + findings table) is posted as the review body, and any finding with confidence at or above the threshold whose line is part of the PR diff is posted as an inline review comment on that line. Findings below the threshold or pointing at lines outside the diff stay in the summary only. The review is always published with `event: "COMMENT"` — never `REQUEST_CHANGES` — because this tool is advisory. Pass `--post` through verbatim if the user supplied it. +- `--confidence-threshold ` (optional, default `0.8`) controls which findings become inline comments when `--post` is set. Accepts `0..1` floats or percentages (`80`, `80%`). Pass through verbatim if the user supplied it. - `--path ` reviews a specific file or directory instead of git diff. Can be specified multiple times (`--path src --path lib`). When `--path` is set, the review is assembled from the actual file contents at those paths rather than from `git diff`. This is useful for reviewing specific directories, fixed sets of files, or large untracked/imported code drops. Mutually exclusive with `--pr` (paths take precedence over PR mode). - **PR reference extraction (REQUIRED)**: if the user's input contains a PR reference like `PR #390`, `pr #390`, `PR 390`, or `pr 390` (e.g. `/opencode:review on PR #390`), you MUST extract the number yourself and pass it as `--pr 390`. Do not pass `PR #390` literally to bash — bash strips unquoted `#NNN` tokens as comments before they reach the companion script. Example: `node ... review --pr 390`, NOT `node ... review on PR #390`. diff --git a/plugins/opencode/scripts/lib/pr-comments.mjs b/plugins/opencode/scripts/lib/pr-comments.mjs new file mode 100644 index 0000000..7523cba --- /dev/null +++ b/plugins/opencode/scripts/lib/pr-comments.mjs @@ -0,0 +1,537 @@ +// Post an OpenCode review back to a GitHub pull request. +// +// Structured review findings from OpenCode have a `file`, `line_start`, +// `line_end`, `confidence`, and a recommendation. We can turn them into: +// - a summary comment on the PR with the full findings table, and +// - inline review comments anchored to specific lines for findings +// whose confidence exceeds the user-supplied threshold (default 0.8) +// AND whose target line is addressable on GitHub's unified diff for +// that PR. +// +// GitHub rejects review comments on lines that are not part of the PR's +// diff, so we parse each file's `patch` returned by the pulls/files +// endpoint to learn which RIGHT-side line numbers are addressable. A +// high-confidence finding whose line is outside the diff silently +// degrades to summary-only; we never drop the finding. +// +// We post via `gh api` piping a JSON payload on stdin rather than via +// `gh pr review`, because the CLI wrapper doesn't support inline review +// comments directly and serializing a comments array through repeated +// -f/-F flags is fragile. + +import { spawn } from "node:child_process"; + +/** + * Post a review to PR `prNumber`. Never throws on posting errors — + * returns a result object the caller can log, because the review text + * was already produced and the local run should not fail because of a + * network or auth glitch on GitHub. + * + * @param {string} workspace - cwd for the `gh` invocations + * @param {object} opts + * @param {number} opts.prNumber + * @param {object|null} opts.structured - parsed review JSON (or null) + * @param {string} opts.rendered - human-readable review output (fallback + * when `structured` is null) + * @param {{ providerID: string, modelID: string }|null} opts.model + * @param {boolean} opts.adversarial + * @param {number} [opts.confidenceThreshold=0.8] + * @returns {Promise<{ posted: boolean, reviewUrl?: string, inlineCount: number, summaryOnlyCount: number, error?: string }>} + */ +export async function postReviewToPr(workspace, opts) { + const { + prNumber, + structured, + rendered, + model, + adversarial, + confidenceThreshold = 0.8, + } = opts; + + try { + const prData = await fetchPrData(workspace, prNumber); + + const findings = Array.isArray(structured?.findings) ? structured.findings : []; + const addableByFile = buildAddableLineMap(prData.files); + const { inline, summaryOnly } = splitFindings( + findings, + addableByFile, + confidenceThreshold + ); + + const summaryBody = renderSummaryBody({ + structured, + rendered, + model, + adversarial, + inlineCount: inline.length, + summaryOnlyCount: summaryOnly.length, + confidenceThreshold, + }); + + const payload = { + commit_id: prData.headSha, + event: "COMMENT", + body: summaryBody, + comments: inline.map(findingToInlineComment), + }; + + const resp = await ghPostReview(workspace, prNumber, payload); + return { + posted: true, + reviewUrl: resp.html_url, + inlineCount: inline.length, + summaryOnlyCount: summaryOnly.length, + }; + } catch (err) { + return { + posted: false, + error: err.message, + inlineCount: 0, + summaryOnlyCount: 0, + }; + } +} + +// --------------------------------------------------------------------- +// gh plumbing +// --------------------------------------------------------------------- + +/** + * Run a `gh` subcommand and return parsed stdout. `input` is piped to + * stdin. Rejects with a useful error on non-zero exit codes. + */ +function runGh(workspace, args, { input } = {}) { + return new Promise((resolve, reject) => { + const proc = spawn("gh", args, { + cwd: workspace, + stdio: ["pipe", "pipe", "pipe"], + env: process.env, + }); + let stdout = ""; + let stderr = ""; + proc.stdout.on("data", (d) => { + stdout += d.toString("utf8"); + }); + proc.stderr.on("data", (d) => { + stderr += d.toString("utf8"); + }); + proc.on("error", (err) => reject(err)); + proc.on("close", (code) => { + if (code !== 0) { + reject( + new Error( + `gh ${args.join(" ")} exited ${code}: ${stderr.trim() || "(no stderr)"}` + ) + ); + return; + } + resolve(stdout); + }); + if (input != null) { + proc.stdin.write(input); + } + proc.stdin.end(); + }); +} + +async function fetchPrData(workspace, prNumber) { + const headJson = await runGh(workspace, [ + "pr", + "view", + String(prNumber), + "--json", + "headRefOid", + ]); + let headSha; + try { + headSha = JSON.parse(headJson).headRefOid; + } catch (err) { + throw new Error(`gh pr view returned invalid JSON: ${err.message}`); + } + if (typeof headSha !== "string" || headSha.length === 0) { + throw new Error( + `gh pr view ${prNumber} did not return a headRefOid; is the PR visible to this token?` + ); + } + + // `gh api` paginates via `--paginate`, but the pulls/files endpoint + // returns at most 30 per page by default — bump per_page to 100 and + // paginate so huge PRs don't lose files past the first page. + const filesJson = await runGh(workspace, [ + "api", + "--paginate", + `repos/{owner}/{repo}/pulls/${prNumber}/files?per_page=100`, + ]); + const files = parsePaginatedJson(filesJson); + return { headSha, files }; +} + +/** + * `gh api --paginate` concatenates the per-page JSON arrays as separate + * documents rather than a single merged array, so a naive JSON.parse + * only sees the last page. Split on the `][` page boundary and merge. + */ +function parsePaginatedJson(text) { + const trimmed = text.trim(); + if (!trimmed) return []; + // The simple case: gh emitted one page as one array. + if (trimmed.startsWith("[") && !trimmed.includes("][")) { + return JSON.parse(trimmed); + } + // Multi-page: treat `][` as a page separator and reconstitute. + const chunks = trimmed.split(/\]\s*\[/).map((chunk, i, arr) => { + let c = chunk; + if (i > 0) c = `[${c}`; + if (i < arr.length - 1) c = `${c}]`; + return c; + }); + const out = []; + for (const chunk of chunks) { + const arr = JSON.parse(chunk); + if (Array.isArray(arr)) out.push(...arr); + } + return out; +} + +async function ghPostReview(workspace, prNumber, payload) { + const stdout = await runGh( + workspace, + [ + "api", + "-X", + "POST", + `repos/{owner}/{repo}/pulls/${prNumber}/reviews`, + "--input", + "-", + ], + { input: JSON.stringify(payload) } + ); + try { + return JSON.parse(stdout); + } catch (err) { + throw new Error(`gh api POST reviews returned invalid JSON: ${err.message}`); + } +} + +// --------------------------------------------------------------------- +// Diff parsing +// --------------------------------------------------------------------- + +/** + * Parse the unified diff in a PR file's `patch` field and return the + * set of RIGHT-side line numbers that GitHub will accept as the `line` + * field of a review comment. Those are lines present in the diff as + * either additions (`+`) or unchanged context (` `). Deletions (`-`) + * only exist on the LEFT side and would need `side: "LEFT"`, which we + * don't bother supporting — our findings target the current state of + * the code, not the old version. + * + * Exported for tests. + * + * @param {string} patch + * @returns {Set} + */ +export function parseAddableLines(patch) { + const addable = new Set(); + if (typeof patch !== "string" || patch.length === 0) return addable; + const lines = patch.split("\n"); + let rightLine = 0; + for (const line of lines) { + const hunk = /^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/.exec(line); + if (hunk) { + rightLine = Number(hunk[1]); + continue; + } + if (rightLine === 0) continue; + // Skip the `\ No newline at end of file` marker without advancing. + if (line.startsWith("\\")) continue; + // Diff headers like `+++ b/path` — ignored, they never appear inside + // a hunk but defend anyway. + if (line.startsWith("+++") || line.startsWith("---")) continue; + if (line.startsWith("+")) { + addable.add(rightLine); + rightLine += 1; + } else if (line.startsWith("-")) { + // deletion — does not advance the right side + } else if (line.startsWith(" ") || line.length === 0) { + // context line OR a truly-empty line (`split("\n")` leaves empties + // where the patch had blank context). + addable.add(rightLine); + rightLine += 1; + } + } + return addable; +} + +/** + * Build `Map>` for all files in a PR. + * @param {Array<{ filename: string, patch?: string }>} files + * @returns {Map>} + */ +export function buildAddableLineMap(files) { + const map = new Map(); + if (!Array.isArray(files)) return map; + for (const file of files) { + if (!file || typeof file.filename !== "string") continue; + const addable = parseAddableLines(file.patch); + if (addable.size > 0) map.set(file.filename, addable); + } + return map; +} + +// --------------------------------------------------------------------- +// Finding classification +// --------------------------------------------------------------------- + +/** + * Classify findings into inline-anchored vs summary-only. A finding is + * inline-eligible iff: + * - confidence >= threshold, AND + * - its file is present in the PR diff, AND + * - at least one of the lines in [line_start, line_end] is addable + * (present in the diff as context or addition). + * + * Exported for tests. + * + * @param {any[]} findings + * @param {Map>} addableByFile + * @param {number} threshold + */ +export function splitFindings(findings, addableByFile, threshold) { + const inline = []; + const summaryOnly = []; + if (!Array.isArray(findings)) return { inline, summaryOnly }; + + for (const f of findings) { + if (!f || typeof f !== "object") continue; + + const conf = typeof f.confidence === "number" ? f.confidence : null; + const hasConfidence = conf != null && conf >= threshold; + if (!hasConfidence) { + summaryOnly.push(f); + continue; + } + + const file = typeof f.file === "string" ? f.file : null; + const addable = file ? addableByFile.get(file) : null; + if (!addable) { + summaryOnly.push(f); + continue; + } + + const start = Number(f.line_start); + const end = Number.isFinite(Number(f.line_end)) ? Number(f.line_end) : start; + if (!Number.isFinite(start) || start <= 0) { + summaryOnly.push(f); + continue; + } + + let target = null; + const lo = Math.min(start, end); + const hi = Math.max(start, end); + for (let ln = lo; ln <= hi; ln += 1) { + if (addable.has(ln)) { + target = ln; + break; + } + } + if (target == null) { + summaryOnly.push(f); + continue; + } + + inline.push({ ...f, _targetLine: target }); + } + + return { inline, summaryOnly }; +} + +/** + * Build an inline review comment payload from a classified finding. + * Exported for tests. + */ +export function findingToInlineComment(finding) { + const sevRaw = typeof finding.severity === "string" ? finding.severity : null; + const sev = sevRaw ? sevRaw.toUpperCase() : null; + const confPct = + typeof finding.confidence === "number" + ? `${Math.round(finding.confidence * 100)}%` + : null; + + const header = [sev ? `**${sev}**` : null, finding.title ?? "Finding"] + .filter(Boolean) + .join(" · "); + const meta = confPct ? `_Confidence ${confPct}_` : ""; + + const body = []; + body.push(header); + if (meta) body.push(meta); + body.push(""); + if (finding.body) body.push(String(finding.body)); + if (finding.recommendation) { + body.push(""); + body.push(`**Recommendation:** ${finding.recommendation}`); + } + body.push(""); + body.push("_Posted by opencode-plugin-cc._"); + + return { + path: finding.file, + line: finding._targetLine, + side: "RIGHT", + body: body.join("\n"), + }; +} + +// --------------------------------------------------------------------- +// Summary body rendering +// --------------------------------------------------------------------- + +/** + * Build the top-level review comment body. Exported for tests. + * @param {object} opts + * @param {object|null} opts.structured + * @param {string} [opts.rendered] + * @param {{providerID: string, modelID: string}|null} opts.model + * @param {boolean} opts.adversarial + * @param {number} opts.inlineCount + * @param {number} opts.summaryOnlyCount + * @param {number} opts.confidenceThreshold + * @returns {string} + */ +export function renderSummaryBody(opts) { + const { + structured, + rendered, + model, + adversarial, + inlineCount, + summaryOnlyCount, + confidenceThreshold, + } = opts; + + const lines = []; + const title = adversarial + ? "OpenCode Adversarial Review" + : "OpenCode Review"; + + let verdictLabel; + if (structured?.verdict === "approve") verdictLabel = "Approve"; + else if (structured?.verdict === "needs-attention") + verdictLabel = "Needs attention"; + else verdictLabel = "Advisory"; + + lines.push(`## ${title} — ${verdictLabel}`); + lines.push(""); + + if (model?.providerID && model?.modelID) { + lines.push(`**Model:** \`${model.providerID}/${model.modelID}\``); + lines.push(""); + } + + if (structured?.summary) { + lines.push(String(structured.summary)); + lines.push(""); + } + + const findings = Array.isArray(structured?.findings) ? structured.findings : []; + + if (structured && findings.length === 0) { + lines.push("_No material findings._"); + lines.push(""); + } else if (findings.length > 0) { + lines.push(`### Findings (${findings.length})`); + lines.push(""); + lines.push("| # | Severity | Confidence | File | Lines | Title |"); + lines.push("|---|----------|------------|------|-------|-------|"); + findings.forEach((f, i) => { + const sev = typeof f.severity === "string" ? f.severity.toUpperCase() : "—"; + const conf = + typeof f.confidence === "number" + ? `${Math.round(f.confidence * 100)}%` + : "—"; + const file = typeof f.file === "string" ? `\`${escapeTableCell(f.file)}\`` : "—"; + const lo = Number(f.line_start); + const hi = Number(f.line_end); + const range = Number.isFinite(lo) + ? Number.isFinite(hi) && hi !== lo + ? `${lo}–${hi}` + : `${lo}` + : "—"; + const titleCell = escapeTableCell(f.title ?? ""); + lines.push(`| ${i + 1} | ${sev} | ${conf} | ${file} | ${range} | ${titleCell} |`); + }); + lines.push(""); + + lines.push("
Full findings"); + lines.push(""); + findings.forEach((f, i) => { + const sev = typeof f.severity === "string" ? f.severity.toUpperCase() : ""; + const lo = Number(f.line_start); + const hi = Number(f.line_end); + const range = + Number.isFinite(lo) && Number.isFinite(hi) && hi !== lo + ? `${lo}–${hi}` + : Number.isFinite(lo) + ? `${lo}` + : "?"; + lines.push(`#### ${i + 1}. ${sev ? `${sev} — ` : ""}${f.title ?? "Finding"}`); + if (typeof f.file === "string") { + lines.push(`- **File:** \`${f.file}\`:${range}`); + } + if (typeof f.confidence === "number") { + lines.push(`- **Confidence:** ${Math.round(f.confidence * 100)}%`); + } + if (f.body) { + lines.push(""); + lines.push(String(f.body)); + } + if (f.recommendation) { + lines.push(""); + lines.push(`**Recommendation:** ${f.recommendation}`); + } + lines.push(""); + }); + lines.push("
"); + lines.push(""); + } else if (!structured && typeof rendered === "string" && rendered.trim()) { + // No structured output — fall back to posting the raw rendered review. + lines.push( + "_The model did not return structured JSON; raw review text below._" + ); + lines.push(""); + lines.push(rendered.trim()); + lines.push(""); + } + + const threshPct = Math.round(confidenceThreshold * 100); + const stats = []; + if (inlineCount > 0) { + stats.push( + `${inlineCount} finding${inlineCount === 1 ? "" : "s"} at or above ${threshPct}% confidence posted as inline comment${inlineCount === 1 ? "" : "s"}.` + ); + } + if (summaryOnlyCount > 0) { + stats.push( + `${summaryOnlyCount} finding${summaryOnlyCount === 1 ? "" : "s"} kept in the summary only (below threshold or outside the PR diff).` + ); + } + if (stats.length > 0) { + lines.push(stats.join(" ")); + lines.push(""); + } + + lines.push("---"); + lines.push( + "_Advisory review generated by [opencode-plugin-cc](https://github.com/JohnnyVicious/opencode-plugin-cc)._" + ); + + return lines.join("\n"); +} + +function escapeTableCell(text) { + return String(text) + .replace(/\|/g, "\\|") + .replace(/\r?\n/g, " "); +} diff --git a/plugins/opencode/scripts/opencode-companion.mjs b/plugins/opencode/scripts/opencode-companion.mjs index 7814363..816b86a 100644 --- a/plugins/opencode/scripts/opencode-companion.mjs +++ b/plugins/opencode/scripts/opencode-companion.mjs @@ -83,6 +83,7 @@ import { } from "./lib/worktree.mjs"; import { readJson, readDenyRules } from "./lib/fs.mjs"; import { resolveReviewAgent } from "./lib/review-agent.mjs"; +import { postReviewToPr } from "./lib/pr-comments.mjs"; import { parseModelString, selectFreeModel } from "./lib/model.mjs"; import { applyDefaultModelOptions, @@ -300,9 +301,9 @@ async function resolveRequestedModel(options) { async function handleReview(argv) { const { options } = parseArgs(argv, { - valueOptions: ["base", "scope", "model", "pr", "path"], + valueOptions: ["base", "scope", "model", "pr", "path", "confidence-threshold"], multiValueOptions: ["path"], - booleanOptions: ["wait", "background", "free"], + booleanOptions: ["wait", "background", "free", "post"], }); const prNumber = options.pr ? Number(options.pr) : null; @@ -314,6 +315,22 @@ async function handleReview(argv) { const paths = normalizePathOption(options.path); const effectivePrNumber = paths?.length ? null : prNumber; + const postRequested = Boolean(options.post); + if (postRequested && !effectivePrNumber) { + console.error( + "--post requires --pr (posting back to GitHub is PR-only; --path reviews have nowhere to post)." + ); + process.exit(1); + } + + let confidenceThreshold; + try { + confidenceThreshold = parseConfidenceThreshold(options["confidence-threshold"]); + } catch (err) { + console.error(err.message); + process.exit(1); + } + const workspace = await resolveWorkspace(); const state = loadState(workspace); const defaults = normalizeDefaults(state.config?.defaults); @@ -379,6 +396,15 @@ async function handleReview(argv) { saveLastReview(workspace, result.rendered); console.log(result.rendered); + + if (postRequested) { + await postReviewAndLog(workspace, { + prNumber: effectivePrNumber, + result, + adversarial: false, + confidenceThreshold, + }); + } } catch (err) { console.error(`Review failed: ${err.message}`); process.exit(1); @@ -387,9 +413,9 @@ async function handleReview(argv) { async function handleAdversarialReview(argv) { const { options, positional } = parseArgs(argv, { - valueOptions: ["base", "scope", "model", "pr", "path"], + valueOptions: ["base", "scope", "model", "pr", "path", "confidence-threshold"], multiValueOptions: ["path"], - booleanOptions: ["wait", "background", "free"], + booleanOptions: ["wait", "background", "free", "post"], }); const workspace = await resolveWorkspace(); @@ -428,6 +454,22 @@ async function handleAdversarialReview(argv) { const paths = normalizePathOption(options.path); const effectivePrNumber = paths?.length ? null : prNumber; + const postRequested = Boolean(options.post); + if (postRequested && !effectivePrNumber) { + console.error( + "--post requires --pr (posting back to GitHub is PR-only; --path reviews have nowhere to post)." + ); + process.exit(1); + } + + let confidenceThreshold; + try { + confidenceThreshold = parseConfidenceThreshold(options["confidence-threshold"]); + } catch (err) { + console.error(err.message); + process.exit(1); + } + const job = createJobRecord(workspace, "adversarial-review", { base: options.base, focus, @@ -481,12 +523,83 @@ async function handleAdversarialReview(argv) { saveLastReview(workspace, result.rendered); console.log(result.rendered); + + if (postRequested) { + await postReviewAndLog(workspace, { + prNumber: effectivePrNumber, + result, + adversarial: true, + confidenceThreshold, + }); + } } catch (err) { console.error(`Adversarial review failed: ${err.message}`); process.exit(1); } } +/** + * Parse `--confidence-threshold` into a 0..1 float. Accepts raw floats + * (`0.8`) or percentage strings (`80`, `80%`). Returns the default 0.8 + * when the option is unset. + */ +function parseConfidenceThreshold(raw) { + if (raw == null || raw === "") return 0.8; + let text = String(raw).trim(); + const isPercent = text.endsWith("%"); + if (isPercent) text = text.slice(0, -1).trim(); + const n = Number(text); + if (!Number.isFinite(n)) { + throw new Error( + `Invalid --confidence-threshold value: ${raw} (expected a number between 0 and 1, or a percentage like 80)` + ); + } + const normalized = isPercent || n > 1 ? n / 100 : n; + if (normalized < 0 || normalized > 1) { + throw new Error( + `Invalid --confidence-threshold value: ${raw} (must be between 0 and 1, or 0 and 100 if expressed as a percentage)` + ); + } + return normalized; +} + +/** + * Shared post-review publishing path. Never throws — pushes a readable + * log line to stderr on both success and failure because the local + * review output has already been printed to stdout and the user should + * not get a non-zero exit just because GitHub was unhappy. + */ +async function postReviewAndLog(workspace, { prNumber, result, adversarial, confidenceThreshold }) { + const outcome = await postReviewToPr(workspace, { + prNumber, + structured: result.structured, + rendered: result.rendered, + model: result.model, + adversarial, + confidenceThreshold, + }); + + if (outcome.posted) { + const bits = [`posted review to PR #${prNumber}`]; + if (outcome.inlineCount > 0) { + bits.push( + `${outcome.inlineCount} inline comment${outcome.inlineCount === 1 ? "" : "s"}` + ); + } + if (outcome.summaryOnlyCount > 0) { + bits.push( + `${outcome.summaryOnlyCount} summary-only finding${outcome.summaryOnlyCount === 1 ? "" : "s"}` + ); + } + const url = outcome.reviewUrl ? ` — ${outcome.reviewUrl}` : ""; + process.stderr.write(`[opencode-companion] ${bits.join(", ")}${url}\n`); + } else { + process.stderr.write( + `[opencode-companion] Failed to post review to PR #${prNumber}: ${outcome.error}\n` + ); + } +} + // ------------------------------------------------------------------ // Task (rescue delegation) // ------------------------------------------------------------------ diff --git a/tests/pr-comments.test.mjs b/tests/pr-comments.test.mjs new file mode 100644 index 0000000..7ef75b1 --- /dev/null +++ b/tests/pr-comments.test.mjs @@ -0,0 +1,379 @@ +// Unit tests for pr-comments.mjs — the module that posts OpenCode +// review output back to a GitHub PR as a review comment with +// high-confidence findings anchored inline. +// +// We intentionally don't hit the GitHub API from tests. The pure +// functions (diff parsing, finding classification, summary rendering) +// are exported for testing and are exercised here. The thin `gh api` +// wrapper on top is verified by `node --check` and by manual smoke +// test through the companion CLI. + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { + parseAddableLines, + buildAddableLineMap, + splitFindings, + findingToInlineComment, + renderSummaryBody, +} from "../plugins/opencode/scripts/lib/pr-comments.mjs"; + +describe("parseAddableLines", () => { + it("returns an empty set for empty or missing input", () => { + assert.equal(parseAddableLines("").size, 0); + assert.equal(parseAddableLines(null).size, 0); + assert.equal(parseAddableLines(undefined).size, 0); + }); + + it("collects addition and context lines as addable RIGHT-side lines", () => { + // Hunk starting at right line 10: + // 10: context "a" + // 11: addition "b" <- addable + // 12: addition "c" <- addable + // 13: context "d" <- addable + // (deletion "e" stays on left, does not advance right) + // 14: addition "f" <- addable + const patch = [ + "@@ -5,4 +10,5 @@", + " a", + "+b", + "+c", + " d", + "-e", + "+f", + ].join("\n"); + const addable = parseAddableLines(patch); + assert.deepEqual([...addable].sort((x, y) => x - y), [10, 11, 12, 13, 14]); + }); + + it("does not mark LEFT-side deletions as addable", () => { + const patch = ["@@ -1,2 +1,1 @@", " a", "-b"].join("\n"); + const addable = parseAddableLines(patch); + assert.deepEqual([...addable], [1]); + }); + + it("handles multi-hunk patches", () => { + const patch = [ + "@@ -1,1 +1,1 @@", + "+first", + "@@ -10,1 +20,1 @@", + "+second", + ].join("\n"); + const addable = parseAddableLines(patch); + assert.deepEqual([...addable].sort((x, y) => x - y), [1, 20]); + }); + + it("skips the '\\ No newline at end of file' marker without advancing", () => { + const patch = [ + "@@ -1,1 +1,2 @@", + "+a", + "\\ No newline at end of file", + "+b", + ].join("\n"); + const addable = parseAddableLines(patch); + assert.deepEqual([...addable].sort((x, y) => x - y), [1, 2]); + }); +}); + +describe("buildAddableLineMap", () => { + it("returns an empty map for missing input", () => { + assert.equal(buildAddableLineMap(undefined).size, 0); + assert.equal(buildAddableLineMap([]).size, 0); + }); + + it("keys by filename and drops files with no patch or no addable lines", () => { + const map = buildAddableLineMap([ + { filename: "a.js", patch: "@@ -1,1 +1,1 @@\n+a" }, + { filename: "b.js" }, // no patch + { filename: "c.js", patch: "" }, // empty patch + ]); + assert.equal(map.size, 1); + assert.ok(map.get("a.js").has(1)); + }); +}); + +describe("splitFindings", () => { + const addable = new Map([["src/foo.js", new Set([10, 11, 12])]]); + + it("routes below-threshold findings to summary-only", () => { + const { inline, summaryOnly } = splitFindings( + [ + { + file: "src/foo.js", + line_start: 10, + line_end: 10, + confidence: 0.5, + title: "weak", + }, + ], + addable, + 0.8 + ); + assert.equal(inline.length, 0); + assert.equal(summaryOnly.length, 1); + }); + + it("routes high-confidence findings with addable line to inline", () => { + const { inline, summaryOnly } = splitFindings( + [ + { + file: "src/foo.js", + line_start: 11, + line_end: 11, + confidence: 0.9, + title: "strong", + }, + ], + addable, + 0.8 + ); + assert.equal(inline.length, 1); + assert.equal(summaryOnly.length, 0); + assert.equal(inline[0]._targetLine, 11); + }); + + it("degrades a high-confidence finding whose file is not in the diff", () => { + const { inline, summaryOnly } = splitFindings( + [ + { + file: "src/not-in-diff.js", + line_start: 1, + line_end: 1, + confidence: 0.95, + title: "orphan", + }, + ], + addable, + 0.8 + ); + assert.equal(inline.length, 0); + assert.equal(summaryOnly.length, 1); + }); + + it("degrades a high-confidence finding whose range misses the diff", () => { + const { inline, summaryOnly } = splitFindings( + [ + { + file: "src/foo.js", + line_start: 50, + line_end: 60, + confidence: 0.9, + title: "wrong region", + }, + ], + addable, + 0.8 + ); + assert.equal(inline.length, 0); + assert.equal(summaryOnly.length, 1); + }); + + it("anchors to the first addable line in the finding's range", () => { + const map = new Map([["src/foo.js", new Set([12, 14])]]); + const { inline } = splitFindings( + [ + { + file: "src/foo.js", + line_start: 10, + line_end: 15, + confidence: 0.9, + title: "range", + }, + ], + map, + 0.8 + ); + assert.equal(inline.length, 1); + assert.equal(inline[0]._targetLine, 12); + }); + + it("handles missing line_end by treating it as line_start", () => { + const { inline } = splitFindings( + [ + { + file: "src/foo.js", + line_start: 11, + confidence: 0.9, + title: "single line", + }, + ], + addable, + 0.8 + ); + assert.equal(inline.length, 1); + assert.equal(inline[0]._targetLine, 11); + }); + + it("respects a higher threshold", () => { + const { inline } = splitFindings( + [ + { + file: "src/foo.js", + line_start: 10, + confidence: 0.85, + title: "medium-high", + }, + ], + addable, + 0.95 + ); + assert.equal(inline.length, 0); + }); + + it("skips findings without a numeric confidence", () => { + const { inline, summaryOnly } = splitFindings( + [ + { + file: "src/foo.js", + line_start: 10, + title: "no confidence", + }, + ], + addable, + 0.8 + ); + assert.equal(inline.length, 0); + assert.equal(summaryOnly.length, 1); + }); +}); + +describe("findingToInlineComment", () => { + it("builds a GitHub-review comment payload with path/line/side/body", () => { + const comment = findingToInlineComment({ + file: "src/foo.js", + _targetLine: 42, + severity: "high", + confidence: 0.92, + title: "Race condition", + body: "Two writers can observe stale state.", + recommendation: "Hold the lock around the read-modify-write.", + }); + assert.equal(comment.path, "src/foo.js"); + assert.equal(comment.line, 42); + assert.equal(comment.side, "RIGHT"); + assert.match(comment.body, /HIGH/); + assert.match(comment.body, /Race condition/); + assert.match(comment.body, /92%/); + assert.match(comment.body, /Recommendation/); + assert.match(comment.body, /opencode-plugin-cc/); + }); + + it("survives a minimal finding with no severity or recommendation", () => { + const comment = findingToInlineComment({ + file: "src/foo.js", + _targetLine: 1, + title: "Minimal", + }); + assert.equal(comment.path, "src/foo.js"); + assert.equal(comment.line, 1); + assert.match(comment.body, /Minimal/); + }); +}); + +describe("renderSummaryBody", () => { + const baseStructured = { + verdict: "needs-attention", + summary: "Two issues worth addressing before merge.", + findings: [ + { + severity: "high", + confidence: 0.9, + title: "Tenant isolation", + file: "src/foo.js", + line_start: 10, + line_end: 12, + body: "Reqs from tenant A can see tenant B's data.", + recommendation: "Scope the query by tenant_id.", + }, + { + severity: "medium", + confidence: 0.5, + title: "Missing retries", + file: "src/bar.js", + line_start: 22, + line_end: 22, + body: "Downstream 5xx is not retried.", + recommendation: "Wrap in exponential backoff.", + }, + ], + }; + + it("renders the title, verdict, model, summary, findings table, and footer", () => { + const body = renderSummaryBody({ + structured: baseStructured, + model: { providerID: "openrouter", modelID: "anthropic/claude-opus-4-6" }, + adversarial: true, + inlineCount: 1, + summaryOnlyCount: 1, + confidenceThreshold: 0.8, + }); + + assert.match(body, /OpenCode Adversarial Review/); + assert.match(body, /Needs attention/); + assert.match(body, /openrouter\/anthropic\/claude-opus-4-6/); + assert.match(body, /Two issues worth addressing/); + assert.match(body, /### Findings \(2\)/); + assert.match(body, /\| # \| Severity \| Confidence \|/); + assert.match(body, /HIGH/); + assert.match(body, /MEDIUM/); + assert.match(body, /src\/foo\.js/); + assert.match(body, /10.12/); + assert.match(body, /1 finding at or above 80% confidence posted as inline comment/); + assert.match(body, /1 finding kept in the summary only/); + assert.match(body, /opencode-plugin-cc/); + }); + + it("shows an approve verdict label when the review approves", () => { + const body = renderSummaryBody({ + structured: { verdict: "approve", findings: [] }, + model: null, + adversarial: false, + inlineCount: 0, + summaryOnlyCount: 0, + confidenceThreshold: 0.8, + }); + assert.match(body, /OpenCode Review — Approve/); + assert.match(body, /No material findings/); + }); + + it("falls back to the rendered text when structured is null", () => { + const body = renderSummaryBody({ + structured: null, + rendered: "Raw review text the model produced.", + model: null, + adversarial: false, + inlineCount: 0, + summaryOnlyCount: 0, + confidenceThreshold: 0.8, + }); + assert.match(body, /did not return structured JSON/); + assert.match(body, /Raw review text the model produced/); + }); + + it("escapes pipe characters and newlines in table cells", () => { + const body = renderSummaryBody({ + structured: { + verdict: "needs-attention", + findings: [ + { + severity: "high", + confidence: 0.9, + title: "Has | pipe and\nnewline", + file: "src/foo.js", + line_start: 1, + line_end: 1, + body: "", + recommendation: "", + }, + ], + }, + model: null, + adversarial: false, + inlineCount: 0, + summaryOnlyCount: 1, + confidenceThreshold: 0.8, + }); + // Pipes must be escaped so they don't split a markdown table cell. + assert.match(body, /Has \\\| pipe and newline/); + }); +}); diff --git a/tests/send-prompt-body-timeout.test.mjs b/tests/send-prompt-body-timeout.test.mjs index 4cebc74..b93365f 100644 --- a/tests/send-prompt-body-timeout.test.mjs +++ b/tests/send-prompt-body-timeout.test.mjs @@ -59,18 +59,20 @@ describe("sendPrompt body-timeout resilience", () => { }); it( - "tolerates a server that holds the response body for 7+ seconds", + "tolerates a server that holds the response body for several seconds", { timeout: 20_000 }, async () => { + // Purely a functional check: if `sendPrompt` resolves with the + // expected payload, the request survived the multi-second server + // stall — which is exactly the failure mode that `fetch()` + the + // undici 5-min default `bodyTimeout` hit on long reviews. We used + // to also assert an elapsed-time lower bound, but that flaked + // under WSL2 clock skew and the timing bound adds nothing to the + // assertion that wasn't already proven by the server completing + // its stall + the client returning the final JSON. const client = createClient(baseUrl); - const start = Date.now(); const result = await client.sendPrompt("stall-test", "hello"); - const elapsedMs = Date.now() - start; assert.deepEqual(result, { ok: true }); - assert.ok( - elapsedMs >= 6_500, - `expected request to take ~7s, actually took ${elapsedMs}ms` - ); } ); });