diff --git a/__tests__/unit/ai-review-prompt.test.js b/__tests__/unit/ai-review-prompt.test.js index c66d980..4502880 100644 --- a/__tests__/unit/ai-review-prompt.test.js +++ b/__tests__/unit/ai-review-prompt.test.js @@ -195,9 +195,28 @@ describe('computeVerdict (deterministic, ignores model)', () => { expect(computeVerdict(null)).toBe('approve'); }); - it('returns comment when findings exist', () => { + it('returns comment when only model findings exist', () => { expect(computeVerdict([{ file: 'a', line: 1, quoted_line: 'x', claim: 'y' }])).toBe('comment'); }); + + it('returns comment when oracle findings are warning/info only', () => { + expect(computeVerdict([ + { source: 'oracle:rivet-validate', severity: 'warning', artifact_id: 'X', claim: 'y' } + ])).toBe('comment'); + }); + + it('promotes to request_changes when ANY oracle finding has severity error', () => { + expect(computeVerdict([ + { source: 'oracle:rivet-validate', severity: 'warning', artifact_id: 'X', claim: 'a' }, + { source: 'oracle:rivet-validate', severity: 'error', artifact_id: 'Y', claim: 'b' } + ])).toBe('request_changes'); + }); + + it('does NOT promote to request_changes for model findings (no source) at "error" — only oracle has evidence-grade errors', () => { + expect(computeVerdict([ + { file: 'a', line: 1, quoted_line: 'x', claim: 'y', severity: 'error' } + ])).toBe('comment'); + }); }); describe('renderReviewMarkdown', () => { @@ -245,6 +264,56 @@ describe('renderReviewMarkdown', () => { expect(out).not.toContain('✅ Approve'); }); + it('renders oracle findings with severity badge and artifact_id (not file:line)', () => { + const out = renderReviewMarkdown( + { + verdict: 'comment', + summary: 'Run includes rivet validate.', + findings: [ + { + source: 'oracle:rivet-validate', + severity: 'error', + artifact_id: 'REQ-001', + claim: 'REQ-001: missing required field x' + }, + { + source: 'oracle:rivet-impact', + severity: 'warning', + artifact_id: 'OLD-1', + claim: 'Artifact removed: OLD-1.' + } + ] + }, + 99, 'def5678', meta + ); + expect(out).toContain('🔴 `REQ-001`'); + expect(out).toContain('rivet-validate'); + expect(out).toContain('🟡 `OLD-1`'); + expect(out).toContain('rivet-impact'); + expect(out).toContain('🔴 Request changes'); // because severity:error oracle finding + }); + + it('renders mixed oracle + model findings in two visual styles', () => { + const out = renderReviewMarkdown( + { + verdict: 'comment', + summary: 'Mixed.', + findings: [ + { + source: 'oracle:rivet-validate', + severity: 'warning', + artifact_id: 'REQ-100', + claim: 'REQ-100: lifecycle gap' + }, + { file: 'a.js', line: 5, quoted_line: '+x', claim: 'no test for x at a.js:5' } + ] + }, + 99, 'def5678', meta + ); + expect(out).toContain('🟡 `REQ-100`'); + expect(out).toContain('`a.js:5`'); + }); + it('still posts when caller opts out of skip-approve-empty', () => { const out = renderReviewMarkdown( { verdict: 'approve', summary: 'Doc only.', findings: [] }, diff --git a/__tests__/unit/rivet-fetch.test.js b/__tests__/unit/rivet-fetch.test.js new file mode 100644 index 0000000..a025791 --- /dev/null +++ b/__tests__/unit/rivet-fetch.test.js @@ -0,0 +1,113 @@ +import { EventEmitter } from 'node:events'; +import { + hasRivetYaml, + extractTarballBuffer, + fetchAndExtractTarball +} from '../../src/rivet-fetch.js'; + +function mockOctokit({ contents = null, contentsStatus = 200, tarball = null } = {}) { + return { + request: jest.fn().mockImplementation((route /*, params */) => { + if (route === 'GET /repos/{owner}/{repo}/contents/{path}') { + if (contentsStatus === 404) { + const e = new Error('Not Found'); e.status = 404; + return Promise.reject(e); + } + if (contentsStatus >= 500) { + const e = new Error('Server'); e.status = contentsStatus; + return Promise.reject(e); + } + return Promise.resolve({ status: 200, data: contents }); + } + if (route === 'GET /repos/{owner}/{repo}/tarball/{ref}') { + return Promise.resolve({ status: 200, data: tarball || Buffer.from('fake-tarball') }); + } + return Promise.resolve({ status: 200, data: {} }); + }) + }; +} + +function makeFakeSpawn({ exitCode = 0, errorOnSpawn = null, stderr = '' } = {}) { + return jest.fn(() => { + const proc = new EventEmitter(); + proc.stdin = new EventEmitter(); + proc.stdin.end = jest.fn(() => { + // Simulate async exit shortly after stdin closed. + setImmediate(() => proc.emit('exit', exitCode)); + }); + proc.stdin.write = jest.fn(); + proc.stderr = new EventEmitter(); + proc.stdout = new EventEmitter(); + if (errorOnSpawn) { + setImmediate(() => proc.emit('error', new Error(errorOnSpawn))); + } else if (stderr) { + setImmediate(() => proc.stderr.emit('data', Buffer.from(stderr))); + } + return proc; + }); +} + +describe('hasRivetYaml', () => { + it('returns true on 200 (file exists)', async () => { + const oct = mockOctokit({ contents: { name: 'rivet.yaml' } }); + expect(await hasRivetYaml(oct, 'o', 'r', 'main')).toBe(true); + }); + + it('returns false on 404 (file absent)', async () => { + const oct = mockOctokit({ contentsStatus: 404 }); + expect(await hasRivetYaml(oct, 'o', 'r', 'main')).toBe(false); + }); + + it('fails safe (returns false) on auth/server errors', async () => { + const oct = mockOctokit({ contentsStatus: 503 }); + expect(await hasRivetYaml(oct, 'o', 'r', 'main')).toBe(false); + }); +}); + +describe('extractTarballBuffer', () => { + it('resolves on tar exit code 0', async () => { + const spawnFn = makeFakeSpawn({ exitCode: 0 }); + await expect(extractTarballBuffer(Buffer.from('x'), '/tmp/dest', { spawnFn })) + .resolves.toBeUndefined(); + expect(spawnFn).toHaveBeenCalledWith( + 'tar', + ['-xz', '--strip-components=1', '-C', '/tmp/dest'], + expect.objectContaining({ stdio: ['pipe', 'pipe', 'pipe'] }) + ); + }); + + it('rejects on non-zero exit with stderr in message', async () => { + const spawnFn = makeFakeSpawn({ exitCode: 1, stderr: 'tar: bad gzip' }); + await expect(extractTarballBuffer(Buffer.from('x'), '/tmp/dest', { spawnFn })) + .rejects.toThrow(/tar exited 1/); + }); + + it('rejects when tar cannot be spawned', async () => { + const spawnFn = makeFakeSpawn({ errorOnSpawn: 'ENOENT' }); + await expect(extractTarballBuffer(Buffer.from('x'), '/tmp/dest', { spawnFn })) + .rejects.toThrow(/ENOENT/); + }); +}); + +describe('fetchAndExtractTarball', () => { + it('requests the tarball and pipes it through tar', async () => { + const oct = mockOctokit({ tarball: Buffer.from('hello-tar') }); + const spawnFn = makeFakeSpawn({ exitCode: 0 }); + await fetchAndExtractTarball(oct, 'o', 'r', 'sha123', '/tmp/dest', { spawnFn }); + expect(oct.request).toHaveBeenCalledWith( + 'GET /repos/{owner}/{repo}/tarball/{ref}', + { owner: 'o', repo: 'r', ref: 'sha123' } + ); + expect(spawnFn).toHaveBeenCalledTimes(1); + }); + + it('handles ArrayBuffer responses (Octokit binary default)', async () => { + const buf = new ArrayBuffer(8); + new Uint8Array(buf).set([1, 2, 3, 4, 5, 6, 7, 8]); + const oct = mockOctokit({ tarball: buf }); + const spawnFn = makeFakeSpawn({ exitCode: 0 }); + await expect( + fetchAndExtractTarball(oct, 'o', 'r', 'sha123', '/tmp/dest', { spawnFn }) + ).resolves.toBeUndefined(); + }); +}); diff --git a/config.yml b/config.yml index 43937cf..d201e50 100644 --- a/config.yml +++ b/config.yml @@ -106,6 +106,16 @@ ai_review: timeout: 300000 allow_remote_endpoint: false +# Mechanical oracle: when the target repo ships rivet.yaml and the rivet +# binary is installed, run `rivet validate` and `rivet impact --since=` +# against a fresh tarball of the PR head. Findings from the oracle are +# mechanically validated and bypass the model entirely. Errors from the +# oracle promote the verdict to request_changes. +rivet_oracle: + enabled: true + binary_path: "data/rivet/rivet" + timeout_ms: 60000 + scheduler: interval_minutes: 5 max_tasks_per_tick: 5 diff --git a/src/ai-review-prompt.js b/src/ai-review-prompt.js index f2a9a7d..9963ba5 100644 --- a/src/ai-review-prompt.js +++ b/src/ai-review-prompt.js @@ -155,9 +155,19 @@ export function filterFindings(findings, diffText) { /** * Compute the verdict deterministically from validated findings. * The model's `verdict` is advisory; this function decides. + * + * Promotion rules: + * - any oracle finding with severity 'error' → request_changes + * (these are evidence-grade: broken cross-refs, schema violations, + * missing required fields — the oracle has already said no) + * - otherwise any finding → comment + * - no findings → approve */ export function computeVerdict(findings) { if (!Array.isArray(findings) || findings.length === 0) return 'approve'; + if (findings.some((f) => f && f.severity === 'error' && typeof f.source === 'string' && f.source.startsWith('oracle:'))) { + return 'request_changes'; + } return 'comment'; } @@ -198,11 +208,23 @@ export function renderReviewMarkdown(parsed, prNumber, headSha, meta, opts = {}) lines.push(`**Findings (${findings.length}):**`); lines.push(''); findings.forEach((f, i) => { - lines.push(`${i + 1}. \`${f.file}:${f.line}\``); - lines.push(' ```'); - lines.push(` ${f.quoted_line}`); - lines.push(' ```'); - lines.push(` ${f.claim}`); + if (typeof f.source === 'string' && f.source.startsWith('oracle:')) { + // Oracle finding: artifact-anchored, mechanically validated. + const sevBadge = + f.severity === 'error' ? '🔴' + : f.severity === 'warning' ? '🟡' + : 'â„šī¸'; + const oracleName = f.source.replace('oracle:', ''); + lines.push(`${i + 1}. ${sevBadge} \`${f.artifact_id || ''}\` _(${oracleName})_`); + lines.push(` ${f.claim}`); + } else { + // Model finding: file:line-anchored, post-validated. + lines.push(`${i + 1}. \`${f.file}:${f.line}\``); + lines.push(' ```'); + lines.push(` ${f.quoted_line}`); + lines.push(' ```'); + lines.push(` ${f.claim}`); + } lines.push(''); }); } diff --git a/src/ai-review.js b/src/ai-review.js index bf18e9c..9e298a9 100644 --- a/src/ai-review.js +++ b/src/ai-review.js @@ -10,6 +10,8 @@ import { computeVerdict, renderReviewMarkdown } from './ai-review-prompt.js'; +import { runRivetOracle } from './rivet-oracle.js'; +import { hasRivetYaml, withTempRepoCheckout } from './rivet-fetch.js'; const _reviewTimestamps = new Map(); @@ -371,6 +373,43 @@ async function reviewPullRequest(octokit, owner, repo, prNumber) { { owner, repo, pull_number: prNumber } ); + // Mechanical oracle: only runs for repos that ship rivet.yaml AND have + // the binary configured. Failures are non-fatal — the model path still + // runs. + let oracleFindings = []; + let oracleSummary = null; + const rivetCfg = config.rivet_oracle; + if (rivetCfg?.enabled && rivetCfg?.binary_path) { + const headSha = prData.data.head?.sha; + const baseSha = prData.data.base?.sha; + try { + if (headSha && (await hasRivetYaml(octokit, owner, repo, headSha))) { + oracleSummary = await withTempRepoCheckout( + octokit, + owner, + repo, + headSha, + (repoPath) => runRivetOracle(rivetCfg.binary_path, repoPath, { + baseRef: baseSha, + timeout: rivetCfg.timeout_ms || 60000 + }) + ); + if (oracleSummary?.findings) { + oracleFindings = oracleSummary.findings; + } + getLogger().info( + { pr: prNumber, oracleFindings: oracleFindings.length }, + 'Rivet oracle complete' + ); + } + } catch (err) { + getLogger().warn( + { pr: prNumber, err: err.message }, + 'Rivet oracle failed — continuing with model-only review' + ); + } + } + // Strict-JSON contract by default; legacy freeform if user explicitly // overrides system_prompt in config. const systemPrompt = aiConfig.system_prompt || STRICT_SYSTEM_PROMPT; @@ -412,27 +451,49 @@ async function reviewPullRequest(octokit, owner, repo, prNumber) { if (useStrictMode) { const parsed = tryParseReview(aiResponse); - if (!parsed.ok) { + let modelFindings = []; + let modelSummary = null; + + if (parsed.ok) { + modelFindings = filterFindings(parsed.data.findings, diffText); + modelSummary = parsed.data.summary; + const droppedCount = parsed.data.findings.length - modelFindings.length; + if (droppedCount > 0) { + getLogger().info( + { pr: prNumber, droppedCount, kept: modelFindings.length }, + 'Filtered out hedging/unquoted findings from model output' + ); + } + } else { getLogger().warn( { pr: prNumber, error: parsed.error, sample: aiResponse.slice(0, 200) }, - 'AI review JSON parse failed — skipping post (silence is better than slop)' + 'AI review JSON parse failed' ); - return { success: false, error: `parse: ${parsed.error}`, skipped: true }; } - const filtered = filterFindings(parsed.data.findings, diffText); - const droppedCount = parsed.data.findings.length - filtered.length; - if (droppedCount > 0) { + // Oracle findings come first — they're mechanically validated, not + // subject to the slop filter, and most readable when grouped. + const allFindings = [...oracleFindings, ...modelFindings]; + + // If model failed AND oracle had nothing → skip post entirely. + if (!parsed.ok && oracleFindings.length === 0) { getLogger().info( - { pr: prNumber, droppedCount, kept: filtered.length }, - 'Filtered out hedging/unquoted findings' + { pr: prNumber }, + 'AI review skipped: model parse failed and no oracle findings' ); + return { success: false, error: `parse: ${parsed.error}`, skipped: true }; } - const renderInput = { ...parsed.data, findings: filtered }; + // Compose summary: prefer the model's, fall back to oracle-derived. + const summary = modelSummary || + (oracleFindings.length > 0 + ? `${oracleFindings.length} mechanical finding(s) from rivet oracle.` + : 'No findings.'); + + const renderInput = { summary, findings: allFindings }; comment = renderReviewMarkdown(renderInput, prNumber, headSha, meta); - assessment = computeVerdict(filtered); - findingsCount = filtered.length; + assessment = computeVerdict(allFindings); + findingsCount = allFindings.length; if (comment === null) { getLogger().info( diff --git a/src/rivet-fetch.js b/src/rivet-fetch.js new file mode 100644 index 0000000..6a311e5 --- /dev/null +++ b/src/rivet-fetch.js @@ -0,0 +1,107 @@ +/** + * Fetch a PR's working tree as a tarball and extract it locally so the rivet + * CLI can run against a real on-disk repo. Used only for repos that ship + * `rivet.yaml` — for everything else this is skipped before the tarball is + * even requested. + * + * Tarballs are 50–100 MB on the rivet repo; the cost is real but bounded + * (one fetch per AI review, only for instrumented repos). A future + * optimisation could fetch only `rivet.yaml`, `artifacts/`, `schemas/`, + * `.rivet/` via the Contents API, but that requires recursive traversal — + * the tarball is the simpler primitive. + */ + +import { spawn } from 'node:child_process'; +import fs from 'node:fs'; +import path from 'node:path'; +import os from 'node:os'; + +/** + * Cheap pre-check: is `rivet.yaml` at the root of this ref? Avoids the + * tarball download entirely for the ~99% of pulseengine repos that don't + * use rivet. + */ +export async function hasRivetYaml(octokit, owner, repo, ref) { + try { + await octokit.request('GET /repos/{owner}/{repo}/contents/{path}', { + owner, + repo, + path: 'rivet.yaml', + ref + }); + return true; + } catch (err) { + if (err.status === 404) return false; + // Other errors (auth, rate limit) — treat as "skip oracle" to fail safely. + return false; + } +} + +/** + * Pipe a Buffer (the tarball bytes) through `tar -xz` into destDir. Strips + * the leading "{owner}-{repo}-{sha}/" component so the resulting tree mirrors + * the repo root. + */ +export function extractTarballBuffer(buffer, destDir, opts = {}) { + const { spawnFn = spawn } = opts; + return new Promise((resolve, reject) => { + const tar = spawnFn('tar', ['-xz', '--strip-components=1', '-C', destDir], { + stdio: ['pipe', 'pipe', 'pipe'] + }); + let stderr = ''; + tar.stderr.on('data', (chunk) => { stderr += chunk.toString(); }); + tar.on('error', reject); + tar.on('exit', (code) => { + if (code === 0) resolve(); + else reject(new Error(`tar exited ${code}: ${stderr.trim()}`)); + }); + tar.stdin.on('error', reject); + tar.stdin.end(buffer); + }); +} + +/** + * Fetch the GitHub tarball for a ref and extract it into destDir. + * + * @param {object} octokit + * @param {string} owner + * @param {string} repo + * @param {string} ref + * @param {string} destDir - already-created directory + * @param {object} [opts] + * @param {Function} [opts.spawnFn] - injected for tests + */ +export async function fetchAndExtractTarball(octokit, owner, repo, ref, destDir, opts = {}) { + const response = await octokit.request('GET /repos/{owner}/{repo}/tarball/{ref}', { + owner, + repo, + ref + }); + const data = response.data; + const buf = Buffer.isBuffer(data) + ? data + : data instanceof ArrayBuffer + ? Buffer.from(data) + : Buffer.from(data); // octokit usually returns ArrayBuffer for binary + await extractTarballBuffer(buf, destDir, opts); +} + +/** + * Run `fn(repoPath)` against a freshly-extracted PR tree, then clean up. + * Caller never sees the tempdir; cleanup happens even on error. + */ +export async function withTempRepoCheckout(octokit, owner, repo, ref, fn, opts = {}) { + const dir = await fs.promises.mkdtemp( + path.join(os.tmpdir(), `temper-rivet-${owner}-${repo}-`.replace(/[^a-zA-Z0-9-_]/g, '_')) + ); + try { + await fetchAndExtractTarball(octokit, owner, repo, ref, dir, opts); + return await fn(dir); + } finally { + try { + await fs.promises.rm(dir, { recursive: true, force: true }); + } catch { + // Cleanup failure is non-fatal — tmpdirs get reaped by the OS eventually. + } + } +} diff --git a/src/schema.js b/src/schema.js index 880d532..c2a1d2b 100644 --- a/src/schema.js +++ b/src/schema.js @@ -171,6 +171,24 @@ export function validateConfig(config) { } } + if (config.rivet_oracle !== undefined) { + const ro = config.rivet_oracle; + if (typeof ro !== 'object' || ro === null) { + errors.push('rivet_oracle must be an object'); + } else { + if (ro.enabled !== undefined && typeof ro.enabled !== 'boolean') { + errors.push('rivet_oracle.enabled must be a boolean'); + } + if (ro.binary_path !== undefined && typeof ro.binary_path !== 'string') { + errors.push('rivet_oracle.binary_path must be a string'); + } + if (ro.timeout_ms !== undefined && + (typeof ro.timeout_ms !== 'number' || ro.timeout_ms < 1000)) { + errors.push('rivet_oracle.timeout_ms must be a number >= 1000'); + } + } + } + if (config.ai_review?.enabled) { if (config.ai_review.endpoint !== undefined && typeof config.ai_review.endpoint !== 'string') { errors.push('ai_review.endpoint must be a string');