diff --git a/__tests__/integration/ai-review.test.js b/__tests__/integration/ai-review.test.js index 47608a7..c792031 100644 --- a/__tests__/integration/ai-review.test.js +++ b/__tests__/integration/ai-review.test.js @@ -629,6 +629,25 @@ describe('ai-review', () => { const callArgs = global.fetch.mock.calls[0]; expect(callArgs[1].signal).toBeInstanceOf(AbortSignal); }); + + it('passes responseFormat through to the wire payload as response_format', async () => { + mockFetchSuccess('{"verdict":"approve","summary":"x","findings":[]}'); + const fmt = { type: 'json_schema', json_schema: { name: 'X', strict: true, schema: { type: 'object' } } }; + + await callLocalAI(mockEndpoint, mockModel, mockSystemPrompt, mockUserPrompt, { + responseFormat: fmt + }); + + const body = JSON.parse(global.fetch.mock.calls[0][1].body); + expect(body.response_format).toEqual(fmt); + }); + + it('omits response_format when not requested (legacy compat)', async () => { + mockFetchSuccess('plain prose'); + await callLocalAI(mockEndpoint, mockModel, mockSystemPrompt, mockUserPrompt); + const body = JSON.parse(global.fetch.mock.calls[0][1].body); + expect(body.response_format).toBeUndefined(); + }); }); // ========================================================================= diff --git a/__tests__/unit/ai-review-prompt.test.js b/__tests__/unit/ai-review-prompt.test.js index 4502880..727c338 100644 --- a/__tests__/unit/ai-review-prompt.test.js +++ b/__tests__/unit/ai-review-prompt.test.js @@ -1,5 +1,7 @@ import { STRICT_SYSTEM_PROMPT, + REVIEW_JSON_SCHEMA, + buildResponseFormat, isSlop, tryParseReview, filterFindings, @@ -16,6 +18,40 @@ describe('STRICT_SYSTEM_PROMPT', () => { }); }); +describe('REVIEW_JSON_SCHEMA', () => { + it('declares the verdict enum exactly matching the parser', () => { + expect(REVIEW_JSON_SCHEMA.properties.verdict.enum).toEqual([ + 'approve', 'comment', 'request_changes' + ]); + }); + + it('marks verdict, summary, findings as required', () => { + expect(REVIEW_JSON_SCHEMA.required.sort()).toEqual( + ['findings', 'summary', 'verdict'] + ); + }); + + it('rejects extra properties (additionalProperties: false)', () => { + expect(REVIEW_JSON_SCHEMA.additionalProperties).toBe(false); + expect(REVIEW_JSON_SCHEMA.properties.findings.items.additionalProperties).toBe(false); + }); + + it('every finding field used by the parser is declared in the schema', () => { + const fields = Object.keys(REVIEW_JSON_SCHEMA.properties.findings.items.properties).sort(); + expect(fields).toEqual(['claim', 'file', 'line', 'quoted_line']); + }); +}); + +describe('buildResponseFormat', () => { + it('returns OpenAI-compat json_schema payload', () => { + const rf = buildResponseFormat(); + expect(rf.type).toBe('json_schema'); + expect(rf.json_schema.name).toBe('PullRequestReview'); + expect(rf.json_schema.strict).toBe(true); + expect(rf.json_schema.schema).toBe(REVIEW_JSON_SCHEMA); + }); +}); + describe('isSlop', () => { it.each([ 'consider edge cases here', diff --git a/src/ai-review-prompt.js b/src/ai-review-prompt.js index 9963ba5..b5d5807 100644 --- a/src/ai-review-prompt.js +++ b/src/ai-review-prompt.js @@ -50,6 +50,65 @@ Example with a finding: Example with no findings: {"verdict":"approve","summary":"Pure docs change in README.","findings":[]}`; +/** + * JSON Schema describing the strict review payload. Used as a wire-format + * contract: passed to Ollama via OpenAI-compat `response_format` so the + * decoder is grammar-constrained at generation time. The model literally + * cannot emit a token sequence that violates the schema. + * + * Belt-and-suspenders relative to the prompt rules above: the prompt + * teaches the *intent* (banned hedging, quote-or-die etc.) while the schema + * enforces the *shape*. Today's wrong-shape failures (model emitting + * `{"commit_message":"..."}` instead of the review object) become + * impossible. + */ +export const REVIEW_JSON_SCHEMA = { + type: 'object', + additionalProperties: false, + properties: { + verdict: { + type: 'string', + enum: ['approve', 'comment', 'request_changes'] + }, + summary: { + type: 'string', + maxLength: 400 + }, + findings: { + type: 'array', + items: { + type: 'object', + additionalProperties: false, + properties: { + file: { type: 'string', minLength: 1 }, + line: { type: 'integer', minimum: 1 }, + quoted_line: { type: 'string', minLength: 1 }, + claim: { type: 'string', minLength: 5, maxLength: 400 } + }, + required: ['file', 'line', 'quoted_line', 'claim'] + } + } + }, + required: ['verdict', 'summary', 'findings'] +}; + +/** + * Build the OpenAI-compat `response_format` payload for grammar-constrained + * JSON output. Ollama (≥0.5) honours this on /v1/chat/completions; older + * builds may fall back to plain JSON mode or ignore it entirely (in which + * case the parser still catches non-conforming output). + */ +export function buildResponseFormat() { + return { + type: 'json_schema', + json_schema: { + name: 'PullRequestReview', + strict: true, + schema: REVIEW_JSON_SCHEMA + } + }; +} + const HEDGE_OR_SLOP_PATTERNS = [ /\bmight\b/i, /\bcould\b/i, diff --git a/src/ai-review.js b/src/ai-review.js index f4ccd0f..9f2d193 100644 --- a/src/ai-review.js +++ b/src/ai-review.js @@ -8,7 +8,8 @@ import { tryParseReview, filterFindings, computeVerdict, - renderReviewMarkdown + renderReviewMarkdown, + buildResponseFormat } from './ai-review-prompt.js'; import { runRivetOracle } from './rivet-oracle.js'; import { hasRivetYaml, withTempRepoCheckout } from './rivet-fetch.js'; @@ -223,25 +224,38 @@ function buildReviewPrompt(prData, diff, files, maxDiffSize) { } async function callLocalAI(endpoint, model, systemPrompt, userPrompt, options = {}) { - const { maxTokens = 2000, temperature = 0.3, timeout = 300000 } = options; + const { + maxTokens = 2000, + temperature = 0.3, + timeout = 300000, + responseFormat = undefined + } = options; const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), timeout); + const payload = { + model, + messages: [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: userPrompt } + ], + max_tokens: maxTokens, + temperature, + stream: false + }; + if (responseFormat) { + // OpenAI-compat: forces grammar-constrained output. Ollama honours + // this on /v1/chat/completions in recent builds. Older endpoints that + // ignore it fall through to the strict-prompt + parser path. + payload.response_format = responseFormat; + } + try { const response = await fetch(endpoint, { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - model, - messages: [ - { role: 'system', content: systemPrompt }, - { role: 'user', content: userPrompt } - ], - max_tokens: maxTokens, - temperature, - stream: false - }), + body: JSON.stringify(payload), signal: controller.signal }); @@ -438,7 +452,10 @@ async function reviewPullRequest(octokit, owner, repo, prNumber) { { maxTokens: aiConfig.max_tokens || 2000, temperature: aiConfig.temperature || 0.3, - timeout: aiConfig.timeout || 120000 + timeout: aiConfig.timeout || 120000, + // Grammar-constrained output in strict mode only — legacy freeform + // path expects prose, would get rejected by JSON-schema decoding. + responseFormat: useStrictMode ? buildResponseFormat() : undefined } );