Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions __tests__/integration/ai-review.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,25 @@ describe('ai-review', () => {
const callArgs = global.fetch.mock.calls[0];
expect(callArgs[1].signal).toBeInstanceOf(AbortSignal);
});

it('passes responseFormat through to the wire payload as response_format', async () => {
mockFetchSuccess('{"verdict":"approve","summary":"x","findings":[]}');
const fmt = { type: 'json_schema', json_schema: { name: 'X', strict: true, schema: { type: 'object' } } };

await callLocalAI(mockEndpoint, mockModel, mockSystemPrompt, mockUserPrompt, {
responseFormat: fmt
});

const body = JSON.parse(global.fetch.mock.calls[0][1].body);
expect(body.response_format).toEqual(fmt);
});

it('omits response_format when not requested (legacy compat)', async () => {
mockFetchSuccess('plain prose');
await callLocalAI(mockEndpoint, mockModel, mockSystemPrompt, mockUserPrompt);
const body = JSON.parse(global.fetch.mock.calls[0][1].body);
expect(body.response_format).toBeUndefined();
});
});

// =========================================================================
Expand Down
36 changes: 36 additions & 0 deletions __tests__/unit/ai-review-prompt.test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import {
STRICT_SYSTEM_PROMPT,
REVIEW_JSON_SCHEMA,
buildResponseFormat,
isSlop,
tryParseReview,
filterFindings,
Expand All @@ -16,6 +18,40 @@ describe('STRICT_SYSTEM_PROMPT', () => {
});
});

describe('REVIEW_JSON_SCHEMA', () => {
it('declares the verdict enum exactly matching the parser', () => {
expect(REVIEW_JSON_SCHEMA.properties.verdict.enum).toEqual([
'approve', 'comment', 'request_changes'
]);
});

it('marks verdict, summary, findings as required', () => {
expect(REVIEW_JSON_SCHEMA.required.sort()).toEqual(
['findings', 'summary', 'verdict']
);
});

it('rejects extra properties (additionalProperties: false)', () => {
expect(REVIEW_JSON_SCHEMA.additionalProperties).toBe(false);
expect(REVIEW_JSON_SCHEMA.properties.findings.items.additionalProperties).toBe(false);
});

it('every finding field used by the parser is declared in the schema', () => {
const fields = Object.keys(REVIEW_JSON_SCHEMA.properties.findings.items.properties).sort();
expect(fields).toEqual(['claim', 'file', 'line', 'quoted_line']);
});
});

describe('buildResponseFormat', () => {
it('returns OpenAI-compat json_schema payload', () => {
const rf = buildResponseFormat();
expect(rf.type).toBe('json_schema');
expect(rf.json_schema.name).toBe('PullRequestReview');
expect(rf.json_schema.strict).toBe(true);
expect(rf.json_schema.schema).toBe(REVIEW_JSON_SCHEMA);
});
});

describe('isSlop', () => {
it.each([
'consider edge cases here',
Expand Down
59 changes: 59 additions & 0 deletions src/ai-review-prompt.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,65 @@ Example with a finding:
Example with no findings:
{"verdict":"approve","summary":"Pure docs change in README.","findings":[]}`;

/**
* JSON Schema describing the strict review payload. Used as a wire-format
* contract: passed to Ollama via OpenAI-compat `response_format` so the
* decoder is grammar-constrained at generation time. The model literally
* cannot emit a token sequence that violates the schema.
*
* Belt-and-suspenders relative to the prompt rules above: the prompt
* teaches the *intent* (banned hedging, quote-or-die etc.) while the schema
* enforces the *shape*. Today's wrong-shape failures (model emitting
* `{"commit_message":"..."}` instead of the review object) become
* impossible.
*/
export const REVIEW_JSON_SCHEMA = {
type: 'object',
additionalProperties: false,
properties: {
verdict: {
type: 'string',
enum: ['approve', 'comment', 'request_changes']
},
summary: {
type: 'string',
maxLength: 400
},
findings: {
type: 'array',
items: {
type: 'object',
additionalProperties: false,
properties: {
file: { type: 'string', minLength: 1 },
line: { type: 'integer', minimum: 1 },
quoted_line: { type: 'string', minLength: 1 },
claim: { type: 'string', minLength: 5, maxLength: 400 }
},
required: ['file', 'line', 'quoted_line', 'claim']
}
}
},
required: ['verdict', 'summary', 'findings']
};

/**
* Build the OpenAI-compat `response_format` payload for grammar-constrained
* JSON output. Ollama (≥0.5) honours this on /v1/chat/completions; older
* builds may fall back to plain JSON mode or ignore it entirely (in which
* case the parser still catches non-conforming output).
*/
export function buildResponseFormat() {
return {
type: 'json_schema',
json_schema: {
name: 'PullRequestReview',
strict: true,
schema: REVIEW_JSON_SCHEMA
}
};
}

const HEDGE_OR_SLOP_PATTERNS = [
/\bmight\b/i,
/\bcould\b/i,
Expand Down
43 changes: 30 additions & 13 deletions src/ai-review.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ import {
tryParseReview,
filterFindings,
computeVerdict,
renderReviewMarkdown
renderReviewMarkdown,
buildResponseFormat
} from './ai-review-prompt.js';
import { runRivetOracle } from './rivet-oracle.js';
import { hasRivetYaml, withTempRepoCheckout } from './rivet-fetch.js';
Expand Down Expand Up @@ -223,25 +224,38 @@ function buildReviewPrompt(prData, diff, files, maxDiffSize) {
}

async function callLocalAI(endpoint, model, systemPrompt, userPrompt, options = {}) {
const { maxTokens = 2000, temperature = 0.3, timeout = 300000 } = options;
const {
maxTokens = 2000,
temperature = 0.3,
timeout = 300000,
responseFormat = undefined
} = options;

const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeout);

const payload = {
model,
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userPrompt }
],
max_tokens: maxTokens,
temperature,
stream: false
};
if (responseFormat) {
// OpenAI-compat: forces grammar-constrained output. Ollama honours
// this on /v1/chat/completions in recent builds. Older endpoints that
// ignore it fall through to the strict-prompt + parser path.
payload.response_format = responseFormat;
}

try {
const response = await fetch(endpoint, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userPrompt }
],
max_tokens: maxTokens,
temperature,
stream: false
}),
body: JSON.stringify(payload),
signal: controller.signal
});

Expand Down Expand Up @@ -438,7 +452,10 @@ async function reviewPullRequest(octokit, owner, repo, prNumber) {
{
maxTokens: aiConfig.max_tokens || 2000,
temperature: aiConfig.temperature || 0.3,
timeout: aiConfig.timeout || 120000
timeout: aiConfig.timeout || 120000,
// Grammar-constrained output in strict mode only — legacy freeform
// path expects prose, would get rejected by JSON-schema decoding.
responseFormat: useStrictMode ? buildResponseFormat() : undefined
}
);

Expand Down
Loading