pulseengine · avrabe · Apr 27, 2026
@@ -629,6 +629,25 @@ describe('ai-review', () => {
       const callArgs = global.fetch.mock.calls[0];
       expect(callArgs[1].signal).toBeInstanceOf(AbortSignal);
     });
+
+    it('passes responseFormat through to the wire payload as response_format', async () => {
+      mockFetchSuccess('{"verdict":"approve","summary":"x","findings":[]}');
+      const fmt = { type: 'json_schema', json_schema: { name: 'X', strict: true, schema: { type: 'object' } } };
+
+      await callLocalAI(mockEndpoint, mockModel, mockSystemPrompt, mockUserPrompt, {
+        responseFormat: fmt
+      });
+
+      const body = JSON.parse(global.fetch.mock.calls[0][1].body);
+      expect(body.response_format).toEqual(fmt);
+    });
+
+    it('omits response_format when not requested (legacy compat)', async () => {
+      mockFetchSuccess('plain prose');
+      await callLocalAI(mockEndpoint, mockModel, mockSystemPrompt, mockUserPrompt);
+      const body = JSON.parse(global.fetch.mock.calls[0][1].body);
+      expect(body.response_format).toBeUndefined();
+    });
   });
 
   // =========================================================================

@@ -1,5 +1,7 @@
 import {
   STRICT_SYSTEM_PROMPT,
+  REVIEW_JSON_SCHEMA,
+  buildResponseFormat,
   isSlop,
   tryParseReview,
   filterFindings,
@@ -16,6 +18,40 @@ describe('STRICT_SYSTEM_PROMPT', () => {
   });
 });
 
+describe('REVIEW_JSON_SCHEMA', () => {
+  it('declares the verdict enum exactly matching the parser', () => {
+    expect(REVIEW_JSON_SCHEMA.properties.verdict.enum).toEqual([
+      'approve', 'comment', 'request_changes'
+    ]);
+  });
+
+  it('marks verdict, summary, findings as required', () => {
+    expect(REVIEW_JSON_SCHEMA.required.sort()).toEqual(
+      ['findings', 'summary', 'verdict']
+    );
+  });
+
+  it('rejects extra properties (additionalProperties: false)', () => {
+    expect(REVIEW_JSON_SCHEMA.additionalProperties).toBe(false);
+    expect(REVIEW_JSON_SCHEMA.properties.findings.items.additionalProperties).toBe(false);
+  });
+
+  it('every finding field used by the parser is declared in the schema', () => {
+    const fields = Object.keys(REVIEW_JSON_SCHEMA.properties.findings.items.properties).sort();
+    expect(fields).toEqual(['claim', 'file', 'line', 'quoted_line']);
+  });
+});
+
+describe('buildResponseFormat', () => {
+  it('returns OpenAI-compat json_schema payload', () => {
+    const rf = buildResponseFormat();
+    expect(rf.type).toBe('json_schema');
+    expect(rf.json_schema.name).toBe('PullRequestReview');
+    expect(rf.json_schema.strict).toBe(true);
+    expect(rf.json_schema.schema).toBe(REVIEW_JSON_SCHEMA);
+  });
+});
+
 describe('isSlop', () => {
   it.each([
     'consider edge cases here',

@@ -50,6 +50,65 @@ Example with a finding:
 Example with no findings:
 {"verdict":"approve","summary":"Pure docs change in README.","findings":[]}`;
 
+/**
+ * JSON Schema describing the strict review payload. Used as a wire-format
+ * contract: passed to Ollama via OpenAI-compat `response_format` so the
+ * decoder is grammar-constrained at generation time. The model literally
+ * cannot emit a token sequence that violates the schema.
+ *
+ * Belt-and-suspenders relative to the prompt rules above: the prompt
+ * teaches the *intent* (banned hedging, quote-or-die etc.) while the schema
+ * enforces the *shape*. Today's wrong-shape failures (model emitting
+ * `{"commit_message":"..."}` instead of the review object) become
+ * impossible.
+ */
+export const REVIEW_JSON_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  properties: {
+    verdict: {
+      type: 'string',
+      enum: ['approve', 'comment', 'request_changes']
+    },
+    summary: {
+      type: 'string',
+      maxLength: 400
+    },
+    findings: {
+      type: 'array',
+      items: {
+        type: 'object',
+        additionalProperties: false,
+        properties: {
+          file: { type: 'string', minLength: 1 },
+          line: { type: 'integer', minimum: 1 },
+          quoted_line: { type: 'string', minLength: 1 },
+          claim: { type: 'string', minLength: 5, maxLength: 400 }
+        },
+        required: ['file', 'line', 'quoted_line', 'claim']
+      }
+    }
+  },
+  required: ['verdict', 'summary', 'findings']
+};
+
+/**
+ * Build the OpenAI-compat `response_format` payload for grammar-constrained
+ * JSON output. Ollama (≥0.5) honours this on /v1/chat/completions; older
+ * builds may fall back to plain JSON mode or ignore it entirely (in which
+ * case the parser still catches non-conforming output).
+ */
+export function buildResponseFormat() {
+  return {
+    type: 'json_schema',
+    json_schema: {
+      name: 'PullRequestReview',
+      strict: true,
+      schema: REVIEW_JSON_SCHEMA
+    }
+  };
+}
+
 const HEDGE_OR_SLOP_PATTERNS = [
   /\bmight\b/i,
   /\bcould\b/i,

@@ -8,7 +8,8 @@ import {
   tryParseReview,
   filterFindings,
   computeVerdict,
-  renderReviewMarkdown
+  renderReviewMarkdown,
+  buildResponseFormat
 } from './ai-review-prompt.js';
 import { runRivetOracle } from './rivet-oracle.js';
 import { hasRivetYaml, withTempRepoCheckout } from './rivet-fetch.js';
@@ -223,25 +224,38 @@ function buildReviewPrompt(prData, diff, files, maxDiffSize) {
 }
 
 async function callLocalAI(endpoint, model, systemPrompt, userPrompt, options = {}) {
-  const { maxTokens = 2000, temperature = 0.3, timeout = 300000 } = options;
+  const {
+    maxTokens = 2000,
+    temperature = 0.3,
+    timeout = 300000,
+    responseFormat = undefined
+  } = options;
 
   const controller = new AbortController();
   const timer = setTimeout(() => controller.abort(), timeout);
 
+  const payload = {
+    model,
+    messages: [
+      { role: 'system', content: systemPrompt },
+      { role: 'user', content: userPrompt }
+    ],
+    max_tokens: maxTokens,
+    temperature,
+    stream: false
+  };
+  if (responseFormat) {
+    // OpenAI-compat: forces grammar-constrained output. Ollama honours
+    // this on /v1/chat/completions in recent builds. Older endpoints that
+    // ignore it fall through to the strict-prompt + parser path.
+    payload.response_format = responseFormat;
+  }
+
   try {
     const response = await fetch(endpoint, {
       method: 'POST',
       headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({
-        model,
-        messages: [
-          { role: 'system', content: systemPrompt },
-          { role: 'user', content: userPrompt }
-        ],
-        max_tokens: maxTokens,
-        temperature,
-        stream: false
-      }),
+      body: JSON.stringify(payload),
       signal: controller.signal
     });
 
@@ -438,7 +452,10 @@ async function reviewPullRequest(octokit, owner, repo, prNumber) {
       {
         maxTokens: aiConfig.max_tokens || 2000,
         temperature: aiConfig.temperature || 0.3,
-        timeout: aiConfig.timeout || 120000
+        timeout: aiConfig.timeout || 120000,
+        // Grammar-constrained output in strict mode only — legacy freeform
+        // path expects prose, would get rejected by JSON-schema decoding.
+        responseFormat: useStrictMode ? buildResponseFormat() : undefined
       }
     );