pulseengine · avrabe · Apr 26, 2026 · Apr 26, 2026
@@ -7,6 +7,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added — AI review hardening (PR-A)
+- **`src/ai-review-prompt.js`**: strict-JSON contract + parser + slop filter +
+  quote-or-die + verdict-from-findings. New default for AI review on PRs.
+  - System prompt forbids hedging words (`might`, `could`, `should`, etc.)
+    and slop phrases ("ensure proper error handling", "consider edge cases",
+    etc.) in finding claims.
+  - `tryParseReview()` enforces a strict JSON shape — unknown verdict, null
+    findings, missing fields all reject. No retries: silence is preferred to
+    slop.
+  - `filterFindings()` drops claims whose `quoted_line` doesn't appear
+    verbatim in the diff (kills paraphrase/hallucination) or whose `claim`
+    contains hedging.
+  - `computeVerdict()` is deterministic: empty findings → `approve`; any
+    finding → `comment`. The model's verdict field is advisory only.
+  - When verdict is `approve` and findings are empty, the bot does NOT post
+    — silence is better than a noisy "looks good" comment.
+- Existing `ai_review.system_prompt` config still works as a back-door to the
+  legacy freeform prompt for users who explicitly override it.
+
 ### Added
 - **Repository Rulesets** support (`src/rulesets.js`). Branch protection now uses
   the modern Rulesets API (`POST /repos/{owner}/{repo}/rulesets`) which targets

@@ -92,6 +92,10 @@ function makeAiConfig(overrides = {}) {
     max_tokens: 2000,
     temperature: 0.3,
     timeout: 120000,
+    // Legacy freeform mode — these tests pre-date the strict-JSON contract
+    // and use plain-text mock responses. The new strict-mode path has its
+    // own dedicated tests in __tests__/unit/ai-review-prompt.test.js.
+    system_prompt: 'You are a thorough code reviewer. Analyze the PR diff.',
     ...overrides
   };
 }
@@ -802,7 +806,7 @@ describe('ai-review', () => {
       expect(timestamp).toBeLessThanOrEqual(afterTime);
     });
 
-    it('uses default system prompt when none configured', async () => {
+    it('uses the strict-JSON system prompt when none configured', async () => {
       _setConfigForTesting({
         ai_review: makeAiConfig({ system_prompt: undefined })
       });
@@ -812,11 +816,15 @@ describe('ai-review', () => {
         .mockResolvedValueOnce({ data: 'some diff' })
         .mockResolvedValueOnce({ data: [] });
 
-      mockFetchSuccess('review');
+      // Strict mode parses JSON; freeform 'review' would fail to parse, so we
+      // give it a valid empty review.
+      mockFetchSuccess('{"verdict":"approve","summary":"x","findings":[]}');
       await reviewPullRequest(octokit, 'owner', 'repo', 1);
 
       const fetchBody = JSON.parse(global.fetch.mock.calls[0][1].body);
-      expect(fetchBody.messages[0].content).toContain('thorough code reviewer');
+      // Default prompt is the strict-JSON contract, not the legacy freeform one.
+      expect(fetchBody.messages[0].content).toContain('STRICT JSON');
+      expect(fetchBody.messages[0].content).toContain('NEVER refuse');
     });
 
     it('uses custom system prompt when configured', async () => {

@@ -0,0 +1,257 @@
+import {
+  STRICT_SYSTEM_PROMPT,
+  isSlop,
+  tryParseReview,
+  filterFindings,
+  computeVerdict,
+  renderReviewMarkdown
+} from '../../src/ai-review-prompt.js';
+
+describe('STRICT_SYSTEM_PROMPT', () => {
+  it('includes the banned-word rule and example outputs', () => {
+    expect(STRICT_SYSTEM_PROMPT).toMatch(/FORBIDDEN words/);
+    expect(STRICT_SYSTEM_PROMPT).toMatch(/"verdict"\s*:\s*"approve"/);
+    expect(STRICT_SYSTEM_PROMPT).toMatch(/quoted_line/);
+    expect(STRICT_SYSTEM_PROMPT).toMatch(/NEVER refuse/);
+  });
+});
+
+describe('isSlop', () => {
+  it.each([
+    'consider edge cases here',
+    'developer should validate input',
+    'this might lead to issues',
+    'could possibly fail in some cases',
+    'ensure proper error handling',
+    'in general, this is fine',
+    'more context is needed',
+    'unable to determine without seeing other files',
+    'you should consider refactoring',
+    'enhance validation around the boundary',
+  ])('flags hedging/slop: %s', (s) => {
+    expect(isSlop(s)).toBe(true);
+  });
+
+  it.each([
+    'No new test asserts that a 4xx status survives wrap_full_page.',
+    'config.yml:165 sets pr_labels including "dependencies" but the PR adds no actual dep update.',
+    'src/foo.js:42 dereferences userId before validating req.body is an object.',
+    'The mutex is acquired but never released on the early-return path at line 88.',
+  ])('passes a concrete claim: %s', (s) => {
+    expect(isSlop(s)).toBe(false);
+  });
+
+  it('treats non-string as slop', () => {
+    expect(isSlop(undefined)).toBe(true);
+    expect(isSlop(null)).toBe(true);
+    expect(isSlop(42)).toBe(true);
+  });
+});
+
+describe('tryParseReview', () => {
+  const happy = JSON.stringify({
+    verdict: 'comment',
+    summary: 'Adds a thing.',
+    findings: [
+      { file: 'src/a.js', line: 10, quoted_line: '+const x = 1;', claim: 'no test for x' }
+    ]
+  });
+
+  it('parses a clean JSON response', () => {
+    const r = tryParseReview(happy);
+    expect(r.ok).toBe(true);
+    expect(r.data.verdict).toBe('comment');
+    expect(r.data.findings).toHaveLength(1);
+  });
+
+  it('strips markdown code fences', () => {
+    const r = tryParseReview('```json\n' + happy + '\n```');
+    expect(r.ok).toBe(true);
+    expect(r.data.verdict).toBe('comment');
+  });
+
+  it('extracts a JSON object embedded in prose', () => {
+    const r = tryParseReview('Here you go:\n\n' + happy + '\n\nThanks!');
+    expect(r.ok).toBe(true);
+  });
+
+  it('rejects empty responses', () => {
+    expect(tryParseReview('').ok).toBe(false);
+    expect(tryParseReview('   ').ok).toBe(false);
+  });
+
+  it('rejects invalid JSON with no recoverable object', () => {
+    expect(tryParseReview('not json at all').ok).toBe(false);
+  });
+
+  it('rejects unknown verdict values', () => {
+    const bad = JSON.stringify({ verdict: 'maybe', summary: 'x', findings: [] });
+    const r = tryParseReview(bad);
+    expect(r.ok).toBe(false);
+    expect(r.error).toMatch(/invalid verdict/);
+  });
+
+  it('rejects null findings (would-be escape hatch)', () => {
+    const bad = JSON.stringify({ verdict: 'approve', summary: 'x', findings: null });
+    const r = tryParseReview(bad);
+    expect(r.ok).toBe(false);
+    expect(r.error).toMatch(/findings must be an array/);
+  });
+
+  it('rejects findings with missing fields', () => {
+    const bad = JSON.stringify({
+      verdict: 'comment',
+      summary: 'x',
+      findings: [{ file: 'a', claim: 'b' }]  // missing line, quoted_line
+    });
+    expect(tryParseReview(bad).ok).toBe(false);
+  });
+
+  it('rejects findings with wrong-type fields', () => {
+    const bad = JSON.stringify({
+      verdict: 'comment',
+      summary: 'x',
+      findings: [{ file: 'a', line: 'ten', quoted_line: 'x', claim: 'y' }]
+    });
+    expect(tryParseReview(bad).ok).toBe(false);
+  });
+
+  it('rejects when verdict is missing', () => {
+    const bad = JSON.stringify({ summary: 'x', findings: [] });
+    expect(tryParseReview(bad).ok).toBe(false);
+  });
+
+  it('rejects an array (not object) as top-level', () => {
+    expect(tryParseReview('[]').ok).toBe(false);
+  });
+});
+
+describe('filterFindings (quote-or-die + slop drop)', () => {
+  const diff = `
+diff --git a/src/foo.js b/src/foo.js
++++ b/src/foo.js
+@@ -1,3 +1,5 @@
++const id = req.body.userId;
++console.log("hi");
+`;
+
+  it('keeps findings whose quoted_line appears in diff and claim is concrete', () => {
+    const findings = [{
+      file: 'src/foo.js',
+      line: 1,
+      quoted_line: '+const id = req.body.userId;',
+      claim: 'src/foo.js:1 trusts req.body.userId without validating it is a string.'
+    }];
+    expect(filterFindings(findings, diff)).toHaveLength(1);
+  });
+
+  it('drops findings whose quoted_line is not in the diff (paraphrase / hallucination)', () => {
+    const findings = [{
+      file: 'src/foo.js',
+      line: 99,
+      quoted_line: '+const userId = request.body.user_id;',  // close but not in diff
+      claim: 'unvalidated input'
+    }];
+    expect(filterFindings(findings, diff)).toHaveLength(0);
+  });
+
+  it('drops findings with hedging language', () => {
+    const findings = [{
+      file: 'src/foo.js',
+      line: 1,
+      quoted_line: '+const id = req.body.userId;',
+      claim: 'this might be an issue, consider validating it'
+    }];
+    expect(filterFindings(findings, diff)).toHaveLength(0);
+  });
+
+  it('drops findings with classic slop phrases', () => {
+    const findings = [{
+      file: 'src/foo.js',
+      line: 1,
+      quoted_line: '+const id = req.body.userId;',
+      claim: 'developer should ensure proper error handling around this line'
+    }];
+    expect(filterFindings(findings, diff)).toHaveLength(0);
+  });
+
+  it('returns [] for non-array input', () => {
+    expect(filterFindings(null, diff)).toEqual([]);
+    expect(filterFindings(undefined, diff)).toEqual([]);
+  });
+
+  it('skips quote check when diff is empty (test-time helper)', () => {
+    const findings = [{
+      file: 'a', line: 1, quoted_line: 'whatever', claim: 'no slop here at line 1'
+    }];
+    expect(filterFindings(findings, '')).toHaveLength(1);
+  });
+});
+
+describe('computeVerdict (deterministic, ignores model)', () => {
+  it('returns approve for empty findings', () => {
+    expect(computeVerdict([])).toBe('approve');
+    expect(computeVerdict(undefined)).toBe('approve');
+    expect(computeVerdict(null)).toBe('approve');
+  });
+
+  it('returns comment when findings exist', () => {
+    expect(computeVerdict([{ file: 'a', line: 1, quoted_line: 'x', claim: 'y' }])).toBe('comment');
+  });
+});
+
+describe('renderReviewMarkdown', () => {
+  const meta = {
+    baseRepo: 'org/repo', baseBranch: 'main',
+    headRepo: 'org/repo', headBranch: 'fix/x'
+  };
+
+  it('returns null when verdict is approve and findings empty (silence preferred)', () => {
+    const out = renderReviewMarkdown(
+      { verdict: 'approve', summary: 'Doc only.', findings: [] },
+      42, 'abc1234', meta
+    );
+    expect(out).toBeNull();
+  });
+
+  it('renders verdict + summary + findings when findings exist', () => {
+    const out = renderReviewMarkdown(
+      {
+        verdict: 'comment',
+        summary: 'Adds X.',
+        findings: [{ file: 'a.js', line: 5, quoted_line: '+x', claim: 'no test for x at a.js:5' }]
+      },
+      42, 'abc1234', meta
+    );
+    expect(out).toContain('AI Code Review for PR #42');
+    expect(out).toContain('💬 Comment');
+    expect(out).toContain('Adds X.');
+    expect(out).toContain('`a.js:5`');
+    expect(out).toContain('no test for x');
+    expect(out).toContain('abc1234');
+  });
+
+  it('forces verdict from findings even if model claimed otherwise', () => {
+    // Model says approve but provides findings — verdict must be comment.
+    const out = renderReviewMarkdown(
+      {
+        verdict: 'approve',  // ← model lied
+        summary: 'Adds X.',
+        findings: [{ file: 'a.js', line: 5, quoted_line: '+x', claim: 'no test for x at a.js:5' }]
+      },
+      42, 'abc1234', meta
+    );
+    expect(out).toContain('💬 Comment');
+    expect(out).not.toContain('✅ Approve');
+  });
+
+  it('still posts when caller opts out of skip-approve-empty', () => {
+    const out = renderReviewMarkdown(
+      { verdict: 'approve', summary: 'Doc only.', findings: [] },
+      42, 'abc1234', meta,
+      { skipApproveEmpty: false }
+    );
+    expect(out).toContain('✅ Approve');
+    expect(out).toContain('**Findings:** None.');
+  });
+});