From fed9d48b3af3397eb061eadd42674ec6a6f34814 Mon Sep 17 00:00:00 2001 From: Dina Berry Date: Wed, 8 Apr 2026 09:51:18 -0700 Subject: [PATCH 1/2] fix(watch): detect rate-limit errors --- .changeset/watch-rate-limit-detection.md | 5 ++ .../squad-cli/src/cli/commands/watch/index.ts | 51 +++++++++++-- test/watch-rate-limit.test.ts | 75 +++++++++++++++++++ 3 files changed, 126 insertions(+), 5 deletions(-) create mode 100644 .changeset/watch-rate-limit-detection.md create mode 100644 test/watch-rate-limit.test.ts diff --git a/.changeset/watch-rate-limit-detection.md b/.changeset/watch-rate-limit-detection.md new file mode 100644 index 000000000..5d83469ac --- /dev/null +++ b/.changeset/watch-rate-limit-detection.md @@ -0,0 +1,5 @@ +--- +'@bradygaster/squad-cli': patch +--- + +Fix watch reporting "Board is clear" when GitHub API rate limit is hit. Detects rate-limit and other scan errors, shows appropriate warning instead of the misleading idle message, skips subsequent phases on failed scans, and prevents rate-limited rounds from counting as circuit-breaker successes. diff --git a/packages/squad-cli/src/cli/commands/watch/index.ts b/packages/squad-cli/src/cli/commands/watch/index.ts index 2f38b478f..d1b370a55 100644 --- a/packages/squad-cli/src/cli/commands/watch/index.ts +++ b/packages/squad-cli/src/cli/commands/watch/index.ts @@ -168,15 +168,36 @@ export interface BoardState { executed: number; } +/** Outcome of a runCheck call — wraps BoardState with scan status. */ +export type RunCheckStatus = 'ok' | 'rate-limited' | 'error'; + +export interface RunCheckResult { + state: BoardState; + status: RunCheckStatus; +} + export interface ReportBoardOptions { notifyLevel?: 'all' | 'important' | 'none'; machineName?: string; repoName?: string; + /** When set, overrides the "Board is clear" message for failed scans. */ + scanStatus?: RunCheckStatus; } export function reportBoard(state: BoardState, round: number, options?: ReportBoardOptions): void { const level = options?.notifyLevel ?? 'all'; const total = Object.values(state).reduce((a, b) => a + b, 0); + const scanStatus = options?.scanStatus ?? 'ok'; + + // Rate-limit / error warnings always print (bypass notifyLevel suppression) + if (total === 0 && scanStatus === 'rate-limited') { + console.log(`${YELLOW}⚠ API rate limited — skipping this round (retry in next interval)${RESET}`); + return; + } + if (total === 0 && scanStatus === 'error') { + console.log(`${YELLOW}⚠ Board scan failed — skipping this round (retry in next interval)${RESET}`); + return; + } if (level === 'none') return; if (level === 'important' && total === 0) return; @@ -296,7 +317,7 @@ async function runCheck( capabilities: MachineCapabilities | null, adapter: PlatformAdapter, vlog?: VerboseLogger, -): Promise { +): Promise { const timestamp = new Date().toLocaleTimeString(); try { const issues = await listWatchWorkItems(adapter, { label: 'squad', state: 'open', limit: 20 }); @@ -361,10 +382,16 @@ async function runCheck( } const prState = await checkPRs(roster, adapter, vlog); - return { untriaged: untriaged.length, assigned: assignedIssues.length, executed: 0, ...prState }; + return { state: { untriaged: untriaged.length, assigned: assignedIssues.length, executed: 0, ...prState }, status: 'ok' }; } catch (e) { - console.error(`${RED}✗${RESET} [${timestamp}] Check failed: ${(e as Error).message}`); - return emptyBoardState(); + const err = e as Error; + const limited = isRateLimitError(err); + if (limited) { + console.log(`${YELLOW}⚠${RESET} [${timestamp}] API rate limited — board scan skipped`); + } else { + console.error(`${RED}✗${RESET} [${timestamp}] Check failed: ${err.message}`); + } + return { state: emptyBoardState(), status: limited ? 'rate-limited' : 'error' }; } } @@ -868,7 +895,21 @@ export async function runWatch(dest: string, options: WatchOptions | WatchConfig } // Core: triage (always runs — not a capability) - const roundState = await runCheck(rules, modules, roster, hasCopilot, autoAssign, capabilities, adapter, vlog); + const checkResult = await runCheck(rules, modules, roster, hasCopilot, autoAssign, capabilities, adapter, vlog); + const roundState = checkResult.state; + + // Short-circuit remaining phases when the scan failed or was rate-limited + if (checkResult.status !== 'ok') { + reportBoard(roundState, round, { scanStatus: checkResult.status }); + const nextPollTime = new Date(Date.now() + interval * 60 * 1000); + console.log(`${DIM}Next poll at ${nextPollTime.toLocaleTimeString()}${RESET}`); + // Do NOT count a failed scan as a circuit-breaker success + if (cbState.status === 'half-open') { + cbState.consecutiveSuccesses = 0; + } + saveCBState(squadDirInfo.path, cbState); + return; + } // Phase 2: post-triage (two-pass hydration) await runPhase('post-triage', enabledCapabilities, roundContext, config); diff --git a/test/watch-rate-limit.test.ts b/test/watch-rate-limit.test.ts new file mode 100644 index 000000000..1c03f6ede --- /dev/null +++ b/test/watch-rate-limit.test.ts @@ -0,0 +1,75 @@ +import { describe, it, expect, afterEach, vi } from 'vitest'; +import { reportBoard } from '../packages/squad-cli/src/cli/commands/watch/index.js'; +import type { BoardState } from '../packages/squad-cli/src/cli/commands/watch/index.js'; + +function emptyState(): BoardState { + return { untriaged: 0, assigned: 0, drafts: 0, needsReview: 0, changesRequested: 0, ciFailures: 0, readyToMerge: 0, executed: 0 }; +} + +describe('reportBoard rate-limit / error handling (#806)', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('shows rate-limit warning instead of "Board is clear" when scanStatus is rate-limited', () => { + const spy = vi.spyOn(console, 'log').mockImplementation(() => {}); + reportBoard(emptyState(), 1, { scanStatus: 'rate-limited' }); + const output = spy.mock.calls.map(c => c.join(' ')).join('\n'); + expect(output).toContain('API rate limited'); + expect(output).not.toContain('Board is clear'); + }); + + it('shows error warning instead of "Board is clear" when scanStatus is error', () => { + const spy = vi.spyOn(console, 'log').mockImplementation(() => {}); + reportBoard(emptyState(), 1, { scanStatus: 'error' }); + const output = spy.mock.calls.map(c => c.join(' ')).join('\n'); + expect(output).toContain('Board scan failed'); + expect(output).not.toContain('Board is clear'); + }); + + it('shows normal "Board is clear" when scanStatus is ok', () => { + const spy = vi.spyOn(console, 'log').mockImplementation(() => {}); + reportBoard(emptyState(), 1, { scanStatus: 'ok' }); + const output = spy.mock.calls.map(c => c.join(' ')).join('\n'); + expect(output).toContain('Board is clear'); + }); + + it('shows normal "Board is clear" when scanStatus is omitted', () => { + const spy = vi.spyOn(console, 'log').mockImplementation(() => {}); + reportBoard(emptyState(), 1); + const output = spy.mock.calls.map(c => c.join(' ')).join('\n'); + expect(output).toContain('Board is clear'); + }); + + it('rate-limit warning bypasses notifyLevel "important" suppression', () => { + const spy = vi.spyOn(console, 'log').mockImplementation(() => {}); + reportBoard(emptyState(), 1, { notifyLevel: 'important', scanStatus: 'rate-limited' }); + const output = spy.mock.calls.map(c => c.join(' ')).join('\n'); + expect(output).toContain('API rate limited'); + }); + + it('error warning bypasses notifyLevel "important" suppression', () => { + const spy = vi.spyOn(console, 'log').mockImplementation(() => {}); + reportBoard(emptyState(), 1, { notifyLevel: 'important', scanStatus: 'error' }); + const output = spy.mock.calls.map(c => c.join(' ')).join('\n'); + expect(output).toContain('Board scan failed'); + }); + + it('rate-limit warning bypasses notifyLevel "none" suppression', () => { + const spy = vi.spyOn(console, 'log').mockImplementation(() => {}); + reportBoard(emptyState(), 1, { notifyLevel: 'none', scanStatus: 'rate-limited' }); + const output = spy.mock.calls.map(c => c.join(' ')).join('\n'); + expect(output).toContain('API rate limited'); + }); + + it('shows normal board when scanStatus is rate-limited but board is busy', () => { + const spy = vi.spyOn(console, 'log').mockImplementation(() => {}); + const busy = { ...emptyState(), untriaged: 3 }; + reportBoard(busy, 1, { scanStatus: 'rate-limited' }); + const output = spy.mock.calls.map(c => c.join(' ')).join('\n'); + // When there IS data, show the normal board (partial data is still useful) + expect(output).toContain('Round 1'); + expect(output).toContain('Untriaged'); + expect(output).not.toContain('API rate limited'); + }); +}); From e0cdb83f478240fd5281b68b93e5625844980ba4 Mon Sep 17 00:00:00 2001 From: "Dina Berry (She/her)" Date: Thu, 9 Apr 2026 21:05:21 -0700 Subject: [PATCH 2/2] fix(watch): narrow rate-limit detection to 429 and explicit rate-limit strings Remove broad 403 match that misclassified auth errors as rate limits. A bare 403 Forbidden (e.g. insufficient token scopes) is a permanent auth error, not a transient rate limit. Now only matches 429 status codes and explicit rate-limit message strings. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- packages/squad-cli/src/cli/core/gh-cli.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/squad-cli/src/cli/core/gh-cli.ts b/packages/squad-cli/src/cli/core/gh-cli.ts index 3ada9e65b..6dfc8c434 100644 --- a/packages/squad-cli/src/cli/core/gh-cli.ts +++ b/packages/squad-cli/src/cli/core/gh-cli.ts @@ -153,12 +153,13 @@ export async function ghRateLimitCheck(): Promise { } /** - * Detect if an error is a GitHub 429 rate limit error. + * Detect if an error is a GitHub rate-limit error (429 or explicit rate-limit messages). + * Does NOT match bare 403 — that indicates an auth/permissions error, not a transient rate limit. */ export function isRateLimitError(err: unknown): boolean { if (err instanceof Error) { const msg = err.message.toLowerCase(); - return msg.includes('rate limit') || msg.includes('secondary rate') || msg.includes('403'); + return msg.includes('rate limit') || msg.includes('secondary rate') || msg.includes('429'); } return false; }