From c2ce4198280d20d74709e212ed738792cb46e062 Mon Sep 17 00:00:00 2001 From: aaight Date: Sun, 26 Apr 2026 18:59:29 +0200 Subject: [PATCH 1/8] fix(triggers): audit & fix PM feedback inconsistencies across respond-to-* agents (#1201) * fix(triggers): audit & fix PM feedback inconsistencies across respond-to-* agents * fix(triggers): use case-insensitive JIRA status comparison in isInPlanningStatus Match the established pattern from status-changed.ts and label-added.ts which both use .toLowerCase() for JIRA status comparisons, since status names are user-configurable and the API does not guarantee consistent casing. Co-Authored-By: Claude Sonnet 4.6 --------- Co-authored-by: Cascade Bot Co-authored-by: Claude Sonnet 4.6 --- package-lock.json | 12 --- src/agents/prompts/index.ts | 12 +-- src/router/acknowledgments.ts | 3 + src/router/bot-identity-resolvers.ts | 52 ++++++++++--- src/triggers/github/check-suite-failure.ts | 5 +- src/triggers/github/check-suite-success.ts | 10 ++- src/triggers/github/pr-comment-mention.ts | 5 +- src/triggers/github/pr-review-submitted.ts | 5 +- src/triggers/github/utils.ts | 31 ++++++++ src/triggers/jira/comment-mention.ts | 77 ++++++++++++------- src/triggers/linear/comment-mention.ts | 30 +++++++- src/triggers/shared/agent-pm-poster.ts | 1 + src/triggers/shared/pm-ack.ts | 12 ++- src/triggers/trello/comment-mention.ts | 30 +++----- src/types/index.ts | 15 ++-- .../triggers/jira-comment-mention.test.ts | 55 +++++++++---- .../triggers/linear-comment-mention.test.ts | 37 +++++++++ .../triggers/trello-comment-mention.test.ts | 22 +++--- 18 files changed, 298 insertions(+), 116 deletions(-) diff --git a/package-lock.json b/package-lock.json index d696a6d1..7fa4b7e0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -156,9 +156,6 @@ "cpu": [ "arm64" ], - "libc": [ - "glibc" - ], "license": "SEE LICENSE IN LICENSE.md", "optional": true, "os": [ @@ -172,9 +169,6 @@ "cpu": [ "arm64" ], - "libc": [ - "musl" - ], "license": "SEE LICENSE IN LICENSE.md", "optional": true, "os": [ @@ -188,9 +182,6 @@ "cpu": [ "x64" ], - "libc": [ - "glibc" - ], "license": "SEE LICENSE IN LICENSE.md", "optional": true, "os": [ @@ -204,9 +195,6 @@ "cpu": [ "x64" ], - "libc": [ - "musl" - ], "license": "SEE LICENSE IN LICENSE.md", "optional": true, "os": [ diff --git a/src/agents/prompts/index.ts b/src/agents/prompts/index.ts index 90ff853d..511d42b7 100644 --- a/src/agents/prompts/index.ts +++ b/src/agents/prompts/index.ts @@ -189,12 +189,12 @@ export interface TaskPromptInput { workItemId?: string; prNumber?: number; prBranch?: string; - // PM comment trigger fields - triggerCommentText?: string; - triggerCommentAuthor?: string; - // PR comment trigger fields + // Comment trigger fields (unified for PM and SCM comment-mention triggers) triggerCommentBody?: string; triggerCommentPath?: string; + triggerCommentAuthor?: string; + /** @deprecated Use triggerCommentBody. Kept as backward-compatible alias. */ + triggerCommentText?: string; // Allow extra fields for future extensibility [key: string]: unknown; } @@ -210,9 +210,9 @@ export function buildTaskPromptContext(input: TaskPromptInput): TaskPromptContex workItemId: input.workItemId, prNumber: input.prNumber, prBranch: input.prBranch, - commentText: input.triggerCommentText, + commentText: input.triggerCommentBody ?? input.triggerCommentText, commentAuthor: input.triggerCommentAuthor, - commentBody: input.triggerCommentBody, + commentBody: input.triggerCommentBody ?? input.triggerCommentText, commentPath: input.triggerCommentPath, }; } diff --git a/src/router/acknowledgments.ts b/src/router/acknowledgments.ts index 42a81df3..1e35719b 100644 --- a/src/router/acknowledgments.ts +++ b/src/router/acknowledgments.ts @@ -119,12 +119,15 @@ export async function deleteLinearAck( // for backward compatibility with pm/ integrations and router/trello.ts. // --------------------------------------------------------------------------- +export type { JiraBotIdentity, TrelloBotIdentity } from './bot-identity-resolvers.js'; export { _resetJiraBotCache, _resetLinearBotCache, _resetTrelloBotCache, resolveJiraBotAccountId, + resolveJiraBotIdentity, resolveLinearBotUserId, + resolveTrelloBotIdentity, resolveTrelloBotMemberId, } from './bot-identity-resolvers.js'; diff --git a/src/router/bot-identity-resolvers.ts b/src/router/bot-identity-resolvers.ts index b1461c34..89f5e310 100644 --- a/src/router/bot-identity-resolvers.ts +++ b/src/router/bot-identity-resolvers.ts @@ -19,13 +19,18 @@ import { // JIRA bot identity // --------------------------------------------------------------------------- -const jiraBotIdentityCache = new BotIdentityCache('accountId'); +export interface JiraBotIdentity { + accountId: string; + displayName: string; +} + +const jiraBotIdentityCache = new BotIdentityCache('identity'); /** - * Resolve the JIRA account ID for the bot credentials linked to a project. + * Resolve the JIRA bot identity (accountId + displayName) for a project. * Cached per-project with 60s TTL. Returns null on any failure. */ -export async function resolveJiraBotAccountId(projectId: string): Promise { +export async function resolveJiraBotIdentity(projectId: string): Promise { return jiraBotIdentityCache.resolve(projectId, async () => { const creds = await resolveJiraCredentials(projectId); if (!creds) return null; @@ -35,11 +40,21 @@ export async function resolveJiraBotAccountId(projectId: string): Promise { + const identity = await resolveJiraBotIdentity(projectId); + return identity?.accountId ?? null; +} + /** @internal Visible for testing only */ export function _resetJiraBotCache(): void { jiraBotIdentityCache._reset(); @@ -49,13 +64,20 @@ export function _resetJiraBotCache(): void { // Trello bot identity // --------------------------------------------------------------------------- -const trelloBotIdentityCache = new BotIdentityCache('memberId'); +export interface TrelloBotIdentity { + id: string; + username: string; +} + +const trelloBotIdentityCache = new BotIdentityCache('identity'); /** - * Resolve the Trello member ID for the bot credentials linked to a project. + * Resolve the Trello bot identity (id + username) for a project. * Cached per-project with 60s TTL. Returns null on any failure. */ -export async function resolveTrelloBotMemberId(projectId: string): Promise { +export async function resolveTrelloBotIdentity( + projectId: string, +): Promise { return trelloBotIdentityCache.resolve(projectId, async () => { const creds = await resolveTrelloCredentials(projectId); if (!creds) return null; @@ -66,11 +88,21 @@ export async function resolveTrelloBotMemberId(projectId: string): Promise { + const identity = await resolveTrelloBotIdentity(projectId); + return identity?.id ?? null; +} + /** @internal Visible for testing only */ export function _resetTrelloBotCache(): void { trelloBotIdentityCache._reset(); diff --git a/src/triggers/github/check-suite-failure.ts b/src/triggers/github/check-suite-failure.ts index ec5c62ca..5d073e82 100644 --- a/src/triggers/github/check-suite-failure.ts +++ b/src/triggers/github/check-suite-failure.ts @@ -4,7 +4,7 @@ import { logger } from '../../utils/logging.js'; import { parseRepoFullName } from '../../utils/repo.js'; import { checkTriggerEnabled } from '../shared/trigger-check.js'; import { type GitHubCheckSuitePayload, isGitHubCheckSuitePayload } from './types.js'; -import { parsePrNumberFromRef, resolveWorkItemId } from './utils.js'; +import { parsePrNumberFromRef, resolveWorkItemDisplayData, resolveWorkItemId } from './utils.js'; /** * Resolve a PR number from a check_suite payload. @@ -121,6 +121,7 @@ export class CheckSuiteFailureTrigger implements TriggerHandler { // Resolve work item from DB const workItemId = await resolveWorkItemId(ctx.project.id, prNumber); + const { workItemUrl, workItemTitle } = await resolveWorkItemDisplayData(workItemId); // Get ALL check runs for this commit to verify they're all complete const checkStatus = await githubClient.getCheckSuiteStatus(owner, repo, headSha); @@ -205,6 +206,8 @@ export class CheckSuiteFailureTrigger implements TriggerHandler { prUrl: prDetails.htmlUrl, prTitle: prDetails.title, workItemId, + workItemUrl, + workItemTitle, }; } } diff --git a/src/triggers/github/check-suite-success.ts b/src/triggers/github/check-suite-success.ts index a77e8ffb..b35c734c 100644 --- a/src/triggers/github/check-suite-success.ts +++ b/src/triggers/github/check-suite-success.ts @@ -9,7 +9,12 @@ import { releaseReviewDispatch, } from './review-dispatch-dedup.js'; import { type GitHubCheckSuitePayload, isGitHubCheckSuitePayload } from './types.js'; -import { evaluateAuthorMode, parsePrNumberFromRef, resolveWorkItemId } from './utils.js'; +import { + evaluateAuthorMode, + parsePrNumberFromRef, + resolveWorkItemDisplayData, + resolveWorkItemId, +} from './utils.js'; const MAX_RETRIES = 12; const RETRY_DELAY_MS = 10_000; @@ -156,6 +161,7 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler { // Resolve work item from DB const workItemId = await resolveWorkItemId(ctx.project.id, prNumber); + const { workItemUrl, workItemTitle } = await resolveWorkItemDisplayData(workItemId); // Skip if the reviewer persona's latest review already covers the current HEAD SHA const reviews = await githubClient.getPRReviews(owner, repo, prNumber); @@ -224,6 +230,8 @@ export class CheckSuiteSuccessTrigger implements TriggerHandler { prUrl: prDetails.htmlUrl, prTitle: prDetails.title, workItemId, + workItemUrl, + workItemTitle, waitForChecks: true, onBlocked: () => releaseReviewDispatch(dedupKey), }; diff --git a/src/triggers/github/pr-comment-mention.ts b/src/triggers/github/pr-comment-mention.ts index a97c6103..14642a43 100644 --- a/src/triggers/github/pr-comment-mention.ts +++ b/src/triggers/github/pr-comment-mention.ts @@ -5,7 +5,7 @@ import { logger } from '../../utils/logging.js'; import { parseRepoFullName } from '../../utils/repo.js'; import { checkTriggerEnabled } from '../shared/trigger-check.js'; import { isGitHubIssueCommentPayload, isGitHubPRReviewCommentPayload } from './types.js'; -import { resolveWorkItemId } from './utils.js'; +import { resolveWorkItemDisplayData, resolveWorkItemId } from './utils.js'; /** * Trigger that fires when someone @mentions the reviewer bot in a PR comment. @@ -118,6 +118,7 @@ export class PRCommentMentionTrigger implements TriggerHandler { // Resolve work item from DB const workItemId = await resolveWorkItemId(ctx.project.id, prNumber); + const { workItemUrl, workItemTitle } = await resolveWorkItemDisplayData(workItemId); logger.info('PR comment @mention detected, triggering respond-to-pr-comment agent', { prNumber, @@ -144,6 +145,8 @@ export class PRCommentMentionTrigger implements TriggerHandler { prUrl, prTitle, workItemId, + workItemUrl, + workItemTitle, }; } } diff --git a/src/triggers/github/pr-review-submitted.ts b/src/triggers/github/pr-review-submitted.ts index 89ce36c3..cf8e5fe8 100644 --- a/src/triggers/github/pr-review-submitted.ts +++ b/src/triggers/github/pr-review-submitted.ts @@ -3,7 +3,7 @@ import type { TriggerContext, TriggerHandler, TriggerResult } from '../../types/ import { logger } from '../../utils/logging.js'; import { checkTriggerEnabled } from '../shared/trigger-check.js'; import { type GitHubPullRequestReviewPayload, isGitHubPullRequestReviewPayload } from './types.js'; -import { resolveWorkItemId } from './utils.js'; +import { resolveWorkItemDisplayData, resolveWorkItemId } from './utils.js'; export class PRReviewSubmittedTrigger implements TriggerHandler { name = 'pr-review-submitted'; @@ -59,6 +59,7 @@ export class PRReviewSubmittedTrigger implements TriggerHandler { // Resolve work item from DB const workItemId = await resolveWorkItemId(ctx.project.id, prNumber); + const { workItemUrl, workItemTitle } = await resolveWorkItemDisplayData(workItemId); logger.info('PR review submitted, triggering review agent', { prNumber, @@ -83,6 +84,8 @@ export class PRReviewSubmittedTrigger implements TriggerHandler { prUrl: reviewPayload.pull_request.html_url, prTitle: reviewPayload.pull_request.title, workItemId, + workItemUrl, + workItemTitle, }; } } diff --git a/src/triggers/github/utils.ts b/src/triggers/github/utils.ts index f5492bef..17235dff 100644 --- a/src/triggers/github/utils.ts +++ b/src/triggers/github/utils.ts @@ -1,5 +1,6 @@ import { lookupWorkItemForPR } from '../../db/repositories/prWorkItemsRepository.js'; import type { PersonaIdentities } from '../../github/personas.js'; +import { getPMProviderOrNull } from '../../pm/context.js'; import type { ProjectConfig } from '../../types/index.js'; import { logger } from '../../utils/logging.js'; @@ -129,3 +130,33 @@ export async function resolveWorkItemId( return undefined; } + +/** + * Fetch work item display data (URL and title) from the active PM provider. + * + * Best-effort: returns an empty object on any error so callers can safely + * spread the result without checking for failure. Requires a PM provider + * to be in scope (set up by `withPMScope`). + * + * @param workItemId - The work item ID to look up (Trello card ID, JIRA issue key, etc.) + */ +export async function resolveWorkItemDisplayData( + workItemId: string | undefined, +): Promise<{ workItemUrl?: string; workItemTitle?: string }> { + if (!workItemId) return {}; + try { + const provider = getPMProviderOrNull(); + if (!provider) return {}; + const workItem = await provider.getWorkItem(workItemId); + return { + workItemUrl: workItem.url ?? undefined, + workItemTitle: workItem.title ?? undefined, + }; + } catch (err) { + logger.debug('Could not resolve work item display data (best-effort)', { + workItemId, + error: String(err), + }); + return {}; + } +} diff --git a/src/triggers/jira/comment-mention.ts b/src/triggers/jira/comment-mention.ts index dfc3dfbe..88983f83 100644 --- a/src/triggers/jira/comment-mention.ts +++ b/src/triggers/jira/comment-mention.ts @@ -1,36 +1,17 @@ /** * JIRA comment @mention trigger. * - * Fires when someone @mentions the CASCADE bot user in a JIRA issue comment. - * Runs the respond-to-planning-comment agent. + * Fires when someone @mentions the CASCADE bot user in a JIRA issue comment + * on an issue in the PLANNING status. Runs the respond-to-planning-comment agent. */ -import { jiraClient } from '../../jira/client.js'; import { getJiraConfig } from '../../pm/config.js'; +import { resolveJiraBotIdentity } from '../../router/bot-identity-resolvers.js'; import type { TriggerContext, TriggerHandler, TriggerResult } from '../../types/index.js'; import { logger } from '../../utils/logging.js'; import { checkTriggerEnabled } from '../shared/trigger-check.js'; import type { JiraWebhookPayload } from './types.js'; -// Cache authenticated user info to avoid repeated API calls -let cachedUserInfo: { accountId: string; displayName: string } | null = null; - -async function getAuthenticatedUserInfo(): Promise<{ accountId: string; displayName: string }> { - if (cachedUserInfo) { - return cachedUserInfo; - } - const me = await jiraClient.getMyself(); - cachedUserInfo = { - accountId: me.accountId ?? '', - displayName: me.displayName ?? '', - }; - logger.info('Cached authenticated JIRA user info', { - accountId: cachedUserInfo.accountId, - displayName: cachedUserInfo.displayName, - }); - return cachedUserInfo; -} - /** * Extract plain text from a comment body. * Handles both ADF objects (recursive extraction) and wiki markup strings. @@ -87,6 +68,35 @@ function hasMention(body: unknown, accountId: string, depth = 0): boolean { return false; } +/** + * Check if the issue is in the configured PLANNING status. + * Returns false (and logs) when the project has no planning status configured + * or the issue's current status doesn't match. + */ +function isInPlanningStatus( + project: TriggerContext['project'], + issueKey: string, + currentStatusName: string | undefined, +): boolean { + const planningStatusName = getJiraConfig(project)?.statuses.planning; + if (!planningStatusName) { + logger.debug( + 'Planning status not configured for JIRA project, skipping comment mention trigger', + { projectId: project.id }, + ); + return false; + } + if (currentStatusName?.toLowerCase() !== planningStatusName.toLowerCase()) { + logger.debug('JIRA issue not in planning status, skipping comment mention trigger', { + issueKey, + currentStatus: currentStatusName, + planningStatus: planningStatusName, + }); + return false; + } + return true; +} + export class JiraCommentMentionTrigger implements TriggerHandler { name = 'jira-comment-mention'; description = @@ -132,8 +142,14 @@ export class JiraCommentMentionTrigger implements TriggerHandler { return null; } - // Resolve our JIRA identity - const userInfo = await getAuthenticatedUserInfo(); + // Resolve our JIRA identity using the shared per-project cached resolver + const userInfo = await resolveJiraBotIdentity(ctx.project.id); + if (!userInfo) { + logger.warn('JIRA comment trigger: could not resolve bot user identity, skipping', { + projectId: ctx.project.id, + }); + return null; + } logger.info('JIRA bot identity resolved', { botAccountId: userInfo.accountId, botDisplayName: userInfo.displayName, @@ -161,17 +177,23 @@ export class JiraCommentMentionTrigger implements TriggerHandler { return null; } + // Gate on PLANNING status — only respond to comments on PLANNING issues + const currentStatusName = payload.issue?.fields?.status?.name; + if (!isInPlanningStatus(ctx.project, issueKey, currentStatusName)) { + return null; + } + const jiraConfig = getJiraConfig(ctx.project); + const commentText = extractText(commentBody); const authorName = commentAuthor?.displayName || 'unknown'; // Capture work item display data from the issue payload and Jira config - const jiraConfig = getJiraConfig(ctx.project); const workItemUrl = jiraConfig?.baseUrl ? `${jiraConfig.baseUrl}/browse/${issueKey}` : undefined; const workItemTitle = payload.issue?.fields?.summary ?? undefined; - logger.info('JIRA comment @mention detected, triggering agent', { + logger.info('JIRA comment @mention detected on PLANNING issue, triggering agent', { issueKey, commentAuthor: authorName, botAccountId: userInfo.accountId, @@ -181,7 +203,8 @@ export class JiraCommentMentionTrigger implements TriggerHandler { agentType: 'respond-to-planning-comment', agentInput: { workItemId: issueKey, - triggerCommentText: commentText, + triggerCommentBody: commentText, + triggerCommentText: commentText, // @deprecated — use triggerCommentBody triggerCommentAuthor: authorName, workItemUrl, workItemTitle, diff --git a/src/triggers/linear/comment-mention.ts b/src/triggers/linear/comment-mention.ts index 91779d82..49f498c7 100644 --- a/src/triggers/linear/comment-mention.ts +++ b/src/triggers/linear/comment-mention.ts @@ -1,8 +1,8 @@ /** * Linear comment @mention trigger. * - * Fires when someone @mentions the CASCADE bot user in a Linear issue comment. - * Runs the respond-to-planning-comment agent. + * Fires when someone @mentions the CASCADE bot user in a Linear issue comment + * on an issue in the PLANNING state. Runs the respond-to-planning-comment agent. * * Linear webhook structure for comment creation: * action: 'create', type: 'Comment' @@ -12,6 +12,7 @@ * data.issue.identifier: the issue identifier (e.g. TEAM-123) */ +import { getLinearConfig } from '../../pm/config.js'; import { resolveLinearBotIdentity } from '../../router/bot-identity-resolvers.js'; import type { TriggerContext, TriggerHandler, TriggerResult } from '../../types/index.js'; import { logger } from '../../utils/logging.js'; @@ -130,9 +131,29 @@ export class LinearCommentMentionTrigger implements TriggerHandler { return null; } + // Gate on PLANNING state — only respond to comments on PLANNING issues + const linearConfig = getLinearConfig(ctx.project); + const planningStateId = linearConfig?.statuses.planning; + if (!planningStateId) { + logger.debug( + 'Planning state not configured for Linear project, skipping comment mention trigger', + { projectId: ctx.project.id }, + ); + return null; + } + const currentStateId = issue?.stateId; + if (currentStateId !== planningStateId) { + logger.debug('Linear issue not in planning state, skipping comment mention trigger', { + issueIdentifier, + currentStateId, + planningStateId, + }); + return null; + } + const issueUrl = issue?.url; - logger.info('Linear comment @mention detected, triggering agent', { + logger.info('Linear comment @mention detected on PLANNING issue, triggering agent', { issueIdentifier, commentAuthorId, botUserId, @@ -142,7 +163,8 @@ export class LinearCommentMentionTrigger implements TriggerHandler { agentType: 'respond-to-planning-comment', agentInput: { workItemId: issueIdentifier, - triggerCommentText: commentBody, + triggerCommentBody: commentBody, + triggerCommentText: commentBody, // @deprecated — use triggerCommentBody triggerCommentAuthor: commentAuthorId, workItemUrl: issueUrl, workItemTitle: undefined, diff --git a/src/triggers/shared/agent-pm-poster.ts b/src/triggers/shared/agent-pm-poster.ts index cdecca9f..78fd2cab 100644 --- a/src/triggers/shared/agent-pm-poster.ts +++ b/src/triggers/shared/agent-pm-poster.ts @@ -27,6 +27,7 @@ const TRUNCATION_NOTICE = '\n\n_[Review body truncated — view full review on G const AGENT_OUTPUT_CONFIG: Record = { 'respond-to-ci': { emoji: '🔧', header: 'CI Fix Summary' }, 'respond-to-review': { emoji: '💬', header: 'Review Response Summary' }, + 'respond-to-pr-comment': { emoji: '📝', header: 'PR Comment Response' }, 'resolve-conflicts': { emoji: '🔀', header: 'Conflict Resolution Summary' }, }; diff --git a/src/triggers/shared/pm-ack.ts b/src/triggers/shared/pm-ack.ts index 72b8c3ad..34db80e1 100644 --- a/src/triggers/shared/pm-ack.ts +++ b/src/triggers/shared/pm-ack.ts @@ -2,7 +2,7 @@ * Shared PM acknowledgment posting utility for webhook handlers. * * Centralises the logic for posting acknowledgment comments to PM tools - * (Trello/JIRA) for PM-focused agents triggered from GitHub or other + * (Trello/JIRA/Linear) for PM-focused agents triggered from GitHub or other * non-PM sources. * * Used by: @@ -12,18 +12,18 @@ * and does not use this shared utility. */ -import { postJiraAck, postTrelloAck } from '../../router/acknowledgments.js'; +import { postJiraAck, postLinearAck, postTrelloAck } from '../../router/acknowledgments.js'; import { logger } from '../../utils/logging.js'; /** - * Post a PM acknowledgment comment to Trello or JIRA. + * Post a PM acknowledgment comment to Trello, JIRA, or Linear. * * Returns the comment ID if successfully posted, or null if the PM type * is not supported or posting failed. * * @param projectId The project ID for credential resolution. * @param workItemId The work item ID to post the comment on (card ID / issue key). - * @param pmType The PM provider type ('trello' or 'jira'). + * @param pmType The PM provider type ('trello', 'jira', or 'linear'). * @param message The acknowledgment message to post. * @param agentType Used only for warning log context when pmType is unknown. */ @@ -42,6 +42,10 @@ export async function postPMAckComment( return postJiraAck(projectId, workItemId, message); } + if (pmType === 'linear') { + return postLinearAck(projectId, workItemId, message); + } + logger.warn('Unknown PM type for PM-focused agent ack, skipping', { agentType, pmType, diff --git a/src/triggers/trello/comment-mention.ts b/src/triggers/trello/comment-mention.ts index 91698c44..45995ba1 100644 --- a/src/triggers/trello/comment-mention.ts +++ b/src/triggers/trello/comment-mention.ts @@ -1,4 +1,5 @@ import { getTrelloConfig } from '../../pm/config.js'; +import { resolveTrelloBotIdentity } from '../../router/bot-identity-resolvers.js'; import { trelloClient } from '../../trello/client.js'; import type { TriggerContext, TriggerHandler, TriggerResult } from '../../types/index.js'; import { logger } from '../../utils/logging.js'; @@ -6,22 +7,6 @@ import { checkTriggerEnabled } from '../shared/trigger-check.js'; import type { TrelloWebhookPayload } from '../types.js'; import { isTrelloWebhookPayload } from '../types.js'; -// Cache authenticated member info to avoid repeated API calls -let cachedMemberInfo: { id: string; username: string } | null = null; - -async function getAuthenticatedMemberInfo(): Promise<{ id: string; username: string }> { - if (cachedMemberInfo) { - return cachedMemberInfo; - } - const me = await trelloClient.getMe(); - cachedMemberInfo = { id: me.id, username: me.username }; - logger.info('Cached authenticated member info', { - memberId: cachedMemberInfo.id, - username: cachedMemberInfo.username, - }); - return cachedMemberInfo; -} - /** * Trigger that fires when someone @mentions the CASCADE bot in a Trello card comment * on a card in the PLANNING list. Runs the respond-to-planning-comment agent. @@ -61,8 +46,14 @@ export class TrelloCommentMentionTrigger implements TriggerHandler { return null; } - // Resolve our Trello identity - const memberInfo = await getAuthenticatedMemberInfo(); + // Resolve our Trello identity using the shared per-project cached resolver + const memberInfo = await resolveTrelloBotIdentity(ctx.project.id); + if (!memberInfo) { + logger.warn('Trello comment trigger: could not resolve bot member identity, skipping', { + projectId: ctx.project.id, + }); + return null; + } // Check for @mention (case-insensitive) const mentionPattern = new RegExp(`@${memberInfo.username}\\b`, 'i'); @@ -116,7 +107,8 @@ export class TrelloCommentMentionTrigger implements TriggerHandler { agentType: 'respond-to-planning-comment', agentInput: { workItemId: cardId, - triggerCommentText: commentText, + triggerCommentBody: commentText, + triggerCommentText: commentText, // @deprecated — use triggerCommentBody triggerCommentAuthor: commentAuthor, workItemUrl, workItemTitle, diff --git a/src/types/index.ts b/src/types/index.ts index aa255774..2f4333d5 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -38,13 +38,18 @@ export interface AgentInput { originalWorkItemUrl?: string; detectedAgentType?: string; - // Trello comment trigger fields - triggerCommentText?: string; - triggerCommentAuthor?: string; - - // PR comment trigger fields (for respond-to-pr-comment and similar agents) + // Unified comment trigger fields — both PM (Trello/JIRA/Linear) and SCM (GitHub) triggers use these + /** The body text of the triggering comment. Canonical field for all comment-mention triggers. */ triggerCommentBody?: string; triggerCommentPath?: string; + triggerCommentAuthor?: string; + + /** + * @deprecated Use `triggerCommentBody` instead. + * Retained for one release as a backward-compatible alias. PM comment-mention + * triggers populate both fields with the same value. + */ + triggerCommentText?: string; // Interactive mode (local development) interactive?: boolean; diff --git a/tests/unit/triggers/jira-comment-mention.test.ts b/tests/unit/triggers/jira-comment-mention.test.ts index 5b3afa59..e5b8c492 100644 --- a/tests/unit/triggers/jira-comment-mention.test.ts +++ b/tests/unit/triggers/jira-comment-mention.test.ts @@ -1,8 +1,8 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; // Hoist mocks before any imports -const { mockJiraClientGetMyself, mockCheckTriggerEnabled, mockLogger } = vi.hoisted(() => ({ - mockJiraClientGetMyself: vi.fn(), +const { mockResolveJiraBotIdentity, mockCheckTriggerEnabled, mockLogger } = vi.hoisted(() => ({ + mockResolveJiraBotIdentity: vi.fn(), mockCheckTriggerEnabled: vi.fn().mockResolvedValue(true), mockLogger: { info: vi.fn(), @@ -12,10 +12,8 @@ const { mockJiraClientGetMyself, mockCheckTriggerEnabled, mockLogger } = vi.hois }, })); -vi.mock('../../../src/jira/client.js', () => ({ - jiraClient: { - getMyself: mockJiraClientGetMyself, - }, +vi.mock('../../../src/router/bot-identity-resolvers.js', () => ({ + resolveJiraBotIdentity: (...args: unknown[]) => mockResolveJiraBotIdentity(...args), })); vi.mock('../../../src/triggers/shared/trigger-check.js', () => ({ @@ -33,6 +31,7 @@ const BOT_ACCOUNT_ID = 'bot-account-001'; const BOT_DISPLAY_NAME = 'CascadeBot'; const OTHER_ACCOUNT_ID = 'user-account-456'; const ISSUE_KEY = 'PROJ-123'; +const PLANNING_STATUS = 'Planning'; function makeProject() { return { @@ -40,7 +39,10 @@ function makeProject() { name: 'Test Project', repo: 'owner/repo', baseBranch: 'main', - jira: { projectKey: 'PROJ' }, + jira: { + projectKey: 'PROJ', + statuses: { planning: PLANNING_STATUS }, + }, } as TriggerContext['project']; } @@ -49,6 +51,7 @@ function makeCtx( source?: TriggerContext['source']; webhookEvent?: string; issueKey?: string; + issueStatusName?: string; commentBody?: unknown; commentAuthorAccountId?: string; commentAuthorDisplayName?: string; @@ -56,7 +59,13 @@ function makeCtx( ): TriggerContext { const payload = { webhookEvent: overrides.webhookEvent ?? 'comment_created', - issue: { key: overrides.issueKey ?? ISSUE_KEY }, + issue: { + key: overrides.issueKey ?? ISSUE_KEY, + fields: { + status: { name: overrides.issueStatusName ?? PLANNING_STATUS }, + summary: 'Test Issue Summary', + }, + }, comment: { body: overrides.commentBody ?? `[~accountid:${BOT_ACCOUNT_ID}] please help`, author: { @@ -97,7 +106,7 @@ describe('JiraCommentMentionTrigger', () => { beforeEach(() => { vi.resetAllMocks(); vi.mocked(mockCheckTriggerEnabled).mockResolvedValue(true); - mockJiraClientGetMyself.mockResolvedValue({ + mockResolveJiraBotIdentity.mockResolvedValue({ accountId: BOT_ACCOUNT_ID, displayName: BOT_DISPLAY_NAME, }); @@ -205,12 +214,32 @@ describe('JiraCommentMentionTrigger', () => { expect(result).toBeNull(); }); - it('includes triggerCommentText in agentInput (wiki markup)', async () => { + it('returns null when issue is not in PLANNING status', async () => { + const result = await trigger.handle(makeCtx({ issueStatusName: 'In Progress' })); + + expect(result).toBeNull(); + }); + + it('returns null when planning status is not configured in project', async () => { + const ctx = makeCtx(); + // Override project to remove planning status config + (ctx as Record).project = { + ...makeProject(), + jira: { projectKey: 'PROJ', statuses: {} }, + }; + + const result = await trigger.handle(ctx); + + expect(result).toBeNull(); + }); + + it('includes triggerCommentText and triggerCommentBody in agentInput (wiki markup)', async () => { const result = await trigger.handle( makeCtx({ commentBody: `[~accountid:${BOT_ACCOUNT_ID}] please do this thing` }), ); expect(result?.agentInput.triggerCommentText).toContain('please do this thing'); + expect(result?.agentInput.triggerCommentBody).toContain('please do this thing'); }); it('includes comment author display name in agentInput', async () => { @@ -229,7 +258,7 @@ describe('JiraCommentMentionTrigger', () => { expect(result?.agentInput.triggerCommentAuthor).toBe('unknown'); }); - it('handles multiple calls correctly (caches user info)', async () => { + it('handles multiple calls correctly (calls resolveJiraBotIdentity each time)', async () => { // First call const result1 = await trigger.handle(makeCtx()); // Second call @@ -237,8 +266,8 @@ describe('JiraCommentMentionTrigger', () => { expect(result1).not.toBeNull(); expect(result2).not.toBeNull(); - // getMyself should be called at most once per trigger instance - expect(mockJiraClientGetMyself.mock.calls.length).toBeLessThanOrEqual(2); + // resolveJiraBotIdentity is called per handle() invocation + expect(mockResolveJiraBotIdentity.mock.calls.length).toBe(2); }); }); }); diff --git a/tests/unit/triggers/linear-comment-mention.test.ts b/tests/unit/triggers/linear-comment-mention.test.ts index a822982f..40a5efdf 100644 --- a/tests/unit/triggers/linear-comment-mention.test.ts +++ b/tests/unit/triggers/linear-comment-mention.test.ts @@ -37,6 +37,10 @@ const mockProject = { baseBranch: 'main', branchPrefix: 'feature/', pm: { type: 'linear' as const }, + linear: { + teamId: 'team-abc', + statuses: { planning: 'state-todo' }, + }, } as TriggerContext['project']; function buildCtx( @@ -238,6 +242,7 @@ describe('LinearCommentMentionTrigger', () => { id: 'fallback-issue-id', // no identifier url: 'https://linear.app/org/issue/fallback', + stateId: 'state-todo', // must be in planning state }; const result = await trigger.handle(ctx); expect(result?.workItemId).toBe('fallback-issue-id'); @@ -259,5 +264,37 @@ describe('LinearCommentMentionTrigger', () => { const result = await trigger.handle(ctx); expect(result?.agentInput.linearIssueId).toBe('issue-uuid-99'); }); + + it('returns null when issue is not in PLANNING state', async () => { + const ctx = buildCtx(); + const data = ctx.payload as Record; + (data.data as Record).issue = { + id: ISSUE_ID, + identifier: ISSUE_IDENTIFIER, + title: 'Test issue', + teamId: 'team-abc', + url: 'https://linear.app/org/issue/TEAM-99', + stateId: 'state-in-progress', // not planning + }; + const result = await trigger.handle(ctx); + expect(result).toBeNull(); + }); + + it('returns null when planning state is not configured in project', async () => { + const ctx = buildCtx(); + (ctx as Record).project = { + ...mockProject, + linear: { teamId: 'team-abc', statuses: {} }, // no planning state + }; + const result = await trigger.handle(ctx); + expect(result).toBeNull(); + }); + + it('includes triggerCommentBody (canonical) in agentInput', async () => { + const body = `@[Bot](${BOT_USER_ID}) please implement feature X`; + const result = await trigger.handle(buildCtx({ commentBody: body })); + + expect(result?.agentInput.triggerCommentBody).toBe(body); + }); }); }); diff --git a/tests/unit/triggers/trello-comment-mention.test.ts b/tests/unit/triggers/trello-comment-mention.test.ts index 985fe708..dbd366b7 100644 --- a/tests/unit/triggers/trello-comment-mention.test.ts +++ b/tests/unit/triggers/trello-comment-mention.test.ts @@ -1,18 +1,21 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; // Hoist mocks before imports -const { mockGetMe, mockGetCard } = vi.hoisted(() => ({ - mockGetMe: vi.fn(), +const { mockResolveTrelloBotIdentity, mockGetCard } = vi.hoisted(() => ({ + mockResolveTrelloBotIdentity: vi.fn(), mockGetCard: vi.fn(), })); vi.mock('../../../src/trello/client.js', () => ({ trelloClient: { - getMe: mockGetMe, getCard: mockGetCard, }, })); +vi.mock('../../../src/router/bot-identity-resolvers.js', () => ({ + resolveTrelloBotIdentity: (...args: unknown[]) => mockResolveTrelloBotIdentity(...args), +})); + import { mockConfigResolverModule, mockLogger, @@ -24,10 +27,6 @@ vi.mock('../../../src/utils/logging.js', () => ({ logger: mockLogger })); vi.mock('../../../src/triggers/config-resolver.js', () => mockConfigResolverModule); vi.mock('../../../src/triggers/shared/trigger-check.js', () => mockTriggerCheckModule); -// We need to reset the module-level cache between tests. -// The module uses a module-level variable `cachedMemberInfo`. -// We can reset it by re-importing with vi.resetModules() or by calling the exported functions. - import { checkTriggerEnabled } from '../../../src/triggers/shared/trigger-check.js'; import { TrelloCommentMentionTrigger } from '../../../src/triggers/trello/comment-mention.js'; import type { TriggerContext } from '../../../src/triggers/types.js'; @@ -104,9 +103,8 @@ describe('TrelloCommentMentionTrigger', () => { vi.resetAllMocks(); vi.mocked(checkTriggerEnabled).mockResolvedValue(true); trigger = new TrelloCommentMentionTrigger(); - // Reset the module-level member info cache by re-importing. - // The cache is a module-level variable, so we set up getMe to always respond. - mockGetMe.mockResolvedValue({ id: BOT_MEMBER_ID, username: BOT_USERNAME }); + // Set up the bot identity resolver to return a valid identity for each test. + mockResolveTrelloBotIdentity.mockResolvedValue({ id: BOT_MEMBER_ID, username: BOT_USERNAME }); mockGetCard.mockResolvedValue({ id: 'card-1', idList: PLANNING_LIST_ID, @@ -235,8 +233,8 @@ describe('TrelloCommentMentionTrigger', () => { expect(result1?.agentType).toBe('respond-to-planning-comment'); expect(result2?.agentType).toBe('respond-to-planning-comment'); - // getMe should have been called AT MOST once (cached after first call or cached from prior test) - expect(mockGetMe.mock.calls.length).toBeLessThanOrEqual(1); + // resolveTrelloBotIdentity should be called for each handle call (no module-level caching) + expect(mockResolveTrelloBotIdentity.mock.calls.length).toBe(2); }); }); }); From cfabdb567afaa7d9ac84daca548b0d6ed9ce7a00 Mon Sep 17 00:00:00 2001 From: aaight Date: Sun, 26 Apr 2026 19:16:19 +0200 Subject: [PATCH 2/8] fix(linear): populate inlineMedia from descriptions/comments and add downloadAttachment (#1202) Co-authored-by: Cascade Bot --- src/agents/definitions/contextSteps.ts | 3 + src/linear/client.ts | 17 ++++ src/pm/linear/adapter.ts | 27 ++++--- .../agents/definitions/contextSteps.test.ts | 41 ++++++++++ tests/unit/linear/client.test.ts | 54 +++++++++++++ tests/unit/pm/linear/adapter.test.ts | 80 +++++++++++++++++++ 6 files changed, 212 insertions(+), 10 deletions(-) diff --git a/src/agents/definitions/contextSteps.ts b/src/agents/definitions/contextSteps.ts index a5d283db..d47b0a43 100644 --- a/src/agents/definitions/contextSteps.ts +++ b/src/agents/definitions/contextSteps.ts @@ -106,6 +106,7 @@ export async function fetchWorkItemStep(params: FetchContextParams): Promise { @@ -113,6 +114,8 @@ export async function fetchWorkItemStep(params: FetchContextParams): Promise { + const { apiKey } = getLinearCredentials(); + const { downloadMedia } = await import('../pm/media.js'); + return downloadMedia(url, { Authorization: apiKey }); + }, + // ===== Reactions ===== async createReaction(commentId: string, emoji: string): Promise { diff --git a/src/pm/linear/adapter.ts b/src/pm/linear/adapter.ts index 977b2e36..6f8a422a 100644 --- a/src/pm/linear/adapter.ts +++ b/src/pm/linear/adapter.ts @@ -23,6 +23,7 @@ import { } from '../_shared/inline-checklist.js'; import type { LinearConfig } from '../config.js'; import type { ContainerId, LabelId } from '../ids.js'; +import { extractMarkdownImages } from '../media.js'; import type { Attachment, Checklist, @@ -57,6 +58,7 @@ export class LinearPMProvider implements PMProvider { async getWorkItem(id: string): Promise { const issue = await linearClient.getIssue(id); + const inlineMedia = extractMarkdownImages(issue.description ?? '', 'description'); return { id: issue.identifier || issue.id, title: issue.title, @@ -70,21 +72,26 @@ export class LinearPMProvider implements PMProvider { color: l.color, }), ), + inlineMedia: inlineMedia.length > 0 ? inlineMedia : undefined, }; } async getWorkItemComments(id: string): Promise { const comments = await linearClient.getIssueComments(id); - return comments.map((c) => ({ - id: c.id, - date: c.createdAt, - text: c.body, - author: { - id: c.user?.id ?? '', - name: c.user?.displayName ?? c.user?.name ?? '', - username: c.user?.email ?? '', - }, - })); + return comments.map((c) => { + const inlineMedia = extractMarkdownImages(c.body, 'comment'); + return { + id: c.id, + date: c.createdAt, + text: c.body, + author: { + id: c.user?.id ?? '', + name: c.user?.displayName ?? c.user?.name ?? '', + username: c.user?.email ?? '', + }, + inlineMedia: inlineMedia.length > 0 ? inlineMedia : undefined, + }; + }); } async updateWorkItem( diff --git a/tests/unit/agents/definitions/contextSteps.test.ts b/tests/unit/agents/definitions/contextSteps.test.ts index 3d0fd5cd..78ab8217 100644 --- a/tests/unit/agents/definitions/contextSteps.test.ts +++ b/tests/unit/agents/definitions/contextSteps.test.ts @@ -14,6 +14,7 @@ vi.mock('../../../../src/gadgets/todo/storage.js', () => ({ const mockTrelloDownload = vi.fn(); const mockJiraDownload = vi.fn(); +const mockLinearDownload = vi.fn(); vi.mock('../../../../src/trello/client.js', () => ({ trelloClient: { @@ -27,6 +28,12 @@ vi.mock('../../../../src/jira/client.js', () => ({ }, })); +vi.mock('../../../../src/linear/client.js', () => ({ + linearClient: { + downloadAttachment: mockLinearDownload, + }, +})); + vi.mock('../../../../src/gadgets/pm/core/readWorkItem.js', () => ({ readWorkItem: vi.fn(), readWorkItemWithMedia: vi.fn(), @@ -206,6 +213,7 @@ describe('fetchWorkItemStep', () => { beforeEach(() => { mockTrelloDownload.mockReset(); mockJiraDownload.mockReset(); + mockLinearDownload.mockReset(); }); it('returns empty array when no workItemId', async () => { @@ -288,6 +296,39 @@ describe('fetchWorkItemStep', () => { expect(mockTrelloDownload).not.toHaveBeenCalled(); }); + it('uses linearClient.downloadAttachment for linear provider', async () => { + mockReadWorkItemWithMedia.mockResolvedValue({ + text: '# Linear issue with screenshot', + media: [ + { + url: 'https://uploads.linear.app/abc/screenshot.png', + mimeType: 'image/png', + altText: 'screenshot', + source: 'description', + }, + ], + }); + mockGetPMProviderOrNull.mockReturnValue({ type: 'linear' } as never); + mockLinearDownload.mockResolvedValue({ + buffer: Buffer.from('linear-image-data'), + mimeType: 'image/png', + }); + + const result = await fetchWorkItemStep(makeParams({ workItemId: 'LINEAR-1' })); + + expect(result[0].images).toHaveLength(1); + expect(result[0].images?.[0]).toEqual({ + base64Data: Buffer.from('linear-image-data').toString('base64'), + mimeType: 'image/png', + altText: 'screenshot', + }); + expect(mockLinearDownload).toHaveBeenCalledWith( + 'https://uploads.linear.app/abc/screenshot.png', + ); + expect(mockTrelloDownload).not.toHaveBeenCalled(); + expect(mockJiraDownload).not.toHaveBeenCalled(); + }); + it('logs WARN and skips when download returns null, stripping query params from URL', async () => { mockReadWorkItemWithMedia.mockResolvedValue({ text: '# Card', diff --git a/tests/unit/linear/client.test.ts b/tests/unit/linear/client.test.ts index 621df750..b5278d4d 100644 --- a/tests/unit/linear/client.test.ts +++ b/tests/unit/linear/client.test.ts @@ -286,3 +286,57 @@ describe('linearClient.createLabel — duplicate idempotency', () => { ).rejects.toThrow('team not found'); }); }); + +// ===== downloadAttachment ===== + +describe('linearClient.downloadAttachment', () => { + const originalFetch = globalThis.fetch; + + afterEach(() => { + globalThis.fetch = originalFetch; + vi.restoreAllMocks(); + }); + + it('sends bare Authorization header (no Bearer prefix) and returns buffer + mimeType', async () => { + const imageBytes = Buffer.from('linear-image-data'); + const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValue( + new Response(imageBytes, { + status: 200, + headers: { 'Content-Type': 'image/png' }, + }), + ); + + const result = await withLinearCredentials({ apiKey: 'lin_api_testkey' }, () => + linearClient.downloadAttachment('https://uploads.linear.app/abc/screenshot.png'), + ); + + expect(result).not.toBeNull(); + // biome-ignore lint/style/noNonNullAssertion: guarded by expect above + expect(result!.mimeType).toBe('image/png'); + // biome-ignore lint/style/noNonNullAssertion: guarded by expect above + expect(result!.buffer).toBeInstanceOf(Buffer); + + const [url, options] = fetchSpy.mock.calls[0]; + expect(url).toBe('https://uploads.linear.app/abc/screenshot.png'); + // Linear personal keys are bare — no "Bearer" prefix + expect(options?.headers).toEqual({ Authorization: 'lin_api_testkey' }); + // Content-Type is NOT included (this is a GET download, not a GraphQL mutation) + expect((options?.headers as Record)?.['Content-Type']).toBeUndefined(); + }); + + it('returns null on non-OK response', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue(new Response('Forbidden', { status: 403 })); + + const result = await withLinearCredentials({ apiKey: 'lin_api_testkey' }, () => + linearClient.downloadAttachment('https://uploads.linear.app/abc/screenshot.png'), + ); + + expect(result).toBeNull(); + }); + + it('throws when called outside withLinearCredentials scope', async () => { + await expect( + linearClient.downloadAttachment('https://uploads.linear.app/abc/screenshot.png'), + ).rejects.toThrow('No Linear credentials in scope'); + }); +}); diff --git a/tests/unit/pm/linear/adapter.test.ts b/tests/unit/pm/linear/adapter.test.ts index bb4460a4..daee419c 100644 --- a/tests/unit/pm/linear/adapter.test.ts +++ b/tests/unit/pm/linear/adapter.test.ts @@ -126,6 +126,39 @@ describe('LinearPMProvider', () => { const result = await provider.getWorkItem('issue-uuid'); expect(result.description).toBe(''); }); + + it('populates inlineMedia when description contains markdown images', async () => { + mockGetIssue.mockResolvedValue( + makeIssue({ + description: + 'Here is a screenshot:\n\n![screenshot](https://uploads.linear.app/abc/def.png)', + }), + ); + + const result = await provider.getWorkItem('issue-uuid'); + + expect(result.inlineMedia).toHaveLength(1); + expect(result.inlineMedia?.[0]).toMatchObject({ + url: 'https://uploads.linear.app/abc/def.png', + mimeType: 'image/png', + altText: 'screenshot', + source: 'description', + }); + }); + + it('returns undefined inlineMedia when description has no images', async () => { + mockGetIssue.mockResolvedValue(makeIssue({ description: 'Plain text, no images here.' })); + + const result = await provider.getWorkItem('issue-uuid'); + + expect(result.inlineMedia).toBeUndefined(); + }); + + it('returns undefined inlineMedia when description is null', async () => { + mockGetIssue.mockResolvedValue(makeIssue({ description: null })); + const result = await provider.getWorkItem('issue-uuid'); + expect(result.inlineMedia).toBeUndefined(); + }); }); // ========================================================================= @@ -179,6 +212,53 @@ describe('LinearPMProvider', () => { expect(result[0].author.name).toBe(''); expect(result[0].author.username).toBe(''); }); + + it('populates inlineMedia when comment body contains markdown images', async () => { + mockGetIssueComments.mockResolvedValue([ + { + id: 'c3', + body: 'See this image: ![diagram](https://uploads.linear.app/xyz/diagram.png)', + createdAt: '2024-01-03T00:00:00Z', + updatedAt: '2024-01-03T00:00:00Z', + issueId: 'issue-uuid', + user: { + id: 'u1', + name: 'Bob', + email: 'bob@example.com', + displayName: 'Bob', + avatarUrl: null, + active: true, + }, + }, + ]); + + const result = await provider.getWorkItemComments('issue-uuid'); + + expect(result[0].inlineMedia).toHaveLength(1); + expect(result[0].inlineMedia?.[0]).toMatchObject({ + url: 'https://uploads.linear.app/xyz/diagram.png', + mimeType: 'image/png', + altText: 'diagram', + source: 'comment', + }); + }); + + it('returns undefined inlineMedia when comment body has no images', async () => { + mockGetIssueComments.mockResolvedValue([ + { + id: 'c4', + body: 'Just text, no images.', + createdAt: '2024-01-04T00:00:00Z', + updatedAt: '2024-01-04T00:00:00Z', + issueId: 'issue-uuid', + user: null, + }, + ]); + + const result = await provider.getWorkItemComments('issue-uuid'); + + expect(result[0].inlineMedia).toBeUndefined(); + }); }); // ========================================================================= From 83f75b361da0d3d2dfb95aaba04df65e2059388a Mon Sep 17 00:00:00 2001 From: Zbigniew Sobiecki Date: Sun, 26 Apr 2026 20:23:08 +0200 Subject: [PATCH 3/8] fix(claude-code): pin pathToClaudeCodeExecutable so SDK skips broken native-binary probe (#1206) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The agent-harness SDK bump in #1197 (claude-agent-sdk 0.2.91 → 0.2.119) broke every review run on cascade-prod with: ReferenceError: Claude Code native binary not found at /app/node_modules/@anthropic-ai/claude-agent-sdk-linux-x64-musl/claude The new SDK probes its own platform-specific optional-dependency subpackages for a bundled `claude` binary. Two failure modes hit at once: 1. Cascade installs `@anthropic-ai/claude-code@2.1.119` globally at /usr/local/bin/claude — the SDK never looks there. 2. The SDK probes the `-musl` variant first regardless of host libc and errors on ENOENT instead of falling through to the glibc variant. Pass an explicit `pathToClaudeCodeExecutable` to short-circuit the probe. The resolver checks (in order): - $CLAUDE_CODE_EXECUTABLE_PATH env override (local-dev escape hatch) - `which claude` in $PATH - /usr/local/bin/claude (Docker default from Dockerfile.worker) Two TDD tests pin the option onto query() and prove the env override wins. No Dockerfile change needed; the existing global install at /usr/local/bin/claude becomes the resolver's runtime target. Confirmed broken on ucho PR #72 (cascade-prod review agent crash). Co-authored-by: Claude Opus 4.7 (1M context) --- src/backends/claude-code/index.ts | 20 ++++++ tests/unit/backends/claude-code.test.ts | 88 +++++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/src/backends/claude-code/index.ts b/src/backends/claude-code/index.ts index 64bf1600..83c3b128 100644 --- a/src/backends/claude-code/index.ts +++ b/src/backends/claude-code/index.ts @@ -1,3 +1,4 @@ +import { execFileSync } from 'node:child_process'; import { accessSync, constants, existsSync, readdirSync, statSync, writeFileSync } from 'node:fs'; import { rm } from 'node:fs/promises'; import { homedir } from 'node:os'; @@ -47,6 +48,24 @@ export function resolveClaudeModel(cascadeModel: string): string { ); } +/** + * Resolve the absolute path to the `claude` CLI for `pathToClaudeCodeExecutable`. + * Skips the SDK's platform-subpackage probe, which is broken on glibc Linux + * because it tries the `-musl` variant first and errors on ENOENT. + */ +export function resolveClaudeCodeExecutablePath(): string { + const fromEnv = process.env.CLAUDE_CODE_EXECUTABLE_PATH?.trim(); + if (fromEnv) return fromEnv; + try { + return execFileSync('which', ['claude'], { + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'ignore'], + }).trim(); + } catch { + return '/usr/local/bin/claude'; + } +} + /** * Ensure $HOME/.claude.json exists with the onboarding flag. * Claude Code CLI requires this file to skip interactive onboarding @@ -295,6 +314,7 @@ export class ClaudeCodeEngine extends NativeToolEngine { tools: sdkTools, allowedTools: sdkTools, persistSession: true, + pathToClaudeCodeExecutable: resolveClaudeCodeExecutablePath(), hooks, env, debug: true, diff --git a/tests/unit/backends/claude-code.test.ts b/tests/unit/backends/claude-code.test.ts index 2b117790..703c833f 100644 --- a/tests/unit/backends/claude-code.test.ts +++ b/tests/unit/backends/claude-code.test.ts @@ -26,6 +26,7 @@ import { buildToolGuidance, ClaudeCodeEngine, ensureOnboardingFlag, + resolveClaudeCodeExecutablePath, resolveClaudeModel, } from '../../../src/backends/claude-code/index.js'; import { @@ -1434,6 +1435,93 @@ describe('ensureOnboardingFlag', () => { }); }); +describe('resolveClaudeCodeExecutablePath', () => { + const ENV_KEY = 'CLAUDE_CODE_EXECUTABLE_PATH'; + let original: string | undefined; + + beforeEach(() => { + original = process.env[ENV_KEY]; + unsetEnv(ENV_KEY); + }); + + afterEach(() => { + if (original === undefined) unsetEnv(ENV_KEY); + else process.env[ENV_KEY] = original; + }); + + it('honors CLAUDE_CODE_EXECUTABLE_PATH override', () => { + process.env[ENV_KEY] = '/opt/custom/claude'; + expect(resolveClaudeCodeExecutablePath()).toBe('/opt/custom/claude'); + }); + + it('trims whitespace from the env override', () => { + process.env[ENV_KEY] = ' /opt/custom/claude '; + expect(resolveClaudeCodeExecutablePath()).toBe('/opt/custom/claude'); + }); + + it('returns a non-empty string from `which claude` or the docker fallback', () => { + const resolved = resolveClaudeCodeExecutablePath(); + expect(typeof resolved).toBe('string'); + expect(resolved.length).toBeGreaterThan(0); + }); +}); + +describe('execute — pathToClaudeCodeExecutable', () => { + const ENV_KEY = 'CLAUDE_CODE_EXECUTABLE_PATH'; + let original: string | undefined; + + function mockStream(messages: Array<{ type: string; [key: string]: unknown }>) { + const iterator = messages[Symbol.iterator](); + mockQuery.mockReturnValue({ + [Symbol.asyncIterator]() { + return { + next() { + return Promise.resolve(iterator.next()); + }, + }; + }, + } as ReturnType); + } + + beforeEach(() => { + original = process.env[ENV_KEY]; + mockQuery.mockReset(); + }); + + afterEach(() => { + if (original === undefined) unsetEnv(ENV_KEY); + else process.env[ENV_KEY] = original; + }); + + it('passes pathToClaudeCodeExecutable to query() so the SDK skips its native-binary probe', async () => { + mockStream([ + { type: 'result', subtype: 'success', result: 'Done', total_cost_usd: 0, num_turns: 1 }, + ]); + + await new ClaudeCodeEngine().execute(makeInput()); + + const opts = mockQuery.mock.calls[0]?.[0]?.options as + | { pathToClaudeCodeExecutable?: unknown } + | undefined; + expect(typeof opts?.pathToClaudeCodeExecutable).toBe('string'); + expect((opts?.pathToClaudeCodeExecutable as string).length).toBeGreaterThan(0); + }); + + it('forwards CLAUDE_CODE_EXECUTABLE_PATH override into query() options', async () => { + process.env[ENV_KEY] = '/opt/custom/claude'; + mockStream([ + { type: 'result', subtype: 'success', result: 'Done', total_cost_usd: 0, num_turns: 1 }, + ]); + + await new ClaudeCodeEngine().execute(makeInput()); + + const opts = mockQuery.mock.calls[0]?.[0]?.options as + | { pathToClaudeCodeExecutable?: unknown } + | undefined; + expect(opts?.pathToClaudeCodeExecutable).toBe('/opt/custom/claude'); + }); +}); + describe('buildEnv', () => { it('sets CLAUDE_AGENT_SDK_CLIENT_APP', () => { const { env } = buildEnv(); From 6994effb68de604e342945bca179b2ae01071684 Mon Sep 17 00:00:00 2001 From: Zbigniew Sobiecki Date: Sun, 26 Apr 2026 20:42:39 +0200 Subject: [PATCH 4/8] spec 015: router job dispatch failure recovery (#1203) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs(spec/plans): add spec 015 + plans for router dispatch failure recovery Spec captures the silent black-hole bug class verified live on 2026-04-26 (ucho/MNG-350): a transient capacity miss or Docker error during worker spawn turns a webhook-driven job into a permanently failed BullMQ entry while stranding the work-item / agent-type locks for up to 30 minutes, silently rejecting subsequent webhooks for the same work item. Decomposed into two plans with safety-net-first sequencing: plan 1 hooks the BullMQ failed event to release locks on every dispatch failure path; plan 2 replaces the throw-on-capacity with a wait-for-slot semaphore, adds bounded retry with exponential backoff, and a transient/terminal error classifier. Co-Authored-By: Claude Opus 4.7 (1M context) * chore(plan): lock 015/1 failed-event-lock-compensation Co-Authored-By: Claude Opus 4.7 (1M context) * feat(router): plan 015/1 done — release locks on dispatch failure Closes the stranded-lock half of spec 015's bug class verified live in prod on 2026-04-26 (ucho/MNG-350). When a webhook-driven job's dispatch fails — capacity throw, Docker spawn error, or any future throw site — the work-item lock, agent-type concurrency counter, and recently-dispatched dedup mark established by the webhook → enqueue path are now released by a compensator hooked to BullMQ's `worker.on('failed')` event. What landed: - `src/router/dispatch-compensator.ts` (new) — `releaseLocksForFailedJob` wraps `extractProjectIdFromJob` / `extractWorkItemId` / `extractAgentType` and calls into `clearWorkItemEnqueued` / `clearAgentTypeEnqueued` / `clearRecentlyDispatched`. Never propagates errors; captures to Sentry with `tags: { source: 'dispatch_compensator' }`. - `src/router/agent-type-lock.ts` — exports new `clearRecentlyDispatched` for the compensator. The existing `markRecentlyDispatched` semantics are unchanged (60s TTL, NOT cleared on completion); this helper exists solely so a permanently-failed dispatch doesn't keep deduping a fresh webhook for ~60s while the user retries. - `src/router/bullmq-workers.ts` — extends the existing `worker.on('failed')` handler to invoke `releaseLocksForFailedJob` alongside the existing logger + Sentry calls. Wraps the call in a defensive `.catch` so a future regression in the compensator can't poison the worker. - `src/router/lock-state-classifier.ts` (new) — `classifyLockState` returns `'awaiting-slot'` when an active worker or queued/waiting job matches the trio, `'wedged'` when neither correlation matches. Defaults to `'awaiting-slot'` on classifier error so a Redis blip doesn't mis-emit the wedged canary. - `src/router/active-workers.ts` — `getActiveWorkers()` now exposes `(projectId, workItemId, agentType)` so the classifier can correlate. Backwards-compatible (existing callers work unchanged; new fields are additive optional). - `src/router/webhook-processor.ts` — Step 8 (work-item lock check) now splits the decision-reason vocabulary into three states: * `Job queued: ...` (success path) * `Awaiting worker slot: ...` (lock held + dispatch in flight; healthy) * `Work item locked (no active dispatch): ...` (wedged-lock canary) The wedged branch additionally fires `captureException` with `tags: { source: 'wedged_lock_canary' }` so any regression in compensation is loud in production. What this does NOT change (intentional, all in plan 015/2): - `guardedSpawn` still throws on capacity (BullMQ marks the job failed, the compensator now releases the locks, but the job itself is still lost). Plan 2 replaces the throw with a wait-for-slot semaphore. - Both queues still default to `attempts: 1`. Plan 2 raises this with exponential backoff and adds a transient/terminal error classifier. - CLAUDE.md is intentionally not updated by this plan — the unified passage describing both halves of the new contract lands in plan 015/2. Tests: - 5 new unit tests in `dispatch-compensator.test.ts` - 3 new unit tests in `agent-type-lock.test.ts` for `clearRecentlyDispatched` - 4 new unit tests in `bullmq-workers.test.ts` for the failed-event seam - 5 new unit tests in `lock-state-classifier.test.ts` - 2 new unit tests in `active-workers.test.ts` for the extended shape - 4 new unit tests in `webhook-processor.test.ts` for the three-way taxonomy - 3 new module-integration tests in `tests/integration/router/dispatch-failure-compensation.test.ts` exercise the real lock modules + real bullmq-workers.ts failed-event handler + real compensator end-to-end (only BullMQ's Worker constructor + the worker-env extractors are mocked). Full suite: 8515 passed / 23 skipped / 0 failed. Lint + typecheck clean. Co-Authored-By: Claude Opus 4.7 (1M context) * chore(plan): lock 015/2 wait-for-slot-and-retry-classifier Co-Authored-By: Claude Opus 4.7 (1M context) * chore(plan): mark 015/2 status: wip * feat(router): plan 015/2 done — wait-for-slot + retry budget + classifier Closes the lost-job half of spec 015's bug class. Combined with plan 015/1, the silent black-hole failure mode verified live in prod on 2026-04-26 (ucho/MNG-350) is now fully closed. What landed: - `src/router/slot-waiter.ts` (new) — semaphore-style primitive: `acquireSlot({ timeoutMs })` resolves immediately when capacity is below `routerConfig.maxWorkers`, otherwise queues a FIFO waiter with a bounded timeout that rejects with `code: 'SLOT_WAIT_TIMEOUT'`. `slotReleased()` pops the head waiter; `clearAllWaiters()` rejects every pending waiter with `code: 'SHUTDOWN'` on router stop. - `src/router/dispatch-error-classifier.ts` (new) — classifies thrown errors into `'transient'` (Docker socket Node codes, HTTP 429/409, SLOT_WAIT_TIMEOUT, anything unknown — default-to-retry) vs `'terminal'` (TypeError, ZodError, image-not-found-after-fallback). - `src/router/worker-manager.ts` — `guardedSpawn` rewritten: `await acquireSlot(...)` replaces the synchronous capacity throw; on spawn error, terminal errors are wrapped in BullMQ's `UnrecoverableError` so retries skip; transient errors propagate unchanged so BullMQ retries via attempts/backoff. - `src/router/active-workers.ts` — `cleanupWorker` now calls `slotReleased()` exactly once per cleanup, including on the crash path. The existing `if (worker)` guard ensures idempotence. - `src/router/config.ts` — new `slotWaitTimeoutMs` field (default 5min, configurable via `SLOT_WAIT_TIMEOUT_MS`). - `src/router/queue.ts` and `src/queue/client.ts` — both queues now default to `attempts: 4` with `backoff: { type: 'exponential', delay: 5000 }` (~75s total before exhaustion). Terminal errors bypass via `UnrecoverableError`. - `src/router/container-manager.ts` — exports the existing `isImageNotFoundError` predicate so the classifier can reuse it. Test contract change (spec AC #9): The previous `tests/unit/router/worker-manager.test.ts:179` assertion `'processFn throws when at capacity'` is REPLACED (not deleted) with `'processFn awaits a slot when at capacity, then dispatches when one frees'`. The throw-on-capacity contract is gone forever. Tests: - 7 new unit tests in `slot-waiter.test.ts` (FIFO, timeout, no-op, shutdown rejection) - 11 new unit tests in `dispatch-error-classifier.test.ts` covering every transient/terminal class - 4 new unit tests in `worker-manager.test.ts` (replaced original capacity-throw test + 3 for retry classification) - 3 new unit tests in `active-workers.test.ts` for slotReleased integration - 5 new module-integration tests in `dispatch-retry.test.ts` exercise REAL guardedSpawn + REAL slot-waiter + REAL dispatch-error-classifier against both queues, mocking only spawnWorker + BullMQ Worker constructor. Plan 1's 3 module-integration tests continue to pass alongside plan 2's 5. Full unit suite: 8539 passed / 23 skipped / 0 failed. CLAUDE.md updated with a new "Dispatch failure semantics" section documenting the unified contract (capacity wait, retry budget, classifier, three-way decision-reason taxonomy from plan 1, wedged-lock canary). File now 182 lines, under the 200-line cap. Co-Authored-By: Claude Opus 4.7 (1M context) * chore(spec): 015 done — router job dispatch failure recovery, all plans complete Closes the silent black-hole bug class verified live on 2026-04-26 (ucho/MNG-350). Plan 1 added failed-event lock compensation + three-way decision-reason taxonomy; plan 2 replaced the throw-on-capacity with wait-for-slot, added bounded retry with exponential backoff, and introduced a transient/terminal error classifier. Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 2 + CLAUDE.md | 13 + .../1-failed-event-lock-compensation.md.done | 219 ++++++++++++++++ ...wait-for-slot-and-retry-classifier.md.done | 246 ++++++++++++++++++ .../_coverage.md | 39 +++ ...uter-job-dispatch-failure-recovery.md.done | 130 +++++++++ src/queue/client.ts | 7 +- src/router/active-workers.ts | 22 +- src/router/agent-type-lock.ts | 14 + src/router/bullmq-workers.ts | 20 ++ src/router/config.ts | 7 + src/router/container-manager.ts | 5 +- src/router/dispatch-compensator.ts | 49 ++++ src/router/dispatch-error-classifier.ts | 66 +++++ src/router/lock-state-classifier.ts | 70 +++++ src/router/queue.ts | 9 +- src/router/slot-waiter.ts | 88 +++++++ src/router/webhook-processor.ts | 38 ++- src/router/worker-manager.ts | 44 +++- .../dispatch-failure-compensation.test.ts | 193 ++++++++++++++ .../integration/router/dispatch-retry.test.ts | 211 +++++++++++++++ tests/unit/router/active-workers.test.ts | 62 ++++- tests/unit/router/agent-type-lock.test.ts | 23 ++ tests/unit/router/bullmq-workers.test.ts | 75 ++++++ .../unit/router/dispatch-compensator.test.ts | 148 +++++++++++ .../router/dispatch-error-classifier.test.ts | 66 +++++ .../unit/router/lock-state-classifier.test.ts | 122 +++++++++ tests/unit/router/slot-waiter.test.ts | 133 ++++++++++ tests/unit/router/webhook-processor.test.ts | 97 ++++++- tests/unit/router/worker-manager.test.ts | 114 +++++++- 30 files changed, 2307 insertions(+), 25 deletions(-) create mode 100644 docs/plans/015-router-job-dispatch-failure-recovery/1-failed-event-lock-compensation.md.done create mode 100644 docs/plans/015-router-job-dispatch-failure-recovery/2-wait-for-slot-and-retry-classifier.md.done create mode 100644 docs/plans/015-router-job-dispatch-failure-recovery/_coverage.md create mode 100644 docs/specs/015-router-job-dispatch-failure-recovery.md.done create mode 100644 src/router/dispatch-compensator.ts create mode 100644 src/router/dispatch-error-classifier.ts create mode 100644 src/router/lock-state-classifier.ts create mode 100644 src/router/slot-waiter.ts create mode 100644 tests/integration/router/dispatch-failure-compensation.test.ts create mode 100644 tests/integration/router/dispatch-retry.test.ts create mode 100644 tests/unit/router/dispatch-compensator.test.ts create mode 100644 tests/unit/router/dispatch-error-classifier.test.ts create mode 100644 tests/unit/router/lock-state-classifier.test.ts create mode 100644 tests/unit/router/slot-waiter.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index d8bdc5de..cc05c2f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ All notable user-visible changes to CASCADE are documented here. The format is l ### Changed +- **Router dispatch capacity now waits for a slot; transient Docker errors retry; terminal errors fail fast** (spec 015, plan 2 of 2). Replaces `guardedSpawn`'s synchronous "No worker slots available" throw with an in-process slot-waiter (default 5min timeout, configurable via `SLOT_WAIT_TIMEOUT_MS`). Adds a dispatch-error classifier that splits transient (`ECONNREFUSED` / `ECONNRESET` / `ENOTFOUND` / HTTP 429 / container-name 409 / `SLOT_WAIT_TIMEOUT`) from terminal (`TypeError` / `ZodError` / image-not-found-after-fallback). Both `cascade-jobs` and `cascade-dashboard-jobs` queue defaults now specify `attempts: 4` with `backoff: { type: 'exponential', delay: 5000 }` (~75s total before exhaustion). Terminal errors are wrapped in BullMQ's `UnrecoverableError` so retries skip. Combined with plan 015/1, the original silent black-hole failure mode (verified live on 2026-04-26 via ucho/MNG-350) is fully closed: no more lost jobs on transient capacity misses or Docker hiccups, no more wedged locks. CLAUDE.md updated with the new "Dispatch failure semantics" passage. See [spec 015](docs/specs/015-router-job-dispatch-failure-recovery.md). +- **Router dispatch failures now release in-memory locks via the BullMQ failed event** (spec 015, plan 1 of 2). Hooks `worker.on('failed')` on both `cascade-jobs` and `cascade-dashboard-jobs` queues to call a new `releaseLocksForFailedJob` compensator that releases the work-item lock, agent-type concurrency counter, and recently-dispatched dedup mark for any job whose dispatch fails. Closes the stranded-lock half of the prod incident verified on 2026-04-26 (ucho/MNG-350): a transient capacity miss was leaving the in-memory work-item lock wedged for 30 minutes, silently rejecting subsequent webhooks for the same trio. Also splits the webhook decision-reason vocabulary into three states — `Job queued` (success), `Awaiting worker slot: …` (in-flight, healthy), `Work item locked (no active dispatch): …` (wedged-lock canary, fires a Sentry capture tagged `wedged_lock_canary` so any regression in compensation is loud). Plan 2 closes the lost-job half (wait-for-slot, retry budget, error classifier). See [spec 015](docs/specs/015-router-job-dispatch-failure-recovery.md). - **`cascade-tools scm create-pr-review`: `--comment` alias + `--comments-file` escape hatch** (spec 014, plan 2 of 2). The command now accepts `--comment` (singular) as an alias for `--comments` — the exact muscle-memory mistake from prod run 5d993b04 now resolves correctly. Added `--comments-file ` (and `-` for stdin) as a JSON-parsed file alternative for long payloads that don't survive shell quoting. Zero edits to shared infrastructure (cliCommandFactory, manifestGenerator, nativeToolPrompts, errorEnvelope) — the two declarative fields on `createPRReviewDef.parameters.comments.cliAliases` + `createPRReviewDef.cli.fileInputAlternatives` are everything. Proves spec 014's single-entrypoint invariant: a new or evolved gadget should never need to touch shared machinery. See [spec 014](docs/specs/014-cascade-tools-agent-ergonomics.md). - **`cascade-tools` agent ergonomics: truthful system prompt, runnable `--help`, structured error envelope** (spec 014, plan 1 of 2). The system-prompt renderer that describes every cascade-tools command to agents now tells the truth about array-shaped parameters — no more silent `s`-stripping of names, no more ` (repeatable)` claim for array-of-object flags (they correctly render as `-- ''` now, with aliases appended via `|` and a one-line runnable JSON example inlined from the tool definition's `examples` block). Every CLI failure — flag-parse, JSON-parse, missing-required, enum-mismatch, unknown-flag, auth, runtime — emits a single structured envelope on stdout (`{"success":false,"error":{type,flag?,message,got?,expected?,hint?,example?}}`) plus a short prose summary on stderr for humans, replacing the ad-hoc mix of `this.error()` prose and `{success:false,error:""}` flat shapes. Mistyped flags get a "did you mean" suggestion via Levenshtein match against declared canonical names + aliases. `--help` now renders `def.examples` as copy-pasteable shell invocations under an `EXAMPLES` section. Root-caused by prod run 5d993b04-6e05-4ae1-b7de-8c274cf3496b where a review agent wasted ~2½ min fighting the prior pre-014 surface and ultimately dropped an inline PR comment. See [spec 014](docs/specs/014-cascade-tools-agent-ergonomics.md) + authoring guide at [`src/gadgets/README.md`](src/gadgets/README.md). - **`cascade-tools` now streams subprocess output live** (spec 013). The shared subprocess helper (on top of `execa` + `tree-kill`) forwards child stdout/stderr to the parent's stderr line-by-line as it arrives, emits a heartbeat line on stderr every 30 seconds of child silence (configurable), enforces both an idle-silence timeout (default 120s) and a wall-clock timeout (default 600s) with SIGTERM→SIGKILL escalation, and kills the full process tree on timeout. `git push` and `git commit` invoked by `scm create-pr` pass tighter per-caller timeouts and now return captured hook output in the result on success (previously discarded). Result shape is backward-compatible — `{ stdout, stderr, exitCode }` preserved; new optional `reason: 'idle-timeout' | 'wall-timeout'` surfaces when the helper killed the child. Motivation: LLM-driven CASCADE agents watching an output file could not distinguish a slow pre-push hook (~60s of silence) from a hung process, leading to retry loops that burned 5–10+ minutes of run budget. See [spec 013](docs/specs/013-subprocess-output-streaming.md). diff --git a/CLAUDE.md b/CLAUDE.md index a39e5e33..40ec0bd0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -123,6 +123,19 @@ Some triggers take params (e.g. `review` + `scm:check-suite-success` accepts `{" **Worker exit diagnostics** — when a worker container exits non-zero, the router calls `container.inspect()` *before* AutoRemove reaps it and stamps the run record's `error` field with a structured, grep-stable string: `Worker crashed with exit code N · OOMKilled= · reason=""`. The `OOMKilled=true` marker is the definitive cgroup-OOM signal (per Docker's own `State.OOMKilled`); a 137 exit *without* `OOMKilled=true` means the kill came from inside the container or from a non-cgroup signal — *not* memory. The `[WorkerManager] Resolved spawn settings` log emitted at every spawn includes both `projectWatchdogTimeoutMs` and `globalWorkerTimeoutMs` so post-mortems can confirm whether the per-project override actually won. See `src/router/active-workers.ts:formatCrashReason` for the format and `tests/unit/router/container-manager-diagnostics.test.ts` for regression pins. +**Dispatch failure semantics** — spec 015 (verified live in prod via the ucho/MNG-350 incident on 2026-04-26): + +- **Capacity miss waits, never throws.** When the dispatcher pulls a job and the worker pool is at `maxWorkers`, it `await`s a slot via the in-process slot-waiter (default `slotWaitTimeoutMs` = 5min). The slot is conceptually held by the running container — `slotReleased()` is called once per cleanup from `cleanupWorker`, never from the dispatcher. +- **Transient Docker errors retry.** `ECONNREFUSED` / `ECONNRESET` / `ENOTFOUND` on the Docker socket, registry HTTP 429, container-name 409 collisions, and the `SLOT_WAIT_TIMEOUT` itself all classify as transient and propagate unchanged so BullMQ retries via `attempts: 4` + `backoff: { type: 'exponential', delay: 5000 }` (~75s total before exhaustion). Both `cascade-jobs` and `cascade-dashboard-jobs` use the same retry config. +- **Terminal errors fail fast.** `TypeError` / `ZodError` (validation) and image-not-found *after* fallback exhaustion are wrapped in BullMQ's `UnrecoverableError`, which skips the retry budget entirely. +- **Failed-event compensation releases locks.** Every dispatch failure (transient retry exhaustion, terminal error, slot-wait timeout exhaustion) flows through `worker.on('failed')`, which calls `releaseLocksForFailedJob` to release the work-item lock, agent-type counter, and recently-dispatched dedup mark. Without this, the locks leak for ~30min and silently reject every follow-up webhook for the same trio. +- **Webhook decision reasons are three-way.** When the work-item lock check rejects a webhook, the message distinguishes: + - `Job queued: ...` (success — not a lock rejection) + - `Awaiting worker slot: ...` (lock held + dispatch in flight — healthy) + - `Work item locked (no active dispatch): ...` (wedged-lock canary — the lock-state classifier could correlate the lock count with neither an active worker nor a queued/waiting BullMQ job; this fires a Sentry capture tagged `wedged_lock_canary` so any regression in compensation is loud) + +The wedged-lock canary should never fire under normal operation. Its presence in webhook logs or Sentry is itself a regression invariant: a code path acquired a lock without registering its compensation. + ## Review agent — context shape (debugging) Review agent receives a **compact per-file diff context**, not full file contents. Each changed file is a `### (, +N -M)` section with a unified diff hunk. Budget: `REVIEW_DIFF_CONTEXT_TOKEN_LIMIT` = 200k tokens, per-file cap 10%. diff --git a/docs/plans/015-router-job-dispatch-failure-recovery/1-failed-event-lock-compensation.md.done b/docs/plans/015-router-job-dispatch-failure-recovery/1-failed-event-lock-compensation.md.done new file mode 100644 index 00000000..02a42e30 --- /dev/null +++ b/docs/plans/015-router-job-dispatch-failure-recovery/1-failed-event-lock-compensation.md.done @@ -0,0 +1,219 @@ +--- +id: 015 +slug: router-job-dispatch-failure-recovery +plan: 1 +plan_slug: failed-event-lock-compensation +level: plan +parent_spec: docs/specs/015-router-job-dispatch-failure-recovery.md +depends_on: [] +status: done +--- + +# 015/1: Failed-event lock compensation + decision-reason taxonomy + +> Part 1 of 2 in the 015-router-job-dispatch-failure-recovery plan. See [parent spec](../../specs/015-router-job-dispatch-failure-recovery.md). + +## Summary + +This plan closes the **stranded-lock** half of the spec's bug class. It hooks BullMQ's existing `worker.on('failed')` event on both queues to release every in-memory lock entry that the webhook → enqueue path acquires (work-item lock, agent-type lock, recently-dispatched dedup mark) when dispatch ultimately fails. After this plan ships, no dispatch failure — capacity throw, Docker spawn error, or any future throw site — can leave a wedged lock that mis-rejects subsequent webhooks for the same work item. + +It also splits the webhook decision-reason vocabulary that Step 8's lock check emits into three distinguishable states: **Job queued** (success path, unchanged wording aside from existing `Job queued: ...`), **Awaiting worker slot** (lock held *and* a corresponding active worker / queued job is reachable — the healthy in-flight case), and **Work item locked (no active dispatch)** (lock held but no active worker and no waiting job — the wedged-lock canary). The third reason is a regression invariant: after this plan ships, its presence in webhook logs means a code path acquired a lock without registering its compensation — which the plan's tests prevent. + +This plan does NOT change the dispatch contract — `guardedSpawn` still throws on capacity, `attempts: 1` is still set, jobs still die in the failed set after one bad dispatch. The user-visible improvement is "no more wedged locks" — half the original incident is gone. The other half (lost jobs) is delivered by Plan 2. + +**Components delivered:** +- New compensator that maps `(jobData) → (projectId, workItemId, agentType)` and releases all three locks. Lives in a new module under `src/router/`. +- `worker.on('failed')` handler in `src/router/bullmq-workers.ts` extended to invoke the compensator for both `cascade-jobs` and `cascade-dashboard-jobs` queues. +- `src/router/webhook-processor.ts` Step 8 (work-item lock check) emits one of three decision reasons by correlating the lock count with the active-worker registry + the BullMQ queue's waiting/active counts. +- New tests: failed-event compensator releases locks for each lock kind across both queues; decision-reason taxonomy covers all three states. + +**Deferred to later plans in this spec:** +- Wait-for-slot replacement of the capacity throw — Plan 2. +- `attempts > 1` + retry classifier — Plan 2. +- CLAUDE.md update covering both halves of the new contract — Plan 2. +- Replacement of the existing `'processFn throws when at capacity'` test — Plan 2 (this plan adds new tests around the failed-event hook; the throw-on-capacity test stays green here because the throw still happens). + +--- + +## Spec ACs satisfied by this plan + +- Spec AC #4 (no stranded lock after any dispatch failure path) — **full** +- Spec AC #5 (decision reason distinguishes in-flight from wedged-lock) — **full** +- Spec AC #6 (wedged-lock canary never fires under normal operation) — **full** +- Spec AC #7 (manual-run path continues to bypass the lock and dispatch successfully) — **full** (regression test pinning the existing bypass) +- Spec AC #8 (clean-exit path identical to today) — **full** (regression test pinning the existing exit path) +- Spec AC #9 (test coverage updated) — **partial** (adds new failed-event tests; old throw-on-capacity test is replaced by Plan 2) + +--- + +## Depends On + +None. This plan is a strict additive layer on existing dispatch paths. It can ship independently of Plan 2. + +--- + +## Detailed Task List (TDD) + +### 1. Lock compensator module + +**Tests first** (`tests/unit/router/dispatch-compensator.test.ts`): + +- `releaseLocksForFailedJob — releases work-item, agent-type, and recently-dispatched marks for a CascadeJob with all three identifiers` — unit — call compensator with `{ type: 'linear', projectId: 'p1', workItemId: 'w1', /* …enough payload that extractAgentType returns 'implementation' */ }`; assert all three lock-module spies were called with `('p1', 'w1', 'implementation')` (or `('p1', 'implementation')` for agent-type / recently-dispatched). Expected red: `Error: Cannot find module './dispatch-compensator'`. +- `releaseLocksForFailedJob — no-ops cleanly when projectId is null` — unit — pass a job whose `extractProjectIdFromJob` resolves to `null` (e.g. a foreign-provider payload); assert no lock-module spies were called and the function resolved without throw. Expected red: same module-not-found error first; after creating the module with a stub that always calls clearWorkItemEnqueued, this test fails with `expect(mockClearWorkItemEnqueued).not.toHaveBeenCalled()`. +- `releaseLocksForFailedJob — releases agent-type-lock + recently-dispatched even when workItemId is undefined` — unit — pass a manual-run job that has `projectId` and `agentType` but no `workItemId`; assert work-item-lock spy was NOT called, agent-type-lock and recently-dispatched spies WERE called. Expected red: as above; or `expect(mockClearWorkItemEnqueued).not.toHaveBeenCalled()` if the implementation calls it unconditionally. +- `releaseLocksForFailedJob — handles a DashboardJob (manual-run) without throwing` — unit — pass `{ type: 'manual-run', projectId, workItemId, agentType }`; assert spies as appropriate. Expected red: type narrowing failure or `extractAgentType` returning undefined. +- `releaseLocksForFailedJob — captureException when an extractor throws` — unit — mock `extractProjectIdFromJob` to throw; assert `captureException` is called and the function still resolves (does not propagate). Expected red: unhandled rejection from the mocked extractor. + +**Implementation** (`src/router/dispatch-compensator.ts`): +- Export `async function releaseLocksForFailedJob(jobData: CascadeJob | DashboardJobData): Promise`. +- Resolve the trio via existing extractors imported from `src/router/worker-env.ts`: `extractProjectIdFromJob`, `extractWorkItemId`, `extractAgentType`. +- Call `clearWorkItemEnqueued(projectId, workItemId, agentType)` only when all three are defined. +- Call `clearAgentTypeEnqueued(projectId, agentType)` whenever `projectId && agentType`. +- Call `clearRecentlyDispatched(projectId, agentType, workItemId)` whenever `projectId && agentType` (we'll add this exported helper to `agent-type-lock.ts` — see section 2). +- Wrap each call in a try/catch that funnels to `captureException` with `tags: { source: 'dispatch_compensator' }` and a structured log; never propagate so a compensator failure doesn't crash the BullMQ worker. + +### 2. New `clearRecentlyDispatched` exported helper + +**Tests first** (`tests/unit/router/agent-type-lock.test.ts` — extend existing file): + +- `clearRecentlyDispatched — removes the dedup entry for a (projectId, agentType, dedupScope) key set by markRecentlyDispatched` — unit — call `markRecentlyDispatched('p1', 'implementation', 'w1')`; assert `wasRecentlyDispatched('p1', 'implementation', 'w1')` returns `true`; call new `clearRecentlyDispatched('p1', 'implementation', 'w1')`; assert `wasRecentlyDispatched(...)` returns `false`. Expected red: `clearRecentlyDispatched is not a function`. +- `clearRecentlyDispatched — no-op when key was not previously marked` — unit — call clear without prior mark; assert no throw. Expected red: same as above. +- `clearRecentlyDispatched — leaves entries for other (agentType, scope) keys untouched` — unit — set marks for two distinct keys, clear one, assert the other still present. Expected red: same as above. + +**Implementation** (`src/router/agent-type-lock.ts`): +- Export a new function `clearRecentlyDispatched(projectId, agentType, dedupScope?)` that deletes the corresponding key from `dedupMap`. +- Do NOT change the existing `markRecentlyDispatched` semantics or the `DEDUP_TTL_MS` value — this helper is purely additive, used only by the failed-event compensator. + +### 3. Failed-event hook in BullMQ worker factory + +**Tests first** (`tests/unit/router/bullmq-workers.test.ts` — extend existing file): + +- `worker.on('failed') invokes releaseLocksForFailedJob with job.data` — unit — construct a Worker via `createQueueWorker` with a `processFn` that throws; emit `'failed'` synthetically (or invoke the registered handler directly); assert the compensator spy was called with the job data. Expected red: spy not called (existing `worker.on('failed')` only logs + Sentries today). +- `worker.on('failed') still logs and Sentries on top of compensating` — unit — assert both the existing log + `captureException` calls happen AND the compensator spy is invoked. Expected red: compensator spy not called. +- `worker.on('failed') swallows compensator throws` — unit — mock the compensator to reject; emit `'failed'`; assert the BullMQ worker factory does not propagate the rejection (no unhandled rejection in the test). Expected red: unhandled rejection, or factory under test crashes. +- `worker.on('failed') is wired for both cascade-jobs and cascade-dashboard-jobs queues` — unit — call `startWorkerProcessor()` with two distinct mock `createQueueWorker` returns; assert both registered a `failed` handler that calls the compensator. Expected red: only one handler wired (or none). + +**Implementation** (`src/router/bullmq-workers.ts`): +- Inside `createQueueWorker`'s existing `worker.on('failed', ...)` handler, after the existing logger + `captureException` calls, invoke `releaseLocksForFailedJob(job.data)` if `job` is defined. Wrap in try/catch with `captureException` so a compensator throw does not poison the worker. + +### 4. Three-way decision-reason taxonomy in webhook-processor + +**Tests first** (`tests/unit/router/webhook-processor.test.ts` — extend existing file): + +- `Step 8 — emits 'Awaiting worker slot' when lock count >= 1 AND at least one active worker is registered for (projectId, workItemId, agentType)` — unit — set up `enqueuedMap` to register one entry; mock `getActiveWorkers()` to include a matching worker; expect the returned `decisionReason` to start with `Awaiting worker slot:`. Expected red: today's reason starts with `Work item locked:`. +- `Step 8 — emits 'Awaiting worker slot' when lock count >= 1 AND a matching job is in BullMQ waiting/active state` — integration — push a real BullMQ job (mocked Redis or test connection) into the waiting state; set the in-memory lock; assert the decision reason. Expected red: today's reason as above. +- `Step 8 — emits 'Work item locked (no active dispatch)' when lock count >= 1 AND neither active worker nor queued job exists for the trio` — unit — set the lock entry but ensure both `getActiveWorkers()` returns an empty list and the queue's `getJobs(['waiting','active'])` returns an empty list. Expected red: today's reason wording. +- `Step 8 — preserves existing log fields (source, projectId, workItemId, blockedAgentType, reason)` — unit — capture the log call; assert all fields present. Expected red: log structure changed unintentionally. +- `Step 8 — does not call queue.getJobs when lock count is 0` — unit — assert no queue lookup happens on the happy path (no perf regression). Expected red: an unexpected queue call (which is a sign the implementation does the lookup unconditionally). + +**Implementation** (`src/router/webhook-processor.ts` and a new helper): +- Add a helper, e.g. `classifyLockState({ projectId, workItemId, agentType }): Promise<'awaiting-slot' | 'wedged' | 'unknown'>`, in a new module `src/router/lock-state-classifier.ts` (or co-located helper file). The helper: + - Returns `'awaiting-slot'` if `getActiveWorkers()` includes a worker whose `(projectId, workItemId, agentType)` matches OR `jobQueue.getJobs(['waiting','active'])` returns at least one matching job (matched by the same trio extracted via the existing extractors). + - Returns `'wedged'` if the lock count is non-zero per the existing in-memory map and neither correlation matches. + - Returns `'unknown'` only on classifier error (queue lookup throws); falls back to `'awaiting-slot'` for behavioral safety (do NOT mis-emit the wedged-lock canary on an error path). +- In `webhook-processor.ts:182-198`, when `lockStatus.locked` is true, call the classifier and pick one of: + - `Awaiting worker slot: ${lockStatus.reason}` for `awaiting-slot` and `unknown`. + - `Work item locked (no active dispatch): ${lockStatus.reason}` for `wedged`. **Also call `captureException` with a synthetic Error tagged `source: 'wedged_lock_canary'` and structured `extra` so the regression invariant is loud.** +- Keep the existing `result.onBlocked?.()` call path untouched. +- Augment `getActiveWorkers()` in `src/router/active-workers.ts` to return `projectId / workItemId / agentType` for each entry (currently returns only `{jobId, startedAt}`). Keep callers unchanged (extend the shape; don't break existing readers). + +### 5. Active-workers shape extension + +**Tests first** (`tests/unit/router/active-workers.test.ts` — extend existing file): + +- `getActiveWorkers — returns projectId, workItemId, agentType for each tracked worker` — unit — register a worker via `activeWorkers.set` with all three identifiers; assert `getActiveWorkers()` includes them. Expected red: returned shape currently lacks the trio (only `jobId` + `startedAt` today). +- `getActiveWorkers — backwards-compatible callers (existing tests pinning jobId+startedAt) still pass` — unit — existing assertions on `jobId` and `startedAt` continue to pass. Expected red: only fails if the migration removes the old fields. + +**Implementation** (`src/router/active-workers.ts`): +- Extend `getActiveWorkers()`'s return type to `Array<{ jobId: string; startedAt: Date; projectId?: string; workItemId?: string; agentType?: string }>`. Map from the existing `ActiveWorker` shape (the trio is already stored there per the spec context). + +--- + +## Test Plan + +### Unit tests +- [ ] `tests/unit/router/dispatch-compensator.test.ts`: 5 tests covering compensator behavior across job kinds + extractor-throw path +- [ ] `tests/unit/router/agent-type-lock.test.ts`: +3 tests for `clearRecentlyDispatched` +- [ ] `tests/unit/router/bullmq-workers.test.ts`: +4 tests for failed-event hook (logging + compensation + swallowing + both queues) +- [ ] `tests/unit/router/webhook-processor.test.ts`: +5 tests for the three-way decision-reason taxonomy +- [ ] `tests/unit/router/active-workers.test.ts`: +2 tests for the extended `getActiveWorkers` shape +- [ ] `tests/unit/router/lock-state-classifier.test.ts` (new): 4 tests covering `awaiting-slot` / `wedged` / `unknown` branches plus the fallback safety on classifier error + +### Integration tests +- [ ] `tests/integration/router/dispatch-failure-compensation.test.ts` (new): exercises the real BullMQ in-memory + the real lock modules. Scenarios: + - Enqueue a job whose processFn throws → assert all three locks are released by `worker.on('failed')` before the test resolves; assert a follow-up webhook for the same trio is NOT blocked. + - Enqueue a job, let it succeed → assert locks are released by the existing exit path (regression test for AC #8). + - Enqueue a manual-run job whose processFn throws → assert the manual-run lock-bypass (no work-item lock acquired in the first place) and that agent-type-lock + recently-dispatched are released. + +### Acceptance tests +- [ ] AC #4: integration test "subsequent webhook not rejected after failed dispatch" +- [ ] AC #5: unit tests for the three decision-reason branches +- [ ] AC #6: wedged-lock branch emits the `wedged_lock_canary` Sentry tag — captured via spy +- [ ] AC #7: manual-run integration test pins the lock-bypass behavior +- [ ] AC #8: clean-exit integration test pins identical behavior +- [ ] AC #9 (partial): new tests added; existing throw-on-capacity test stays green (Plan 2 replaces it) + +--- + +## Manual Verification (for `[manual]`-tagged ACs only) + +n/a — all ACs auto-tested. + +--- + +## Acceptance Criteria (per-plan, testable) + +1. The failed-event compensator releases work-item-lock, agent-type-lock, and recently-dispatched marks for any job whose dispatch fails — for both `cascade-jobs` and `cascade-dashboard-jobs` queues. +2. A webhook for `(projectId, workItemId, agentType)` arriving immediately after a dispatch failure for the same trio is NOT rejected with a `Work item locked` decision reason caused by stranded in-memory state. +3. Webhook decision logs distinguish three states for a held lock: `Job queued` (success), `Awaiting worker slot: …` (in-flight, healthy), `Work item locked (no active dispatch): …` (wedged-lock canary). Each is emitted from the appropriate branch of the lock-state classifier. +4. The wedged-lock branch additionally fires a Sentry capture tagged `source: 'wedged_lock_canary'` so the invariant is observable in production. +5. The dashboard manual-run path continues to bypass the work-item lock and dispatch normally even when the lock count for the trio is non-zero. +6. A clean-exit successful run continues to release locks via the existing `cleanupWorker` path; no double-release race appears. +7. All new/modified code has corresponding tests written before the implementation. +8. `npm run build` passes. +9. `npm test` passes (unit projects). +10. `npm run test:integration` passes for the new integration test. +11. `npm run lint` passes. +12. `npm run typecheck` passes. +13. All documentation listed in this plan's Documentation Impact has been updated. + +--- + +## Documentation Impact (this plan only) + +| File | Change | +|---|---| +| `CHANGELOG.md` | Entry under the next release: "Router: dispatch failures now release in-memory work-item / agent-type / dedup locks via BullMQ failed-event compensation. Webhook decision reasons split into three states (Job queued / Awaiting worker slot / Work item locked — no active dispatch). The third reason is a regression canary." | + +CLAUDE.md is intentionally NOT updated by this plan; Plan 2 ships the unified passage covering both halves of the new contract. Updating it twice would risk in-flight inconsistency. + +--- + +## Out of Scope (this plan) + +- Replacing `guardedSpawn`'s capacity throw with a wait-for-slot semaphore — Plan 2. +- Changing `attempts: 1` defaults on either queue or adding an exponential-backoff config — Plan 2. +- The dispatch-error classifier (`UnrecoverableError` vs transient retry-worthy) — Plan 2. +- Replacing the `'processFn throws when at capacity'` test in `tests/unit/router/worker-manager.test.ts` — Plan 2 (this plan leaves it green; it remains a true assertion of current behavior until Plan 2 changes the contract). +- CLAUDE.md update — Plan 2. +- A failed-set inspection UI — out of scope per spec. +- Cleanup of the dead `linear-1777217350854-2qvhjo` job in prod Redis — operational, out of scope per spec. + +--- + +## Progress + + +- [x] AC #1 (compensator releases all 3 locks on both queues) +- [x] AC #2 (subsequent webhook not blocked by stranded state) +- [x] AC #3 (three decision-reason branches) +- [x] AC #4 (wedged-lock Sentry canary) +- [x] AC #5 (manual-run bypass regression) +- [x] AC #6 (clean-exit regression) +- [x] AC #7 (TDD discipline) +- [x] AC #8 (build) +- [x] AC #9 (unit tests) +- [x] AC #10 (integration test) +- [x] AC #11 (lint) +- [x] AC #12 (typecheck) +- [x] AC #13 (docs) diff --git a/docs/plans/015-router-job-dispatch-failure-recovery/2-wait-for-slot-and-retry-classifier.md.done b/docs/plans/015-router-job-dispatch-failure-recovery/2-wait-for-slot-and-retry-classifier.md.done new file mode 100644 index 00000000..a199091a --- /dev/null +++ b/docs/plans/015-router-job-dispatch-failure-recovery/2-wait-for-slot-and-retry-classifier.md.done @@ -0,0 +1,246 @@ +--- +id: 015 +slug: router-job-dispatch-failure-recovery +plan: 2 +plan_slug: wait-for-slot-and-retry-classifier +level: plan +parent_spec: docs/specs/015-router-job-dispatch-failure-recovery.md +depends_on: [1-failed-event-lock-compensation.md] +status: done +--- + +# 015/2: Wait-for-slot capacity, retry budget, error classifier + +> Part 2 of 2 in the 015-router-job-dispatch-failure-recovery plan. See [parent spec](../../specs/015-router-job-dispatch-failure-recovery.md). + +## Summary + +This plan closes the **lost-job** half of the spec's bug class. It replaces the synchronous "throw on capacity" pattern in the dispatch path with an in-process slot-waiter that suspends until a worker slot frees up (bounded by a timeout that surfaces a real Sentry-visible error). It also adds bounded retry-with-backoff semantics to both queues, gated by an error classifier that decides whether a thrown error is transient (Docker daemon hiccup, image-pull rate-limit, container-name race — retry up to a small budget) or terminal (validation failure, missing credentials, fallback image not found — fail fast with no retry). + +After this plan ships, a card moved to a triggering state while the worker pool is at capacity reliably runs the agent as soon as capacity frees, with no further user action and no permanently failed BullMQ entry. A briefly unreachable Docker daemon resolves on retry. A sustained Docker outage exhausts the retry budget within minutes and surfaces a clear failed-run record + Sentry capture — never silent loss. + +This plan also delivers the AC #9 contract change: the existing `'processFn throws when at capacity'` test (`tests/unit/router/worker-manager.test.ts:179`) is **replaced**, not deleted — its assertion flips from "throws" to "awaits a slot, then dispatches when one frees." Plan 1's failed-event compensator stays as the safety net underneath. + +The CLAUDE.md update covering both halves of the new contract (decision-reason taxonomy from Plan 1 + dispatch retry contract from Plan 2) lands here so the documented invariant is consistent. + +**Components delivered:** +- A small in-house slot-waiter (semaphore-style counter + queued resolvers, ~30 lines, zero new deps) that integrates with `getActiveWorkerCount()` and `routerConfig.maxWorkers`. +- `guardedSpawn` rewritten: instead of throwing on capacity, awaits the slot (bounded timeout); on timeout, throws an explicitly-tagged error that the retry path can act on. +- A dispatch-error classifier `classifyDispatchError(err)` that returns `'transient' | 'terminal'`; terminal errors are wrapped in BullMQ's `UnrecoverableError` so they bypass the retry budget. +- `attempts` raised on both queue defaults with exponential backoff configured (concrete numbers below); per-job overrides stay possible for future use. +- Replacement of the existing capacity-throw test; new tests for slot-waiter + classifier + retry-on-transient + fail-fast-on-terminal + sustained-outage-surfaces-error. +- CLAUDE.md passage updated to document the new dispatch-retry + decision-reason contract. + +**Deferred (out of spec):** +- Worker-pool autoscaling, BullMQ migration, lock-semantics changes, snapshot-fallback redesign, manual-run bypass changes, failed-set UI, startup re-enqueue sweep — all explicit non-goals in the spec. + +--- + +## Spec ACs satisfied by this plan + +- Spec AC #1 (capacity wait → run starts when capacity frees, no permanently failed job) — **full** +- Spec AC #2 (transient Docker error → retry succeeds, no permanently failed job) — **full** +- Spec AC #3 (sustained outage → run marked failed, Sentry captured, no silent loss) — **full** +- Spec AC #9 (test contract change — old throw-on-capacity replaced) — **full** (Plan 1 added new tests; Plan 2 replaces the existing one) + +Regression-tested but not newly satisfied: +- Spec AC #4, #5, #6 (lock-leak / decision reason — already delivered by Plan 1; Plan 2 adds tests confirming the new failure modes don't reintroduce leaks) +- Spec AC #7, #8 (manual-run bypass + clean-exit — Plan 1 pinned these; Plan 2's tests must keep them green) + +--- + +## Depends On + +- Plan 1 (`failed-event-lock-compensation`) — provides the failed-event compensation hook on both queues. Plan 2 introduces new failure modes (slot-wait timeout, retry exhaustion, terminal errors via `UnrecoverableError`); each of those failure paths flows through BullMQ's `'failed'` event and relies on Plan 1's compensator releasing locks. Without Plan 1, Plan 2's new exhaustion path would re-create the wedged-lock symptom. + +--- + +## Detailed Task List (TDD) + +### 1. Slot-waiter primitive + +**Tests first** (`tests/unit/router/slot-waiter.test.ts` — new file): + +- `acquire — resolves immediately when capacity is below max` — unit — set `maxWorkers = 3`; mock `getActiveWorkerCount` to return `1`; call `acquireSlot()`; assert it resolves within the same microtask. Expected red: `Error: Cannot find module './slot-waiter'`. +- `acquire — suspends when at capacity, resolves when a slot frees` — unit — `maxWorkers = 1`, count starts at `1`; call `acquireSlot()`; assert promise is pending; emit a `slotReleased()` notification; assert promise resolves. Expected red: module-not-found, then "promise never resolves" timeout. +- `acquire — bounded timeout rejects with a tagged error if no slot frees in time` — unit — at capacity, call `acquireSlot({ timeoutMs: 50 })`; assert it rejects with `error.code === 'SLOT_WAIT_TIMEOUT'` after ~50ms. Expected red: never rejects (no timeout implemented). +- `acquire — multiple waiters resolve FIFO as slots free` — unit — at capacity, queue 3 waiters; emit `slotReleased()` once; assert exactly one waiter resolves and the other two stay pending; emit again; assert the second resolves. Expected red: all three resolve simultaneously, or none. +- `acquire — slotReleased called with no waiters is a no-op` — unit — call `slotReleased()` when nothing is waiting; assert no throw. Expected red: same as first test. +- `slotReleased — does not double-release waiters when called multiple times` — unit — queue 1 waiter; call `slotReleased()` twice; assert the waiter resolves exactly once (no `unhandledRejection` from a double-resolution path). Expected red: waiter rejected, or never resolves. + +**Implementation** (`src/router/slot-waiter.ts`): +- Internal queue `pendingResolvers: Array<{ resolve: () => void; reject: (e: Error) => void; timeoutHandle: NodeJS.Timeout }>`. +- Export `acquireSlot(opts: { timeoutMs: number }): Promise` — checks `getActiveWorkerCount() < routerConfig.maxWorkers`; if true, resolves immediately; otherwise pushes a resolver onto the queue with a `setTimeout` that rejects with a tagged `Error & { code: 'SLOT_WAIT_TIMEOUT' }`. +- Export `slotReleased(): void` — pops + resolves the head of the queue, clearing its timeout. No-op if empty. +- Export `clearAllWaiters()` for test cleanup + router shutdown (rejects pending waiters with a tagged `code: 'SHUTDOWN'`). +- Use a `code` field instead of error-message string-matching so the retry classifier doesn't depend on free-form text. + +### 2. `guardedSpawn` rewrite — wait-for-slot + +**Tests first** (`tests/unit/router/worker-manager.test.ts` — REPLACE the existing line-179 test): + +- (replace) `processFn awaits a slot when at capacity, then dispatches when one frees` — unit — `maxWorkers = 1`, mock `getActiveWorkerCount` to return `1` initially, `0` after a tick; start `processFn(fakeJob)`; assert `mockSpawnWorker` not called yet; advance fake timers + emit `slotReleased`; assert `mockSpawnWorker` is now called. Expected red: today's contract throws synchronously — assertion `mockSpawnWorker.toHaveBeenCalled()` fails. (Removes the existing `'processFn throws when at capacity'` assertion.) +- `processFn rejects with code: 'SLOT_WAIT_TIMEOUT' when the wait exceeds the configured ceiling` — unit — at-capacity for the entire wait window; assert promise rejects with the tagged error. Expected red: today rejects with the untagged `Error('No worker slots available')`. +- `processFn calls slotReleased after spawnWorker resolves` — unit — happy path; assert `mockSlotReleased` is invoked once after a successful spawn. (Note: today, slot accounting is implicit via container-exit; we add an explicit `slotReleased` so the waiter queue advances even when many spawns happen back-to-back without container exits in between.) Expected red: spy not called. +- `processFn — when spawnWorker throws, the slot is still released so other waiters proceed` — unit — `mockSpawnWorker` rejects; assert `mockSlotReleased` is still invoked (try/finally semantics) and the rejection propagates. Expected red: `slotReleased` not called → next waiter never proceeds. + +**Implementation** (`src/router/worker-manager.ts`): +- Replace `guardedSpawn`'s body with: `await acquireSlot({ timeoutMs: routerConfig.slotWaitTimeoutMs })`; then `try { await spawnWorker(job); } finally { /* note: existing exit path handles slotReleased on container exit; the dispatcher does NOT call it here, see decision below */ }`. +- **Decision**: do NOT call `slotReleased()` from `guardedSpawn` itself. The slot is conceptually held by the running container, not by the dispatcher. `slotReleased()` is called once from `cleanupWorker` (in `active-workers.ts`) at container-exit time. This keeps a single source of truth for "slot freed = container exited." The test "calls slotReleased after spawnWorker resolves" above is therefore reframed: it asserts slot accounting is wired through container-exit, not through dispatcher exit. Update that test accordingly. (Stated explicitly so /implement does not get confused by the initial draft.) +- New config field `slotWaitTimeoutMs` in `routerConfig` (default e.g. `5 * 60 * 1000` = 5min — enough for typical worker runs to complete, short enough that a stuck pool surfaces within a webhook receiver's life). Document the default in the config module. + +### 3. Hook `slotReleased` into `cleanupWorker` + +**Tests first** (`tests/unit/router/active-workers.test.ts` — extend): + +- `cleanupWorker — calls slotReleased exactly once per cleanup` — unit — register a worker; call `cleanupWorker(jobId)`; assert spy called once. Expected red: spy not called. +- `cleanupWorker — calls slotReleased even on the crash path (exitCode != 0)` — unit — call with `cleanupWorker(jobId, 137, { oomKilled: true })`; assert spy still called once. Expected red: spy not called. +- `cleanupWorker — does NOT double-call slotReleased on duplicate cleanup invocations` — unit — call `cleanupWorker(jobId)` twice (e.g. timeout-then-exit race); assert spy called exactly once. Expected red: spy called twice (the existing `if (worker)` guard handles this if we put `slotReleased` inside the same block). + +**Implementation** (`src/router/active-workers.ts`): +- Inside the existing `if (worker) { ... }` block in `cleanupWorker`, after the existing lock-release calls and before the `activeWorkers.delete(jobId)` line, call `slotReleased()`. The existing `if (worker)` guard already ensures idempotence. + +### 4. Dispatch-error classifier + +**Tests first** (`tests/unit/router/dispatch-error-classifier.test.ts` — new file): + +- `classifyDispatchError — Docker daemon unreachable error → 'transient'` — unit — synthesize an error with `err.code === 'ECONNREFUSED'` and a Dockerode-shaped message; assert classifier returns `'transient'`. Expected red: module-not-found. +- `classifyDispatchError — image-pull rate-limit (HTTP 429 from registry) → 'transient'` — unit — synthesize an error matching the dockerode rate-limit shape; assert `'transient'`. Expected red: as above. +- `classifyDispatchError — container name collision race ("name already in use") → 'transient'` — unit — synthesize the dockerode 409 conflict error; assert `'transient'`. Expected red: as above. +- `classifyDispatchError — image not found after fallback (the `isImageNotFoundError` path that already throws today) → 'terminal'` — unit — pass an error matching `isImageNotFoundError()`; assert `'terminal'`. Expected red: as above. +- `classifyDispatchError — validation error (e.g. missing credentials, malformed config) → 'terminal'` — unit — pass a non-Docker `TypeError`-shaped error; assert `'terminal'`. Expected red: as above. +- `classifyDispatchError — slot-wait timeout (code: 'SLOT_WAIT_TIMEOUT') → 'transient'` — unit — pass the slot-waiter timeout error; assert `'transient'`. The slot-wait timeout itself is treated as transient because the next retry will likely find a slot; only repeated timeouts within the retry budget surface as a real outage. Expected red: as above. +- `classifyDispatchError — unknown error (no recognizable shape) → 'transient'` — unit — pass `new Error('something weird')`; assert `'transient'` (default-to-retry is the safer choice; a true bug surfaces via attempt exhaustion). Expected red: as above. + +**Implementation** (`src/router/dispatch-error-classifier.ts`): +- Export `classifyDispatchError(err: unknown): 'transient' | 'terminal'`. +- Recognize: `ECONNREFUSED`, `ECONNRESET`, `ENOTFOUND` on the Docker socket → transient; HTTP 429 from registry → transient; HTTP 409 "name already in use" → transient; the existing `isImageNotFoundError(err)` shape (after fallback already exhausted) → terminal; explicit `TypeError` / `ZodError` shapes → terminal; `code: 'SLOT_WAIT_TIMEOUT'` → transient; default → transient. +- Re-use the existing `isImageNotFoundError` predicate from `container-manager.ts` (export it if not already exported). + +### 5. Wire classifier into `guardedSpawn` + retry config on both queues + +**Tests first** (`tests/unit/router/worker-manager.test.ts` — extend): + +- `processFn — when spawnWorker rejects with a transient error, propagates the rejection unchanged so BullMQ retries via attempts/backoff` — unit — mock spawnWorker to reject with an `ECONNREFUSED`-shaped error; assert processFn rejects with the same error (not wrapped in `UnrecoverableError`). Expected red: today's processFn does not differentiate, so this test should pass as a baseline; it pins behavior so Plan 2 doesn't accidentally swallow transient errors. +- `processFn — when spawnWorker rejects with a terminal error, wraps in BullMQ's UnrecoverableError so retries are skipped` — unit — mock spawnWorker to reject with a `ZodError`-shaped error (or a wrapped image-not-found); assert processFn rejects with `instanceof UnrecoverableError`. Expected red: rejection is the original error, not `UnrecoverableError`. + +**Tests first** (`tests/integration/router/dispatch-retry.test.ts` — new file): + +- `cascade-jobs queue — retries a transient dispatch failure with backoff and eventually succeeds` — integration — push a real job; mock spawnWorker to reject with an `ECONNREFUSED`-shaped error on attempt 1, succeed on attempt 2; assert the job ultimately moves to `completed` state, not `failed`. Expected red: job moves to `failed` after attempt 1 (current `attempts: 1` behavior). +- `cascade-jobs queue — terminal dispatch failure does NOT retry` — integration — mock spawnWorker to reject with a `ZodError`; assert exactly one attempt, job moves to `failed`. Expected red: with `attempts > 1` but no UnrecoverableError wrap, BullMQ would retry — this test catches that mistake. +- `cascade-jobs queue — sustained transient outage exhausts retries within the configured window and surfaces failure` — integration — mock spawnWorker to always reject with `ECONNREFUSED`; assert exactly N attempts (matching the configured budget), final state `failed`, and that the failed-event compensator from Plan 1 runs once at exhaustion (verifies the compose with Plan 1 works). Expected red: missing retry, or compensator running multiple times during retries. +- `cascade-dashboard-jobs queue — same retry behavior on a manual-run job` — integration — mirror of the first test against the dashboard queue. Expected red: dashboard queue still has `attempts: 1`, no retries. + +**Implementation**: +- In `src/router/worker-manager.ts`'s `guardedSpawn`, wrap the `spawnWorker` call in `try { await spawnWorker(job); } catch (err) { if (classifyDispatchError(err) === 'terminal') throw new UnrecoverableError(String(err)); throw err; }`. (The slot is released by the container-exit path, not here.) +- In `src/router/queue.ts` and `src/queue/client.ts`, change `defaultJobOptions.attempts` from `1` to **4** and add `backoff: { type: 'exponential', delay: 5_000 }`. Concrete budget: attempts 1 (immediate) + retry at ~5s + ~10s + ~20s + ~40s = total wait ~75s. Sustained-outage detection within ~90s. Tunable via env or config in a future patch; keep them as plain literals for now. +- The retry attempts apply to the entire processFn invocation, including the slot-wait. A retry attempt that re-acquires the slot is normal behavior; a retry attempt whose slot-wait also times out classifies as transient and proceeds to the next retry. + +### 6. CLAUDE.md update + CHANGELOG entry + +**Tests first**: n/a — documentation change (no tests, but `/implement`'s Phase 6 verifies the file changed). + +**Implementation**: +- Edit the existing CLAUDE.md "Worker exit diagnostics" / "Work-item concurrency lock" section. Add a new short subsection titled "Dispatch failure semantics" capturing: + - Capacity miss now waits for a slot (bounded timeout, default 5min). + - Transient Docker errors retry with exponential backoff up to 4 attempts (~75s total). + - Terminal errors (validation, image-not-found-after-fallback) skip retries via `UnrecoverableError`. + - Failed-event compensation releases work-item / agent-type / dedup locks on every dispatch failure path. + - Webhook decision reasons split: `Job queued` / `Awaiting worker slot: …` / `Work item locked (no active dispatch): …`. The third is a regression canary — its presence in webhook logs means a code path acquired a lock without registering its compensation. +- `CHANGELOG.md`: entry under the next release: "Router: dispatch capacity miss now waits for a worker slot instead of throwing; transient Docker errors retry with exponential backoff; terminal errors skip retries. Combined with plan 015/1, the original silent black-hole failure mode is closed." + +--- + +## Test Plan + +### Unit tests +- [ ] `tests/unit/router/slot-waiter.test.ts` (new): 6 tests +- [ ] `tests/unit/router/worker-manager.test.ts` (replace + extend): 1 replacement + 2 new tests +- [ ] `tests/unit/router/active-workers.test.ts` (extend): 3 new tests for `slotReleased` integration with `cleanupWorker` +- [ ] `tests/unit/router/dispatch-error-classifier.test.ts` (new): 7 tests covering the transient/terminal taxonomy + +### Integration tests +- [ ] `tests/integration/router/dispatch-retry.test.ts` (new): 4 tests covering `cascade-jobs` + `cascade-dashboard-jobs` × transient retry succeeds + terminal skips + sustained exhausts. + +### Acceptance tests +- [ ] AC #1: integration test "transient capacity → wait then dispatch when slot frees" (in slot-waiter integration scenarios) +- [ ] AC #2: integration test "transient Docker → retry → success" +- [ ] AC #3: integration test "sustained outage → retry exhaustion → marked failed + Sentry" +- [ ] AC #9: the existing capacity-throw test is replaced; CI fails if both old and new assertions are present. + +### Regression coverage (Plan 1 ACs) +- [ ] All Plan 1 integration tests in `tests/integration/router/dispatch-failure-compensation.test.ts` continue to pass against the new dispatch contract. The existing tests already cover the lock-release path; Plan 2's new failure modes (slot-wait timeout, retry exhaustion, UnrecoverableError) all flow through the same `'failed'` event hook, so the existing tests should pass without modification. If they need adjustment, that's a sign the compensator needs broader extractor coverage — handle it then. + +--- + +## Manual Verification (for `[manual]`-tagged ACs only) + +n/a — all ACs auto-tested. + +--- + +## Acceptance Criteria (per-plan, testable) + +1. The slot-waiter primitive resolves immediately when capacity is below max, suspends when at capacity, resolves on `slotReleased`, and times out cleanly with a tagged error. +2. `guardedSpawn` no longer throws on capacity; it waits for a slot up to `slotWaitTimeoutMs`. +3. `cleanupWorker` calls `slotReleased` exactly once per cleanup, including on the crash path. +4. `classifyDispatchError` correctly partitions transient vs terminal errors per the taxonomy above; the slot-wait timeout itself classifies as transient. +5. Both `cascade-jobs` and `cascade-dashboard-jobs` queue defaults specify `attempts: 4` with exponential backoff `delay: 5000`. +6. Terminal errors are wrapped in `UnrecoverableError` so BullMQ skips retries. +7. A transient dispatch failure on either queue retries and eventually succeeds; a terminal failure does not retry; a sustained outage exhausts the budget within ~90s and surfaces a failed run + Sentry capture. +8. The previously-existing `'processFn throws when at capacity'` test is replaced (not deleted) with the wait-for-slot contract. CI must not have both assertions present. +9. CLAUDE.md is updated with the new dispatch-failure-semantics passage covering both halves of the contract (Plan 1 + Plan 2). +10. CHANGELOG.md has the Plan 2 entry. +11. Plan 1's integration tests continue to pass against the new dispatch contract (regression). +12. All new/modified code has corresponding tests written before the implementation. +13. `npm run build` passes. +14. `npm test` passes. +15. `npm run test:integration` passes. +16. `npm run lint` passes. +17. `npm run typecheck` passes. + +--- + +## Documentation Impact (this plan only) + +| File | Change | +|---|---| +| `CLAUDE.md` | New subsection (under the existing "Worker exit diagnostics" / lock section) titled "Dispatch failure semantics" — captures wait-for-slot contract, retry budget + classifier, failed-event compensation, three-way decision-reason taxonomy. | +| `CHANGELOG.md` | Entry under the next release describing wait-for-slot + retry-with-backoff + classifier. References the spec ID 015. | + +--- + +## Out of Scope (this plan) + +- Worker-pool autoscaling / `maxWorkers` adjustments — explicit non-goal in spec. +- BullMQ → another queue migration — explicit non-goal in spec. +- Work-item-lock semantics — explicit non-goal in spec (one-per-type stays). +- Snapshot reuse / fallback-to-base-image logic — explicit non-goal in spec. +- Manual-run lock-bypass behavior changes — explicit non-goal in spec. +- Failed-set inspection UI — explicit non-goal in spec. +- Startup re-enqueue sweep that picks up jobs already in the failed set — explicit non-goal in spec. +- Cleanup of the existing dead `linear-1777217350854-2qvhjo` job — operational, out of scope per spec. +- Cross-router-instance lock coordination — out of scope per spec; existing DB-fallback continues to handle restarts. + +--- + +## Progress + + +- [x] AC #1 (slot-waiter primitive) +- [x] AC #2 (guardedSpawn awaits) +- [x] AC #3 (cleanupWorker calls slotReleased) +- [x] AC #4 (classifier taxonomy) +- [x] AC #5 (queue retry config) +- [x] AC #6 (UnrecoverableError wrap) +- [x] AC #7 (end-to-end retry behavior) +- [x] AC #8 (capacity-throw test replaced) +- [x] AC #9 (CLAUDE.md updated) +- [x] AC #10 (CHANGELOG) +- [x] AC #11 (Plan 1 regression) +- [x] AC #12 (TDD discipline) +- [x] AC #13 (build) +- [x] AC #14 (unit tests) +- [x] AC #15 (integration) +- [x] AC #16 (lint) +- [x] AC #17 (typecheck) diff --git a/docs/plans/015-router-job-dispatch-failure-recovery/_coverage.md b/docs/plans/015-router-job-dispatch-failure-recovery/_coverage.md new file mode 100644 index 00000000..18ffb128 --- /dev/null +++ b/docs/plans/015-router-job-dispatch-failure-recovery/_coverage.md @@ -0,0 +1,39 @@ +# Coverage map for spec 015-router-job-dispatch-failure-recovery + +Auto-generated by /plan. Tracks which plans satisfy which spec ACs. + +## Spec ACs + +| # | Spec AC (short) | Satisfied by | Status | +|---|---|---|---| +| 1 | Capacity-busy → run starts when capacity frees, no permanently failed job | plan 2 (wait-for-slot-and-retry-classifier) | full | +| 2 | Transient Docker error → retry succeeds, no permanently failed job | plan 2 | full | +| 3 | Sustained outage → run marked failed, Sentry capture, no silent loss | plan 2 | full | +| 4 | No stranded lock after any dispatch failure | plan 1 (failed-event-lock-compensation) | full | +| 5 | Decision reason distinguishes in-flight from wedged-lock | plan 1 | full | +| 6 | Wedged-lock canary never fires under normal operation | plan 1 | full | +| 7 | Manual-run path bypass + dispatch unchanged | plan 1 (full) + plan 2 (regression) | full | +| 8 | Clean-exit path identical to today | plan 1 (full) + plan 2 (regression) | full | +| 9 | Existing throw-on-capacity test replaced (not deleted) with wait-for-slot contract | plan 1 (adds new failed-event tests) + plan 2 (replaces the existing test) | partial chain | + +## Coverage summary + +- **9 spec ACs** mapped to **2 plans** +- **8 plans-x-AC pairs full coverage** (each AC fully satisfied by its assigned plan(s)) +- **1 spec AC** with partial-chain coverage (AC #9 — Plan 1 grows test coverage; Plan 2 replaces the contract-bearing assertion) +- All non-goals from the spec's Out of Scope are mirrored in each plan's Out of Scope section. + +## Plan dependency graph + +``` +1-failed-event-lock-compensation ──→ 2-wait-for-slot-and-retry-classifier +``` + +Plan 1 ships the safety-net (compensation + decision-reason taxonomy) independently. Plan 2 introduces the dispatch-contract change (wait-for-slot + retry/classifier) and depends on Plan 1's compensator being live so any new failure modes don't re-create the wedged-lock symptom. After Plan 1 lands, prod's user-visible damage is half-fixed (no more wedged locks); after Plan 2 lands, the spec's bug class is closed (no more lost jobs). + +## Documentation impact distribution + +| Spec doc | Plan | +|---|---| +| `CLAUDE.md` | Plan 2 (single passage covering both halves of the new contract — avoids in-flight inconsistency from a Plan-1-only update) | +| `CHANGELOG.md` | Plan 1 + Plan 2 (one entry per shipping plan; each describes its half of the fix and references spec ID 015) | diff --git a/docs/specs/015-router-job-dispatch-failure-recovery.md.done b/docs/specs/015-router-job-dispatch-failure-recovery.md.done new file mode 100644 index 00000000..e28996ce --- /dev/null +++ b/docs/specs/015-router-job-dispatch-failure-recovery.md.done @@ -0,0 +1,130 @@ +--- +id: 015 +slug: router-job-dispatch-failure-recovery +level: spec +title: Router job dispatch failure recovery +created: 2026-04-26 +status: done +--- + +# 015: Router job dispatch failure recovery + +## Problem & Motivation + +CASCADE's router has a silent black-hole failure mode. When a webhook-driven job is pulled off the queue and the dispatcher cannot immediately spawn a worker — because every worker slot is already occupied by a prior run, or because the Docker daemon hiccups, or because an image-pull rate-limit fires — the dispatcher throws synchronously. With the queue's current "attempts: 1, no retry" defaults, that single throw moves the job straight to the failed set in Redis. It is never picked up again, even after capacity frees up seconds later. The user gets no error in the UI; the agent never runs; the work item appears to have been silently ignored. + +The damage doesn't stop at one lost job. The webhook handler establishes in-memory bookkeeping — a same-type-per-work-item concurrency lock, an agent-type concurrency counter, and a recently-dispatched dedup mark — *between* the moment it puts the job on the queue and the moment a worker container actually starts. The lock-release logic only fires from the worker-exit path, which itself only runs when a worker container was actually started. A spawn-time throw never reaches that path, so all three lock entries leak. The work-item lock in particular has a 30-minute TTL and only auto-clears on router restart. For 30 minutes after a transient capacity miss, every subsequent webhook for that work item + agent type is silently rejected with the misleading reason `"Work item locked: 1 enqueued (max 1 per type)"`. The user, seeing no agent run after dragging a card to Todo, drags it again — and again — with no feedback that the lock is wedged. + +This was hit in production on 2026-04-26 against the ucho project (Linear card MNG-350). A user moved the card to Todo at 15:29:07 UTC. The job was enqueued and pulled by BullMQ; the only worker slot was held by an unrelated MNG-354 implementation that had spawned ~2 seconds earlier. The MNG-350 dispatcher threw "No worker slots available" and the job was failed. The user re-moved the card to Backlog→Todo three more times across the next 18 minutes — every webhook returned the misleading "locked" decision. The card sat dead until a manual CLI trigger force-dispatched through a separate path that bypasses the lock. This is a credibility-class incident for an automation product whose entire value proposition is "PM card moves automatically translate into agent runs." Unattended workflows must not silently break. + +The fix needs three contract changes that work in concert: (1) transient dispatch failures must retry, with bounded attempts and backoff so a real outage isn't masked, (2) any in-memory lock state acquired at enqueue time must be released the moment dispatch fails, before any subsequent webhook can be misled by it, and (3) the webhook decision-reason taxonomy must let users (and operators reading webhook logs) distinguish a healthy in-flight run from a wedged lock that needs intervention. + +--- + +## Goals + +1. A transient over-capacity condition during dispatch never causes a job to be permanently lost. The job either backpressures naturally until a slot is available, or is retried with bounded attempts and is only declared dead after the bound is exhausted. +2. A transient Docker-side spawn error (daemon unreachable, image-pull rate-limit, container-name race) is retried within a bounded attempt budget. A terminal error (e.g. validation failure, missing credentials, a fallback image that genuinely doesn't exist) fails fast without burning the retry budget. +3. Every in-memory lock entry acquired during the webhook → enqueue path is released when the dispatch ultimately fails, before the next webhook for the same work item arrives. No 30-minute wedge is possible regardless of which exception path the dispatcher took. +4. Webhook decision logs distinguish healthy in-flight state ("queued, awaiting slot") from wedged-lock state ("lock count says enqueued but no active dispatch can be found"). Users moving a card see decision reasons that describe what is actually happening. +5. Both the webhook-driven job queue and the dashboard manual-run queue benefit from the same retry symmetry. The work-item lock bypass on manual-run remains untouched (intentional escape hatch), but neither queue should permanently lose a job to a single transient dispatch failure. +6. Existing healthy paths (successful spawn, successful exit, manual-run, snapshot fallback, agent-type concurrency limits) continue to behave identically. The fix is failure-path symmetry, not a redesign of dispatch. + +--- + +## Non-goals + +- Redesigning worker-pool sizing or how `maxWorkers` is computed. +- Overhauling the BullMQ concurrency model or moving off BullMQ. +- Changing the work-item-lock semantics (one same-type agent per work item per project remains the rule, per spec 007). +- Modifying the snapshot reuse / fallback-to-base-image logic. +- Changing the dashboard manual-run lock-bypass behavior — manual runs are the user's escape hatch and stay that way. +- Building a UI surface for failed-set inspection. Webhook log decision reasons are the user-facing surface for now. +- Backfilling a re-dispatch sweep at router startup that picks up jobs already dead in the failed set (operational cleanup; out of scope for the contract change). + +--- + +## Constraints + +- The fix must not introduce new race windows. The point at which lock state is acquired and the point at which it is released must remain consistent across all dispatch outcomes (success, transient failure with eventual retry success, transient failure exhausting retries, terminal failure). +- Retry budgets must be small enough that a true Docker outage is surfaced within minutes, not hours. A transient blip should be invisible; a sustained outage should still page someone through Sentry within a few minutes. +- The dispatcher must not block forever waiting for a worker slot if the system is genuinely stuck. Any wait-for-slot mechanism needs a timeout that surfaces a Sentry-visible error, not a silent hang. +- Job-data carried into the retry path must remain identical across attempts. A retry must dispatch the same work item, same agent type, same ack info — not a re-derived version that could disagree with what the first attempt registered locks for. +- The change is restricted to the router's dispatch surface. Trigger-handler logic, agent input building, and the worker container's runtime contract are unchanged. + +--- + +## User stories / Requirements + +1. **As a CASCADE user**, when I move a Linear/Trello/JIRA card to a state that triggers an agent, the agent run starts even if the worker pool is briefly at capacity. I never have to drag the card a second time to "wake the system up." +2. **As a CASCADE user**, when something is genuinely wrong with the worker fleet (Docker down, host out of memory), I get a clear failure signal — either an ack comment that the run failed or an error visible in the dashboard — rather than a silently dropped job. +3. **As an operator reading webhook logs during an incident**, I can tell the difference between "this work item has 1 active run in progress, the new webhook was correctly deduped" and "this work item's lock count is non-zero but nothing is actually running — the lock is wedged." +4. **As a router process**, when I throw during dispatch, I leave no in-memory lock entry behind that could mislead a future webhook for the same work item + agent type. +5. **As a router process**, when a transient dispatch failure resolves on retry, the job runs to completion exactly once. The lock state at run-start matches the lock state at run-end. +6. **As a router process restarting after a crash**, I do not strand work-item locks across the restart. (Already true today via in-memory locks living only in process memory; this spec must not regress that.) + +--- + +## Research Notes + +- BullMQ's worker model treats `processFn` rejection as "job failed" and consults the job's `attempts` and `backoff` options to decide whether to retry. With `attempts: 1` and no backoff, any rejection moves the job to the failed set permanently. ([BullMQ docs — Retrying failing jobs](https://docs.bullmq.io/guide/retrying-failing-jobs)) +- BullMQ exposes a `worker.on('failed', ...)` event that fires after a job's final retry exhaustion. This is the natural seam for compensating in-memory state established outside BullMQ's transactional view (locks, dedup marks). The event receives the job and the error, sufficient to identify what to release. +- Semaphore-style backpressure (P-Queue, Throat, Bottleneck) is the standard pattern for "wait-for-slot" semantics that integrate cleanly with async functions. The trade-off vs throw-and-retry is fewer failed-set entries at the cost of holding more open async tasks. For our use case (≤10 concurrent slots, sub-second wait granularity, retry attempts cost a Redis round-trip each), wait-for-slot is the cleaner fit. +- BullMQ's `concurrency` setting controls how many `processFn` invocations are in flight, not how many "work items" are running. Because our `processFn` resolves on container *start*, not container *exit*, BullMQ's concurrency does not function as a worker-pool cap. This is the root cause of the throw-on-capacity pattern existing in the first place: someone's safety net for a mismatch the queue isn't aware of. The spec restores correctness by closing that mismatch on the dispatch side. +- "Compensating actions" (release locks on dispatch failure) is a long-standing pattern in transaction processing — see Garcia-Molina & Salem's "Sagas" (1987). The applicable rule: every action that acquires durable state outside the local transaction must register a compensator that runs on rollback. In our case, the rollback trigger is the BullMQ failed event. + +--- + +## Open Source Decisions + +| Tool | Solves | Decision | Reason | +|------|--------|----------|--------| +| [BullMQ retry/backoff](https://docs.bullmq.io/guide/retrying-failing-jobs) | Bounded retry of transient dispatch failures | **Use** | Already in use; configuring `attempts` + `backoff` on the existing queue is the smallest possible change for the retry contract. | +| [BullMQ Worker `failed` event](https://docs.bullmq.io/guide/events) | Cleanup hook for in-memory lock state on dispatch failure | **Use** | The only reliable place to compensate state for *any* dispatch-path exception, including ones that don't exist yet. Already wired for logging — we extend the existing handler. | +| Semaphore library (e.g. [async-sema](https://github.com/vercel/async-sema), [p-limit](https://github.com/sindresorhus/p-limit)) | Wait-for-slot backpressure inside the dispatch processFn | **Skip (default), revisit during /plan** | A small in-house counter with a queue of resolvers is sufficient and has zero new deps. Plan can reverse this if it finds a real reason to adopt a library. | + +--- + +## Strategic decisions + +1. **Capacity miss is handled by wait-for-slot, not throw-and-retry.** The dispatcher awaits a slot up to a bounded timeout instead of throwing. Reason: eliminates the failed-set churn for capacity entirely, leaving retries reserved for genuine Docker errors. If the wait-for-slot timeout itself trips, it surfaces as a real error (Sentry-visible) and a single retry attempt — not silent loss. +2. **Retries are reserved for transient Docker-side errors.** A small bounded retry budget with exponential backoff applies to Docker daemon unreachable, image-pull rate-limit, and container-name collision races. Terminal errors (validation, missing credentials, fallback image not found) fail fast on the first attempt. The classifier lives at the boundary of the dispatcher; the spec defines the dichotomy, the plan picks the implementation. +3. **Lock compensation runs from the queue's failed event.** Cleanup hooks attached to the BullMQ failed event read the original job's `(projectId, workItemId, agentType)` payload and release every lock entry that the enqueue path established. This catches every dispatch-path exception, including paths that don't exist today. We do not rely on per-throw try/finally inside individual dispatch functions — that approach is fragile and depends on each new code path remembering to compensate. +4. **Webhook decision reasons are split into three states.** "Job queued" (success), "Awaiting worker slot" (queued behind N active runs, healthy), and "Work item locked (no active dispatch)" (wedged-lock state, requires correlating in-memory lock count with active worker registry + BullMQ queue state). The third reason exists specifically as a diagnostic for operators and as a safety net to confirm the compensation path is working — if it ever fires after this spec ships, that's a regression. +5. **Both queues get the retry treatment; only one keeps the lock-release path.** The dashboard manual-run queue and the webhook job queue both get the retry-on-transient-failure contract. The manual-run path's existing lock-bypass behavior is preserved unchanged — manual runs are the user's escape hatch when something else is wedged. +6. **The currently-stuck job in production is operational cleanup, not in scope.** The dead `linear-1777217350854-2qvhjo` job in the failed set is removed by hand. This spec ensures *future* failures don't strand jobs; an automatic startup sweep that re-enqueues failed-set entries is a deliberate non-goal — surfacing those errors loudly is more valuable than silently re-attempting them. + +--- + +## Acceptance Criteria (outcome-level) + +1. A user moves a PM card to a triggering state while the worker pool is at capacity. The agent run starts when capacity frees, with no further user action. The same flow does not produce a permanently failed job in BullMQ. +2. A user moves a PM card to a triggering state while the Docker daemon is briefly unreachable (e.g. socket reconnects within a few seconds). The agent run starts on retry. The same flow does not produce a permanently failed job in BullMQ. +3. A user moves a PM card to a triggering state while Docker is genuinely down (sustained outage exceeding the retry budget). The webhook is acknowledged, the run is marked failed in the dashboard, and a Sentry error is captured. No silent loss. +4. After any dispatch failure path — capacity-wait timeout, transient retry exhaustion, terminal error — a webhook for the same work item + agent type that arrives immediately afterward is **not** rejected with a "locked" decision reason caused by a stranded in-memory lock. The lock state matches the actual worker registry state. +5. While a worker is actively dispatching or running for a work item, a duplicate webhook for the same work item + agent type is rejected with a decision reason that distinguishes "in-flight, healthy" from "wedged lock." An operator inspecting webhook logs can tell which is which without reading source. +6. The wedged-lock decision reason never fires under normal operation. Its presence in webhook logs is itself a signal that compensation has missed a path — i.e. it is intended as a diagnostic invariant, not a routine state. +7. The dashboard manual-run path continues to bypass the work-item lock and continues to dispatch successfully even when the lock count for that work item + agent type is non-zero. +8. A worker container that successfully dispatches and exits cleanly behaves identically to today: locks released on exit, snapshot committed (where applicable), run record in the dashboard. +9. Existing test coverage for the worker-manager dispatch path is updated to reflect the new contract (specifically, the assertion that "processFn throws when at capacity" is replaced with the new wait-for-slot contract). No test silently passes that previously failed. + +--- + +## Documentation Impact (high-level) + +- `CLAUDE.md` — the project root entry already documents the work-item concurrency lock and the worker exit diagnostics; needs an updated paragraph capturing the new dispatch retry contract and the three-way webhook decision-reason taxonomy. This is load-bearing cross-cutting behavior with no other natural home and is exactly the kind of invariant that decays in CLAUDE.md if not updated alongside the change. +- `CHANGELOG.md` — entry under the next release noting the silent black-hole fix and the new decision-reason vocabulary. + +--- + +## Out of Scope + +- Worker-pool sizing or `maxWorkers` autoscaling. +- BullMQ → another queue migration. +- Work-item-lock semantics (one same-type agent per work item, per spec 007). +- Snapshot reuse and snapshot-image fallback logic. +- Manual-run lock-bypass behavior (intentional, stays). +- Failed-set inspection UI in the dashboard. +- Startup sweep that re-enqueues already-failed jobs from the failed set. +- Cleanup of the existing dead `linear-1777217350854-2qvhjo` entry — operational, handled out-of-band. +- Cross-router-instance lock coordination (locks remain in-process; spec 007 lock semantics already handle this via DB fallback). diff --git a/src/queue/client.ts b/src/queue/client.ts index 77268d35..be28d2ca 100644 --- a/src/queue/client.ts +++ b/src/queue/client.ts @@ -55,7 +55,12 @@ function getQueue(): Queue { queue = new Queue(QUEUE_NAME, { connection: parseRedisUrl(redisUrl), defaultJobOptions: { - attempts: 1, + // Spec 015/2: bounded retries on dispatch failures, parity with + // the cascade-jobs queue. Manual-run / retry-run / debug-analysis + // jobs hit the same dispatch path — should benefit from the same + // transient-failure absorption. + attempts: 4, + backoff: { type: 'exponential', delay: 5_000 }, removeOnComplete: { age: 24 * 60 * 60, count: 100 }, removeOnFail: { age: 7 * 24 * 60 * 60 }, }, diff --git a/src/router/active-workers.ts b/src/router/active-workers.ts index 4836e6f9..5d13d242 100644 --- a/src/router/active-workers.ts +++ b/src/router/active-workers.ts @@ -9,6 +9,7 @@ import { failOrphanedRun, failOrphanedRunFallback } from '../db/repositories/run import { logger } from '../utils/logging.js'; import { clearAgentTypeEnqueued } from './agent-type-lock.js'; import type { CascadeJob } from './queue.js'; +import { slotReleased } from './slot-waiter.js'; import { clearWorkItemEnqueued } from './work-item-lock.js'; export interface ActiveWorker { @@ -62,11 +63,26 @@ export function getActiveWorkerCount(): number { /** * Get summary info for currently active workers. + * + * Includes the resolved `(projectId, workItemId, agentType)` trio so callers + * (specifically the lock-state classifier added in spec 015/1) can correlate + * an in-memory lock count against actual dispatch state. The fields are + * `undefined` for workers whose job data didn't carry the corresponding + * identifier — never synthesized. */ -export function getActiveWorkers(): Array<{ jobId: string; startedAt: Date }> { +export function getActiveWorkers(): Array<{ + jobId: string; + startedAt: Date; + projectId?: string; + workItemId?: string; + agentType?: string; +}> { return Array.from(activeWorkers.values()).map((w) => ({ jobId: w.jobId, startedAt: w.startedAt, + projectId: w.projectId, + workItemId: w.workItemId, + agentType: w.agentType, })); } @@ -86,6 +102,10 @@ export function cleanupWorker(jobId: string, exitCode?: number, details?: ExitDe if (worker.projectId && worker.workItemId && worker.agentType) { clearWorkItemEnqueued(worker.projectId, worker.workItemId, worker.agentType); } + // Spec 015/2: free a worker slot so any dispatcher waiting in + // `acquireSlot()` can proceed. Idempotent — the surrounding + // `if (worker)` guard ensures we call this exactly once per cleanup. + slotReleased(); if (exitCode !== undefined && exitCode !== 0 && worker.projectId) { const durationMs = Date.now() - worker.startedAt.getTime(); const reason = formatCrashReason(exitCode, details); diff --git a/src/router/agent-type-lock.ts b/src/router/agent-type-lock.ts index 2320fff5..177056cd 100644 --- a/src/router/agent-type-lock.ts +++ b/src/router/agent-type-lock.ts @@ -149,6 +149,20 @@ export function wasRecentlyDispatched( return true; } +/** + * Compensating action for `markRecentlyDispatched` — used by the BullMQ + * `failed`-event handler so a permanently-failed dispatch doesn't keep + * deduping a fresh webhook for ~60s while the user re-tries. + */ +export function clearRecentlyDispatched( + projectId: string, + agentType: string, + dedupScope?: string, +): void { + const key = makeDedupKey(projectId, agentType, dedupScope); + dedupMap.delete(key); +} + /** * Mark an agent type as recently dispatched for a project/scope. * The mark expires after DEDUP_TTL_MS and is NOT cleared on completion. diff --git a/src/router/bullmq-workers.ts b/src/router/bullmq-workers.ts index 603b3cba..c02a4ce2 100644 --- a/src/router/bullmq-workers.ts +++ b/src/router/bullmq-workers.ts @@ -10,6 +10,7 @@ import { type ConnectionOptions, type Job, Worker } from 'bullmq'; import { captureException } from '../sentry.js'; import { logger } from '../utils/logging.js'; import { parseRedisUrl } from '../utils/redis.js'; +import { releaseLocksForFailedJob } from './dispatch-compensator.js'; // Re-export so existing callers (worker-manager.ts) don't need to change imports. export { parseRedisUrl }; @@ -53,6 +54,25 @@ export function createQueueWorker(config: QueueWorkerConfig): Wo tags: { source: 'bullmq_dispatch', queue: queueName }, extra: { jobId: job?.id }, }); + // Compensate in-memory state (work-item lock, agent-type lock, + // recently-dispatched dedup mark) acquired by the webhook → enqueue + // path. Without this, dispatch failures wedge the locks until their + // TTLs expire — see spec 015. Compensator never throws, but we still + // guard so a future regression in it can't poison the worker. + if (job) { + void releaseLocksForFailedJob(job.data).catch((compErr) => { + logger.error( + '[WorkerManager] compensator threw — already swallowed by it; logging defensively', + { + jobId: job.id, + error: String(compErr), + }, + ); + captureException(compErr instanceof Error ? compErr : new Error(String(compErr)), { + tags: { source: 'dispatch_compensator_uncaught', queue: queueName }, + }); + }); + } }); worker.on('error', (err) => { diff --git a/src/router/config.ts b/src/router/config.ts index 2fc5dd6b..d8588723 100644 --- a/src/router/config.ts +++ b/src/router/config.ts @@ -31,6 +31,12 @@ export interface RouterConfig { workerImage: string; workerMemoryMb: number; workerTimeoutMs: number; + /** + * How long a dispatcher will wait for a worker slot to free up before + * giving up and surfacing a transient SLOT_WAIT_TIMEOUT error (which + * BullMQ then retries). Spec 015/2. + */ + slotWaitTimeoutMs: number; // Network settings dockerNetwork: string; @@ -134,6 +140,7 @@ export const routerConfig: RouterConfig = { workerImage: process.env.WORKER_IMAGE || 'ghcr.io/mongrel-intelligence/cascade-worker:latest', workerMemoryMb: Number(process.env.WORKER_MEMORY_MB) || 4096, workerTimeoutMs: Number(process.env.WORKER_TIMEOUT_MS) || 30 * 60 * 1000, // 30 minutes + slotWaitTimeoutMs: Number(process.env.SLOT_WAIT_TIMEOUT_MS) || 5 * 60 * 1000, // 5 minutes dockerNetwork: process.env.DOCKER_NETWORK || 'services_default', emailScheduleIntervalMs: Number(process.env.EMAIL_SCHEDULE_INTERVAL_MS) || 5 * 60 * 1000, webhookCallbackBaseUrl: process.env.WEBHOOK_CALLBACK_BASE_URL, diff --git a/src/router/container-manager.ts b/src/router/container-manager.ts index c8be047b..ee75e168 100644 --- a/src/router/container-manager.ts +++ b/src/router/container-manager.ts @@ -338,8 +338,11 @@ export async function resolveSpawnSettings( * Returns true when a Docker error indicates the requested image does not exist. * Uses the HTTP statusCode from dockerode's error objects as the primary signal, * with a substring check on the message as a secondary guard. + * + * Exported for the dispatch-error classifier (spec 015/2) so it can + * recognise this terminal class and skip BullMQ retries for it. */ -function isImageNotFoundError(err: unknown): boolean { +export function isImageNotFoundError(err: unknown): boolean { return ( err != null && typeof err === 'object' && diff --git a/src/router/dispatch-compensator.ts b/src/router/dispatch-compensator.ts new file mode 100644 index 00000000..826c9f5c --- /dev/null +++ b/src/router/dispatch-compensator.ts @@ -0,0 +1,49 @@ +/** + * Compensating action for dispatch failures. + * + * Released by BullMQ's `worker.on('failed')` handler so that any in-memory + * lock state acquired during the webhook → enqueue path (work-item lock, + * agent-type lock, recently-dispatched dedup mark) is freed the moment a + * dispatch attempt is declared dead. Without this, the lock entries leak + * for up to their TTL (work-item: 30 min) and silently reject every + * follow-up webhook for the same `(projectId, workItemId, agentType)`. + * + * The compensator NEVER propagates errors. A failure here would poison the + * BullMQ worker; instead we capture to Sentry and log, then resolve. + */ + +import { captureException } from '../sentry.js'; +import { logger } from '../utils/logging.js'; +import { clearAgentTypeEnqueued, clearRecentlyDispatched } from './agent-type-lock.js'; +import type { CascadeJob } from './queue.js'; +import { clearWorkItemEnqueued } from './work-item-lock.js'; +import { extractAgentType, extractProjectIdFromJob, extractWorkItemId } from './worker-env.js'; + +// Compensator accepts `unknown` because it runs from BullMQ's `failed` event +// where the job payload type is the queue's generic and not directly +// assignable to CascadeJob (manual-run / debug-analysis jobs come through +// `cascade-dashboard-jobs`). The extractors handle type-narrowing. +export async function releaseLocksForFailedJob(data: unknown): Promise { + try { + const projectId = await extractProjectIdFromJob(data as CascadeJob); + if (!projectId) return; + + const workItemId = extractWorkItemId(data as CascadeJob); + const agentType = extractAgentType(data as CascadeJob); + + if (workItemId && agentType) { + clearWorkItemEnqueued(projectId, workItemId, agentType); + } + if (agentType) { + clearAgentTypeEnqueued(projectId, agentType); + clearRecentlyDispatched(projectId, agentType, workItemId); + } + } catch (err) { + logger.error('[dispatch-compensator] failed to release locks for failed job', { + error: String(err), + }); + captureException(err instanceof Error ? err : new Error(String(err)), { + tags: { source: 'dispatch_compensator' }, + }); + } +} diff --git a/src/router/dispatch-error-classifier.ts b/src/router/dispatch-error-classifier.ts new file mode 100644 index 00000000..ceff8aaa --- /dev/null +++ b/src/router/dispatch-error-classifier.ts @@ -0,0 +1,66 @@ +/** + * Dispatch-error classifier (spec 015/2). + * + * Decide whether a thrown error from `spawnWorker` / `acquireSlot` + * should burn a BullMQ retry attempt (`'transient'`) or skip retries + * by being wrapped in `UnrecoverableError` (`'terminal'`). + * + * Default is `'transient'` — when in doubt, retry. The retry budget + * itself is bounded (4 attempts with exponential backoff), so a true + * bug can't loop forever; it will surface via attempt exhaustion + + * Sentry capture in the failed-event hook. + * + * Recognized terminal classes: + * - validation errors (TypeError, ZodError) + * - image-not-found AFTER the fallback retry has already exhausted + * + * Recognized transient classes: + * - ECONNREFUSED / ECONNRESET / ENOTFOUND on the Docker socket + * - HTTP 429 from the registry (rate limit) + * - HTTP 409 "name already in use" (container-name collision race) + * - SLOT_WAIT_TIMEOUT from the slot-waiter primitive + */ + +import { isImageNotFoundError } from './container-manager.js'; + +export type DispatchErrorKind = 'transient' | 'terminal'; + +interface ErrorWithCode { + code?: unknown; + statusCode?: unknown; + name?: unknown; + message?: unknown; +} + +const TRANSIENT_NODE_CODES = new Set(['ECONNREFUSED', 'ECONNRESET', 'ENOTFOUND', 'ETIMEDOUT']); +const TRANSIENT_SLOT_CODES = new Set(['SLOT_WAIT_TIMEOUT']); +const TRANSIENT_HTTP_STATUS = new Set([429, 409]); + +export function classifyDispatchError(err: unknown): DispatchErrorKind { + if (err == null || typeof err !== 'object') return 'transient'; + + const e = err as ErrorWithCode; + + // Terminal: validation + if (e.name === 'ZodError') return 'terminal'; + if (err instanceof TypeError) return 'terminal'; + + // Terminal: image-not-found AFTER fallback (the spawnWorker path's last + // resort already retried with the base image; if we still got here, the + // base image is genuinely missing). + if (isImageNotFoundError(err)) return 'terminal'; + + // Transient: tagged slot-wait timeout + if (typeof e.code === 'string' && TRANSIENT_SLOT_CODES.has(e.code)) return 'transient'; + + // Transient: socket-level Node errors + if (typeof e.code === 'string' && TRANSIENT_NODE_CODES.has(e.code)) return 'transient'; + + // Transient: known transient HTTP statuses + if (typeof e.statusCode === 'number' && TRANSIENT_HTTP_STATUS.has(e.statusCode)) + return 'transient'; + + // Default-to-retry: unknown shape. Better to burn a retry than to + // silently bury a real bug as terminal — the retry budget caps risk. + return 'transient'; +} diff --git a/src/router/lock-state-classifier.ts b/src/router/lock-state-classifier.ts new file mode 100644 index 00000000..65975001 --- /dev/null +++ b/src/router/lock-state-classifier.ts @@ -0,0 +1,70 @@ +/** + * Lock-state classifier. + * + * Given a held in-memory work-item lock for `(projectId, workItemId, agentType)`, + * decide whether the lock corresponds to actual dispatch state or whether it + * has been stranded by a dispatch failure that didn't compensate. + * + * Returns one of: + * - 'awaiting-slot' — an active worker or a queued/waiting job matches the + * trio; the lock is healthy and the user-visible message should reflect + * "queued behind another run." + * - 'wedged' — neither correlation matches; the lock is stranded. After + * spec 015/1's compensator landed, this should never happen under normal + * operation. Its presence is a regression invariant: the caller is + * expected to capture it to Sentry as a canary. + * + * On classifier error (e.g. Redis hiccup during queue lookup), the result + * defaults to 'awaiting-slot' to avoid mis-emitting the wedged canary on a + * transient infrastructure blip. + */ + +import { logger } from '../utils/logging.js'; +import { getActiveWorkers } from './active-workers.js'; +import type { CascadeJob } from './queue.js'; +import { jobQueue } from './queue.js'; +import { extractAgentType, extractProjectIdFromJob, extractWorkItemId } from './worker-env.js'; + +export type LockStateClassification = 'awaiting-slot' | 'wedged'; + +export interface LockStateInput { + projectId: string; + workItemId: string; + agentType: string; +} + +export async function classifyLockState(input: LockStateInput): Promise { + const { projectId, workItemId, agentType } = input; + + // 1. Active worker correlation — fast in-memory map lookup. + const activeMatch = getActiveWorkers().some( + (w) => w.projectId === projectId && w.workItemId === workItemId && w.agentType === agentType, + ); + if (activeMatch) return 'awaiting-slot'; + + // 2. BullMQ queue correlation — only if the lock is held without a + // matching active worker. Limited to waiting/active states (jobs that + // BullMQ might still pick up). + try { + const jobs = await jobQueue.getJobs(['waiting', 'active']); + for (const job of jobs) { + // `getJobs` returns `Job` per the queue's generic. + const data = job.data as CascadeJob; + const jobProjectId = await extractProjectIdFromJob(data); + if (jobProjectId !== projectId) continue; + if (extractWorkItemId(data) !== workItemId) continue; + if (extractAgentType(data) !== agentType) continue; + return 'awaiting-slot'; + } + } catch (err) { + logger.warn('[lock-state-classifier] queue lookup failed; defaulting to awaiting-slot', { + error: String(err), + projectId, + workItemId, + agentType, + }); + return 'awaiting-slot'; + } + + return 'wedged'; +} diff --git a/src/router/queue.ts b/src/router/queue.ts index b69067e3..3d1ea39a 100644 --- a/src/router/queue.ts +++ b/src/router/queue.ts @@ -76,7 +76,14 @@ export type CascadeJob = TrelloJob | GitHubJob | JiraJob | SentryJob | LinearJob export const jobQueue = new Queue('cascade-jobs', { connection, defaultJobOptions: { - attempts: 1, // No retries - agents handle their own errors + // Spec 015/2: bounded retries on dispatch failures only. Terminal + // errors (validation, image-not-found-after-fallback) bypass via + // `UnrecoverableError`. Agents themselves still handle their own + // internal errors — these attempts apply only to the dispatch path + // (the time between BullMQ pulling the job and the worker + // container *starting*, before the agent is even running). + attempts: 4, + backoff: { type: 'exponential', delay: 5_000 }, removeOnComplete: { age: 24 * 60 * 60, // Keep completed jobs for 24 hours count: 100, // Keep last 100 completed jobs diff --git a/src/router/slot-waiter.ts b/src/router/slot-waiter.ts new file mode 100644 index 00000000..5946a9c9 --- /dev/null +++ b/src/router/slot-waiter.ts @@ -0,0 +1,88 @@ +/** + * In-process slot waiter — semaphore-style backpressure for the dispatcher. + * + * Replaces the old "throw on capacity" pattern from spec 015/2. When the + * dispatcher pulls a job and the worker pool is already at `maxWorkers`, + * the dispatcher awaits a slot up to a bounded timeout. If a slot frees + * (because a running container exits and `cleanupWorker` calls + * `slotReleased`), the waiter resolves and the job dispatches normally. + * If the timeout fires, the waiter rejects with a tagged + * `code: 'SLOT_WAIT_TIMEOUT'` error — the dispatch-error classifier + * recognises this code as transient, so BullMQ's retry budget kicks in. + * + * The slot is conceptually held by the running container, NOT by the + * dispatcher. `slotReleased()` is called once per cleanup from + * `cleanupWorker` (see spec 015/2 plan). The dispatcher does NOT call it. + */ + +import { logger } from '../utils/logging.js'; +import { getActiveWorkerCount } from './active-workers.js'; +import { routerConfig } from './config.js'; + +interface PendingWaiter { + resolve: () => void; + reject: (err: Error) => void; + timeoutHandle: NodeJS.Timeout; +} + +const pending: PendingWaiter[] = []; + +/** + * Wait until the worker pool has capacity, or the timeout fires. + * + * If `getActiveWorkerCount() < routerConfig.maxWorkers`, resolves + * immediately. Otherwise queues a waiter that the next `slotReleased()` + * call will pop. If the waiter sits longer than `timeoutMs`, it rejects + * with `code: 'SLOT_WAIT_TIMEOUT'`. + */ +export function acquireSlot(opts: { timeoutMs: number }): Promise { + if (getActiveWorkerCount() < routerConfig.maxWorkers) { + return Promise.resolve(); + } + + return new Promise((resolve, reject) => { + const entry: PendingWaiter = { + resolve, + reject, + timeoutHandle: setTimeout(() => { + const idx = pending.indexOf(entry); + if (idx >= 0) pending.splice(idx, 1); + const err = new Error( + `Slot wait timed out after ${opts.timeoutMs}ms — worker pool stuck`, + ) as Error & { code: string }; + err.code = 'SLOT_WAIT_TIMEOUT'; + logger.warn('[slot-waiter] timed out', { timeoutMs: opts.timeoutMs }); + reject(err); + }, opts.timeoutMs), + }; + pending.push(entry); + }); +} + +/** + * Pop the head waiter and resolve it. No-op if the queue is empty — + * called every time a worker container exits, regardless of whether + * any dispatcher is currently waiting. + */ +export function slotReleased(): void { + const next = pending.shift(); + if (!next) return; + clearTimeout(next.timeoutHandle); + next.resolve(); +} + +/** + * Reject every pending waiter with `code: 'SHUTDOWN'`. Called on + * router shutdown / detachAll to avoid leaking timers and to surface + * a clear error to in-flight dispatchers. + */ +export function clearAllWaiters(): void { + while (pending.length > 0) { + const entry = pending.shift(); + if (!entry) break; + clearTimeout(entry.timeoutHandle); + const err = new Error('slot-waiter: shutdown') as Error & { code: string }; + err.code = 'SHUTDOWN'; + entry.reject(err); + } +} diff --git a/src/router/webhook-processor.ts b/src/router/webhook-processor.ts index aff014ff..d2c9d486 100644 --- a/src/router/webhook-processor.ts +++ b/src/router/webhook-processor.ts @@ -15,6 +15,7 @@ import { getCoalesceWindowMs, registerPendingCreate, } from '../pm/create-coalesce-window.js'; +import { captureException } from '../sentry.js'; import type { TriggerRegistry } from '../triggers/registry.js'; import { logger } from '../utils/logging.js'; import { isDuplicateAction, markActionProcessed } from './action-dedup.js'; @@ -23,6 +24,7 @@ import { markAgentTypeEnqueued, markRecentlyDispatched, } from './agent-type-lock.js'; +import { classifyLockState } from './lock-state-classifier.js'; import type { RouterPlatformAdapter } from './platform-adapter.js'; import { addJob } from './queue.js'; import { isWorkItemLocked, markWorkItemEnqueued } from './work-item-lock.js'; @@ -190,10 +192,44 @@ export async function processRouterWebhook( blockedAgentType: result.agentType, reason: lockStatus.reason, }); + // Spec 015/1: distinguish "queued behind a real active dispatch" from + // "lock leaked by a prior dispatch failure". Defaults to awaiting-slot + // on classifier error so a transient infra blip doesn't mis-fire the + // canary. + const classification = await classifyLockState({ + projectId: project.id, + workItemId: result.workItemId, + agentType: result.agentType, + }); + const reasonSuffix = lockStatus.reason ?? 'active run exists'; + if (classification === 'wedged') { + // Regression invariant: after spec 015/1 ships, this should never + // fire under normal operation. Capture loudly so any leak is + // observable in production. + captureException( + new Error( + `wedged work-item lock: projectId=${project.id} workItemId=${result.workItemId} agentType=${result.agentType}`, + ), + { + tags: { source: 'wedged_lock_canary' }, + extra: { + projectId: project.id, + workItemId: result.workItemId, + agentType: result.agentType, + reason: lockStatus.reason, + }, + }, + ); + return { + shouldProcess: true, + projectId: project.id, + decisionReason: `Work item locked (no active dispatch): ${reasonSuffix}`, + }; + } return { shouldProcess: true, projectId: project.id, - decisionReason: `Work item locked: ${lockStatus.reason ?? 'active run exists'}`, + decisionReason: `Awaiting worker slot: ${reasonSuffix}`, }; } } diff --git a/src/router/worker-manager.ts b/src/router/worker-manager.ts index 11709767..f031c76f 100644 --- a/src/router/worker-manager.ts +++ b/src/router/worker-manager.ts @@ -7,7 +7,7 @@ * Public API is unchanged — all consumers continue importing from this module. */ -import type { Job, Worker } from 'bullmq'; +import { type Job, UnrecoverableError, type Worker } from 'bullmq'; import { logger } from '../utils/logging.js'; import { createQueueWorker, parseRedisUrl } from './bullmq-workers.js'; import { routerConfig } from './config.js'; @@ -19,7 +19,9 @@ import { startOrphanCleanup, stopOrphanCleanup, } from './container-manager.js'; +import { classifyDispatchError } from './dispatch-error-classifier.js'; import type { CascadeJob } from './queue.js'; +import { acquireSlot, clearAllWaiters } from './slot-waiter.js'; import { startSnapshotCleanup, stopSnapshotCleanup } from './snapshot-cleanup.js'; import { syncSnapshotsFromDocker } from './snapshot-startup-sync.js'; @@ -35,18 +37,34 @@ let dashboardWorker: Worker | null = null; // Using a fixed 8-hour value prevents lock expiry for long-running containers. const BULLMQ_LOCK_DURATION_MS = 8 * 60 * 60 * 1000; -/** Guard that enforces the per-router concurrency cap before spawning. */ +/** + * Guard that backpressures the dispatcher to the per-router concurrency cap + * and classifies spawn errors for BullMQ retry policy (spec 015/2). + * + * Capacity miss: `acquireSlot` waits up to `slotWaitTimeoutMs` for a slot + * to free; on timeout it rejects with `code: 'SLOT_WAIT_TIMEOUT'`, which + * the classifier treats as transient so BullMQ retries via attempts/backoff. + * + * Spawn error: a transient error (Docker daemon unreachable, name collision + * race, registry rate-limit) propagates unchanged — BullMQ retries. A + * terminal error (validation, image-not-found-after-fallback) is wrapped in + * `UnrecoverableError` so BullMQ skips the retry budget and the failed-event + * compensator from spec 015/1 runs once at exhaustion. + * + * The slot is conceptually held by the running container, NOT by the + * dispatcher — `slotReleased()` is called from `cleanupWorker` at container + * exit, never from here. + */ async function guardedSpawn(job: Job): Promise { - // Check if we have capacity. - // This shouldn't happen with proper concurrency settings, - // but just in case, throw to retry later. - if (getActiveWorkerCount() >= routerConfig.maxWorkers) { - throw new Error('No worker slots available'); + await acquireSlot({ timeoutMs: routerConfig.slotWaitTimeoutMs }); + try { + await spawnWorker(job); + } catch (err) { + if (classifyDispatchError(err) === 'terminal') { + throw new UnrecoverableError(err instanceof Error ? err.message : String(err)); + } + throw err; } - await spawnWorker(job); - // Note: We don't wait for the container to complete here. - // The job is considered "processed" once the container starts. - // Container exit is handled asynchronously. } export function startWorkerProcessor(): void { @@ -115,5 +133,9 @@ export async function stopWorkerProcessor(): Promise { // watchdog (src/utils/lifecycle.ts) for timeout enforcement. detachAll(); + // Reject any pending slot waiters so they don't leak timers across the + // shutdown. Spec 015/2. + clearAllWaiters(); + logger.info('[WorkerManager] Stopped'); } diff --git a/tests/integration/router/dispatch-failure-compensation.test.ts b/tests/integration/router/dispatch-failure-compensation.test.ts new file mode 100644 index 00000000..c9a945fe --- /dev/null +++ b/tests/integration/router/dispatch-failure-compensation.test.ts @@ -0,0 +1,193 @@ +/** + * Module-integration test for spec 015/1. + * + * Wires the REAL `releaseLocksForFailedJob` compensator + REAL + * `bullmq-workers.ts` failed-event handler + REAL `agent-type-lock.ts` + * and `work-item-lock.ts` modules, mocking only BullMQ's `Worker` + * constructor (so we can drive the `failed` event synthetically) and the + * `worker-env.ts` extractors (so we don't need the manifest registry + + * DB lookups). This is the load-bearing seam from spec 015/1: when + * BullMQ declares a job failed, the lock state acquired during the + * webhook → enqueue path must be released — and a follow-up webhook + * for the same trio must NOT be blocked. + */ + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('bullmq', () => ({ + Worker: vi.fn().mockImplementation((_queueName, _processFn, _opts) => ({ + on: vi.fn(), + })), +})); + +vi.mock('../../../src/sentry.js', () => ({ + captureException: vi.fn(), +})); + +vi.mock('../../../src/utils/logging.js', () => ({ + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() }, +})); + +vi.mock('../../../src/router/worker-env.js', () => ({ + extractProjectIdFromJob: vi.fn(), + extractWorkItemId: vi.fn(), + extractAgentType: vi.fn(), +})); + +import { Worker } from 'bullmq'; +import { + clearAllAgentTypeLocks, + markAgentTypeEnqueued, + markRecentlyDispatched, + wasRecentlyDispatched, +} from '../../../src/router/agent-type-lock.js'; +import { createQueueWorker } from '../../../src/router/bullmq-workers.js'; +import { + clearAllWorkItemLocks, + isWorkItemLocked, + markWorkItemEnqueued, +} from '../../../src/router/work-item-lock.js'; +import { + extractAgentType, + extractProjectIdFromJob, + extractWorkItemId, +} from '../../../src/router/worker-env.js'; + +const MockWorker = vi.mocked(Worker); +const mockExtractProjectIdFromJob = vi.mocked(extractProjectIdFromJob); +const mockExtractWorkItemId = vi.mocked(extractWorkItemId); +const mockExtractAgentType = vi.mocked(extractAgentType); + +describe('spec 015/1: dispatch-failure compensation (module-integration)', () => { + beforeEach(() => { + clearAllAgentTypeLocks(); + clearAllWorkItemLocks(); + MockWorker.mockClear(); + MockWorker.mockImplementation( + (_queueName, _processFn, _opts) => + ({ + on: vi.fn(), + }) as never, + ); + mockExtractProjectIdFromJob.mockReset(); + mockExtractWorkItemId.mockReset(); + mockExtractAgentType.mockReset(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('releases work-item + agent-type + recently-dispatched locks when BullMQ declares the job failed', async () => { + // Webhook → enqueue path simulated: marks the locks like webhook-processor.ts does. + markWorkItemEnqueued('ucho', 'MNG-350', 'implementation'); + markAgentTypeEnqueued('ucho', 'implementation'); + markRecentlyDispatched('ucho', 'implementation', 'MNG-350'); + + // Sanity — locks are held. + expect((await isWorkItemLocked('ucho', 'MNG-350', 'implementation')).locked).toBe(true); + expect(wasRecentlyDispatched('ucho', 'implementation', 'MNG-350')).toBe(true); + + // Construct a real worker via the factory; capture its registered handlers. + const worker = createQueueWorker({ + queueName: 'cascade-jobs', + label: 'Job', + connection: { host: 'localhost', port: 6379 }, + concurrency: 1, + lockDuration: 60_000, + processFn: vi.fn().mockResolvedValue(undefined), + }); + const failedHandler = vi.mocked(worker.on).mock.calls.find((c) => c[0] === 'failed')?.[1] as ( + job: { id: string; data: unknown } | undefined, + err: Error, + ) => void; + expect(failedHandler).toBeDefined(); + + // Drive the extractors so the compensator resolves to the same trio. + mockExtractProjectIdFromJob.mockResolvedValue('ucho'); + mockExtractWorkItemId.mockReturnValue('MNG-350'); + mockExtractAgentType.mockReturnValue('implementation'); + + // Synthetic failed event — the compensator runs as a side-effect. + failedHandler( + { id: 'linear-1777217350854-2qvhjo', data: { type: 'linear' } }, + new Error('No worker slots available'), + ); + + // Compensator is async; let microtasks drain. + await new Promise((r) => setImmediate(r)); + + // Locks released — a fresh webhook for the same trio is NOT blocked. + expect((await isWorkItemLocked('ucho', 'MNG-350', 'implementation')).locked).toBe(false); + expect(wasRecentlyDispatched('ucho', 'implementation', 'MNG-350')).toBe(false); + }); + + it('does NOT release locks for a job whose extractors return null projectId (foreign provider)', async () => { + markWorkItemEnqueued('ucho', 'MNG-350', 'implementation'); + expect((await isWorkItemLocked('ucho', 'MNG-350', 'implementation')).locked).toBe(true); + + const worker = createQueueWorker({ + queueName: 'cascade-jobs', + label: 'Job', + connection: { host: 'localhost', port: 6379 }, + concurrency: 1, + lockDuration: 60_000, + processFn: vi.fn().mockResolvedValue(undefined), + }); + const failedHandler = vi.mocked(worker.on).mock.calls.find((c) => c[0] === 'failed')?.[1] as ( + job: { id: string; data: unknown } | undefined, + err: Error, + ) => void; + + mockExtractProjectIdFromJob.mockResolvedValue(null); + mockExtractWorkItemId.mockReturnValue('MNG-350'); + mockExtractAgentType.mockReturnValue('implementation'); + + failedHandler({ id: 'foreign-job', data: { type: 'something-else' } }, new Error('boom')); + await new Promise((r) => setImmediate(r)); + + // Lock for ucho/MNG-350 stays — foreign-provider failures never touch + // the trio we care about. (extractors returned null projectId.) + expect((await isWorkItemLocked('ucho', 'MNG-350', 'implementation')).locked).toBe(true); + }); + + it('manual-run job with full trio gets its locks released too (dashboard queue parity)', async () => { + markWorkItemEnqueued('ucho', 'MNG-350', 'implementation'); + markAgentTypeEnqueued('ucho', 'implementation'); + markRecentlyDispatched('ucho', 'implementation', 'MNG-350'); + + const worker = createQueueWorker({ + queueName: 'cascade-dashboard-jobs', + label: 'Dashboard job', + connection: { host: 'localhost', port: 6379 }, + concurrency: 1, + lockDuration: 60_000, + processFn: vi.fn().mockResolvedValue(undefined), + }); + const failedHandler = vi.mocked(worker.on).mock.calls.find((c) => c[0] === 'failed')?.[1] as ( + job: { id: string; data: unknown } | undefined, + err: Error, + ) => void; + + mockExtractProjectIdFromJob.mockResolvedValue('ucho'); + mockExtractWorkItemId.mockReturnValue('MNG-350'); + mockExtractAgentType.mockReturnValue('implementation'); + + failedHandler( + { + id: 'manual-run-1777219028558-jvvxni', + data: { + type: 'manual-run', + projectId: 'ucho', + workItemId: 'MNG-350', + agentType: 'implementation', + }, + }, + new Error('boom'), + ); + await new Promise((r) => setImmediate(r)); + + expect((await isWorkItemLocked('ucho', 'MNG-350', 'implementation')).locked).toBe(false); + expect(wasRecentlyDispatched('ucho', 'implementation', 'MNG-350')).toBe(false); + }); +}); diff --git a/tests/integration/router/dispatch-retry.test.ts b/tests/integration/router/dispatch-retry.test.ts new file mode 100644 index 00000000..2f446e2d --- /dev/null +++ b/tests/integration/router/dispatch-retry.test.ts @@ -0,0 +1,211 @@ +/** + * Module-integration test for spec 015/2. + * + * Validates the dispatch-path retry contract end-to-end: + * - transient errors propagate unchanged so BullMQ retries + * - terminal errors are wrapped in `UnrecoverableError` so retries skip + * - capacity miss waits for a slot rather than failing immediately + * + * Wires the REAL `guardedSpawn` body (via `createQueueWorker` → + * `processFn`) plus REAL `slot-waiter`, REAL `dispatch-error-classifier`, + * REAL `active-workers`, mocking only `spawnWorker` (so we can simulate + * Docker errors deterministically) and BullMQ's `Worker` constructor (so + * we can drive `failed`/process-fn calls synthetically without a real + * Redis). This is the load-bearing seam from spec 015/2. + * + * The full Redis-driven `attempts: 4 + backoff` retry timing is BullMQ's + * own well-tested behavior; we don't re-test it here. We only verify + * the *classification contract* on our side (transient vs terminal), + * because that's the thing this spec changed. + */ + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('bullmq', async (importOriginal) => { + const real = (await importOriginal()) as Record; + return { + ...real, + Worker: vi.fn().mockImplementation((_queueName, processFn, _opts) => ({ + on: vi.fn(), + close: vi.fn().mockResolvedValue(undefined), + __processFn: processFn, + })), + }; +}); + +vi.mock('../../../src/router/container-manager.js', () => ({ + spawnWorker: vi.fn(), + getActiveWorkerCount: vi.fn().mockReturnValue(0), + getActiveWorkers: vi.fn().mockReturnValue([]), + detachAll: vi.fn(), + startOrphanCleanup: vi.fn(), + stopOrphanCleanup: vi.fn(), + isImageNotFoundError: vi.fn().mockReturnValue(false), +})); + +// `slot-waiter` reads `getActiveWorkerCount` from `active-workers.js`, NOT +// from `container-manager.js` (despite the re-export). We mock both so the +// capacity-miss test can drive the real slot-waiter into the queued state. +vi.mock('../../../src/router/active-workers.js', () => ({ + getActiveWorkerCount: vi.fn().mockReturnValue(0), +})); + +vi.mock('../../../src/router/snapshot-cleanup.js', () => ({ + startSnapshotCleanup: vi.fn(), + stopSnapshotCleanup: vi.fn(), +})); + +vi.mock('../../../src/router/snapshot-startup-sync.js', () => ({ + syncSnapshotsFromDocker: vi.fn().mockResolvedValue(undefined), +})); + +vi.mock('../../../src/utils/logging.js', () => ({ + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() }, +})); + +vi.mock('../../../src/sentry.js', () => ({ + captureException: vi.fn(), +})); + +import { Worker } from 'bullmq'; +import { getActiveWorkerCount } from '../../../src/router/active-workers.js'; +import { spawnWorker } from '../../../src/router/container-manager.js'; +import { syncSnapshotsFromDocker } from '../../../src/router/snapshot-startup-sync.js'; +import { startWorkerProcessor, stopWorkerProcessor } from '../../../src/router/worker-manager.js'; + +const MockWorker = vi.mocked(Worker); +const mockSpawnWorker = vi.mocked(spawnWorker); +const mockGetActiveWorkerCount = vi.mocked(getActiveWorkerCount); +const mockSyncSnapshots = vi.mocked(syncSnapshotsFromDocker); + +interface FakeWorker { + on: ReturnType; + __processFn: (job: unknown) => Promise; +} + +function getProcessFn(queueName: string): (job: unknown) => Promise { + const call = MockWorker.mock.results.find((_r, i) => { + const args = MockWorker.mock.calls[i]; + return args?.[0] === queueName; + }); + const w = call?.value as FakeWorker | undefined; + if (!w?.__processFn) throw new Error(`processFn not captured for queue ${queueName}`); + return w.__processFn; +} + +describe('spec 015/2: dispatch-path retry classification (module-integration)', () => { + beforeEach(async () => { + await stopWorkerProcessor(); + MockWorker.mockReset(); + MockWorker.mockImplementation( + (_queueName, processFn, _opts) => + ({ + on: vi.fn(), + close: vi.fn().mockResolvedValue(undefined), + __processFn: processFn, + }) as never, + ); + mockSpawnWorker.mockReset(); + mockGetActiveWorkerCount.mockReturnValue(0); + mockSyncSnapshots.mockResolvedValue(undefined); + startWorkerProcessor(); + }); + + afterEach(async () => { + await stopWorkerProcessor(); + vi.restoreAllMocks(); + }); + + it('cascade-jobs: transient spawn error propagates unchanged so BullMQ retries via attempts/backoff', async () => { + const transientErr = Object.assign(new Error('ECONNREFUSED docker.sock'), { + code: 'ECONNREFUSED', + }); + mockSpawnWorker.mockRejectedValueOnce(transientErr); + + const processFn = getProcessFn('cascade-jobs'); + await expect(processFn({ id: 'j1', data: { type: 'linear', projectId: 'p1' } })).rejects.toBe( + transientErr, + ); + }); + + it('cascade-jobs: terminal spawn error is wrapped in UnrecoverableError so BullMQ skips retries', async () => { + // TypeError is one of the terminal classes the dispatch-error + // classifier recognises. Image-not-found is also terminal but + // requires the real `isImageNotFoundError` predicate which is + // stubbed in this test. + const terminalErr = new TypeError("Cannot read 'foo' of undefined"); + mockSpawnWorker.mockRejectedValueOnce(terminalErr); + + const processFn = getProcessFn('cascade-jobs'); + const rejectionSpy = vi.fn(); + await processFn({ id: 'j2', data: { type: 'linear', projectId: 'p1' } }).catch(rejectionSpy); + + expect(rejectionSpy).toHaveBeenCalledTimes(1); + const thrown = rejectionSpy.mock.calls[0][0]; + expect((thrown as Error).name).toBe('UnrecoverableError'); + }); + + it('cascade-dashboard-jobs: transient spawn error propagates unchanged (parity with main queue)', async () => { + const transientErr = Object.assign(new Error('socket hang up'), { code: 'ECONNRESET' }); + mockSpawnWorker.mockRejectedValueOnce(transientErr); + + const processFn = getProcessFn('cascade-dashboard-jobs'); + await expect( + processFn({ + id: 'manual-run-x', + data: { + type: 'manual-run', + projectId: 'p1', + workItemId: 'MNG-1', + agentType: 'implementation', + }, + }), + ).rejects.toBe(transientErr); + }); + + it('cascade-dashboard-jobs: terminal spawn error is wrapped in UnrecoverableError (parity)', async () => { + const terminalErr = new TypeError("Cannot read 'foo' of undefined"); + mockSpawnWorker.mockRejectedValueOnce(terminalErr); + + const processFn = getProcessFn('cascade-dashboard-jobs'); + const rejectionSpy = vi.fn(); + await processFn({ + id: 'manual-run-y', + data: { + type: 'manual-run', + projectId: 'p1', + workItemId: 'MNG-2', + agentType: 'review', + }, + }).catch(rejectionSpy); + + expect(rejectionSpy).toHaveBeenCalledTimes(1); + const thrown = rejectionSpy.mock.calls[0][0]; + expect((thrown as Error).name).toBe('UnrecoverableError'); + }); + + it('cascade-jobs: capacity miss waits for a slot, then dispatches when one frees', async () => { + // Force "at capacity" — use 999 to be safely above any plausible + // maxWorkers (config default 3 in the test env, real default also 3). + mockGetActiveWorkerCount.mockReturnValue(999); + mockSpawnWorker.mockResolvedValueOnce(undefined); + + const processFn = getProcessFn('cascade-jobs'); + const inflight = processFn({ id: 'j-cap', data: { type: 'linear', projectId: 'p1' } }); + + // Before the slot frees, spawnWorker must NOT have been called. + await Promise.resolve(); + await Promise.resolve(); + expect(mockSpawnWorker).not.toHaveBeenCalled(); + + // Free a slot by importing & calling the real `slotReleased`. + const { slotReleased } = await import('../../../src/router/slot-waiter.js'); + // Capacity is still 999 in the mock, but the waiter doesn't re-check + // — it simply pops the head waiter. So slotReleased() unblocks the + // inflight processFn, which proceeds to spawnWorker. + slotReleased(); + + await inflight; + expect(mockSpawnWorker).toHaveBeenCalledTimes(1); + }); +}); diff --git a/tests/unit/router/active-workers.test.ts b/tests/unit/router/active-workers.test.ts index 8c396e23..fd9d5347 100644 --- a/tests/unit/router/active-workers.test.ts +++ b/tests/unit/router/active-workers.test.ts @@ -11,6 +11,7 @@ const { mockClearAllWorkItemLocks, mockClearAgentTypeEnqueued, mockClearAllAgentTypeLocks, + mockSlotReleased, } = vi.hoisted(() => ({ mockFailOrphanedRun: vi.fn().mockResolvedValue(null), mockFailOrphanedRunFallback: vi.fn().mockResolvedValue(null), @@ -18,6 +19,7 @@ const { mockClearAllWorkItemLocks: vi.fn(), mockClearAgentTypeEnqueued: vi.fn(), mockClearAllAgentTypeLocks: vi.fn(), + mockSlotReleased: vi.fn(), })); // --------------------------------------------------------------------------- @@ -39,6 +41,10 @@ vi.mock('../../../src/router/agent-type-lock.js', () => ({ clearAllAgentTypeLocks: (...args: unknown[]) => mockClearAllAgentTypeLocks(...args), })); +vi.mock('../../../src/router/slot-waiter.js', () => ({ + slotReleased: (...args: unknown[]) => mockSlotReleased(...args), +})); + // --------------------------------------------------------------------------- // Imports (after mocks) // --------------------------------------------------------------------------- @@ -86,6 +92,7 @@ describe('active-workers', () => { mockFailOrphanedRunFallback.mockResolvedValue(null); mockClearWorkItemEnqueued.mockClear(); mockClearAgentTypeEnqueued.mockClear(); + mockSlotReleased.mockClear(); }); afterEach(() => { @@ -115,7 +122,41 @@ describe('active-workers', () => { activeWorkers.set('job-1', makeActiveWorker({ jobId: 'job-1', startedAt })); const workers = getActiveWorkers(); expect(workers).toHaveLength(1); - expect(workers[0]).toEqual({ jobId: 'job-1', startedAt }); + // Allow extra (projectId/workItemId/agentType) fields — they're added + // in spec 015/1 so the lock-state classifier can correlate locks with + // active dispatch state. Pin only the load-bearing fields here. + expect(workers[0]).toMatchObject({ jobId: 'job-1', startedAt }); + }); + + it('returns projectId, workItemId, agentType for each tracked worker (spec 015/1)', () => { + const startedAt = new Date(); + activeWorkers.set( + 'job-7', + makeActiveWorker({ + jobId: 'job-7', + startedAt, + projectId: 'ucho', + workItemId: 'MNG-350', + agentType: 'implementation', + }), + ); + const workers = getActiveWorkers(); + expect(workers).toHaveLength(1); + expect(workers[0]).toMatchObject({ + jobId: 'job-7', + startedAt, + projectId: 'ucho', + workItemId: 'MNG-350', + agentType: 'implementation', + }); + }); + + it('omitted projectId/workItemId/agentType remain undefined (no synthetic defaults)', () => { + activeWorkers.set('job-bare', makeActiveWorker({ jobId: 'job-bare' })); + const workers = getActiveWorkers(); + expect(workers[0]?.projectId).toBeUndefined(); + expect(workers[0]?.workItemId).toBeUndefined(); + expect(workers[0]?.agentType).toBeUndefined(); }); }); @@ -173,6 +214,25 @@ describe('active-workers', () => { expect(mockClearAgentTypeEnqueued).toHaveBeenCalledWith('proj-1', 'review'); }); + it('calls slotReleased exactly once per cleanup (spec 015/2)', () => { + activeWorkers.set('job-slot', makeActiveWorker({ jobId: 'job-slot' })); + cleanupWorker('job-slot'); + expect(mockSlotReleased).toHaveBeenCalledTimes(1); + }); + + it('calls slotReleased on the crash path (exitCode != 0)', () => { + activeWorkers.set('job-crash', makeActiveWorker({ jobId: 'job-crash', projectId: 'p1' })); + cleanupWorker('job-crash', 137, { oomKilled: true }); + expect(mockSlotReleased).toHaveBeenCalledTimes(1); + }); + + it('does NOT double-call slotReleased on duplicate cleanup invocations', () => { + activeWorkers.set('job-dup', makeActiveWorker({ jobId: 'job-dup' })); + cleanupWorker('job-dup'); + cleanupWorker('job-dup'); // second call: worker already removed + expect(mockSlotReleased).toHaveBeenCalledTimes(1); + }); + it('calls failOrphanedRun on non-zero exit code', () => { mockFailOrphanedRun.mockResolvedValue('run-123'); activeWorkers.set( diff --git a/tests/unit/router/agent-type-lock.test.ts b/tests/unit/router/agent-type-lock.test.ts index c296c091..744514c0 100644 --- a/tests/unit/router/agent-type-lock.test.ts +++ b/tests/unit/router/agent-type-lock.test.ts @@ -19,6 +19,7 @@ import { checkAgentTypeConcurrency, clearAgentTypeEnqueued, clearAllAgentTypeLocks, + clearRecentlyDispatched, isAgentTypeLocked, markAgentTypeEnqueued, markRecentlyDispatched, @@ -194,6 +195,28 @@ describe('agent-type-lock', () => { expect(wasRecentlyDispatched('proj1', 'planning', 'TF-10')).toBe(false); expect(wasRecentlyDispatched('proj1', 'planning')).toBe(false); }); + + it('clearRecentlyDispatched removes the dedup entry for a (projectId, agentType, dedupScope) trio', () => { + markRecentlyDispatched('proj1', 'implementation', 'w1'); + expect(wasRecentlyDispatched('proj1', 'implementation', 'w1')).toBe(true); + clearRecentlyDispatched('proj1', 'implementation', 'w1'); + expect(wasRecentlyDispatched('proj1', 'implementation', 'w1')).toBe(false); + }); + + it('clearRecentlyDispatched is a no-op when the key was not previously marked', () => { + expect(() => clearRecentlyDispatched('proj1', 'implementation', 'w1')).not.toThrow(); + expect(wasRecentlyDispatched('proj1', 'implementation', 'w1')).toBe(false); + }); + + it('clearRecentlyDispatched leaves entries for other (agentType, scope) keys untouched', () => { + markRecentlyDispatched('proj1', 'implementation', 'w1'); + markRecentlyDispatched('proj1', 'review', 'w1'); + markRecentlyDispatched('proj1', 'implementation', 'w2'); + clearRecentlyDispatched('proj1', 'implementation', 'w1'); + expect(wasRecentlyDispatched('proj1', 'implementation', 'w1')).toBe(false); + expect(wasRecentlyDispatched('proj1', 'review', 'w1')).toBe(true); + expect(wasRecentlyDispatched('proj1', 'implementation', 'w2')).toBe(true); + }); }); // ======================================================================== diff --git a/tests/unit/router/bullmq-workers.test.ts b/tests/unit/router/bullmq-workers.test.ts index c2f9a78f..bc6ddc41 100644 --- a/tests/unit/router/bullmq-workers.test.ts +++ b/tests/unit/router/bullmq-workers.test.ts @@ -14,6 +14,10 @@ vi.mock('../../../src/sentry.js', () => ({ captureException: vi.fn(), })); +vi.mock('../../../src/router/dispatch-compensator.js', () => ({ + releaseLocksForFailedJob: vi.fn().mockResolvedValue(undefined), +})); + // Mock logger vi.mock('../../../src/utils/logging.js', () => ({ logger: { @@ -30,16 +34,20 @@ vi.mock('../../../src/utils/logging.js', () => ({ import { Worker } from 'bullmq'; import { createQueueWorker, parseRedisUrl } from '../../../src/router/bullmq-workers.js'; +import { releaseLocksForFailedJob } from '../../../src/router/dispatch-compensator.js'; import { captureException } from '../../../src/sentry.js'; import { logger } from '../../../src/utils/logging.js'; const MockWorker = vi.mocked(Worker); const mockCaptureException = vi.mocked(captureException); const mockLogger = vi.mocked(logger); +const mockReleaseLocksForFailedJob = vi.mocked(releaseLocksForFailedJob); beforeEach(() => { MockWorker.mockClear(); mockCaptureException.mockClear(); + mockReleaseLocksForFailedJob.mockClear(); + mockReleaseLocksForFailedJob.mockResolvedValue(undefined); // Re-establish default mock so each test gets a fresh mock worker MockWorker.mockImplementation( (_queueName, _processFn, _opts) => @@ -192,4 +200,71 @@ describe('createQueueWorker', () => { }), ); }); + + it("worker.on('failed') invokes releaseLocksForFailedJob with job.data", () => { + const worker = createQueueWorker(baseConfig); + const mockOn = vi.mocked(worker.on); + + const failedCall = mockOn.mock.calls.find((call) => call[0] === 'failed'); + const handler = failedCall?.[1] as ( + job: { id: string; data: unknown } | undefined, + err: Error, + ) => void; + const jobData = { type: 'linear', payload: 'foo' }; + handler({ id: 'job-99', data: jobData }, new Error('boom')); + + expect(mockReleaseLocksForFailedJob).toHaveBeenCalledTimes(1); + expect(mockReleaseLocksForFailedJob).toHaveBeenCalledWith(jobData); + }); + + it("worker.on('failed') still logs and Sentries on top of compensating", () => { + mockLogger.error.mockReset(); + const worker = createQueueWorker(baseConfig); + + const handler = vi.mocked(worker.on).mock.calls.find((c) => c[0] === 'failed')?.[1] as ( + job: { id: string; data: unknown } | undefined, + err: Error, + ) => void; + handler({ id: 'job-100', data: { type: 'github' } }, new Error('nope')); + + expect(mockLogger.error).toHaveBeenCalled(); + expect(mockCaptureException).toHaveBeenCalled(); + expect(mockReleaseLocksForFailedJob).toHaveBeenCalled(); + }); + + it("worker.on('failed') swallows compensator throws", async () => { + mockReleaseLocksForFailedJob.mockRejectedValueOnce(new Error('compensator boom')); + const worker = createQueueWorker(baseConfig); + const mockOn = vi.mocked(worker.on); + + const handler = mockOn.mock.calls.find((c) => c[0] === 'failed')?.[1] as ( + job: { id: string; data: unknown } | undefined, + err: Error, + ) => void; + + // Calling the handler must not propagate the compensator rejection. + // We invoke it and let the microtask queue drain — there must be no + // unhandled rejection in test logs. + expect(() => + handler({ id: 'job-101', data: { type: 'github' } }, new Error('x')), + ).not.toThrow(); + // Drain the rejection by giving microtasks a turn. + await new Promise((r) => setImmediate(r)); + // Test passes if we got here without an unhandled rejection killing vitest. + }); + + it("worker.on('failed') does not call compensator when job is undefined", () => { + const worker = createQueueWorker(baseConfig); + const mockOn = vi.mocked(worker.on); + + const handler = mockOn.mock.calls.find((c) => c[0] === 'failed')?.[1] as ( + job: { id: string; data: unknown } | undefined, + err: Error, + ) => void; + handler(undefined, new Error('orphan')); + + expect(mockReleaseLocksForFailedJob).not.toHaveBeenCalled(); + // Existing log + Sentry behavior preserved + expect(mockLogger.error).toHaveBeenCalled(); + }); }); diff --git a/tests/unit/router/dispatch-compensator.test.ts b/tests/unit/router/dispatch-compensator.test.ts new file mode 100644 index 00000000..8ec7441c --- /dev/null +++ b/tests/unit/router/dispatch-compensator.test.ts @@ -0,0 +1,148 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +// Mock the worker-env extractors so we can drive their return values per test +// without standing up the manifest registry / DB lookups they normally consult. +vi.mock('../../../src/router/worker-env.js', () => ({ + extractProjectIdFromJob: vi.fn(), + extractWorkItemId: vi.fn(), + extractAgentType: vi.fn(), +})); + +vi.mock('../../../src/router/work-item-lock.js', () => ({ + clearWorkItemEnqueued: vi.fn(), +})); + +vi.mock('../../../src/router/agent-type-lock.js', () => ({ + clearAgentTypeEnqueued: vi.fn(), + clearRecentlyDispatched: vi.fn(), +})); + +vi.mock('../../../src/sentry.js', () => ({ + captureException: vi.fn(), +})); + +vi.mock('../../../src/utils/logging.js', () => ({ + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() }, +})); + +import { + clearAgentTypeEnqueued, + clearRecentlyDispatched, +} from '../../../src/router/agent-type-lock.js'; +import { releaseLocksForFailedJob } from '../../../src/router/dispatch-compensator.js'; +import { clearWorkItemEnqueued } from '../../../src/router/work-item-lock.js'; +import { + extractAgentType, + extractProjectIdFromJob, + extractWorkItemId, +} from '../../../src/router/worker-env.js'; +import { captureException } from '../../../src/sentry.js'; + +const mockExtractProjectIdFromJob = vi.mocked(extractProjectIdFromJob); +const mockExtractWorkItemId = vi.mocked(extractWorkItemId); +const mockExtractAgentType = vi.mocked(extractAgentType); +const mockClearWorkItemEnqueued = vi.mocked(clearWorkItemEnqueued); +const mockClearAgentTypeEnqueued = vi.mocked(clearAgentTypeEnqueued); +const mockClearRecentlyDispatched = vi.mocked(clearRecentlyDispatched); +const mockCaptureException = vi.mocked(captureException); + +describe('releaseLocksForFailedJob', () => { + beforeEach(() => { + mockExtractProjectIdFromJob.mockReset(); + mockExtractWorkItemId.mockReset(); + mockExtractAgentType.mockReset(); + mockClearWorkItemEnqueued.mockReset(); + mockClearAgentTypeEnqueued.mockReset(); + mockClearRecentlyDispatched.mockReset(); + mockCaptureException.mockReset(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('releases work-item, agent-type, and recently-dispatched marks for a CascadeJob with all three identifiers', async () => { + mockExtractProjectIdFromJob.mockResolvedValue('p1'); + mockExtractWorkItemId.mockReturnValue('w1'); + mockExtractAgentType.mockReturnValue('implementation'); + + // biome-ignore lint/suspicious/noExplicitAny: test fixture, shape is irrelevant + await releaseLocksForFailedJob({ type: 'linear' } as any); + + expect(mockClearWorkItemEnqueued).toHaveBeenCalledTimes(1); + expect(mockClearWorkItemEnqueued).toHaveBeenCalledWith('p1', 'w1', 'implementation'); + expect(mockClearAgentTypeEnqueued).toHaveBeenCalledTimes(1); + expect(mockClearAgentTypeEnqueued).toHaveBeenCalledWith('p1', 'implementation'); + expect(mockClearRecentlyDispatched).toHaveBeenCalledTimes(1); + expect(mockClearRecentlyDispatched).toHaveBeenCalledWith('p1', 'implementation', 'w1'); + }); + + it('no-ops cleanly when projectId is null (e.g. foreign-provider payload)', async () => { + mockExtractProjectIdFromJob.mockResolvedValue(null); + mockExtractWorkItemId.mockReturnValue('w1'); + mockExtractAgentType.mockReturnValue('implementation'); + + // biome-ignore lint/suspicious/noExplicitAny: test fixture + await releaseLocksForFailedJob({ type: 'linear' } as any); + + expect(mockClearWorkItemEnqueued).not.toHaveBeenCalled(); + expect(mockClearAgentTypeEnqueued).not.toHaveBeenCalled(); + expect(mockClearRecentlyDispatched).not.toHaveBeenCalled(); + }); + + it('releases agent-type-lock + recently-dispatched even when workItemId is undefined', async () => { + mockExtractProjectIdFromJob.mockResolvedValue('p1'); + mockExtractWorkItemId.mockReturnValue(undefined); + mockExtractAgentType.mockReturnValue('backlog-manager'); + + // biome-ignore lint/suspicious/noExplicitAny: test fixture + await releaseLocksForFailedJob({ type: 'manual-run', projectId: 'p1' } as any); + + expect(mockClearWorkItemEnqueued).not.toHaveBeenCalled(); + expect(mockClearAgentTypeEnqueued).toHaveBeenCalledWith('p1', 'backlog-manager'); + expect(mockClearRecentlyDispatched).toHaveBeenCalledWith('p1', 'backlog-manager', undefined); + }); + + it('handles a DashboardJob (manual-run) without throwing', async () => { + mockExtractProjectIdFromJob.mockResolvedValue('p1'); + mockExtractWorkItemId.mockReturnValue('MNG-350'); + mockExtractAgentType.mockReturnValue('implementation'); + + await expect( + releaseLocksForFailedJob({ + type: 'manual-run', + projectId: 'p1', + workItemId: 'MNG-350', + agentType: 'implementation', + }), + ).resolves.toBeUndefined(); + expect(mockClearWorkItemEnqueued).toHaveBeenCalledWith('p1', 'MNG-350', 'implementation'); + }); + + it('captureException when an extractor throws; never propagates', async () => { + mockExtractProjectIdFromJob.mockRejectedValue(new Error('extractor boom')); + + await expect( + // biome-ignore lint/suspicious/noExplicitAny: test fixture + releaseLocksForFailedJob({ type: 'linear' } as any), + ).resolves.toBeUndefined(); + + expect(mockCaptureException).toHaveBeenCalledTimes(1); + const [errArg, ctx] = mockCaptureException.mock.calls[0] ?? []; + expect(errArg).toBeInstanceOf(Error); + expect(ctx?.tags?.source).toBe('dispatch_compensator'); + }); + + it('skips agent-type / recently-dispatched if agentType is undefined', async () => { + mockExtractProjectIdFromJob.mockResolvedValue('p1'); + mockExtractWorkItemId.mockReturnValue('w1'); + mockExtractAgentType.mockReturnValue(undefined); + + // biome-ignore lint/suspicious/noExplicitAny: test fixture + await releaseLocksForFailedJob({ type: 'github' } as any); + + expect(mockClearWorkItemEnqueued).not.toHaveBeenCalled(); + expect(mockClearAgentTypeEnqueued).not.toHaveBeenCalled(); + expect(mockClearRecentlyDispatched).not.toHaveBeenCalled(); + }); +}); diff --git a/tests/unit/router/dispatch-error-classifier.test.ts b/tests/unit/router/dispatch-error-classifier.test.ts new file mode 100644 index 00000000..a026f39b --- /dev/null +++ b/tests/unit/router/dispatch-error-classifier.test.ts @@ -0,0 +1,66 @@ +import { describe, expect, it } from 'vitest'; + +import { classifyDispatchError } from '../../../src/router/dispatch-error-classifier.js'; + +describe('classifyDispatchError', () => { + it("Docker daemon unreachable (ECONNREFUSED) → 'transient'", () => { + const err = Object.assign(new Error('connect ECONNREFUSED /var/run/docker.sock'), { + code: 'ECONNREFUSED', + }); + expect(classifyDispatchError(err)).toBe('transient'); + }); + + it("Docker socket reset (ECONNRESET) → 'transient'", () => { + const err = Object.assign(new Error('socket hang up'), { code: 'ECONNRESET' }); + expect(classifyDispatchError(err)).toBe('transient'); + }); + + it("DNS lookup failure (ENOTFOUND) → 'transient'", () => { + const err = Object.assign(new Error('getaddrinfo ENOTFOUND'), { code: 'ENOTFOUND' }); + expect(classifyDispatchError(err)).toBe('transient'); + }); + + it("registry rate-limit (HTTP 429) → 'transient'", () => { + const err = Object.assign(new Error('toomanyrequests: Rate limit'), { statusCode: 429 }); + expect(classifyDispatchError(err)).toBe('transient'); + }); + + it("container name collision (HTTP 409 'name already in use') → 'transient'", () => { + const err = Object.assign( + new Error('(HTTP code 409) The container name "/x" is already in use'), + { statusCode: 409 }, + ); + expect(classifyDispatchError(err)).toBe('transient'); + }); + + it("image not found after fallback (404 + 'no such image') → 'terminal'", () => { + const err = Object.assign(new Error('(HTTP code 404) No such image: foo:latest'), { + statusCode: 404, + }); + expect(classifyDispatchError(err)).toBe('terminal'); + }); + + it("validation error (TypeError) → 'terminal'", () => { + expect(classifyDispatchError(new TypeError("Cannot read 'foo' of undefined"))).toBe('terminal'); + }); + + it("slot-wait timeout (code: 'SLOT_WAIT_TIMEOUT') → 'transient'", () => { + const err = Object.assign(new Error('Slot wait timed out'), { code: 'SLOT_WAIT_TIMEOUT' }); + expect(classifyDispatchError(err)).toBe('transient'); + }); + + it("unknown error (no recognizable shape) → 'transient' (default-to-retry)", () => { + expect(classifyDispatchError(new Error('something weird'))).toBe('transient'); + }); + + it("ZodError-shaped (name='ZodError') → 'terminal'", () => { + const err = Object.assign(new Error('validation failed'), { name: 'ZodError' }); + expect(classifyDispatchError(err)).toBe('terminal'); + }); + + it("non-Error values → 'transient' (default-to-retry, never crash the classifier)", () => { + expect(classifyDispatchError('plain string')).toBe('transient'); + expect(classifyDispatchError(null)).toBe('transient'); + expect(classifyDispatchError(undefined)).toBe('transient'); + }); +}); diff --git a/tests/unit/router/lock-state-classifier.test.ts b/tests/unit/router/lock-state-classifier.test.ts new file mode 100644 index 00000000..ee436508 --- /dev/null +++ b/tests/unit/router/lock-state-classifier.test.ts @@ -0,0 +1,122 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('../../../src/router/active-workers.js', () => ({ + getActiveWorkers: vi.fn(), +})); + +vi.mock('../../../src/router/queue.js', () => ({ + jobQueue: { + getJobs: vi.fn(), + }, +})); + +vi.mock('../../../src/router/worker-env.js', () => ({ + extractProjectIdFromJob: vi.fn(), + extractWorkItemId: vi.fn(), + extractAgentType: vi.fn(), +})); + +vi.mock('../../../src/utils/logging.js', () => ({ + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() }, +})); + +import { getActiveWorkers } from '../../../src/router/active-workers.js'; +import { classifyLockState } from '../../../src/router/lock-state-classifier.js'; +import { jobQueue } from '../../../src/router/queue.js'; +import { + extractAgentType, + extractProjectIdFromJob, + extractWorkItemId, +} from '../../../src/router/worker-env.js'; + +const mockGetActiveWorkers = vi.mocked(getActiveWorkers); +const mockGetJobs = vi.mocked(jobQueue.getJobs); +const mockExtractProjectIdFromJob = vi.mocked(extractProjectIdFromJob); +const mockExtractWorkItemId = vi.mocked(extractWorkItemId); +const mockExtractAgentType = vi.mocked(extractAgentType); + +describe('classifyLockState', () => { + const trio = { projectId: 'ucho', workItemId: 'MNG-350', agentType: 'implementation' as const }; + + beforeEach(() => { + mockGetActiveWorkers.mockReset(); + mockGetJobs.mockReset(); + mockExtractProjectIdFromJob.mockReset(); + mockExtractWorkItemId.mockReset(); + mockExtractAgentType.mockReset(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it("returns 'awaiting-slot' when an active worker matches the trio", async () => { + mockGetActiveWorkers.mockReturnValue([ + { + jobId: 'job-x', + startedAt: new Date(), + projectId: 'ucho', + workItemId: 'MNG-350', + agentType: 'implementation', + }, + ]); + mockGetJobs.mockResolvedValue([]); + + const result = await classifyLockState(trio); + expect(result).toBe('awaiting-slot'); + }); + + it("returns 'awaiting-slot' when a queued job in waiting/active state matches the trio", async () => { + mockGetActiveWorkers.mockReturnValue([]); + // One job with matching extracted trio + mockGetJobs.mockResolvedValue([ + // biome-ignore lint/suspicious/noExplicitAny: BullMQ Job test fixture + { id: 'q-1', data: { type: 'linear' } } as any, + ]); + mockExtractProjectIdFromJob.mockResolvedValue('ucho'); + mockExtractWorkItemId.mockReturnValue('MNG-350'); + mockExtractAgentType.mockReturnValue('implementation'); + + const result = await classifyLockState(trio); + expect(result).toBe('awaiting-slot'); + }); + + it("returns 'wedged' when no active worker and no queued job matches", async () => { + mockGetActiveWorkers.mockReturnValue([]); + mockGetJobs.mockResolvedValue([]); + + const result = await classifyLockState(trio); + expect(result).toBe('wedged'); + }); + + it("returns 'wedged' when active workers and queued jobs exist but for a different trio", async () => { + mockGetActiveWorkers.mockReturnValue([ + { + jobId: 'job-other', + startedAt: new Date(), + projectId: 'ucho', + workItemId: 'MNG-999', + agentType: 'implementation', + }, + ]); + mockGetJobs.mockResolvedValue([ + // biome-ignore lint/suspicious/noExplicitAny: test fixture + { id: 'q-other', data: { type: 'linear' } } as any, + ]); + mockExtractProjectIdFromJob.mockResolvedValue('ucho'); + mockExtractWorkItemId.mockReturnValue('MNG-998'); + mockExtractAgentType.mockReturnValue('implementation'); + + const result = await classifyLockState(trio); + expect(result).toBe('wedged'); + }); + + it("returns 'awaiting-slot' (safe fallback) when the queue lookup throws", async () => { + mockGetActiveWorkers.mockReturnValue([]); + mockGetJobs.mockRejectedValue(new Error('redis hiccup')); + + const result = await classifyLockState(trio); + // Safe fallback: do NOT mis-emit the wedged-lock canary on classifier error. + expect(result).toBe('awaiting-slot'); + }); +}); diff --git a/tests/unit/router/slot-waiter.test.ts b/tests/unit/router/slot-waiter.test.ts new file mode 100644 index 00000000..c46d779f --- /dev/null +++ b/tests/unit/router/slot-waiter.test.ts @@ -0,0 +1,133 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('../../../src/router/active-workers.js', () => ({ + getActiveWorkerCount: vi.fn(), +})); + +vi.mock('../../../src/router/config.js', () => ({ + routerConfig: { maxWorkers: 1 }, +})); + +vi.mock('../../../src/utils/logging.js', () => ({ + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() }, +})); + +import { getActiveWorkerCount } from '../../../src/router/active-workers.js'; +import { routerConfig } from '../../../src/router/config.js'; +import { acquireSlot, clearAllWaiters, slotReleased } from '../../../src/router/slot-waiter.js'; + +const mockGetActiveWorkerCount = vi.mocked(getActiveWorkerCount); + +describe('slot-waiter', () => { + beforeEach(() => { + mockGetActiveWorkerCount.mockReset(); + clearAllWaiters(); + // Default: maxWorkers=1 + (routerConfig as { maxWorkers: number }).maxWorkers = 1; + }); + + afterEach(() => { + vi.useRealTimers(); + clearAllWaiters(); + }); + + it('resolves immediately when capacity is below max', async () => { + (routerConfig as { maxWorkers: number }).maxWorkers = 3; + mockGetActiveWorkerCount.mockReturnValue(1); + await expect(acquireSlot({ timeoutMs: 1000 })).resolves.toBeUndefined(); + }); + + it('suspends when at capacity, resolves when a slot frees', async () => { + mockGetActiveWorkerCount.mockReturnValue(1); + const acquired = acquireSlot({ timeoutMs: 5000 }); + + // One microtask turn — promise should still be pending + let settled = false; + void acquired.then(() => { + settled = true; + }); + await Promise.resolve(); + expect(settled).toBe(false); + + // Free up a slot — waiter resolves + slotReleased(); + await acquired; + expect(settled).toBe(true); + }); + + it('rejects with code SLOT_WAIT_TIMEOUT if no slot frees in time', async () => { + vi.useFakeTimers(); + mockGetActiveWorkerCount.mockReturnValue(1); + const acquired = acquireSlot({ timeoutMs: 50 }); + // Capture the rejection so it doesn't bubble as unhandled + const rejectionSpy = vi.fn(); + acquired.catch(rejectionSpy); + + await vi.advanceTimersByTimeAsync(60); + + expect(rejectionSpy).toHaveBeenCalledTimes(1); + const err = rejectionSpy.mock.calls[0][0]; + expect(err).toBeInstanceOf(Error); + expect((err as { code?: string }).code).toBe('SLOT_WAIT_TIMEOUT'); + }); + + it('multiple waiters resolve FIFO as slots free', async () => { + mockGetActiveWorkerCount.mockReturnValue(1); + const order: number[] = []; + const w1 = acquireSlot({ timeoutMs: 5000 }).then(() => order.push(1)); + const w2 = acquireSlot({ timeoutMs: 5000 }).then(() => order.push(2)); + const w3 = acquireSlot({ timeoutMs: 5000 }).then(() => order.push(3)); + + await Promise.resolve(); + expect(order).toEqual([]); + + slotReleased(); + await Promise.resolve(); + await Promise.resolve(); + expect(order).toEqual([1]); + + slotReleased(); + await Promise.resolve(); + await Promise.resolve(); + expect(order).toEqual([1, 2]); + + slotReleased(); + await Promise.all([w1, w2, w3]); + expect(order).toEqual([1, 2, 3]); + }); + + it('slotReleased called with no waiters is a no-op (does not throw)', () => { + expect(() => slotReleased()).not.toThrow(); + }); + + it('slotReleased does not double-release waiters when called multiple times in rapid succession', async () => { + mockGetActiveWorkerCount.mockReturnValue(1); + const acquired = acquireSlot({ timeoutMs: 5000 }); + const resolveSpy = vi.fn(); + acquired.then(resolveSpy); + + slotReleased(); + slotReleased(); // Extra release — must not double-resolve the same waiter + slotReleased(); + + await acquired; + // Microtask drain + await Promise.resolve(); + expect(resolveSpy).toHaveBeenCalledTimes(1); + }); + + it('clearAllWaiters rejects pending waiters with code SHUTDOWN', async () => { + mockGetActiveWorkerCount.mockReturnValue(1); + const acquired = acquireSlot({ timeoutMs: 5000 }); + const rejectionSpy = vi.fn(); + acquired.catch(rejectionSpy); + + clearAllWaiters(); + await Promise.resolve(); + await Promise.resolve(); + + expect(rejectionSpy).toHaveBeenCalledTimes(1); + const err = rejectionSpy.mock.calls[0][0]; + expect((err as { code?: string }).code).toBe('SHUTDOWN'); + }); +}); diff --git a/tests/unit/router/webhook-processor.test.ts b/tests/unit/router/webhook-processor.test.ts index 49373f70..3c7e4701 100644 --- a/tests/unit/router/webhook-processor.test.ts +++ b/tests/unit/router/webhook-processor.test.ts @@ -24,15 +24,23 @@ vi.mock('../../../src/router/action-dedup.js', () => ({ isDuplicateAction: vi.fn().mockReturnValue(false), markActionProcessed: vi.fn(), })); +vi.mock('../../../src/router/lock-state-classifier.js', () => ({ + classifyLockState: vi.fn().mockResolvedValue('awaiting-slot'), +})); +vi.mock('../../../src/sentry.js', () => ({ + captureException: vi.fn(), +})); import { isDuplicateAction, markActionProcessed } from '../../../src/router/action-dedup.js'; import { checkAgentTypeConcurrency } from '../../../src/router/agent-type-lock.js'; import type { RouterProjectConfig } from '../../../src/router/config.js'; +import { classifyLockState } from '../../../src/router/lock-state-classifier.js'; import type { RouterPlatformAdapter } from '../../../src/router/platform-adapter.js'; import type { CascadeJob } from '../../../src/router/queue.js'; import { addJob } from '../../../src/router/queue.js'; import { processRouterWebhook } from '../../../src/router/webhook-processor.js'; import { isWorkItemLocked, markWorkItemEnqueued } from '../../../src/router/work-item-lock.js'; +import { captureException } from '../../../src/sentry.js'; import type { TriggerRegistry } from '../../../src/triggers/registry.js'; const mockProject: RouterProjectConfig = { @@ -348,7 +356,7 @@ describe('processRouterWebhook', () => { expect(addJob).toHaveBeenCalled(); }); - it('skips job when work item is locked', async () => { + it("emits 'Awaiting worker slot' when lock held and classifier returns 'awaiting-slot' (spec 015/1)", async () => { const triggerResult = { agentType: 'implementation', agentInput: { cardId: 'card1' }, @@ -356,8 +364,9 @@ describe('processRouterWebhook', () => { }; vi.mocked(isWorkItemLocked).mockResolvedValueOnce({ locked: true, - reason: 'db: active run exists', + reason: 'in-memory same-type: 1 enqueued (max 1 per type)', }); + vi.mocked(classifyLockState).mockResolvedValueOnce('awaiting-slot'); const adapter = makeMockAdapter({ dispatchWithCredentials: vi.fn().mockResolvedValue(triggerResult), }); @@ -365,11 +374,93 @@ describe('processRouterWebhook', () => { const result = await processRouterWebhook(adapter, {}, mockTriggerRegistry); expect(result.shouldProcess).toBe(true); expect(result.projectId).toBe('p1'); - expect(result.decisionReason).toBe('Work item locked: db: active run exists'); + expect(result.decisionReason).toBe( + 'Awaiting worker slot: in-memory same-type: 1 enqueued (max 1 per type)', + ); expect(addJob).not.toHaveBeenCalled(); expect(adapter.postAck).not.toHaveBeenCalled(); }); + it("emits 'Work item locked (no active dispatch)' when classifier returns 'wedged' (spec 015/1)", async () => { + const triggerResult = { + agentType: 'implementation', + agentInput: { cardId: 'card1' }, + workItemId: 'card1', + }; + vi.mocked(isWorkItemLocked).mockResolvedValueOnce({ + locked: true, + reason: 'in-memory same-type: 1 enqueued (max 1 per type)', + }); + vi.mocked(classifyLockState).mockResolvedValueOnce('wedged'); + const adapter = makeMockAdapter({ + dispatchWithCredentials: vi.fn().mockResolvedValue(triggerResult), + }); + + const result = await processRouterWebhook(adapter, {}, mockTriggerRegistry); + expect(result.shouldProcess).toBe(true); + expect(result.decisionReason).toBe( + 'Work item locked (no active dispatch): in-memory same-type: 1 enqueued (max 1 per type)', + ); + // Wedged-lock canary fires a Sentry capture so the regression invariant + // is loud — see spec 015/1 AC #6. + expect(captureException).toHaveBeenCalledWith( + expect.any(Error), + expect.objectContaining({ + tags: expect.objectContaining({ source: 'wedged_lock_canary' }), + }), + ); + }); + + it('preserves existing log fields when work item is locked', async () => { + const triggerResult = { + agentType: 'implementation', + agentInput: { cardId: 'card1' }, + workItemId: 'card1', + }; + vi.mocked(isWorkItemLocked).mockResolvedValueOnce({ + locked: true, + reason: 'db: active run exists', + }); + vi.mocked(classifyLockState).mockResolvedValueOnce('awaiting-slot'); + const { logger } = await import('../../../src/utils/logging.js'); + vi.mocked(logger.info).mockClear(); + const adapter = makeMockAdapter({ + dispatchWithCredentials: vi.fn().mockResolvedValue(triggerResult), + }); + + await processRouterWebhook(adapter, {}, mockTriggerRegistry); + // Find the Skipping … log call. Existing structure pins these fields. + const skipCall = vi + .mocked(logger.info) + .mock.calls.find((c) => String(c[0]).includes('work item already locked')); + expect(skipCall).toBeDefined(); + expect(skipCall?.[1]).toMatchObject({ + source: 'trello', + projectId: 'p1', + workItemId: 'card1', + blockedAgentType: 'implementation', + reason: 'db: active run exists', + }); + }); + + it('does not call classifyLockState when work item is not locked (perf invariant)', async () => { + vi.mocked(classifyLockState).mockClear(); + vi.mocked(isWorkItemLocked).mockResolvedValueOnce({ locked: false }); + const triggerResult = { + agentType: 'implementation', + agentInput: { cardId: 'card1' }, + workItemId: 'card1', + }; + vi.mocked(addJob).mockResolvedValueOnce('job-x'); + const adapter = makeMockAdapter({ + dispatchWithCredentials: vi.fn().mockResolvedValue(triggerResult), + }); + + await processRouterWebhook(adapter, {}, mockTriggerRegistry); + // Happy path must not pay the queue-lookup cost. + expect(classifyLockState).not.toHaveBeenCalled(); + }); + it('calls onBlocked when work item is locked', async () => { const onBlocked = vi.fn(); const triggerResult = { diff --git a/tests/unit/router/worker-manager.test.ts b/tests/unit/router/worker-manager.test.ts index f08c723f..fe604052 100644 --- a/tests/unit/router/worker-manager.test.ts +++ b/tests/unit/router/worker-manager.test.ts @@ -18,6 +18,24 @@ vi.mock('../../../src/router/container-manager.js', () => ({ stopOrphanCleanup: vi.fn(), })); +vi.mock('../../../src/router/slot-waiter.js', () => ({ + acquireSlot: vi.fn().mockResolvedValue(undefined), + clearAllWaiters: vi.fn(), +})); + +vi.mock('../../../src/router/dispatch-error-classifier.js', () => ({ + classifyDispatchError: vi.fn().mockReturnValue('transient'), +})); + +vi.mock('bullmq', () => ({ + UnrecoverableError: class extends Error { + constructor(message: string) { + super(message); + this.name = 'UnrecoverableError'; + } + }, +})); + vi.mock('../../../src/router/snapshot-cleanup.js', () => ({ startSnapshotCleanup: vi.fn(), stopSnapshotCleanup: vi.fn(), @@ -30,6 +48,7 @@ vi.mock('../../../src/router/config.js', () => ({ workerImage: 'test-worker:latest', workerMemoryMb: 512, workerTimeoutMs: 5000, + slotWaitTimeoutMs: 5 * 60 * 1000, dockerNetwork: 'test-network', }, })); @@ -57,6 +76,8 @@ import { startOrphanCleanup, stopOrphanCleanup, } from '../../../src/router/container-manager.js'; +import { classifyDispatchError } from '../../../src/router/dispatch-error-classifier.js'; +import { acquireSlot } from '../../../src/router/slot-waiter.js'; import { startSnapshotCleanup, stopSnapshotCleanup } from '../../../src/router/snapshot-cleanup.js'; import { startWorkerProcessor, @@ -77,6 +98,8 @@ const mockStopOrphanCleanup = vi.mocked(stopOrphanCleanup); const mockStartSnapshotCleanup = vi.mocked(startSnapshotCleanup); const mockStopSnapshotCleanup = vi.mocked(stopSnapshotCleanup); const mockLogger = vi.mocked(logger); +const mockAcquireSlot = vi.mocked(acquireSlot); +const mockClassifyDispatchError = vi.mocked(classifyDispatchError); // --------------------------------------------------------------------------- // Helpers @@ -176,19 +199,100 @@ describe('startWorkerProcessor', () => { expect(mockSpawnWorker).toHaveBeenCalledWith(fakeJob); }); - it('processFn throws when at capacity', async () => { + // REPLACED in spec 015/2: capacity miss now waits for a slot instead of + // throwing. The previous assertion `processFn throws when at capacity` + // is intentionally gone (per spec AC #9) — preserved here as an + // inverted test pinning the new contract. + it('processFn awaits a slot when at capacity, then dispatches when one frees', async () => { startWorkerProcessor(); const cascadeJobsCall = mockCreateQueueWorker.mock.calls.find( (call) => call[0].queueName === 'cascade-jobs', ); - const processFn = cascadeJobsCall?.[0].processFn; + const processFn = cascadeJobsCall?.[0].processFn as (j: unknown) => Promise; + + // `acquireSlot` resolves once a slot is available — drive that here. + let resolveAcquire: () => void = () => {}; + mockAcquireSlot.mockImplementationOnce( + () => + new Promise((res) => { + resolveAcquire = res; + }), + ); - // At capacity - mockGetActiveWorkerCount.mockReturnValue(3); // equals maxWorkers + mockSpawnWorker.mockClear(); const fakeJob = { id: 'j2', data: { type: 'trello', projectId: 'p1' } }; - await expect(processFn(fakeJob)).rejects.toThrow('No worker slots available'); + const inflight = processFn(fakeJob); + + // Before the slot frees, spawnWorker must NOT have been called. + await Promise.resolve(); expect(mockSpawnWorker).not.toHaveBeenCalled(); + + // Free the slot — processFn proceeds to spawnWorker. + resolveAcquire(); + await inflight; + expect(mockSpawnWorker).toHaveBeenCalledWith(fakeJob); + }); + + it("processFn rejects with code 'SLOT_WAIT_TIMEOUT' when the wait exceeds the timeout", async () => { + startWorkerProcessor(); + + const cascadeJobsCall = mockCreateQueueWorker.mock.calls.find( + (call) => call[0].queueName === 'cascade-jobs', + ); + const processFn = cascadeJobsCall?.[0].processFn as (j: unknown) => Promise; + + const timeoutErr = Object.assign(new Error('Slot wait timed out'), { + code: 'SLOT_WAIT_TIMEOUT', + }); + mockAcquireSlot.mockRejectedValueOnce(timeoutErr); + // Slot timeout classifies as transient → propagates unchanged so + // BullMQ retries via attempts/backoff. + mockClassifyDispatchError.mockReturnValueOnce('transient'); + + mockSpawnWorker.mockClear(); + const fakeJob = { id: 'j2', data: { type: 'trello', projectId: 'p1' } }; + await expect(processFn(fakeJob)).rejects.toMatchObject({ code: 'SLOT_WAIT_TIMEOUT' }); + expect(mockSpawnWorker).not.toHaveBeenCalled(); + }); + + it('processFn propagates a transient spawn error unchanged so BullMQ retries', async () => { + startWorkerProcessor(); + + const cascadeJobsCall = mockCreateQueueWorker.mock.calls.find( + (call) => call[0].queueName === 'cascade-jobs', + ); + const processFn = cascadeJobsCall?.[0].processFn as (j: unknown) => Promise; + + const transientErr = Object.assign(new Error('ECONNREFUSED docker.sock'), { + code: 'ECONNREFUSED', + }); + mockSpawnWorker.mockRejectedValueOnce(transientErr); + mockClassifyDispatchError.mockReturnValueOnce('transient'); + + const fakeJob = { id: 'j3', data: { type: 'trello', projectId: 'p1' } }; + await expect(processFn(fakeJob)).rejects.toBe(transientErr); + }); + + it('processFn wraps a terminal spawn error in UnrecoverableError so retries are skipped', async () => { + startWorkerProcessor(); + + const cascadeJobsCall = mockCreateQueueWorker.mock.calls.find( + (call) => call[0].queueName === 'cascade-jobs', + ); + const processFn = cascadeJobsCall?.[0].processFn as (j: unknown) => Promise; + + const terminalErr = Object.assign(new TypeError("Cannot read 'foo'"), {}); + mockSpawnWorker.mockRejectedValueOnce(terminalErr); + mockClassifyDispatchError.mockReturnValueOnce('terminal'); + + const fakeJob = { id: 'j4', data: { type: 'trello', projectId: 'p1' } }; + const rejectionSpy = vi.fn(); + await processFn(fakeJob).catch(rejectionSpy); + + expect(rejectionSpy).toHaveBeenCalledTimes(1); + const thrown = rejectionSpy.mock.calls[0][0]; + expect((thrown as Error).name).toBe('UnrecoverableError'); }); }); From c310f975af740ff6c623bd37f63df5f1e7eaecd7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 26 Apr 2026 20:47:09 +0200 Subject: [PATCH 5/8] chore(deps): bump postcss from 8.5.8 to 8.5.12 (#1204) Bumps [postcss](https://github.com/postcss/postcss) from 8.5.8 to 8.5.12. - [Release notes](https://github.com/postcss/postcss/releases) - [Changelog](https://github.com/postcss/postcss/blob/main/CHANGELOG.md) - [Commits](https://github.com/postcss/postcss/compare/8.5.8...8.5.12) --- updated-dependencies: - dependency-name: postcss dependency-version: 8.5.12 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 7fa4b7e0..5a2888c4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9261,9 +9261,9 @@ } }, "node_modules/postcss": { - "version": "8.5.8", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.8.tgz", - "integrity": "sha512-OW/rX8O/jXnm82Ey1k44pObPtdblfiuWnrd8X7GJ7emImCOstunGbXUpp7HdBrFQX6rJzn3sPT397Wp5aCwCHg==", + "version": "8.5.12", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.12.tgz", + "integrity": "sha512-W62t/Se6rA0Az3DfCL0AqJwXuKwBeYg6nOaIgzP+xZ7N5BFCI7DYi1qs6ygUYT6rvfi6t9k65UMLJC+PHZpDAA==", "dev": true, "funding": [ { From 1fc5be67f0366f1e2a7a7065897a403e304ef342 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 26 Apr 2026 20:47:14 +0200 Subject: [PATCH 6/8] chore(deps): bump uuid, bullmq and dockerode (#1192) Removes [uuid](https://github.com/uuidjs/uuid). It's no longer used after updating ancestor dependencies [uuid](https://github.com/uuidjs/uuid), [bullmq](https://github.com/taskforcesh/bullmq) and [dockerode](https://github.com/apocas/dockerode). These dependencies need to be updated together. Removes `uuid` Updates `bullmq` from 5.72.0 to 5.76.2 - [Release notes](https://github.com/taskforcesh/bullmq/releases) - [Commits](https://github.com/taskforcesh/bullmq/compare/v5.72.0...v5.76.2) Updates `dockerode` from 4.0.10 to 5.0.0 - [Release notes](https://github.com/apocas/dockerode/releases) - [Commits](https://github.com/apocas/dockerode/compare/v4.0.10...v5.0.0) --- updated-dependencies: - dependency-name: uuid dependency-version: dependency-type: indirect - dependency-name: bullmq dependency-version: 5.76.2 dependency-type: direct:production - dependency-name: dockerode dependency-version: 5.0.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- package-lock.json | 49 ++++++++++++++--------------------------------- package.json | 4 ++-- 2 files changed, 16 insertions(+), 37 deletions(-) diff --git a/package-lock.json b/package-lock.json index 5a2888c4..c953ab70 100644 --- a/package-lock.json +++ b/package-lock.json @@ -22,10 +22,10 @@ "@types/archiver": "^7.0.0", "archiver": "^7.0.1", "bcrypt": "^6.0.0", - "bullmq": "^5.66.4", + "bullmq": "^5.76.2", "chalk": "^5.4.1", "diff-match-patch": "^1.0.5", - "dockerode": "^4.0.9", + "dockerode": "^5.0.0", "drizzle-orm": "^0.45.1", "eta": "^4.5.0", "execa": "^9.6.1", @@ -4890,9 +4890,9 @@ } }, "node_modules/bullmq": { - "version": "5.72.0", - "resolved": "https://registry.npmjs.org/bullmq/-/bullmq-5.72.0.tgz", - "integrity": "sha512-1Wmfym7bC8BFxDjKcF4iZNZmqXYo0rgPFlxfi8ET3AaP/vOY/MY33iWsWqAKwe8v/QO/8osipjwTAcFB7egINA==", + "version": "5.76.2", + "resolved": "https://registry.npmjs.org/bullmq/-/bullmq-5.76.2.tgz", + "integrity": "sha512-kkNU6TPAjqV3Ep0kIaYhT79Z2IMoA7vadqjmr/zvmPicg0K/cOAecqZTihD726LbI043yPU0MBv/nMQmd5rNIg==", "license": "MIT", "dependencies": { "cron-parser": "4.9.0", @@ -4900,8 +4900,10 @@ "msgpackr": "1.11.5", "node-abort-controller": "3.1.1", "semver": "7.7.4", - "tslib": "2.8.1", - "uuid": "11.1.0" + "tslib": "2.8.1" + }, + "engines": { + "node": ">=12.22.0" } }, "node_modules/bundle-name": { @@ -5826,9 +5828,9 @@ } }, "node_modules/dockerode": { - "version": "4.0.10", - "resolved": "https://registry.npmjs.org/dockerode/-/dockerode-4.0.10.tgz", - "integrity": "sha512-8L/P9JynLBiG7/coiA4FlQXegHltRqS0a+KqI44P1zgQh8QLHTg7FKOwhkBgSJwZTeHsq30WRoVFLuwkfK0YFg==", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/dockerode/-/dockerode-5.0.0.tgz", + "integrity": "sha512-C52mvJ+7lcyhWNfrzVfFsbTrBfy/ezE9FGEYLpu17FUeBcCkxERk9nN7uDl/478ynDiQ4U+5DbQC2vENHkVEtQ==", "license": "Apache-2.0", "dependencies": { "@balena/dockerignore": "^1.0.2", @@ -5836,22 +5838,10 @@ "@grpc/proto-loader": "^0.7.13", "docker-modem": "^5.0.7", "protobufjs": "^7.3.2", - "tar-fs": "^2.1.4", - "uuid": "^10.0.0" + "tar-fs": "^2.1.4" }, "engines": { - "node": ">= 8.0" - } - }, - "node_modules/dockerode/node_modules/uuid": { - "version": "10.0.0", - "funding": [ - "https://github.com/sponsors/broofa", - "https://github.com/sponsors/ctavan" - ], - "license": "MIT", - "bin": { - "uuid": "dist/bin/uuid" + "node": ">= 14.17" } }, "node_modules/dom-accessibility-api": { @@ -10718,17 +10708,6 @@ "version": "1.0.2", "license": "MIT" }, - "node_modules/uuid": { - "version": "11.1.0", - "funding": [ - "https://github.com/sponsors/broofa", - "https://github.com/sponsors/ctavan" - ], - "license": "MIT", - "bin": { - "uuid": "dist/esm/bin/uuid" - } - }, "node_modules/vary": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", diff --git a/package.json b/package.json index e3350543..888931e9 100644 --- a/package.json +++ b/package.json @@ -67,10 +67,10 @@ "@types/archiver": "^7.0.0", "archiver": "^7.0.1", "bcrypt": "^6.0.0", - "bullmq": "^5.66.4", + "bullmq": "^5.76.2", "chalk": "^5.4.1", "diff-match-patch": "^1.0.5", - "dockerode": "^4.0.9", + "dockerode": "^5.0.0", "drizzle-orm": "^0.45.1", "eta": "^4.5.0", "execa": "^9.6.1", From 1ced61bcc302e6dfbf50acfc30798b156c689541 Mon Sep 17 00:00:00 2001 From: Zbigniew Sobiecki Date: Mon, 27 Apr 2026 00:24:54 +0200 Subject: [PATCH 7/8] spec 016: PM image delivery reliability (#1209) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs(spec/plans): add spec 016 + plans for PM image delivery reliability Closes the silent screenshot-drop bug class verified live on 2026-04-26 (ucho/MNG-357): Linear's user-pasted-image URLs (uploads.linear.app/ with no file extension) were dropped at the pre-download MIME filter because mimeTypeFromUrl returned 'application/octet-stream' and filterImageMedia excluded them. This affected all engines on the disk-write path, regardless of PR #948's Claude-Code SDK delivery fix. Three plans, safety-net-first sequencing matching spec 015: - Plan 1 (boot-path-mime-fix-and-diagnostic-log): defers MIME authority to download response Content-Type via image/* wildcard sentinel; adds the grep-stable diagnostic log line at extract time. Independently fixes MNG-357. - Plan 2 (runtime-gadget-image-delivery): makes the runtime cascade-tools pm read-work-item gadget actually download + write images to disk with file paths returned in text. Closes the mid-run pickup gap. Depends on Plan 1's shared download-and-prepare helper. - Plan 3 (linear-fixture-and-extraction-coverage): captures a Linear GraphQL Issue payload fixture for an issue with a pasted screenshot; pins extraction with a regression test that fails loudly if Linear ever changes the payload shape. Mostly tests + docs. 9 ACs, 0 manual-only. CLAUDE.md not updated (already covered by spec 015's silent-failure → diagnostic-line pattern). Co-Authored-By: Claude Opus 4.7 (1M context) * chore(plan): lock 016/1 boot-path-mime-fix-and-diagnostic-log * feat(pm): plan 016/1 done — boot-path image MIME fix + diagnostic log Closes the silent screenshot-drop bug class verified live on 2026-04-26 (ucho/MNG-357). Linear's extension-less pasted-image URLs (uploads.linear.app/) now survive the pre-download MIME filter via an image/* wildcard sentinel. The download response's Content-Type header is the authoritative MIME — wildcard is resolved before bytes are written. What landed: - src/pm/media.ts — new IMAGE_HOST_ALLOWLIST (currently 'uploads.linear.app'); mimeTypeFromUrl returns 'image/*' for extension-less URLs from allowlisted hosts; isImageMimeType accepts the wildcard. - src/pm/download-and-prepare.ts (new) — shared helper for the per-provider download dispatch loop (jira/linear/trello). Returns { images, failures }. Spec 016/2's runtime gadget will import this. - src/agents/definitions/contextSteps.ts — fetchWorkItemStep refactored to use the shared helper; emits the new grep-stable diagnostic line '[image-pipeline] work-item-fetch summary' with stable fields: { provider, workItemId, urlsDetected, urlsAfterFilter, urlsDownloaded, urlsFailed, urlsByMimeType }. Tests: - 6 new unit tests in tests/unit/pm/media.test.ts (wildcard sentinel, Linear extension-less, regression for extensioned + non-PM URLs) - 7 new unit tests in tests/unit/pm/download-and-prepare.test.ts - 3 new diagnostic-log tests in contextSteps.test.ts; existing log message expectations updated to the new helper-prefix - 3 module-integration tests in tests/integration/pm/image-pipeline.test.ts pinning the MNG-357 reproduction end-to-end with real mimeTypeFromUrl + filterImageMedia + extractMarkdownImages PR #948's Claude-Code initial-input ImageBlockParam path is unchanged; existing regression test (claude-code.test.ts:939 'logs image injection and strips images before buildTaskPrompt') confirms. Docs: - CHANGELOG.md entry under Unreleased. - src/integrations/README.md gains a new 'Image delivery contract' section documenting the shared resolution path, allowlist semantics, diagnostic log line schema, and the rule that providers shouldn't write their own MIME-detection. Full unit suite: 8521 passed / 23 skipped / 0 failed. Lint + typecheck clean. Co-Authored-By: Claude Opus 4.7 (1M context) * chore(plan): lock 016/2 runtime-gadget-image-delivery * feat(pm): plan 016/2 done — runtime gadget delivers images on disk Closes the mid-run image pickup gap from spec 016. The runtime gadget `cascade-tools pm read-work-item` now downloads any image media and writes it to .cascade/context/images/work-item--img-., returning text whose new "Local Image Files" section lists actual file paths the agent's file-read tool can consume. What landed: - src/gadgets/pm/core/writeRuntimeImages.ts (new) — writes ContextImage arrays to .cascade/context/images/ with stable naming convention (work-item--img-.); extension derived from resolved MIME; falls back to .bin + warn log for unresolved image/* sentinel. - src/gadgets/pm/core/readWorkItem.ts — readWorkItem now calls downloadAndPrepareImages (Plan 1's helper) + writeRuntimeImages (this plan), then mutates the returned text to include the local file paths via formatRuntimeImagePaths. Same diagnostic log line '[image-pipeline] work-item-fetch summary' as the boot path. Failed downloads surface in a "Failed Image Downloads" subsection. Tests: - 8 new unit tests in tests/unit/gadgets/pm/core/writeRuntimeImages.test.ts - 5 new unit tests in tests/unit/gadgets/pm/core/readWorkItem.test.ts (spec 016/2 sub-describe) - 4 new module-integration tests in tests/integration/gadgets/runtime-image-delivery.test.ts pinning the mid-run pickup contract end-to-end. CHANGELOG.md entry added. Full unit suite (single-fork): 8534 passed / 23 skipped / 0 failed. Lint + typecheck clean. Three PM manifest test suites occasionally time out under parallel load on this machine — verified to pass in isolation; not a code regression. Co-Authored-By: Claude Opus 4.7 (1M context) * chore(plan): lock 016/3 linear-fixture-and-extraction-coverage * test(pm): plan 016/3 done — Linear fixture + extraction-coverage regression Closes spec 016 with the regression net for the contract Plans 1+2 established. If Linear ever changes its Issue payload shape in a way that loses inline images, the extraction-coverage test fails loudly with a specific URL-missing message. What landed: - tests/fixtures/linear-issue-with-screenshot.json (new) — reconstructed Linear GraphQL Issue payload covering: extension-less uploads.linear.app URL in description, extensioned Linear URL with alt text, external URL with image/svg+xml MIME, non-image markdown link (must NOT be picked up), one comment with a pasted screenshot, one comment without, and three formal Attachment records (Slack/GitHub/Sentry link previews). - tests/unit/pm/linear/extraction-coverage.test.ts (new) — 9 tests: description coverage with explicit expected-URL list, image/* sentinel for extension-less, concrete MIME for extensioned, image/svg+xml for external SVG, non-image link exclusion, comment coverage, comment source field, attachment-NOT-leaked rule, meta-test of regression net. - src/integrations/README.md — new "Linear: GraphQL surface for inline images" subsection documenting the conclusion: Issue.description markdown is canonical for inline-pasted screenshots; Issue.attachments is for formal Attachment records (link previews) and is the wrong surface for inline images. Links to the fixture and the test. No production code change — Plan 1's mimeTypeFromUrl + extractMarkdownImages already cover the cases. This plan ships the regression armor. CHANGELOG.md entry added. Lint + typecheck clean. 9/9 unit tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) * chore(spec): 016 done — pm-image-delivery-reliability, all plans complete Closes the silent screenshot-drop bug class verified live on 2026-04-26 (ucho/MNG-357). Plan 1 added the Linear-extension-less MIME wildcard sentinel + diagnostic log line; plan 2 made the runtime cascade-tools pm read-work-item gadget actually deliver images on disk; plan 3 captured a Linear GraphQL fixture and pinned extraction coverage with a regression test. CLAUDE.md untouched by this spec — already covered by spec 015's broader silent-failure → diagnostic-line pattern. Co-Authored-By: Claude Opus 4.7 (1M context) * fix: address code review concerns * test(image-pipeline): supply urlsDetected in readWorkItemWithMedia mocks The diagnostic-line assertion expected urlsDetected on the log payload, but the mocked readWorkItemWithMedia return values omitted it, so the field arrived as undefined and the toHaveBeenCalledWith match failed. Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.7 (1M context) Co-authored-by: Cascade Bot --- CHANGELOG.md | 3 + ...t-path-mime-fix-and-diagnostic-log.md.done | 211 ++++++++++++++++++ .../2-runtime-gadget-image-delivery.md.done | 187 ++++++++++++++++ ...ar-fixture-and-extraction-coverage.md.done | 163 ++++++++++++++ .../_coverage.md | 48 ++++ .../016-pm-image-delivery-reliability.md.done | 129 +++++++++++ src/agents/definitions/contextSteps.ts | 91 +++----- src/gadgets/pm/core/readWorkItem.ts | 98 +++++++- src/gadgets/pm/core/writeRuntimeImages.ts | 129 +++++++++++ src/integrations/README.md | 54 +++++ src/pm/download-and-prepare.ts | 96 ++++++++ src/pm/media.ts | 42 +++- .../linear-issue-with-screenshot.json | 97 ++++++++ .../gadgets/runtime-image-delivery.test.ts | 202 +++++++++++++++++ tests/integration/pm/image-pipeline.test.ts | 157 +++++++++++++ .../agents/definitions/contextSteps.test.ts | 90 +++++++- .../unit/gadgets/pm/core/readWorkItem.test.ts | 180 ++++++++++++++- .../pm/core/writeRuntimeImages.test.ts | 140 ++++++++++++ tests/unit/pm/download-and-prepare.test.ts | 162 ++++++++++++++ .../pm/linear/extraction-coverage.test.ts | 157 +++++++++++++ tests/unit/pm/media.test.ts | 40 ++++ 21 files changed, 2399 insertions(+), 77 deletions(-) create mode 100644 docs/plans/016-pm-image-delivery-reliability/1-boot-path-mime-fix-and-diagnostic-log.md.done create mode 100644 docs/plans/016-pm-image-delivery-reliability/2-runtime-gadget-image-delivery.md.done create mode 100644 docs/plans/016-pm-image-delivery-reliability/3-linear-fixture-and-extraction-coverage.md.done create mode 100644 docs/plans/016-pm-image-delivery-reliability/_coverage.md create mode 100644 docs/specs/016-pm-image-delivery-reliability.md.done create mode 100644 src/gadgets/pm/core/writeRuntimeImages.ts create mode 100644 src/pm/download-and-prepare.ts create mode 100644 tests/fixtures/linear-issue-with-screenshot.json create mode 100644 tests/integration/gadgets/runtime-image-delivery.test.ts create mode 100644 tests/integration/pm/image-pipeline.test.ts create mode 100644 tests/unit/gadgets/pm/core/writeRuntimeImages.test.ts create mode 100644 tests/unit/pm/download-and-prepare.test.ts create mode 100644 tests/unit/pm/linear/extraction-coverage.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index cc05c2f8..c57b92da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ All notable user-visible changes to CASCADE are documented here. The format is l ### Changed +- **PM image delivery: Linear GraphQL fixture + extraction-coverage regression test** (spec 016, plan 3 of 3). Captures a reconstructed Linear `Issue` GraphQL payload at `tests/fixtures/linear-issue-with-screenshot.json` containing extension-less and extensioned inline-pasted images (description + comment bodies) plus formal Attachment records (Slack/GitHub/Sentry link previews) that must NOT be mistaken for inline images. The unit test at `tests/unit/pm/linear/extraction-coverage.test.ts` pins the contract and fails loudly with a specific URL-missing message if Linear ever changes its payload shape in a way that loses inline images. Documents the conclusion in `src/integrations/README.md`: `Issue.description` markdown is canonical for Linear inline images; `Issue.attachments` is the wrong surface (formal Attachment records, not pastes). No production code change — this plan ships the regression net for the contract Plans 1+2 established. See [spec 016](docs/specs/016-pm-image-delivery-reliability.md). +- **PM image delivery: runtime `cascade-tools pm read-work-item` gadget now delivers images on disk** (spec 016, plan 2 of 3). The runtime gadget that agents call mid-run for a work item used to return text only — its "Pre-fetched Images" section listed URL refs but no local file paths, so an agent that needed to re-read a work item (e.g. after a teammate added a screenshot) had no way to actually see the new image. After this plan, the gadget downloads any image media present and writes it to `.cascade/context/images/work-item--img-.` (extension derived from the resolved Content-Type MIME), then returns text whose new "Local Image Files" section lists actual file paths the agent's file-read tool can consume. Failed downloads are surfaced in a "Failed Image Downloads" subsection so they're never silently dropped. Same diagnostic log line as the boot path (`[image-pipeline] work-item-fetch summary`) — operators see consistent shape across boot and runtime fetches. Closes the mid-run pickup gap. See [spec 016](docs/specs/016-pm-image-delivery-reliability.md). +- **PM image delivery: extension-less Linear pasted-image URLs are no longer dropped at the pre-download MIME filter** (spec 016, plan 1 of 3). Linear's `https://uploads.linear.app/` URLs (with no file extension in the pathname) used to fall through `mimeTypeFromUrl` to `application/octet-stream` and were silently filtered out by `filterImageMedia` before the download loop ran. The fix introduces an `image/*` wildcard sentinel for trusted PM-provider upload hosts (allowlisted by hostname); `isImageMimeType` now accepts the wildcard, and the download response's `Content-Type` header resolves it to a concrete MIME (`image/png`, etc.) before any image is written. The shared `downloadAndPrepareImages` helper consolidates the per-provider download dispatch (jira/linear/trello) so both the boot-path and the runtime gadget (spec 016 plan 2) share one code path. Adds AC#5's grep-stable diagnostic line — `[image-pipeline] work-item-fetch summary` — emitted once per work-item-fetch with stable fields (`provider`, `workItemId`, `urlsDetected`, `urlsAfterFilter`, `urlsDownloaded`, `urlsFailed`, `urlsByMimeType`). Closes the silent screenshot-drop bug class verified live on 2026-04-26 (ucho/MNG-357). See [spec 016](docs/specs/016-pm-image-delivery-reliability.md). - **Router dispatch capacity now waits for a slot; transient Docker errors retry; terminal errors fail fast** (spec 015, plan 2 of 2). Replaces `guardedSpawn`'s synchronous "No worker slots available" throw with an in-process slot-waiter (default 5min timeout, configurable via `SLOT_WAIT_TIMEOUT_MS`). Adds a dispatch-error classifier that splits transient (`ECONNREFUSED` / `ECONNRESET` / `ENOTFOUND` / HTTP 429 / container-name 409 / `SLOT_WAIT_TIMEOUT`) from terminal (`TypeError` / `ZodError` / image-not-found-after-fallback). Both `cascade-jobs` and `cascade-dashboard-jobs` queue defaults now specify `attempts: 4` with `backoff: { type: 'exponential', delay: 5000 }` (~75s total before exhaustion). Terminal errors are wrapped in BullMQ's `UnrecoverableError` so retries skip. Combined with plan 015/1, the original silent black-hole failure mode (verified live on 2026-04-26 via ucho/MNG-350) is fully closed: no more lost jobs on transient capacity misses or Docker hiccups, no more wedged locks. CLAUDE.md updated with the new "Dispatch failure semantics" passage. See [spec 015](docs/specs/015-router-job-dispatch-failure-recovery.md). - **Router dispatch failures now release in-memory locks via the BullMQ failed event** (spec 015, plan 1 of 2). Hooks `worker.on('failed')` on both `cascade-jobs` and `cascade-dashboard-jobs` queues to call a new `releaseLocksForFailedJob` compensator that releases the work-item lock, agent-type concurrency counter, and recently-dispatched dedup mark for any job whose dispatch fails. Closes the stranded-lock half of the prod incident verified on 2026-04-26 (ucho/MNG-350): a transient capacity miss was leaving the in-memory work-item lock wedged for 30 minutes, silently rejecting subsequent webhooks for the same trio. Also splits the webhook decision-reason vocabulary into three states — `Job queued` (success), `Awaiting worker slot: …` (in-flight, healthy), `Work item locked (no active dispatch): …` (wedged-lock canary, fires a Sentry capture tagged `wedged_lock_canary` so any regression in compensation is loud). Plan 2 closes the lost-job half (wait-for-slot, retry budget, error classifier). See [spec 015](docs/specs/015-router-job-dispatch-failure-recovery.md). - **`cascade-tools scm create-pr-review`: `--comment` alias + `--comments-file` escape hatch** (spec 014, plan 2 of 2). The command now accepts `--comment` (singular) as an alias for `--comments` — the exact muscle-memory mistake from prod run 5d993b04 now resolves correctly. Added `--comments-file ` (and `-` for stdin) as a JSON-parsed file alternative for long payloads that don't survive shell quoting. Zero edits to shared infrastructure (cliCommandFactory, manifestGenerator, nativeToolPrompts, errorEnvelope) — the two declarative fields on `createPRReviewDef.parameters.comments.cliAliases` + `createPRReviewDef.cli.fileInputAlternatives` are everything. Proves spec 014's single-entrypoint invariant: a new or evolved gadget should never need to touch shared machinery. See [spec 014](docs/specs/014-cascade-tools-agent-ergonomics.md). diff --git a/docs/plans/016-pm-image-delivery-reliability/1-boot-path-mime-fix-and-diagnostic-log.md.done b/docs/plans/016-pm-image-delivery-reliability/1-boot-path-mime-fix-and-diagnostic-log.md.done new file mode 100644 index 00000000..7bc75cee --- /dev/null +++ b/docs/plans/016-pm-image-delivery-reliability/1-boot-path-mime-fix-and-diagnostic-log.md.done @@ -0,0 +1,211 @@ +--- +id: 016 +slug: pm-image-delivery-reliability +plan: 1 +plan_slug: boot-path-mime-fix-and-diagnostic-log +level: plan +parent_spec: docs/specs/016-pm-image-delivery-reliability.md +depends_on: [] +status: done +--- + +# 016/1: Boot-path MIME fix + diagnostic log line + +> Part 1 of 3 in the 016-pm-image-delivery-reliability plan. See [parent spec](../../specs/016-pm-image-delivery-reliability.md). + +## Summary + +This plan fixes the MNG-357 root cause — Linear's extension-less pasted-image URLs (`https://uploads.linear.app//`) get dropped at the pre-download MIME filter because `mimeTypeFromUrl` returns `application/octet-stream` and `filterImageMedia` excludes anything that isn't an image MIME. The fix defers MIME authority to the download response's `Content-Type` header by introducing an `image/*` wildcard sentinel that survives the filter and is resolved to a concrete MIME at download time. + +It also adds the diagnostic log line that AC#5 requires. Today the only image-pipeline log is the post-download `fetchWorkItemStep: image download complete` summary; the upstream extract-and-filter step is invisible. After this plan ships, every work-item-fetch emits ONE structured log line at `INFO` level with a stable shape: provider, work-item-id, urls-detected, urls-after-filter, urls-downloaded, urls-failed. An operator can grep for it and triage any "no image delivered" report from that line alone. + +This plan does NOT change the runtime read-work-item gadget (Plan 2) and does NOT add the Linear GraphQL fixture (Plan 3). It also does NOT touch PR #948's Claude-Code initial-input ImageBlockParam path — but a regression test pins that path so Plan 2 and beyond can't accidentally break it. + +**Components delivered:** +- New behavior in `src/pm/media.ts`: extension-less URLs that PM providers commonly produce (Linear's `uploads.linear.app/` shape) are tagged with `mimeType: 'image/*'` instead of `application/octet-stream`. The wildcard is added to `IMAGE_MIME_TYPES`-equivalent acceptance in `isImageMimeType` (a single check). +- New extraction-step diagnostic log line (one INFO log per call), emitted from a new helper that wraps the existing extract → filter → download → write pipeline. Format pinned by test. +- Regression tests: extension-less Linear URL flows end-to-end (extract → filter → download → write); extension-bearing Trello/JIRA URLs still work (no regression); the new diagnostic log shape is asserted; PR #948's Claude-Code ImageBlockParam path is pinned by a regression test that fails loudly if the boot-path image stripping behavior changes. +- A small refactor extracting the existing download-and-base64 loop in `fetchWorkItemStep` (`src/agents/definitions/contextSteps.ts:107-153`) into a shared module-internal helper. Plan 2 will import this helper for the runtime gadget; Plan 1 only refactors and consumes from one site. + +**Deferred to later plans in this spec:** +- The runtime read-work-item gadget that delivers images mid-run (Plan 2). +- The Linear GraphQL fixture + extraction-coverage regression test (Plan 3). +- `src/integrations/README.md`'s Linear-specific GraphQL surface confirmation (Plan 3). + +--- + +## Spec ACs satisfied by this plan + +- Spec AC #1 (Linear screenshot on-disk for all engines, boot path) — **full** +- Spec AC #2 (Trello/JIRA images regression-safe) — **full** (regression tests) +- Spec AC #5 (single grep-stable diagnostic log line) — **partial** (boot path here; runtime path same format in Plan 2) +- Spec AC #6 (PR #948 Claude-Code path untouched) — **full** (regression test pins it) +- Spec AC #8 (new-provider invariant preserved) — **full** (no provider-specific code added in shared resolution) +- Spec AC #9 (MNG-357 end-to-end reproduces clean) — **full** + +--- + +## Depends On + +None. This plan is the foundation of the spec. + +--- + +## Detailed Task List (TDD) + +### 1. Extension-less PM URL detection + `image/*` wildcard sentinel + +**Tests first** (`tests/unit/pm/media.test.ts` — extend existing file): + +- `mimeTypeFromUrl — returns 'image/*' for extension-less Linear URL` — unit — input `https://uploads.linear.app/abc-123-def-456` (no extension); assert returns `'image/*'`. Expected red: `AssertionError: expected 'application/octet-stream' to be 'image/*'`. +- `mimeTypeFromUrl — returns 'image/png' for extensioned PM URL (regression-safe)` — unit — input `https://example.com/foo.png`; assert returns `'image/png'`. Expected red: passes already (regression pin); fails only if we accidentally regress the extensioned path. +- `mimeTypeFromUrl — returns 'application/octet-stream' for non-PM extension-less URL (no over-broad behavior)` — unit — input `https://example.com/random-file`; assert returns `'application/octet-stream'`. Expected red: passes already; fails if we over-broaden the wildcard logic to all extension-less URLs (which would degrade observability). +- `mimeTypeFromUrl — returns 'image/*' for extension-less Linear comment-pasted URL` — unit — input `https://uploads.linear.app//Screenshot 2026-04-26 at 10.30.png` (some Linear URLs DO have extensions but with spaces; some don't); covers the no-extension fallback. Expected red: same as test 1 if the path has no `.` segment. +- `isImageMimeType — accepts the 'image/*' wildcard sentinel` — unit — input `'image/*'`; assert returns `true`. Expected red: `AssertionError: expected false to be true` (`IMAGE_MIME_TYPES` set doesn't contain `'image/*'`). +- `isImageMimeType — preserves existing strict acceptance for known image MIMEs` — unit — input `'image/png'` returns true, `'application/pdf'` returns false. Regression pin. + +**Implementation** (`src/pm/media.ts`): +- Add `IMAGE_HOST_ALLOWLIST` constant: `Set` containing `'uploads.linear.app'`. (Could grow to include trusted hosts for other providers if they prove to need it; today Linear is the only one.) Documented inline comment: this list represents trusted PM-provider upload hosts whose extension-less URLs we should treat as candidate images and resolve at download time. +- Modify `mimeTypeFromUrl(url)`: after computing `ext` from the pathname, if the resolved MIME is `'application/octet-stream'` AND the URL's hostname is in `IMAGE_HOST_ALLOWLIST`, return `'image/*'` instead. +- Modify `isImageMimeType(mime)`: accept `'image/*'` (the wildcard) in addition to the existing concrete-MIME set. +- Do NOT change `filterImageMedia` — it already calls `isImageMimeType`, so widening that predicate flows through. +- Do NOT add a HEAD request; resolve at GET (the existing `downloadMedia` path). + +### 2. Diagnostic log line at extract-time + +**Tests first** (`tests/unit/agents/definitions/contextSteps.test.ts` — extend existing or create new file if absent): + +- `fetchWorkItemStep — emits diagnostic log line with extracted, post-filter, downloaded, failed counts` — unit — mock `readWorkItemWithMedia` to return 3 image refs (mix of extensioned + extension-less); mock `downloadMedia` to succeed for 2, fail for 1; assert exactly ONE INFO-level log call matches `'work-item-fetch image pipeline'` (or whatever the agreed prefix becomes) with structured fields `{ provider, workItemId, urlsDetected: 3, urlsAfterFilter: 3, urlsDownloaded: 2, urlsFailed: 1 }`. Expected red: `expected logger.info to have been called with object containing 'work-item-fetch image pipeline'` — today no such log exists. +- `fetchWorkItemStep — log line emitted even when no images are present (urlsDetected: 0)` — unit — mock returns no images; assert the log line still fires with all-zero counts. Why: an operator triaging "no image delivered" reports needs to see "0 detected upstream" as positive confirmation, not absence-of-log. Expected red: same. +- `fetchWorkItemStep — log fields include the post-resolve mime distribution` — unit — mock returns 1 extensioned PNG and 1 extension-less Linear; downloads both; assert log includes `urlsByMimeType: { 'image/png': 1, 'image/*': 1 }` (or similar — the test pins the field name and shape, not the exact map keys). Expected red: same. +- `fetchWorkItemStep — log line is INFO level (not DEBUG)` — unit — assert the call is on `logger.info`, not `logger.debug`. Why: AC#5 requires "single grep-stable line in the cascade run log surface" — DEBUG is filtered out by default. Expected red: passes if log doesn't exist (vacuous), fails right reason if implementation uses wrong level. + +**Implementation** (`src/agents/definitions/contextSteps.ts`): +- Add a structured log call inside `fetchWorkItemStep`, AFTER the download Promise.all resolves and BEFORE the function returns. Exact shape: + ``` + logger.info('[image-pipeline] work-item-fetch summary', { + provider: , + workItemId, + urlsDetected: , + urlsAfterFilter: , + urlsDownloaded: , + urlsFailed: , + urlsByMimeType: >, + }); + ``` +- The provider type is available via `getPMProviderOrNull()?.type`. +- `urlsByMimeType` is built from the resolved `DownloadMediaResult.mimeType` for successes; failures contribute to a separate `failuresByReason` field if useful (defer to test contract). +- The log line is grep-stable via the literal prefix `[image-pipeline] work-item-fetch summary`. Document this in the integrations README (see Doc Impact below). + +### 3. Refactor: extract download-and-base64 loop into a shared helper + +**Tests first** (`tests/unit/pm/download-and-prepare.test.ts` — new file): + +- `downloadAndPrepareImages — downloads each ref, returns success array + failure array` — unit — pass 3 refs; mock `downloadMedia` per-provider; assert returned shape `{ images: [...], failures: [{url, reason}] }`. Expected red: module not found. +- `downloadAndPrepareImages — preserves base64 + altText + mimeType from the resolved download` — unit — pass extension-less Linear ref; mock download to return `{ buffer, mimeType: 'image/png' }`; assert resulting image's mimeType is `'image/png'` (NOT `'image/*'` — the wildcard was resolved). Expected red: module not found. +- `downloadAndPrepareImages — caps at MAX_IMAGES_PER_WORK_ITEM` — unit — pass 12 refs, MAX_IMAGES_PER_WORK_ITEM=10; assert only 10 attempted. Expected red: module not found. +- `downloadAndPrepareImages — picks per-provider download client (jiraClient / linearClient / trelloClient)` — unit — set provider to 'linear'; assert `linearClient.downloadAttachment` is the one called. Expected red: module not found. + +**Implementation** (`src/pm/download-and-prepare.ts` — new file): +- Function signature: `downloadAndPrepareImages(workItemId: string, media: MediaReference[], logWriter: LogWriter): Promise<{ images: ContextInjectionImage[]; failures: { url: string; reason: string }[] }>`. +- Lifts the loop currently at `src/agents/definitions/contextSteps.ts:107-153` into this module unchanged (same provider-dispatch, same Promise.all, same per-failure WARN log). +- The new diagnostic log (task 2 above) calls this helper, then emits the summary. +- Plan 2 will import this helper for the runtime gadget. This plan only consumes it from `fetchWorkItemStep` — the runtime gadget consumer is deliberately out of scope here. + +**Reuse + refactor**: After the helper exists, modify `fetchWorkItemStep` to call it instead of the inline loop. The diagnostic log (task 2) is wired around the helper call. Same external behavior; same returned shape. + +### 4. Regression test: PR #948's Claude-Code ImageBlockParam path stays untouched + +**Tests first** (`tests/unit/backends/claude-code/image-injection.test.ts` — extend existing PR #948 test file or create if absent): + +- `Claude Code backend — initial-input image-strip-before-buildTaskPrompt invariant holds` — unit — feed a `ContextInjection` with `images` populated; assert that `buildTaskPrompt` (or its mock) receives an injection where `images` field is absent/empty AND that `buildPromptWithImages` is called with the original images. Expected red: passes already (regression pin); fails right reason if Plan 1 accidentally regresses the strip-before-buildTaskPrompt logic. +- `Claude Code backend — multimodal SDK content blocks include the prepared base64 + mimeType` — unit — assert the resulting SDK call includes `ImageBlockParam` content blocks matching the input images. Expected red: passes already; fails right reason if Plan 1's MIME-resolution refactor breaks the data flowing into the SDK call. + +**Implementation**: none. These are pure regression pins. If they don't already exist (PR #948 may have a different test file), create the minimum coverage to pin the behavior. If PR #948 left no tests on this surface, that's a finding to surface — but the spec's AC#6 cannot be satisfied without a test that pins the behavior, so adding it is in-scope here. + +--- + +## Test Plan + +### Unit tests +- [ ] `tests/unit/pm/media.test.ts`: 6 new tests covering `mimeTypeFromUrl` extension-less Linear URL handling + `isImageMimeType` wildcard acceptance + Trello/JIRA regression pins +- [ ] `tests/unit/agents/definitions/contextSteps.test.ts`: 4 new tests covering the diagnostic log line shape, level, and zero-image case +- [ ] `tests/unit/pm/download-and-prepare.test.ts` (new): 4 tests covering the extracted helper's contract (shape, MIME resolution, cap, per-provider dispatch) +- [ ] `tests/unit/backends/claude-code/image-injection.test.ts`: 2 regression pins for PR #948's strip-before-buildTaskPrompt invariant + +### Integration tests +- [ ] `tests/integration/pm/image-pipeline.test.ts` (new): one happy-path integration that exercises the real `mimeTypeFromUrl` + real `isImageMimeType` + real `filterImageMedia` + real `downloadAndPrepareImages` (with `downloadMedia` stubbed to control Content-Type) + real diagnostic log emission, end-to-end with an extension-less Linear-shaped URL. + +### Acceptance tests +- [ ] AC#1: integration test "extension-less Linear URL flows end-to-end and lands on disk" +- [ ] AC#2: regression tests "Trello PNG URL still flows" + "JIRA attachment URL still flows" +- [ ] AC#5 (boot path): unit test pinning the diagnostic log line shape and level +- [ ] AC#6: regression pins for PR #948's Claude-Code path +- [ ] AC#8: covered by AC#1 + AC#2 (no provider-specific code added) +- [ ] AC#9: integration test that simulates the MNG-357 scenario (extension-less Linear URL → boot path → file on disk + log line shows `urlsDownloaded: 1`) + +--- + +## Manual Verification (for `[manual]`-tagged ACs only) + +n/a — all ACs auto-tested. + +--- + +## Acceptance Criteria (per-plan, testable) + +1. An extension-less Linear-shaped URL (`https://uploads.linear.app//...` with no `.png`/`.jpg` extension) flows through extract → filter → download → write end-to-end. The resulting `MediaReference.mimeType` after download resolves to `'image/png'` (or whatever the response Content-Type header reports), NOT `'image/*'` or `'application/octet-stream'`. +2. Extension-bearing Trello and JIRA URLs continue to flow through with no behavior change. +3. Every `fetchWorkItemStep` invocation emits exactly ONE structured INFO-level log line with the literal prefix `[image-pipeline] work-item-fetch summary` and structured fields `{ provider, workItemId, urlsDetected, urlsAfterFilter, urlsDownloaded, urlsFailed, urlsByMimeType }`. +4. PR #948's Claude-Code initial-input ImageBlockParam path passes a regression test that fails loudly if the strip-before-buildTaskPrompt invariant changes. +5. `downloadAndPrepareImages` is a callable helper module exporting the prep loop with a stable shape `Promise<{ images, failures }>`. `fetchWorkItemStep` uses it (refactor not new code path). +6. `mimeTypeFromUrl` returns `'image/*'` for extension-less URLs whose hostname is in `IMAGE_HOST_ALLOWLIST` (currently `uploads.linear.app`); returns the existing extension-derived MIME for everything else; returns `'application/octet-stream'` for unknown extension-less hosts (preserving observability of unrecognized cases). +7. `isImageMimeType('image/*')` returns true; `isImageMimeType('application/pdf')` returns false (regression). +8. All new/modified code has corresponding tests written before the implementation. +9. `npm run build` passes. +10. `npm test` passes. +11. `npm run test:integration` passes for the new integration test. +12. `npm run lint` passes. +13. `npm run typecheck` passes. +14. All documentation listed in this plan's Documentation Impact has been updated. + +--- + +## Documentation Impact (this plan only) + +| File | Change | +|---|---| +| `CHANGELOG.md` | Entry under the next release: "PM image delivery: extension-less PM-provider URLs (Linear `uploads.linear.app/`) are no longer dropped by the pre-download MIME filter. Defers MIME authority to the download response's Content-Type header. Adds a single grep-stable diagnostic log line at extract time: `[image-pipeline] work-item-fetch summary`. Closes the silent screenshot-drop bug class verified live on 2026-04-26 (ucho/MNG-357)." | +| `src/integrations/README.md` | Add a new section titled "Image delivery contract" near the end. Documents: (a) the shared MIME resolution path (Content-Type-first, URL-extension-second, no magic-byte sniffing); (b) the `IMAGE_HOST_ALLOWLIST` for trusted PM-provider upload hosts and how to add a new entry; (c) the `[image-pipeline] work-item-fetch summary` diagnostic log line operators rely on, with field schema; (d) the rule that providers should NOT do their own MIME resolution — let the shared path handle it. Cross-link to spec 016. | + +--- + +## Out of Scope (this plan) + +- The runtime read-work-item gadget downloading + writing images (Plan 2). +- Linear GraphQL fixture + extraction-coverage regression test (Plan 3). +- Codex / OpenCode native multimodal SDK delivery — out of scope per spec. +- Magic-byte sniffing — out of scope per spec. +- Backfilling missed screenshots for prior runs — out of scope per spec. +- Image compression / resize / format conversion — out of scope per spec. +- Dashboard surface for "image not delivered" — out of scope per spec. + +--- + +## Progress + + +- [x] AC #1 (extension-less Linear URL flows end-to-end) +- [x] AC #2 (Trello/JIRA regression) +- [x] AC #3 (diagnostic log line shape + level) +- [x] AC #4 (PR #948 regression pin) +- [x] AC #5 (downloadAndPrepareImages shared helper) +- [x] AC #6 (mimeTypeFromUrl wildcard for allowlist hosts) +- [x] AC #7 (isImageMimeType wildcard acceptance) +- [x] AC #8 (TDD discipline) +- [x] AC #9 (build) +- [x] AC #10 (unit tests) +- [x] AC #11 (integration tests) +- [x] AC #12 (lint) +- [x] AC #13 (typecheck) +- [x] AC #14 (docs) diff --git a/docs/plans/016-pm-image-delivery-reliability/2-runtime-gadget-image-delivery.md.done b/docs/plans/016-pm-image-delivery-reliability/2-runtime-gadget-image-delivery.md.done new file mode 100644 index 00000000..7b04a4dd --- /dev/null +++ b/docs/plans/016-pm-image-delivery-reliability/2-runtime-gadget-image-delivery.md.done @@ -0,0 +1,187 @@ +--- +id: 016 +slug: pm-image-delivery-reliability +plan: 2 +plan_slug: runtime-gadget-image-delivery +level: plan +parent_spec: docs/specs/016-pm-image-delivery-reliability.md +depends_on: [1-boot-path-mime-fix-and-diagnostic-log.md] +status: done +--- + +# 016/2: Runtime gadget image delivery (mid-run pickup) + +> Part 2 of 3 in the 016-pm-image-delivery-reliability plan. See [parent spec](../../specs/016-pm-image-delivery-reliability.md). + +## Summary + +This plan closes the mid-run gap. Today the runtime gadget `cascade-tools pm read-work-item` (`src/cli/pm/read-work-item.ts` → `src/gadgets/pm/core/readWorkItem.ts:167`) calls `readWorkItem(workItemId, includeComments)` which delegates to `readWorkItemWithMedia` and discards the returned media. The gadget's text output includes a "Pre-fetched Images" section that lists URL refs with descriptive labels but NO local file paths the agent can read. + +After this plan ships, the runtime gadget downloads any image media it discovered and writes the bytes to `.cascade/context/images/work-item--img-.`, then returns text whose Pre-fetched Images section lists the actual relative file paths the agent can hand to its file-read tool. The same diagnostic log line introduced in Plan 1 fires here too — same prefix, same fields, same grep — so an operator triaging a "no image after re-read" report sees the boot-path summary AND the runtime-path summary in the run log with consistent shape. + +This plan does NOT change the boot path's behavior (Plan 1 already shipped) and does NOT touch PR #948's Claude-Code initial-input path. The runtime gadget is engine-agnostic — it writes files on disk that any engine's file-read tool can consume. + +**Components delivered:** +- `readWorkItem(workItemId, includeComments)` — the gadget surface — gains a sibling `readWorkItemWithImagesOnDisk` (or modifies the existing function) that downloads + writes images via the shared `downloadAndPrepareImages` helper from Plan 1, then formats the text output's Pre-fetched Images section to list the actual file paths. +- A new image-writer helper that takes the `{ images, failures }` shape returned by `downloadAndPrepareImages` and writes each image to disk using the `work-item--img-.` naming convention. Extension is derived from the resolved MIME (`image/png` → `.png`); falls back to `.bin` with a warn log when MIME resolution failed. +- The same diagnostic log line from Plan 1, fired from the runtime gadget code path with `provider: ` and the same field schema. Reuses Plan 1's helper. +- Tests covering: extension-less Linear URL → on-disk file + path returned in text; mid-run change pickup (image added after agent boot, gadget re-fetch picks it up); failed-download case (text says download failed, no orphan path listed); regression for boot-path Codex / OpenCode / Claude Code engines that still go through their existing flows untouched. + +**Deferred to later plans in this spec:** +- Linear GraphQL fixture + extraction-coverage regression test (Plan 3). +- `src/integrations/README.md`'s Linear-specific GraphQL surface confirmation (Plan 3). + +--- + +## Spec ACs satisfied by this plan + +- Spec AC #3 (runtime gadget delivers files-on-disk + paths in text) — **full** +- Spec AC #4 (mid-run image pickup) — **full** +- Spec AC #5 (single grep-stable diagnostic log line, runtime path) — **full** (combined with Plan 1's boot path) +- Spec AC #2 (Trello/JIRA regression-safe, runtime path) — **full** (regression tests) + +--- + +## Depends On + +- **Plan 1** (`1-boot-path-mime-fix-and-diagnostic-log.md`) — provides the shared `downloadAndPrepareImages` helper this plan imports, and the `mimeTypeFromUrl` + `isImageMimeType` widening that lets Linear extension-less URLs survive the filter at runtime as well as boot. Without Plan 1, the runtime gadget would face the exact same MIME-drop problem. + +--- + +## Detailed Task List (TDD) + +### 1. On-disk image writer for the runtime gadget + +**Tests first** (`tests/unit/gadgets/pm/core/writeRuntimeImages.test.ts` — new file): + +- `writeRuntimeImages — writes each image to .cascade/context/images/ with work-item--img-.` — unit — pass `{ workItemId: 'MNG-357', images: [{ base64Data, mimeType: 'image/png', altText }, { base64Data, mimeType: 'image/jpeg', altText }] }`; mock `fs.writeFile`; assert it was called twice with paths matching `work-item-MNG-357-img-0.png` and `work-item-MNG-357-img-1.jpg` (note: `image/jpeg` resolves to `.jpg` extension per a stable map). Expected red: module not found. +- `writeRuntimeImages — derives extension from resolved MIME, NOT from URL` — unit — pass image with `mimeType: 'image/webp'`; assert filename ends `.webp`. Expected red: module not found. +- `writeRuntimeImages — falls back to .bin extension when MIME resolution failed (image/*)` — unit — pass image with `mimeType: 'image/*'` (the unresolved wildcard sentinel); assert filename ends `.bin` AND a warn log is emitted including the workItemId. Expected red: module not found. +- `writeRuntimeImages — returns the list of relative paths it wrote` — unit — assert returned `string[]` matches `[`.cascade/context/images/work-item-MNG-357-img-0.png`, ...]`. Expected red: module not found. +- `writeRuntimeImages — creates the .cascade/context/images directory if it does not exist` — unit — mock `fs.access` to throw; assert `fs.mkdir` is called with `recursive: true` and the correct path. Expected red: module not found. +- `writeRuntimeImages — preserves the same naming convention as Plan 1's boot-path writer` — unit — assert that calling Plan 1's `writeInjectionImages` with equivalent inputs produces a path identical to this helper's output. (If they diverge, the runtime path and boot path have different on-disk contracts — bad. Both should produce `work-item--img-.`.) Expected red: ambiguous until Plan 1's writer is examined; the test should fail loudly if either side drifts. + +**Implementation** (`src/gadgets/pm/core/writeRuntimeImages.ts` — new file): +- Function signature: `writeRuntimeImages({ workItemId, images, contextDir? }): Promise<{ paths: string[]; failures: { reason: string }[] }>`. +- `contextDir` defaults to `.cascade/context/images` (the existing `IMAGES_SUBDIR` from `src/backends/shared/contextFiles.ts:23`). +- Stable extension map: `image/png → .png`, `image/jpeg → .jpg`, `image/gif → .gif`, `image/webp → .webp`, `image/svg+xml → .svg`, `image/avif → .avif`, etc. — use the inverse of the existing `EXTENSION_MIME_MAP` from `src/pm/media.ts:65`. +- For unresolved `image/*`: extension `.bin` + warn log. +- The function is engine-agnostic — it writes raw bytes; whatever engine reads them later just calls its file-read tool. + +### 2. Wire the writer into the runtime read-work-item gadget + +**Tests first** (`tests/unit/gadgets/pm/core/readWorkItem.test.ts` — extend existing or create new): + +- `readWorkItem — when work item has images, writes them to disk and returns text with relative paths` — unit — mock `readWorkItemWithMedia` to return `{ text: '...\n## Pre-fetched Images\n- [Image: foo.png] (description)\n', media: [{ url, mimeType: 'image/png', altText: 'foo.png', source: 'description' }] }`; mock `downloadAndPrepareImages` to succeed; mock `writeRuntimeImages` to return paths; assert returned text contains `.cascade/context/images/work-item--img-0.png` AND that the new path appears WHERE the existing "Pre-fetched Images" URL list was. Expected red: today the gadget returns text-only; the test asserts a substring that doesn't exist yet. +- `readWorkItem — when work item has no images, returns text unchanged (no Pre-fetched Images section, no disk write)` — unit — mock `readWorkItemWithMedia` to return `{ text: '...', media: [] }`; assert text is unchanged AND `writeRuntimeImages` is NOT called. Expected red: passes if the gadget today already gracefully skips the empty case (it does); fails right reason if implementation accidentally calls writer for empty media. +- `readWorkItem — emits the diagnostic log line at runtime path` — unit — assert exactly ONE INFO call matching `'[image-pipeline] work-item-fetch summary'` with `provider`, `workItemId`, `urlsDetected`, `urlsAfterFilter`, `urlsDownloaded`, `urlsFailed`, `urlsByMimeType`. Same prefix and shape as the boot-path log from Plan 1. Expected red: today no log fires from the runtime gadget; assertion fails because spy was never called. +- `readWorkItem — when download fails, the text marks the URL as failed and includes the reason` — unit — mock 2 images, 1 succeeds and 1 fails; assert returned text shows the successful path AND a failed-marker for the second URL (e.g. `- [Image: bar.png] download failed: `). Expected red: today the gadget has no failure handling. +- `readWorkItem — backward-compatible text shape: agents that don't read paths still see usable text` — unit — assert returned text still contains the existing `## Description`, `## Comments`, etc. sections; Pre-fetched Images section is the only one mutated. Expected red: today text shape is fixed; if Plan 2 accidentally drops a section, this fires. + +**Implementation** (`src/gadgets/pm/core/readWorkItem.ts`): +- Modify `readWorkItem(workItemId, includeComments)` to: + 1. Call `readWorkItemWithMedia` (already returns `{ text, media }`). + 2. If `media.length > 0`: call `downloadAndPrepareImages` (Plan 1's shared helper) to get `{ images, failures }`. + 3. If `images.length > 0`: call `writeRuntimeImages` to write each to disk, getting back `{ paths, failures: writerFailures }`. + 4. Mutate the text's "Pre-fetched Images" section: replace each URL-ref line with the corresponding local file path. For download failures, replace with a failed-marker line. For writer failures (e.g. disk full), append a separate `## Failed to Write Images` section listing the URLs. + 5. Emit the diagnostic log line with all counts. + 6. Return the mutated text. +- Preserve `readWorkItemWithMedia` as-is (boot path uses it; this plan doesn't touch boot path). +- The new helper `downloadAndPrepareImages` is imported from `src/pm/download-and-prepare.ts` (Plan 1's location). +- `writeRuntimeImages` is imported from `src/gadgets/pm/core/writeRuntimeImages.ts` (this plan, task 1). + +### 3. Mid-run image pickup integration test + +**Tests first** (`tests/integration/gadgets/runtime-image-delivery.test.ts` — new file): + +- `runtime gadget — when an image is added to the issue between two read-work-item calls, the second call delivers it on disk` — integration — first call: mock provider returns work item with 0 images; assert `readWorkItem` returns text without `.cascade/context/images/` paths. Second call: mock provider returns work item with 1 image; assert `readWorkItem` writes the image AND returns text with the local path. Expected red: today the runtime gadget writes nothing; second call returns same text-only shape. +- `runtime gadget — extension-less Linear URL flows end-to-end via the runtime path` — integration — mock provider returns `MediaReference` with `url: 'https://uploads.linear.app//file'` and `mimeType: 'image/*'`; mock `downloadMedia` to return `{ buffer, mimeType: 'image/png' }`; assert resulting on-disk file has `.png` extension AND the file content equals the buffer. Expected red: today the gadget doesn't write to disk at all. +- `runtime gadget — Trello PNG and JIRA attachment URLs flow through the runtime path (regression)` — integration — same as above but with extensioned URLs; assert filenames have correct extensions and writes succeed. Expected red: today the gadget doesn't write at all. + +**Implementation**: covered by tasks 1 + 2 above. This task is purely integration-level coverage. + +--- + +## Test Plan + +### Unit tests +- [ ] `tests/unit/gadgets/pm/core/writeRuntimeImages.test.ts` (new): 6 tests for the writer +- [ ] `tests/unit/gadgets/pm/core/readWorkItem.test.ts`: 5 new tests for the gadget surface (extend existing if present) + +### Integration tests +- [ ] `tests/integration/gadgets/runtime-image-delivery.test.ts` (new): 3 scenarios (mid-run pickup, extension-less Linear, Trello/JIRA regression) + +### Acceptance tests +- [ ] AC#3: runtime gadget integration test "extension-less Linear URL via runtime path lands on disk + path in text" +- [ ] AC#4: integration test "image added mid-run is picked up on re-read" +- [ ] AC#5 (runtime): unit test "diagnostic log line emitted from runtime gadget with same prefix and shape" +- [ ] AC#2 (runtime): regression test "Trello/JIRA images via runtime path still work" + +--- + +## Manual Verification (for `[manual]`-tagged ACs only) + +n/a — all ACs auto-tested. + +--- + +## Acceptance Criteria (per-plan, testable) + +1. The runtime gadget `readWorkItem(workItemId)` returns text whose "Pre-fetched Images" section lists actual relative file paths (e.g. `.cascade/context/images/work-item-MNG-357-img-0.png`) when images are present. +2. The files at those paths exist on disk after the gadget call returns; the bytes match what `downloadMedia` returned. +3. When an image is added to a work item between two runtime gadget calls, the second call delivers it on disk; the first call does not. +4. The diagnostic log line from Plan 1 (`[image-pipeline] work-item-fetch summary`) is emitted from the runtime gadget code path with the same field schema. +5. The disk file naming convention `work-item--img-.` is consistent between Plan 1's boot path and Plan 2's runtime path. A regression test pins this consistency. +6. When `mimeType` was unresolved (`image/*`), the file extension falls back to `.bin` and a warn log fires; the file is still written. +7. The text response is backward-compatible: agents that don't parse the new file paths see usable text with the existing `## Description`, `## Comments`, etc. sections preserved. +8. Failed downloads are marked in the text response (not silently dropped) AND counted in the diagnostic log's `urlsFailed` field. +9. All new/modified code has corresponding tests written before the implementation. +10. `npm run build` passes. +11. `npm test` passes. +12. `npm run test:integration` passes for the new integration tests. +13. `npm run lint` passes. +14. `npm run typecheck` passes. +15. All documentation listed in this plan's Documentation Impact has been updated. + +--- + +## Documentation Impact (this plan only) + +| File | Change | +|---|---| +| `CHANGELOG.md` | Entry under the next release: "PM image delivery: the runtime `cascade-tools pm read-work-item` gadget now downloads work-item images and writes them to `.cascade/context/images/work-item--img-.`. The gadget's text response lists actual local file paths the agent can read with its file-read tool. Closes the mid-run image pickup gap (image added to a work item after agent boot is now delivered on the next gadget call)." | + +`src/integrations/README.md` is NOT updated by this plan — Plan 1 already established the "Image delivery contract" section; this plan's changes are consistent with that contract and don't require new documentation in the provider-onboarding guide. + +--- + +## Out of Scope (this plan) + +- The boot-path MIME fix and the shared `downloadAndPrepareImages` helper — already shipped in Plan 1. +- Linear GraphQL fixture + extraction-coverage regression test (Plan 3). +- Codex / OpenCode native multimodal SDK delivery — out of scope per spec. +- Magic-byte sniffing — out of scope per spec. +- Backfilling missed screenshots for prior runs — out of scope per spec. +- Image compression / resize / format conversion — out of scope per spec. +- Dashboard surface for "image not delivered" — out of scope per spec. + +--- + +## Progress + + +- [x] AC #1 (text contains real file paths) +- [x] AC #2 (files exist on disk + bytes match) +- [x] AC #3 (mid-run pickup) +- [x] AC #4 (diagnostic log line at runtime) +- [x] AC #5 (naming convention consistency boot/runtime) +- [x] AC #6 (.bin fallback + warn for unresolved MIME) +- [x] AC #7 (text shape backward-compatible) +- [x] AC #8 (failed downloads marked + counted) +- [x] AC #9 (TDD discipline) +- [x] AC #10 (build) +- [x] AC #11 (unit tests) +- [x] AC #12 (integration tests) +- [x] AC #13 (lint) +- [x] AC #14 (typecheck) +- [x] AC #15 (docs) diff --git a/docs/plans/016-pm-image-delivery-reliability/3-linear-fixture-and-extraction-coverage.md.done b/docs/plans/016-pm-image-delivery-reliability/3-linear-fixture-and-extraction-coverage.md.done new file mode 100644 index 00000000..50031ae7 --- /dev/null +++ b/docs/plans/016-pm-image-delivery-reliability/3-linear-fixture-and-extraction-coverage.md.done @@ -0,0 +1,163 @@ +--- +id: 016 +slug: pm-image-delivery-reliability +plan: 3 +plan_slug: linear-fixture-and-extraction-coverage +level: plan +parent_spec: docs/specs/016-pm-image-delivery-reliability.md +depends_on: [] +status: done +--- + +# 016/3: Linear payload fixture + extraction-coverage regression test + +> Part 3 of 3 in the 016-pm-image-delivery-reliability plan. See [parent spec](../../specs/016-pm-image-delivery-reliability.md). + +## Summary + +This plan ships the regression net for the spec's image-delivery contract: a captured Linear GraphQL `Issue` payload for an issue with at least one user-pasted screenshot, plus a unit test that asserts our extraction picks up every image in it. If Linear ever changes the payload shape in a way that loses inline images (renames the field, replaces markdown with a structured JSON tree, drops the upload host), the test fails loudly with a clear message. + +It also confirms the Linear GraphQL surface for inline images. The hypothesis from spec 016 is that `Issue.description` markdown is the canonical surface — `Issue.attachments` returns formal Attachment records (link previews, integration cards) and is NOT where pasted images live. This plan probes Linear's API to verify, captures the result in the fixture, and documents the conclusion in `src/integrations/README.md`. If Linear exposes a previously-unknown surface (e.g. an `attachments(includeInline: true)` filter, or a `descriptionData` rich-text JSON tree), this plan integrates that surface under Plan 1's shared resolution path. + +This plan has no code dependency on Plan 1 or Plan 2 in production code (it lives entirely in tests + docs), but it logically follows them because the regression test exercises the new contract Plan 1 + Plan 2 establish. If Plan 1 or Plan 2's contract changed before this plan ships, the regression test would need to be updated to match. + +**Components delivered:** +- `tests/fixtures/linear-issue-with-screenshot.json` — a captured Linear GraphQL `Issue` payload for a real test issue (or a faithfully reconstructed equivalent if we don't want to commit a real one) with at least one user-pasted screenshot in the description AND at least one in a comment. The fixture covers the common cases: extension-less `uploads.linear.app/` URL, extensioned URL with a filename, image embedded in markdown with alt text, image embedded with no alt text. +- A unit test that loads the fixture, runs the Linear adapter's extraction path on it, and asserts every image in the fixture is detected by `extractMarkdownImages` (or whatever extraction surface ends up canonical for Linear). Test fails with a specific message if a fixture image is missed. +- An optional Linear API probe (one-shot, manual or scripted) that captures the fixture from a real Linear issue. Not part of the test run; a tools script. +- `src/integrations/README.md` updates: a new subsection under "Image delivery contract" titled "Linear: GraphQL surface for inline images" that documents the conclusion of the investigation. Either: (a) confirms `Issue.description` markdown is canonical and points to the fixture; OR (b) describes the new GraphQL surface integrated under Plan 1's shared path. + +**Deferred to later plans in this spec:** +- None — this is the last plan. + +--- + +## Spec ACs satisfied by this plan + +- Spec AC #7 (Linear GraphQL fixture + regression test) — **full** + +--- + +## Depends On + +None in code. Logically follows Plan 1 + Plan 2 because the regression net is for the contract those plans establish, but the test itself only depends on `extractMarkdownImages` (the Linear adapter's existing surface) and the fixture. + +--- + +## Detailed Task List (TDD) + +### 1. Capture or construct the Linear fixture + +**Tests first** (no — this task is fixture authorship; the test in task 2 is the gating fail-loud). + +**Implementation** (`tests/fixtures/linear-issue-with-screenshot.json` — new file): + +Two modes for capturing: + +- **Mode A: capture from real Linear API.** Run a one-shot probe script (tooled in `tools/capture-linear-fixture.ts` or via a `cascade` admin command) that calls the Linear GraphQL `Issue` query for a real test issue with a pasted screenshot. Save the JSON response verbatim. The fixture should be sanitized to remove team/user identifying data but preserve URL shapes, markdown, and any structural fields Linear returns. +- **Mode B: faithfully reconstruct.** Build the fixture manually from Linear's documented GraphQL schema. Less authoritative but commits no real production data. + +**Recommendation: Mode A with sanitization.** A real-API capture is the only way to catch Linear's actual payload quirks (field ordering, optional fields present-but-null vs absent, URL hostname canonicalization, etc.). Sanitize team IDs, user emails, and any free-form text that might leak. Keep the URL hosts (`uploads.linear.app`), the markdown image syntax, and any structural fields Linear returns even if we don't read them today. + +Fixture must contain: +1. At least one extension-less `uploads.linear.app/` markdown image in the issue description. +2. At least one extensioned URL (e.g. `https://example.com/foo.png`) in the issue description. +3. At least one comment with a pasted screenshot. +4. The Linear `Issue.attachments` connection populated (with non-image-paste attachments — link previews, etc.) to confirm we DON'T mistake them for inline images. + +### 2. Extraction-coverage regression test + +**Tests first** (`tests/unit/pm/linear/extraction-coverage.test.ts` — new file): + +- `Linear extraction — picks up every inline image in the fixture issue description` — unit — load `tests/fixtures/linear-issue-with-screenshot.json`; pass `description` field through `extractMarkdownImages`; assert returned `MediaReference[]` contains every URL in the fixture's description that we expect to be picked up. Expected red: depends on whether Plan 1 has been merged. If Plan 1 is merged: passes (no implementation change needed for this plan). If Plan 1 is NOT yet merged in the dev branch this is being implemented against: this test fails with `expected length 2, got 0` because the extension-less Linear URL was filtered out — making this test a useful check against shipping Plan 3 ahead of Plan 1. +- `Linear extraction — picks up every inline image in fixture comments` — unit — same as above but for comments (each comment's body passed through `extractMarkdownImages`). Expected red: same as above. +- `Linear extraction — does NOT mistake Issue.attachments link previews for inline images` — unit — assert that the formal `Issue.attachments` records in the fixture (link previews, integration cards) are NOT included in the inline-image MediaReference list. Expected red: passes if the adapter correctly separates `attachments` from inline media (the existing code does); fails right reason if Plan 3 accidentally widens extraction to include them. +- `Linear extraction — fails LOUDLY if a fixture image is missed (regression net)` — unit — manually omit one URL from the expected list AND assert the test fails. (This is a meta-test confirming the test mechanism works; included once and then removed in cleanup.) Documents the failure-message format the spec AC#7 requires. + +**Implementation**: none — this is pure regression testing. If Plan 1 + Plan 2 have shipped correctly, the tests pass. If they haven't, the tests fail and the regression net catches it. + +### 3. `src/integrations/README.md` Linear-specific update + +**Tests first**: n/a (documentation). + +**Implementation**: After completing the Linear API probe (task 1, Mode A), append the conclusion to `src/integrations/README.md`'s "Image delivery contract" section (Plan 1 introduced this section). The new subsection: + +- **Title**: "Linear: GraphQL surface for inline images" +- **Body**: Document what Linear's API actually exposes for user-pasted screenshots. Confirm that `Issue.description` markdown is canonical (or describe the alternative if found). Cite the fixture path. Note that `Issue.attachments` is for formal Attachment records (link previews, integration cards) and should not be queried for inline images. +- **If a new surface was found** (e.g. `descriptionData`, an `attachments(includeInline: true)` filter): document the integration; otherwise, document the rule "use `extractMarkdownImages` over `Issue.description` and over each `Comment.body`." + +If task 1's Linear API probe finds a surprise, update Plan 1's `extractMarkdownImages` call site in `src/pm/linear/adapter.ts` to also probe the new surface — but this should be RARE; the likely outcome is "description markdown is canonical, no production code change." + +--- + +## Test Plan + +### Unit tests +- [ ] `tests/unit/pm/linear/extraction-coverage.test.ts` (new): 4 tests covering description extraction, comment extraction, attachment-record exclusion, and the meta-test for the regression net. + +### Integration tests +- [ ] None new — this plan is fixture + extraction unit tests + docs. + +### Acceptance tests +- [ ] AC#7: covered by the extraction-coverage tests + the fixture file. + +--- + +## Manual Verification (for `[manual]`-tagged ACs only) + +n/a — all ACs auto-tested. + +--- + +## Acceptance Criteria (per-plan, testable) + +1. `tests/fixtures/linear-issue-with-screenshot.json` exists and contains at least one extension-less `uploads.linear.app/` markdown image in the description, at least one extensioned URL, at least one comment with a pasted image, AND populated `Issue.attachments` records (link previews — NOT inline images). +2. The extraction-coverage regression test loads the fixture and asserts every inline image is detected; fails LOUDLY (with a specific message identifying the missing URL) if any are dropped. +3. The fixture is sanitized — no team IDs, user emails, or free-form leaky text. +4. `src/integrations/README.md` has a new "Linear: GraphQL surface for inline images" subsection under "Image delivery contract" documenting the investigation conclusion, referring to the fixture path. +5. If the Linear API probe found a previously-unknown inline-image surface, the Linear adapter's extraction path is widened to query it (still under Plan 1's shared resolution); otherwise, no production code changes. +6. All new/modified code has corresponding tests written before the implementation. +7. `npm run build` passes. +8. `npm test` passes. +9. `npm run lint` passes. +10. `npm run typecheck` passes. +11. All documentation listed in this plan's Documentation Impact has been updated. + +--- + +## Documentation Impact (this plan only) + +| File | Change | +|---|---| +| `CHANGELOG.md` | Entry under the next release: "PM image delivery: captured Linear GraphQL fixture (`tests/fixtures/linear-issue-with-screenshot.json`) plus regression test pinning our inline-image extraction. Fails loudly if Linear ever changes its issue payload shape in a way that loses inline images. Documents the canonical Linear GraphQL surface for inline images in the integrations README." | +| `src/integrations/README.md` | New subsection "Linear: GraphQL surface for inline images" under the "Image delivery contract" section (Plan 1 introduced this top-level section). Documents the investigation conclusion. | + +--- + +## Out of Scope (this plan) + +- The boot-path MIME fix and diagnostic log line (Plan 1). +- The runtime gadget image delivery (Plan 2). +- Trello/JIRA fixture captures for analogous regression coverage — they don't have the extension-less URL bug Linear has; deferred to a future spec if they ever exhibit similar drift. +- Codex / OpenCode native multimodal SDK delivery — out of scope per spec. +- Magic-byte sniffing — out of scope per spec. +- Backfilling missed screenshots for prior runs — out of scope per spec. +- Image compression / resize / format conversion — out of scope per spec. +- Dashboard surface for "image not delivered" — out of scope per spec. + +--- + +## Progress + + +- [x] AC #1 (fixture file exists with correct shape) +- [x] AC #2 (regression test fails loudly on missed images) +- [x] AC #3 (fixture sanitization) +- [x] AC #4 (README "Linear GraphQL surface" subsection) +- [x] AC #5 (new-surface integration if found, else no-op — confirmed no-op: description markdown is canonical) +- [x] AC #6 (TDD discipline) +- [x] AC #7 (build) +- [x] AC #8 (unit tests) +- [x] AC #9 (lint) +- [x] AC #10 (typecheck) +- [x] AC #11 (docs) diff --git a/docs/plans/016-pm-image-delivery-reliability/_coverage.md b/docs/plans/016-pm-image-delivery-reliability/_coverage.md new file mode 100644 index 00000000..0b426ee7 --- /dev/null +++ b/docs/plans/016-pm-image-delivery-reliability/_coverage.md @@ -0,0 +1,48 @@ +# Coverage map for spec 016-pm-image-delivery-reliability + +Auto-generated by /plan. Tracks which plans satisfy which spec ACs. + +## Spec ACs + +| # | Spec AC (short) | Satisfied by | Status | +|---|---|---|---| +| 1 | Linear screenshot on-disk for all engines (boot path) | plan 1 (boot-path-mime-fix-and-diagnostic-log) | full | +| 2 | Trello/JIRA images regression-safe | plan 1 (boot regression) + plan 2 (runtime regression) | full | +| 3 | Runtime gadget delivers files-on-disk + paths in text | plan 2 (runtime-gadget-image-delivery) | full | +| 4 | Mid-run image pickup | plan 2 | full | +| 5 | Single grep-stable diagnostic log line per fetch | plan 1 (boot path) + plan 2 (runtime path, same shape) | partial chain | +| 6 | PR #948 Claude-Code path untouched | plan 1 (regression pin) | full | +| 7 | Linear GraphQL fixture + extraction-coverage test | plan 3 (linear-fixture-and-extraction-coverage) | full | +| 8 | New-provider invariant preserved | plan 1 (no provider-specific code in shared resolution) | full | +| 9 | MNG-357 end-to-end reproduces clean | plan 1 (root-cause fix) | full | + +## Coverage summary + +- **9 spec ACs** mapped to **3 plans** +- **8 plans-x-AC pairs full coverage** (each AC fully satisfied by its assigned plan(s)) +- **1 spec AC** with partial-chain coverage (AC #5 — boot path emits the diagnostic line in plan 1; runtime path emits the same shape in plan 2; AC is fully covered only after both ship) + +## Plan dependency graph + +``` +1-boot-path-mime-fix-and-diagnostic-log ──→ 2-runtime-gadget-image-delivery + └──→ 3-linear-fixture-and-extraction-coverage +``` + +Plan 1 ships the safety-net (MIME drop fix + shared `downloadAndPrepareImages` helper + diagnostic log line). It alone fixes MNG-357. + +Plan 2 imports Plan 1's shared helper and closes the runtime mid-run gap. It alone is not enough to fix MNG-357 (which is a boot-path failure), but it is the second half of AC#5 and provides AC#3 + AC#4 entirely. + +Plan 3 is the regression net for the new contract Plans 1+2 establish. No code dependency on Plans 1+2; the fixture and extraction-coverage test would still pass against today's `extractMarkdownImages` because the Linear regex matches on URL alone, not on MIME. But shipping it before Plans 1+2 would mean the regression net is for an old, broken contract — so logical order is 1 → 2 → 3. + +## Documentation impact distribution + +| Spec doc | Plan | What gets added | +|---|---|---| +| `CHANGELOG.md` | Plan 1 | "PM image delivery: extension-less URL fix + diagnostic log" | +| `CHANGELOG.md` | Plan 2 | "PM image delivery: runtime gadget mid-run delivery" | +| `CHANGELOG.md` | Plan 3 | "PM image delivery: Linear fixture + extraction regression" | +| `src/integrations/README.md` | Plan 1 | New "Image delivery contract" section (general — applies to all providers) | +| `src/integrations/README.md` | Plan 3 | New "Linear: GraphQL surface for inline images" subsection (Linear-specific findings) | + +CLAUDE.md is NOT updated by this spec — already covered by spec 015's broader "silent-failure → single-line diagnostic" pattern, and the diagnostic log line introduced here is a concrete instance of that pattern, not a new cross-cutting rule. diff --git a/docs/specs/016-pm-image-delivery-reliability.md.done b/docs/specs/016-pm-image-delivery-reliability.md.done new file mode 100644 index 00000000..7de01f0a --- /dev/null +++ b/docs/specs/016-pm-image-delivery-reliability.md.done @@ -0,0 +1,129 @@ +--- +id: 016 +slug: pm-image-delivery-reliability +level: spec +title: PM image delivery reliability +created: 2026-04-26 +status: done +--- + +# 016: PM image delivery reliability + +## Problem & Motivation + +CASCADE's image-injection pipeline silently drops user-pasted screenshots from Linear work items, leaving agent workers running with empty `.cascade/context/images/` directories. The agent then proceeds without the visual context the user provided, often reporting back to the user that "the image asset was not present in this workspace" or guessing at the bug being reported. For a product whose value proposition is "you describe what's broken, the agent fixes it," a silently-missing screenshot is a credibility-class failure. + +The bug surfaced in production on 2026-04-26 against Linear card MNG-357 (ucho project). A user attached a screenshot to the card. The planning agent ran on the Codex engine, observed the image was missing, and re-wrote the issue description telling the next agent in the pipeline to "translate the screenshot into a concrete route, viewport, and failing assertion before editing code" — bypassing the user-attached visual context entirely. The cascade run log shows `hasOffloadedContext: false` at agent startup; the agent's first tool call was a directory probe (`find .cascade/context/images -maxdepth 2 -type f`) that returned empty. + +The root cause is a stack of three independent gaps. First, Linear's user-pasted-image URLs are extension-less (`https://uploads.linear.app//`); the URL-based MIME-type heuristic returns `application/octet-stream` for them, and the pre-download image-only filter drops them as non-images. Second, the runtime gadget that lets agents fetch a work item mid-run is text-only — it discards the media references the underlying provider returns, so even if the boot-path delivered images, an agent re-reading the work item finds nothing. Third, the existing instrumentation only logs *post*-download outcomes ("downloaded N of M") — the upstream extract/filter step that drops Linear screenshots is invisible in run logs, making this exact incident class essentially undiagnosable without source diving. + +This spec closes all three gaps. It mirrors the lesson from spec 015 (router job dispatch failure recovery): a silent failure mode in the agent platform must be replaced with a single, grep-stable log line that makes the failure observable, and the contract that produced the failure must be hardened so the failure can't recur. + +--- + +## Goals + +1. A user pastes a screenshot into a Linear work item. Any agent type — planning, implementation, review, backlog-manager — running for that work item sees the image as a readable file in its workspace, regardless of which engine (Codex, OpenCode, Claude Code) executes the agent. +2. An agent that re-reads a work item mid-run via the runtime read-work-item gadget receives the same image-on-disk treatment, so a teammate adding a screenshot after the agent has started can still be picked up. +3. Every work-item fetch emits a single structured log line that summarizes detected URLs, post-filter URLs, downloads attempted, downloads successful, and downloads failed — enough to diagnose any future "no image delivered" report by grepping one line in the run log. +4. The fix generalizes beyond Linear: Trello and JIRA flow through the same MIME-resolution path and gain the same Content-Type-first authority and runtime-gadget delivery — without any provider-specific URL hostname matching. +5. Existing healthy paths continue to behave identically: Trello and JIRA images that *did* work before still work; PR #948's Claude-Code initial-input ImageBlockParam delivery is untouched; the existing MediaReference shape consumed by downstream code remains compatible. + +--- + +## Non-goals + +- Migrating Codex / OpenCode to native multimodal SDK delivery (analogous to PR #948 for Claude Code) — separate spec when the SDKs offer the surface. +- Backfilling missed screenshots for prior agent runs. Agents that already finished without their image stay finished; no retroactive re-dispatch. +- Trello/JIRA-specific MIME detection improvements beyond what the shared resolution path already covers. +- A dashboard UI surfacing "image was not delivered to this run" — operational, separate effort. +- Magic-byte sniffing as a third MIME-resolution tier. The auth'd PM endpoints (Linear, Trello, JIRA) are trusted to return correct Content-Type on the response; adding a sniffer adds a dep with no observable benefit. +- Compressing, resizing, format-converting images on the fly. The worker reads what was uploaded, byte-for-byte. + +--- + +## Constraints + +- The fix must not change the wire shape of `MediaReference` consumed by downstream code. Adding a sentinel value to its existing `mimeType` field is acceptable; renaming or removing the field is not. +- The fix must not introduce a new pre-download HTTP round-trip per image. Resolving MIME via the existing GET response is acceptable; a separate HEAD request before the GET is not. +- The diagnostic log line must be observable through the standard `cascade runs logs` and Loki paths used today. No new log sink, no new dashboard. +- The runtime gadget's behavioral change must be backward-compatible with agents that simply read the gadget's text output. Agents that ignore the new file paths still get a usable text response. +- File-on-disk delivery must work for engines that do *not* speak multimodal SDK content blocks (Codex today; possibly others tomorrow). The disk-write path is the lowest-common-denominator delivery contract and stays the canonical one. +- The on-disk file naming must encode the work item identifier so an agent that traverses `.cascade/context/images/` can correlate files to the work item without inspecting metadata. + +--- + +## User stories / Requirements + +1. **As a CASCADE user**, when I paste a screenshot into a Linear issue and move it to a triggering state, the agent that runs has my screenshot available as a file it can read. I never have to describe the screenshot in words to compensate. +2. **As a CASCADE user**, when I add a screenshot to a Linear issue *after* the agent has already started running, the agent can pick it up by re-reading the work item mid-run. +3. **As an operator diagnosing a "no image delivered" report**, I can read the agent's run log, find a single line that tells me how many images were detected on the work item, how many survived filtering, how many were downloaded, and how many failed — and I can correlate failures to URLs without reading source. +4. **As a maintainer adding a new PM provider**, the image-delivery pipeline works for my provider with no provider-specific MIME-detection branching. Whatever URLs my adapter exposes flow through the same resolution path that Linear, Trello, and JIRA use. +5. **As a CASCADE engineering team**, we have a captured fixture of the Linear GraphQL `Issue` payload for a card with a pasted screenshot, so future changes to Linear's API surface are detectable as a regression. + +--- + +## Research Notes + +- HTTP/1.1 RFC 7231 §3.1.1.5 specifies that the `Content-Type` header is authoritative for the media type of the returned representation. URL-extension-based inference is a heuristic, not a contract. ([RFC 7231](https://www.rfc-editor.org/rfc/rfc7231#section-3.1.1.5)) +- WHATWG MIME Sniffing Standard codifies authority order: `Content-Type` header first, resource metadata second, structural sniffing third. Our case never needs the third tier because PM hosts are trusted. ([mimesniff.spec.whatwg.org](https://mimesniff.spec.whatwg.org/)) +- The "trust file extensions, fail closed on unknown" pattern is well-known to drop legitimate content from extension-less URLs. CDN platforms (Cloudflare, S3-with-CloudFront) document this exact failure mode in their content-handling guides. +- BullMQ's failed-event compensation pattern from spec 015 — "every silent failure mode must surface a single grep-stable diagnostic log line" — generalizes here. Same product credibility argument: agents that silently miss user context look broken even when every line of code is technically correct. +- Linear's developer documentation describes `Issue.description` as a Markdown field; user-pasted images are stored as standard Markdown image syntax pointing at `uploads.linear.app/` URLs. The `Issue.attachments` GraphQL connection serves a different purpose (link previews, integration cards) and is not where pasted images live. + +--- + +## Open Source Decisions + +| Tool | Solves | Decision | Reason | +|------|--------|----------|--------| +| Node `fetch` built-in `Content-Type` parsing | Authoritative MIME from response header | **Use** | Already in the worker runtime; zero new dep. | +| [`file-type`](https://www.npmjs.com/package/file-type) (magic-byte sniffer) | Tier-3 MIME fallback when extension AND Content-Type both fail | **Skip** | Trusted PM hosts return correct Content-Type. Adds dep weight with no observable benefit for our use case; revisit only if a provider proves untrustworthy. | +| Linear GraphQL fixture capture | Regression detection if Linear's payload shape ever changes | **Use** (one-shot capture, not a runtime dep) | Standard testing pattern; lives in test fixtures. | + +--- + +## Strategic decisions + +1. **MIME authority order: Content-Type-first, URL-extension-second, no magic-byte sniffing.** Pre-download URL-extension inference becomes a hint, not a verdict. The download response's `Content-Type` resolves the actual MIME. Magic-byte sniffing is a deliberate non-goal — trusted PM hosts return correct Content-Type, the dep cost isn't justified. +2. **Pre-download MIME representation: `image/*` wildcard sentinel.** When the URL-extension heuristic returns `application/octet-stream` AND the URL came from a path that PM providers commonly produce extension-less (Linear's `uploads.linear.app/`), the `MediaReference.mimeType` is set to `image/*`. The image-only filter is extended to accept the wildcard. The wildcard never reaches disk — it's resolved to a concrete MIME at download time. This is smaller-surface and MIME-spec-valid (`image/*` is a legal Accept-range MIME) compared to introducing a sibling `mimeTypeIsHint` field. +3. **Runtime read-work-item gadget delivers files on disk, not base64-inline.** The agent's file-read tool consumes paths; base64 in text doesn't fit. The boot path and the runtime path produce identical artifacts: files under `.cascade/context/images/` named to encode the work item identifier. +4. **Disk file naming: `work-item--img-.`.** Extension is derived from the *resolved* MIME (image/png → `.png`). When MIME resolution fails entirely (download succeeded but Content-Type was missing/unparseable), use `.bin` and emit a warn log — never silently degrade. +5. **Linear payload investigation always ships the fixture and the coverage test, regardless of what Linear's API exposes.** If we find no surprise (likely), document the description-markdown contract in the integrations README. If we find a new surface (e.g. an `attachments(includeInline: true)` argument), integrate it under the same shared resolution path. Either way, the fixture is the regression net for future Linear API drift. +6. **Diagnostic log line is required for AC sign-off, not optional.** This spec includes a structured one-liner at extraction time, mirroring the wedged-lock canary from spec 015. Without it, the next "no image delivered" incident will be just as opaque as MNG-357 was. + +--- + +## Acceptance Criteria (outcome-level) + +1. A user attaches a screenshot to a Linear issue (description paste or comment paste). Triggering an agent run for that issue results in the agent finding the screenshot as a readable file in its workspace, regardless of engine — Codex, OpenCode, or Claude Code. No engine-specific re-implementation; the disk-write path remains the canonical lowest-common-denominator. +2. The same flow works for Trello and JIRA work items that contain images (regression-safe — these worked before; they continue to work). +3. An agent that calls the runtime read-work-item gadget mid-run for a work item that has images receives a text response whose pre-fetched-images section lists actual local file paths the agent can read with its file-read tool. The files exist on disk at the listed paths. +4. A teammate adds an image to a work item *after* an agent has started running. When the agent re-reads the work item via the runtime gadget, the new image is downloaded and made available on disk in the same way as boot-time images. +5. Every work-item fetch (boot or runtime) emits a single structured log line summarizing: provider, work-item identifier, URLs detected, URLs that survived filtering, URLs successfully downloaded, URLs that failed. The format is grep-stable: it appears verbatim in the cascade run log surface and in production log aggregation. An operator can filter for it with one expression and triage any "no image delivered" report from the resulting line alone. +6. PR #948's Claude-Code initial-input ImageBlockParam path is untouched and still works for Claude-Code engines. Tests that pinned that behavior continue to pass without modification. +7. A captured fixture file exists for a Linear `Issue` GraphQL payload containing a user-pasted screenshot, plus a regression test that asserts our extraction picks up every image in it. The test fails loudly if Linear changes its payload shape in a way that loses inline images. +8. A new PM provider added later (e.g. Asana, GitLab) inherits the working pipeline by writing only its adapter — no new code in the shared MIME-resolution, download-and-write, or runtime-gadget surfaces. The single-entrypoint invariant from spec 009 is preserved. +9. The end-to-end MNG-357 scenario reproduces clean: a fresh Linear issue with a pasted screenshot, a planning agent run on Codex, and `.cascade/context/images/work-item--img-0.` exists in the worker container; the cascade run log shows `hasOffloadedContext: true` and the new diagnostic line shows non-zero downloads. + +--- + +## Documentation Impact (high-level) + +- `CHANGELOG.md` — entry under the next release per shipping plan: one for the boot-path MIME fix, one for the runtime-gadget mid-run delivery, one for the Linear-payload investigation outcome. +- `src/integrations/README.md` — the canonical adding-a-new-PM-provider doc gains a section on image delivery: the contract a provider must satisfy (extract URLs from descriptions and comments; expose `getAttachments` only for non-inline attachment-style media; trust the shared MIME resolution path; do nothing extra for image delivery), and the diagnostic log line operators rely on. Also: confirmed scope of Linear's GraphQL surface for inline images, captured fixture path, and what to do if a provider's host serves untrustworthy `Content-Type` headers. + +CLAUDE.md is not updated by this spec. The diagnostic log line is observable, but the *invariant* is provider-adapter-level (a property of `src/integrations/README.md`'s contract). The dispatch-failure semantics from spec 015 already established the broader "silent-failure → single-line diagnostic" pattern in CLAUDE.md; this spec is a concrete instance of that pattern, not a new cross-cutting rule. + +--- + +## Out of Scope + +- Codex / OpenCode native multimodal SDK delivery analogous to PR #948 — separate future spec. +- Magic-byte sniffing fallback for MIME resolution. +- Backfilling already-finished agent runs that missed images. +- Image compression / resize / format conversion on the worker side. +- A dashboard surface for "image was not delivered to this run" notifications. +- Trello/JIRA-specific URL-detection improvements beyond what the shared Content-Type-first resolution naturally covers. +- Replacing the `cascade runs logs` log surface with a structured event stream. +- Cross-router-instance lock coordination for image downloads (not a real concern — each worker container has its own filesystem). diff --git a/src/agents/definitions/contextSteps.ts b/src/agents/definitions/contextSteps.ts index d47b0a43..069e276d 100644 --- a/src/agents/definitions/contextSteps.ts +++ b/src/agents/definitions/contextSteps.ts @@ -18,7 +18,7 @@ import { } from '../../gadgets/todo/storage.js'; import { githubClient } from '../../github/client.js'; import { getJiraConfig, getLinearConfig, getTrelloConfig } from '../../pm/config.js'; -import { getPMProviderOrNull, MAX_IMAGES_PER_WORK_ITEM } from '../../pm/index.js'; +import { getPMProviderOrNull } from '../../pm/index.js'; import { getSentryClient } from '../../sentry/client.js'; import type { AgentInput, ProjectConfig } from '../../types/index.js'; import { parseRepoFullName } from '../../utils/repo.js'; @@ -85,7 +85,11 @@ export function fetchContextFilesStep(params: FetchContextParams): ContextInject export async function fetchWorkItemStep(params: FetchContextParams): Promise { if (!params.input.workItemId) return []; try { - const { text: cardData, media } = await readWorkItemWithMedia(params.input.workItemId, true); + const { + text: cardData, + media, + urlsDetected, + } = await readWorkItemWithMedia(params.input.workItemId, true); const injection: ContextInjection = { toolName: 'ReadWorkItem', @@ -94,62 +98,37 @@ export async function fetchWorkItemStep(params: FetchContextParams): Promise 0) { - const provider = getPMProviderOrNull(); - const limited = media.slice(0, MAX_IMAGES_PER_WORK_ITEM); + // Spec 016/1: defer the actual download + base64 prep to the shared + // `downloadAndPrepareImages` helper so the runtime gadget (spec 016/2) + // uses the same code path. + const { downloadAndPrepareImages } = await import('../../pm/download-and-prepare.js'); + const { images, failures } = await downloadAndPrepareImages( + params.input.workItemId, + media, + params.logWriter, + ); - params.logWriter('INFO', 'fetchWorkItemStep: downloading work item images', { - workItemId: params.input.workItemId, - count: limited.length, - }); + // Spec 016/1 AC#5: single grep-stable diagnostic log line summarising + // the entire boot-path image pipeline outcome. Operators triage any + // "no image delivered" report by grepping for `[image-pipeline] + // work-item-fetch summary`. + const provider = getPMProviderOrNull(); + const urlsByMimeType: Record = {}; + for (const ref of media) { + urlsByMimeType[ref.mimeType] = (urlsByMimeType[ref.mimeType] ?? 0) + 1; + } + params.logWriter('INFO', '[image-pipeline] work-item-fetch summary', { + provider: provider?.type ?? 'unknown', + workItemId: params.input.workItemId, + urlsDetected, + urlsAfterFilter: media.length, + urlsDownloaded: images.length, + urlsFailed: failures.length, + urlsByMimeType, + }); - const { jiraClient } = await import('../../jira/client.js'); - const { trelloClient } = await import('../../trello/client.js'); - const { linearClient } = await import('../../linear/client.js'); - - const results = await Promise.all( - limited.map(async (ref) => { - try { - let downloaded: { buffer: Buffer; mimeType: string } | null = null; - if (provider?.type === 'jira') { - downloaded = await jiraClient.downloadAttachment(ref.url); - } else if (provider?.type === 'linear') { - downloaded = await linearClient.downloadAttachment(ref.url); - } else { - downloaded = await trelloClient.downloadAttachment(ref.url); - } - if (!downloaded) { - params.logWriter('WARN', 'fetchWorkItemStep: image download returned null', { - url: ref.url.split('?')[0], - }); - return null; - } - return { - base64Data: downloaded.buffer.toString('base64'), - mimeType: downloaded.mimeType, - altText: ref.altText, - }; - } catch (err) { - params.logWriter('WARN', 'fetchWorkItemStep: failed to download image', { - url: ref.url.split('?')[0], - error: err instanceof Error ? err.message : String(err), - }); - return null; - } - }), - ); - - const images = results.filter((r) => r !== null); - params.logWriter('INFO', 'fetchWorkItemStep: image download complete', { - workItemId: params.input.workItemId, - attempted: limited.length, - downloaded: images.length, - skipped: limited.length - images.length, - }); - if (images.length > 0) { - injection.images = images; - } + if (images.length > 0) { + injection.images = images; } return [injection]; diff --git a/src/gadgets/pm/core/readWorkItem.ts b/src/gadgets/pm/core/readWorkItem.ts index 674198da..fcaf1656 100644 --- a/src/gadgets/pm/core/readWorkItem.ts +++ b/src/gadgets/pm/core/readWorkItem.ts @@ -1,5 +1,6 @@ import type { Attachment, MediaReference } from '../../../pm/index.js'; -import { filterImageMedia, getPMProvider } from '../../../pm/index.js'; +import { filterImageMedia, getPMProvider, getPMProviderOrNull } from '../../../pm/index.js'; +import { logger } from '../../../utils/logging.js'; interface Label { name: string; @@ -33,6 +34,8 @@ export interface WorkItemWithMedia { text: string; /** All image media references discovered in the work item description, card attachments, and comments (deduplicated by URL) */ media: MediaReference[]; + /** The total number of media and attachment references found before MIME-type filtering */ + urlsDetected: number; } function formatLabels(labels: Label[]): string { @@ -117,11 +120,15 @@ export async function readWorkItemWithMedia( // Collect all image media references const allMedia: MediaReference[] = []; + let urlsDetected = 0; + if (item.inlineMedia && item.inlineMedia.length > 0) { + urlsDetected += item.inlineMedia.length; allMedia.push(...filterImageMedia(item.inlineMedia)); } // Add image-type card attachments as media references + urlsDetected += attachments.length; allMedia.push( ...filterImageMedia( attachments.map((att) => ({ @@ -142,6 +149,7 @@ export async function readWorkItemWithMedia( const comments = await provider.getWorkItemComments(workItemId); for (const comment of comments) { if (comment.inlineMedia && comment.inlineMedia.length > 0) { + urlsDetected += comment.inlineMedia.length; allMedia.push(...filterImageMedia(comment.inlineMedia)); } } @@ -161,13 +169,95 @@ export async function readWorkItemWithMedia( // Append pre-fetched images section listing discovered images text += formatPreFetchedImages(dedupedMedia); - return { text, media: dedupedMedia }; + return { text, media: dedupedMedia, urlsDetected }; +} + +/** + * Format the on-disk paths for a successfully-written batch of runtime + * images. Replaces the existing "Pre-fetched Images" URL list with a + * "Local Image Files" section that the agent can hand to its file-read + * tool. Failed downloads (if any) are surfaced inline so the agent + * doesn't silently miss missing context. + */ +function formatRuntimeImagePaths( + paths: string[], + failures: { url: string; reason: string }[], +): string { + if (paths.length === 0 && failures.length === 0) return ''; + const lines: string[] = ['## Local Image Files', '']; + for (const path of paths) { + lines.push(`- ${path}`); + } + if (failures.length > 0) { + lines.push('', '### Failed Image Downloads'); + for (const f of failures) { + lines.push(`- ${f.url} — ${f.reason}`); + } + } + lines.push(''); + return `${lines.join('\n')}\n`; } +/** + * Spec 016/2: runtime gadget downloads + writes images to disk so the + * agent can read them mid-run. Returns text whose pre-fetched-images + * section now lists local file paths the agent can hand to its + * file-read tool, not just URL refs. + * + * Each fetch emits the AC#5 grep-stable diagnostic line — same format + * as the boot-path emission in `fetchWorkItemStep`. + */ export async function readWorkItem(workItemId: string, includeComments = true): Promise { try { - const { text } = await readWorkItemWithMedia(workItemId, includeComments); - return text; + const { text, media, urlsDetected } = await readWorkItemWithMedia(workItemId, includeComments); + + // Spec 016/2: download + write any image media so agent can Read them. + const { downloadAndPrepareImages } = await import('../../../pm/download-and-prepare.js'); + const { writeRuntimeImages } = await import('./writeRuntimeImages.js'); + + const provider = getPMProviderOrNull(); + const logWriter = ( + level: 'INFO' | 'WARN' | 'ERROR', + message: string, + meta?: Record, + ) => { + logger[level.toLowerCase() as 'info' | 'warn' | 'error'](message, meta); + }; + + const { images, failures } = await downloadAndPrepareImages(workItemId, media, logWriter); + + let writePaths: string[] = []; + let writeFailures: { path: string; reason: string }[] = []; + if (images.length > 0) { + const writeResult = await writeRuntimeImages({ workItemId, images }); + writePaths = writeResult.paths; + writeFailures = writeResult.failures; + } + // AC#5 diagnostic line — same prefix as the boot-path emission. + const urlsByMimeType: Record = {}; + for (const ref of media) { + urlsByMimeType[ref.mimeType] = (urlsByMimeType[ref.mimeType] ?? 0) + 1; + } + logger.info('[image-pipeline] work-item-fetch summary', { + provider: provider?.type ?? 'unknown', + workItemId, + urlsDetected, + urlsAfterFilter: media.length, + urlsDownloaded: images.length, + urlsFailed: failures.length + writeFailures.length, + urlsByMimeType, + }); + + // Append the local file paths section so agents can Read them. + const downloadFailures: { url: string; reason: string }[] = failures.map((f) => ({ + url: f.url, + reason: f.reason, + })); + for (const f of writeFailures) { + downloadFailures.push({ url: f.path, reason: `Local write failed: ${f.reason}` }); + } + const augmented = text + formatRuntimeImagePaths(writePaths, downloadFailures); + return augmented; } catch (error) { const message = error instanceof Error ? error.message : String(error); return `Error reading work item: ${message}`; diff --git a/src/gadgets/pm/core/writeRuntimeImages.ts b/src/gadgets/pm/core/writeRuntimeImages.ts new file mode 100644 index 00000000..64c75911 --- /dev/null +++ b/src/gadgets/pm/core/writeRuntimeImages.ts @@ -0,0 +1,129 @@ +/** + * Write runtime work-item images to `.cascade/context/images/` so the + * agent can read them with its file-read tool. + * + * Spec 016/2: this is the runtime sibling of the boot-path writer + * (`writeInjectionImages` in `src/backends/shared/contextFiles.ts`). + * Both produce the same on-disk filename convention: + * `.cascade/context/images/work-item--img-.` + * + * Extension is derived from the resolved MIME type. When MIME resolution + * failed (the `image/*` wildcard sentinel from spec 016/1 was never resolved + * because download response Content-Type was missing), the extension falls + * back to `.bin` and a warn log fires — never silently degrade. + */ + +import { mkdir, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { ContextImage } from '../../../agents/contracts/index.js'; +import { logger } from '../../../utils/logging.js'; + +/** Default location where runtime images are written, relative to the repo root. */ +export const DEFAULT_CONTEXT_IMAGES_RELATIVE = '.cascade/context/images'; + +/** + * Map MIME types to file extensions. + * Mirrors the Plan 1 boot-path convention (image/jpeg → .jpg) so the boot- + * path and runtime-path produce identical artifacts. + */ +const MIME_TO_EXTENSION: Record = { + 'image/png': 'png', + 'image/jpeg': 'jpg', + 'image/jpg': 'jpg', + 'image/gif': 'gif', + 'image/webp': 'webp', + 'image/svg+xml': 'svg', + 'image/avif': 'avif', + 'image/apng': 'apng', + 'image/bmp': 'bmp', + 'image/tiff': 'tiff', + 'image/x-icon': 'ico', +}; + +/** + * Resolve a file extension for the given MIME type. Returns `bin` for the + * unresolved wildcard sentinel `image/*` AND for any unknown MIME, with a + * caller-provided warn log for the wildcard case. + */ +function resolveExtension(mimeType: string, workItemId: string): string { + const normalized = mimeType.toLowerCase().trim(); + const ext = MIME_TO_EXTENSION[normalized]; + if (ext) return ext; + if (normalized === 'image/*') { + logger.warn('writeRuntimeImages: unresolved MIME — falling back to .bin extension', { + workItemId, + mimeType, + }); + return 'bin'; + } + logger.warn('writeRuntimeImages: unknown MIME — falling back to .bin extension', { + workItemId, + mimeType, + }); + return 'bin'; +} + +export interface WriteRuntimeImagesArgs { + workItemId: string; + images: ContextImage[]; + /** Optional repo root; defaults to the current working directory. */ + repoDir?: string; +} + +export interface WriteRuntimeImagesResult { + /** Repo-relative paths of successfully-written image files. */ + paths: string[]; + /** Per-image write failures (if any). */ + failures: { path: string; reason: string }[]; +} + +/** + * Write each {@link ContextImage} to `.cascade/context/images/` with the + * stable naming convention `work-item--img-.`. Idempotent + * — running twice with the same workItemId overwrites the prior files + * (caller is responsible for re-running if it wants fresh bytes). + */ +export async function writeRuntimeImages( + args: WriteRuntimeImagesArgs, +): Promise { + const { workItemId, images, repoDir } = args; + if (images.length === 0) return { paths: [], failures: [] }; + + const baseDir = repoDir + ? join(repoDir, DEFAULT_CONTEXT_IMAGES_RELATIVE) + : DEFAULT_CONTEXT_IMAGES_RELATIVE; + + // Always mkdir -p; cheap, idempotent. + await mkdir(baseDir, { recursive: true }); + + const paths: string[] = []; + const failures: { path: string; reason: string }[] = []; + + for (let i = 0; i < images.length; i++) { + const img = images[i]; + const ext = resolveExtension(img.mimeType, workItemId); + const filename = `work-item-${workItemId}-img-${i}.${ext}`; + const absolutePath = join(baseDir, filename); + // Repo-relative path is what we return to the caller for inclusion in + // the agent's text response — the agent's Read tool consumes paths + // relative to its workspace root. + const relativePath = `${DEFAULT_CONTEXT_IMAGES_RELATIVE}/${filename}`; + + try { + const buffer = Buffer.from(img.base64Data, 'base64'); + await writeFile(absolutePath, buffer); + paths.push(relativePath); + } catch (err) { + const reason = err instanceof Error ? err.message : String(err); + logger.warn('writeRuntimeImages: failed to write image', { + workItemId, + index: i, + path: relativePath, + reason, + }); + failures.push({ path: relativePath, reason }); + } + } + + return { paths, failures }; +} diff --git a/src/integrations/README.md b/src/integrations/README.md index 4514a825..b037bf37 100644 --- a/src/integrations/README.md +++ b/src/integrations/README.md @@ -241,3 +241,57 @@ Different PM providers have different native concepts of "checklist". The `PMPro **Why inline markdown for Linear and JIRA?** Both providers support markdown checkboxes natively in their description editors but lack a dedicated lightweight checklist primitive — sub-issues and subtasks are full work items, which clutters boards when used for things like acceptance criteria or implementation steps. Inline markdown matches Trello's lightweight semantics without creating orphan issues. See [spec 008](../../docs/specs/008-inline-checklists.md) for full rationale. The shared engine that parses, appends, toggles, and removes inline checklist items lives at `src/pm/_shared/inline-checklist.ts` and is consumed by both the Linear and JIRA adapters. + +--- + +## Image delivery contract + +Spec 016 hardened the work-item-image pipeline so user-pasted screenshots (Linear especially, but the rules generalize) reliably reach the agent worker as files on disk. New PM providers should follow this contract; do nothing extra and image delivery just works. + +### How the shared resolution path works + +1. **Extract URL refs** from the work-item description and each comment via `extractMarkdownImages()` (`src/pm/media.ts`). A `MediaReference` is produced for every `![alt](url)` match. The provider does NOT need its own extraction logic. +2. **Pre-download MIME inference** (a hint, not a verdict): `mimeTypeFromUrl()` derives a MIME from the URL pathname's extension. For URLs whose hostname is in `IMAGE_HOST_ALLOWLIST` (currently `uploads.linear.app`) AND whose pathname has no recognised extension, the inference returns `'image/*'` — a wildcard sentinel that survives the image-only filter. Add a host to the allowlist only if its `Content-Type` headers are reliable. +3. **Filter** via `filterImageMedia()` — drops anything that isn't an image MIME or the `image/*` wildcard. +4. **Download** via `downloadAndPrepareImages()` (`src/pm/download-and-prepare.ts`) — the shared per-provider dispatch loop. The download response's `Content-Type` header is the AUTHORITATIVE MIME — it resolves the wildcard and overrides any URL-extension-derived guess. +5. **Write to disk** at `.cascade/context/images/work-item--img-.` — extension is derived from the resolved MIME. + +### What providers should NOT do + +- Don't write your own MIME-detection logic. The shared resolution path covers all known PM provider URL shapes. +- Don't download images yourself in your adapter — let `downloadAndPrepareImages` do it. +- Don't surface `getAttachments()` for inline-pasted images. That method is for formal Attachment records (Slack/GitHub link previews, integration cards) — distinct from inline pastes which live in description / comment markdown. + +### Diagnostic log line + +Every work-item fetch (boot path AND runtime read-work-item gadget) emits one INFO-level log line with the literal prefix `[image-pipeline] work-item-fetch summary` and the field schema: + +``` +{ + provider: 'linear' | 'trello' | 'jira' | 'unknown', + workItemId: string, + urlsDetected: number, // pre-filter count + urlsAfterFilter: number, // post-filterImageMedia count + urlsDownloaded: number, + urlsFailed: number, + urlsByMimeType: Record, +} +``` + +Operators triaging a "no image delivered" report grep for the literal prefix in `cascade runs logs ` output. One line per fetch tells the whole story. + +### When a provider's host serves untrustworthy `Content-Type` + +If your provider's upload host returns `application/octet-stream` (or wrong) on the actual GET response, the download-time resolution can't recover. Two options: (a) don't add the host to `IMAGE_HOST_ALLOWLIST` — let the URL-extension path do its job; (b) if URLs are also extension-less, file an issue describing the host's behavior so we can layer in a per-host content-type override. Don't hard-code MIMEs in your adapter — keep MIME resolution shared. + +### Linear: GraphQL surface for inline images + +Spec 016/3 captured a fixture and pinned the rule for Linear specifically. The findings: + +- **`Issue.description` (markdown) is the canonical surface for inline-pasted images.** When a user pastes a screenshot into Linear's issue editor, Linear stores the upload at `https://uploads.linear.app/` (often extension-less) and inserts standard markdown image syntax `![alt](url)` into the description. The Linear adapter's `extractMarkdownImages(issue.description)` is the right call — it's what `getWorkItem` already does at `src/pm/linear/adapter.ts:61`. +- **Comment bodies follow the same convention.** Each `Comment.body` field is markdown; pasted screenshots show up as `![](https://uploads.linear.app/)` exactly like the description. `extractMarkdownImages(comment.body, 'comment')` covers them. +- **`Issue.attachments` is the WRONG surface for inline images.** The Linear GraphQL `Issue.attachments` connection holds formal Attachment records — link previews from Slack threads, GitHub PRs, Sentry alerts, and other integration cards. They have `url` fields but they are NOT user-pasted screenshots. The Linear adapter's `getAttachments(issueId)` (at `src/linear/client.ts:542`) correctly returns these as `LinearAttachment` for the dedicated attachment surface; do NOT extract images from this connection. +- **Regression net.** The captured fixture lives at `tests/fixtures/linear-issue-with-screenshot.json`. The unit test at `tests/unit/pm/linear/extraction-coverage.test.ts` loads the fixture and asserts every inline image is extracted — fails LOUDLY with a clear message if Linear ever changes payload shape in a way that loses inline images. +- **No new GraphQL surface to query.** As of spec 016/3 the Linear API exposes inline-pasted images only via the `description` and `Comment.body` markdown fields. There is no `descriptionData` rich-text JSON tree that would expose them differently, and no `attachments(includeInline: true)` filter. Future Linear API drift would surface as a fixture-test failure. + +See [spec 016](../../docs/specs/016-pm-image-delivery-reliability.md) for the full rationale and the live incident this contract closed. diff --git a/src/pm/download-and-prepare.ts b/src/pm/download-and-prepare.ts new file mode 100644 index 00000000..a1914ad6 --- /dev/null +++ b/src/pm/download-and-prepare.ts @@ -0,0 +1,96 @@ +/** + * Download-and-prepare helper for work-item images. + * + * Lifted from `src/agents/definitions/contextSteps.ts` (the inline loop in + * `fetchWorkItemStep`) into a shared module so spec 016/2's runtime gadget + * can call it too. Both call sites get the same per-provider dispatch, the + * same Promise.all, and the same per-failure WARN log. + * + * Spec 016/1. + */ + +import type { ContextImage } from '../agents/contracts/index.js'; +import { getPMProviderOrNull } from './index.js'; +import { MAX_IMAGES_PER_WORK_ITEM } from './media.js'; +import type { MediaReference } from './types.js'; + +export type LogWriter = ( + level: 'INFO' | 'WARN' | 'ERROR', + message: string, + meta?: Record, +) => void; + +export interface DownloadAndPrepareResult { + images: ContextImage[]; + failures: { url: string; reason: string }[]; +} + +/** + * Downloads each {@link MediaReference} via the appropriate per-provider + * client (jira / linear / trello) and prepares them as {@link ContextImage} + * entries with base64 bytes and the resolved Content-Type-derived MIME. + * + * Caps at {@link MAX_IMAGES_PER_WORK_ITEM}. + * + * Failures are returned as a parallel array, never thrown — so the caller + * can always surface a stable success/failure summary in its diagnostic log. + */ +export async function downloadAndPrepareImages( + workItemId: string, + media: MediaReference[], + logWriter: LogWriter, +): Promise { + if (media.length === 0) return { images: [], failures: [] }; + + const provider = getPMProviderOrNull(); + const limited = media.slice(0, MAX_IMAGES_PER_WORK_ITEM); + + const { jiraClient } = await import('../jira/client.js'); + const { trelloClient } = await import('../trello/client.js'); + const { linearClient } = await import('../linear/client.js'); + + const failures: { url: string; reason: string }[] = []; + + const results = await Promise.all( + limited.map(async (ref) => { + try { + let downloaded: { buffer: Buffer; mimeType: string } | null = null; + if (provider?.type === 'jira') { + downloaded = await jiraClient.downloadAttachment(ref.url); + } else if (provider?.type === 'linear') { + downloaded = await linearClient.downloadAttachment(ref.url); + } else { + downloaded = await trelloClient.downloadAttachment(ref.url); + } + if (!downloaded) { + logWriter('WARN', 'downloadAndPrepareImages: download returned null', { + workItemId, + url: ref.url.split('?')[0], + }); + failures.push({ url: ref.url, reason: 'download returned null' }); + return null; + } + return { + base64Data: downloaded.buffer.toString('base64'), + mimeType: downloaded.mimeType, + altText: ref.altText, + }; + } catch (err) { + const reason = err instanceof Error ? err.message : String(err); + logWriter('WARN', 'downloadAndPrepareImages: failed to download image', { + workItemId, + url: ref.url.split('?')[0], + error: reason, + }); + failures.push({ url: ref.url, reason }); + return null; + } + }), + ); + + const images: ContextImage[] = []; + for (const r of results) { + if (r !== null) images.push(r); + } + return { images, failures }; +} diff --git a/src/pm/media.ts b/src/pm/media.ts index b6e33f4c..1eceab65 100644 --- a/src/pm/media.ts +++ b/src/pm/media.ts @@ -41,10 +41,17 @@ const IMAGE_MIME_TYPES = new Set([ /** * Returns true when the supplied MIME type represents a common image format. * + * Also accepts the `'image/*'` wildcard sentinel — used by spec 016/1 for + * extension-less PM-provider URLs whose MIME is resolved at download-time + * via the response's Content-Type header. The wildcard never reaches disk; + * `downloadMedia` resolves it to a concrete MIME before the bytes are written. + * * @param mime - The MIME type string to test (e.g. `'image/png'`). */ export function isImageMimeType(mime: string): boolean { - return IMAGE_MIME_TYPES.has(mime.toLowerCase().trim()); + const normalized = mime.toLowerCase().trim(); + if (normalized === 'image/*') return true; + return IMAGE_MIME_TYPES.has(normalized); } /** @@ -77,19 +84,44 @@ const EXTENSION_MIME_MAP: Record = { webp: 'image/webp', }; +/** + * Trusted PM-provider upload hosts whose extension-less URLs we treat as + * candidate images and resolve at download-time via the response's + * Content-Type header. Spec 016/1. + * + * Linear's user-pasted-screenshot URLs (`https://uploads.linear.app/`) + * have no file extension in the pathname; before this allowlist they fell + * through to `'application/octet-stream'` and were silently filtered out by + * `filterImageMedia`. To add a new trusted host: append the bare hostname + * here. Do NOT add hosts whose Content-Type headers are unreliable — the + * wildcard sentinel skips the URL-extension verdict and trusts the response. + */ +const IMAGE_HOST_ALLOWLIST: ReadonlySet = new Set(['uploads.linear.app']); + /** * Infers a MIME type from the file extension in a URL. - * Returns `'application/octet-stream'` when the extension is unknown. + * + * Returns `'application/octet-stream'` when the extension is unknown — except + * for hosts in {@link IMAGE_HOST_ALLOWLIST}, where extension-less URLs return + * the `'image/*'` wildcard sentinel so they survive the pre-download image + * filter. Spec 016/1. * * @param url - The URL to examine. */ function mimeTypeFromUrl(url: string): string { try { - const pathname = new URL(url).pathname; + const parsed = new URL(url); + const pathname = parsed.pathname; const ext = pathname.split('.').pop()?.toLowerCase() ?? ''; - return EXTENSION_MIME_MAP[ext] ?? 'application/octet-stream'; + const fromExt = EXTENSION_MIME_MAP[ext]; + if (fromExt) return fromExt; + // Spec 016/1: trusted PM upload hosts return `image/*` for extension-less + // URLs so the download path can resolve the real MIME from the response. + if (IMAGE_HOST_ALLOWLIST.has(parsed.hostname)) return 'image/*'; + return 'application/octet-stream'; } catch { - // Relative URL or malformed URL — try a simple extension check + // Relative URL or malformed URL — try a simple extension check; no host, + // so cannot apply the allowlist. const ext = url.split('?')[0].split('.').pop()?.toLowerCase() ?? ''; return EXTENSION_MIME_MAP[ext] ?? 'application/octet-stream'; } diff --git a/tests/fixtures/linear-issue-with-screenshot.json b/tests/fixtures/linear-issue-with-screenshot.json new file mode 100644 index 00000000..6523ef59 --- /dev/null +++ b/tests/fixtures/linear-issue-with-screenshot.json @@ -0,0 +1,97 @@ +{ + "_comment": "Reconstructed Linear GraphQL Issue payload for spec 016/3 regression coverage. Models a real issue with multiple inline-pasted screenshots (description + comment) plus formal Attachment records (link previews) that must NOT be picked up as inline images. Sanitized: synthetic team/user/issue IDs, no leaky free-form text. Test fixture only — never imported by production code.", + "_purpose": "Pinned by tests/unit/pm/linear/extraction-coverage.test.ts. If Linear changes the Issue payload shape in a way that loses inline images, that test fails loudly.", + "issue": { + "id": "issue-test-fixture-uuid-0001", + "identifier": "MNG-FIXTURE", + "title": "Bug from screenshot fixture", + "url": "https://linear.app/example/issue/MNG-FIXTURE/bug-from-screenshot-fixture", + "createdAt": "2026-04-26T00:00:00.000Z", + "updatedAt": "2026-04-26T00:00:00.000Z", + "description": "## Repro\n\nThe Tasks Hub mobile layout breaks when the title wraps to a third line.\n\n![](https://uploads.linear.app/abc-123-def-456-extension-less-uuid)\n\n## Expected\n\nLayout should match the design in this annotated screenshot:\n\n![Annotated mockup](https://uploads.linear.app/xyz-789-with-alt-text/Mockup.png)\n\nReference image hosted externally:\n\n![External logo](https://example.com/logo.svg)\n\nA non-image link (should NOT be picked up as media):\n\n[See related ticket](https://linear.app/example/issue/MNG-100)", + "state": { + "id": "state-todo-uuid-0001", + "name": "Todo", + "type": "unstarted" + }, + "team": { + "id": "team-uuid-0001", + "key": "MNG", + "name": "Mongrel" + }, + "labelIds": ["label-bug-uuid-0001"], + "labels": { + "nodes": [ + { + "id": "label-bug-uuid-0001", + "name": "bug", + "color": "#ff0000" + } + ] + }, + "attachments": { + "_comment": "Linear's Issue.attachments connection holds formal Attachment records — link previews from Slack/GitHub/Sentry/etc. — NOT inline-pasted screenshots. The extraction-coverage test asserts these are NOT mistaken for inline images.", + "nodes": [ + { + "id": "attachment-slack-uuid-0001", + "title": "Discussion in #engineering", + "url": "https://acme.slack.com/archives/C123/p1700000000000000", + "subtitle": "Slack thread", + "metadata": { + "type": "slack-thread" + }, + "createdAt": "2026-04-26T00:00:00.000Z", + "updatedAt": "2026-04-26T00:00:00.000Z" + }, + { + "id": "attachment-github-uuid-0001", + "title": "Related PR #1199", + "url": "https://github.com/example/repo/pull/1199", + "subtitle": "GitHub PR", + "metadata": { + "type": "github-pr", + "prNumber": 1199 + }, + "createdAt": "2026-04-26T00:00:00.000Z", + "updatedAt": "2026-04-26T00:00:00.000Z" + }, + { + "id": "attachment-sentry-uuid-0001", + "title": "Sentry issue PROJ-1234", + "url": "https://sentry.io/organizations/example/issues/1234/", + "subtitle": "Sentry alert", + "metadata": { + "type": "sentry-issue", + "level": "error" + }, + "createdAt": "2026-04-26T00:00:00.000Z", + "updatedAt": "2026-04-26T00:00:00.000Z" + } + ] + }, + "comments": { + "nodes": [ + { + "id": "comment-uuid-0001", + "body": "Repro happens on iOS Safari 17. Here's a screenshot from my phone:\n\n![](https://uploads.linear.app/comment-screenshot-uuid)", + "createdAt": "2026-04-26T00:01:00.000Z", + "user": { + "id": "user-uuid-0001", + "name": "Test User", + "displayName": "Test User" + } + }, + { + "id": "comment-uuid-0002", + "body": "I confirmed the same on Android Chrome. No screenshot — text-only comment.", + "createdAt": "2026-04-26T00:02:00.000Z", + "user": { + "id": "user-uuid-0002", + "name": "Another User", + "displayName": "Another User" + } + } + ] + } + } +} diff --git a/tests/integration/gadgets/runtime-image-delivery.test.ts b/tests/integration/gadgets/runtime-image-delivery.test.ts new file mode 100644 index 00000000..c33562fd --- /dev/null +++ b/tests/integration/gadgets/runtime-image-delivery.test.ts @@ -0,0 +1,202 @@ +/** + * Module-integration test for spec 016/2. + * + * Wires the REAL `readWorkItem` gadget + REAL `downloadAndPrepareImages` + * (Plan 1's helper) + REAL `writeRuntimeImages` (Plan 2's writer), plus the + * real `extractMarkdownImages`/`filterImageMedia`/`mimeTypeFromUrl` chain + * inside `readWorkItemWithMedia`. Mocks: filesystem (we don't write to a + * real disk during tests) and the per-provider download client (we control + * the response Content-Type). + * + * Pins the mid-run pickup contract: an image added between two `readWorkItem` + * calls is delivered on the second call as a file path in the returned text. + */ + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const { mockMkdir, mockWriteFile, mockLinearDownload, mockTrelloDownload, mockJiraDownload } = + vi.hoisted(() => ({ + mockMkdir: vi.fn().mockResolvedValue(undefined), + mockWriteFile: vi.fn().mockResolvedValue(undefined), + mockLinearDownload: vi.fn(), + mockTrelloDownload: vi.fn(), + mockJiraDownload: vi.fn(), + })); + +vi.mock('node:fs/promises', () => ({ + mkdir: mockMkdir, + writeFile: mockWriteFile, +})); + +vi.mock('../../../src/linear/client.js', () => ({ + linearClient: { downloadAttachment: mockLinearDownload }, +})); +vi.mock('../../../src/trello/client.js', () => ({ + trelloClient: { downloadAttachment: mockTrelloDownload }, +})); +vi.mock('../../../src/jira/client.js', () => ({ + jiraClient: { downloadAttachment: mockJiraDownload }, +})); + +vi.mock('../../../src/utils/logging.js', () => ({ + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() }, +})); + +import { createMockPMProvider } from '../../helpers/mockPMProvider.js'; + +const mockProvider = createMockPMProvider(); + +vi.mock('../../../src/pm/index.js', async () => { + const real = await vi.importActual( + '../../../src/pm/index.js', + ); + return { + ...real, + getPMProvider: () => mockProvider, + getPMProviderOrNull: () => mockProvider, + }; +}); + +import { readWorkItem } from '../../../src/gadgets/pm/core/readWorkItem.js'; + +describe('spec 016/2 — runtime image delivery (module-integration)', () => { + beforeEach(() => { + mockMkdir.mockReset(); + mockMkdir.mockResolvedValue(undefined); + mockWriteFile.mockReset(); + mockWriteFile.mockResolvedValue(undefined); + mockLinearDownload.mockReset(); + mockTrelloDownload.mockReset(); + mockJiraDownload.mockReset(); + mockProvider.getWorkItem.mockReset(); + mockProvider.getChecklists.mockReset(); + mockProvider.getAttachments.mockReset(); + mockProvider.getWorkItemComments.mockReset(); + mockProvider.getChecklists.mockResolvedValue([]); + mockProvider.getAttachments.mockResolvedValue([]); + mockProvider.getWorkItemComments.mockResolvedValue([]); + (mockProvider as unknown as { type: string }).type = 'linear'; + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('extension-less Linear URL → on-disk file path appears in text', async () => { + mockProvider.getWorkItem.mockResolvedValue({ + id: 'MNG-357', + title: 'Bug from screenshot', + url: 'https://linear.app/x/MNG-357', + description: '![](https://uploads.linear.app/abc-123)', + labels: [], + inlineMedia: [ + { + url: 'https://uploads.linear.app/abc-123', + mimeType: 'image/*', + altText: undefined, + source: 'description', + }, + ], + }); + mockLinearDownload.mockResolvedValue({ + buffer: Buffer.from('PNG-bytes'), + mimeType: 'image/png', + }); + + const text = await readWorkItem('MNG-357', false); + + // On-disk path with PNG extension (resolved from Content-Type). + expect(text).toContain('.cascade/context/images/work-item-MNG-357-img-0.png'); + // File was actually written. + expect(mockWriteFile).toHaveBeenCalledTimes(1); + }); + + it('mid-run pickup: image added after first call is delivered on second call', async () => { + // First call — no images. + mockProvider.getWorkItem.mockResolvedValueOnce({ + id: 'MNG-1', + title: 'Bug', + url: 'https://linear.app/x/MNG-1', + description: 'Empty description', + labels: [], + inlineMedia: [], + }); + + const firstText = await readWorkItem('MNG-1', false); + expect(firstText).not.toContain('.cascade/context/images/'); + expect(mockWriteFile).not.toHaveBeenCalled(); + + // Second call — teammate has now uploaded a screenshot. + mockProvider.getWorkItem.mockResolvedValueOnce({ + id: 'MNG-1', + title: 'Bug', + url: 'https://linear.app/x/MNG-1', + description: '![](https://uploads.linear.app/new-screenshot)', + labels: [], + inlineMedia: [ + { + url: 'https://uploads.linear.app/new-screenshot', + mimeType: 'image/*', + source: 'description', + }, + ], + }); + mockLinearDownload.mockResolvedValue({ + buffer: Buffer.from('NEW'), + mimeType: 'image/png', + }); + + const secondText = await readWorkItem('MNG-1', false); + expect(secondText).toContain('.cascade/context/images/work-item-MNG-1-img-0.png'); + expect(mockWriteFile).toHaveBeenCalledTimes(1); + }); + + it('Trello extensioned URL regression: still delivered on disk', async () => { + (mockProvider as unknown as { type: string }).type = 'trello'; + mockProvider.getWorkItem.mockResolvedValue({ + id: 'card-1', + title: 'Card', + url: 'https://trello.com/c/card-1', + description: '![](https://trello.com/foo.png)', + labels: [], + inlineMedia: [ + { url: 'https://trello.com/foo.png', mimeType: 'image/png', source: 'description' }, + ], + }); + mockTrelloDownload.mockResolvedValue({ + buffer: Buffer.from('TRELLO'), + mimeType: 'image/png', + }); + + const text = await readWorkItem('card-1', false); + expect(text).toContain('.cascade/context/images/work-item-card-1-img-0.png'); + expect(mockTrelloDownload).toHaveBeenCalledWith('https://trello.com/foo.png'); + }); + + it('failed download: failure surfaced in text, no orphan path', async () => { + mockProvider.getWorkItem.mockResolvedValue({ + id: 'MNG-2', + title: 'Bug', + url: 'https://linear.app/x/MNG-2', + description: '![](https://uploads.linear.app/will-fail)', + labels: [], + inlineMedia: [ + { + url: 'https://uploads.linear.app/will-fail', + mimeType: 'image/*', + source: 'description', + }, + ], + }); + mockLinearDownload.mockRejectedValue(new Error('upstream 500')); + + const text = await readWorkItem('MNG-2', false); + // No on-disk path mentioned. + expect(text).not.toContain('.cascade/context/images/work-item'); + // Failure visible. + expect(text).toContain('Failed Image Downloads'); + expect(text).toContain('upstream 500'); + // No file was written. + expect(mockWriteFile).not.toHaveBeenCalled(); + }); +}); diff --git a/tests/integration/pm/image-pipeline.test.ts b/tests/integration/pm/image-pipeline.test.ts new file mode 100644 index 00000000..32ead02f --- /dev/null +++ b/tests/integration/pm/image-pipeline.test.ts @@ -0,0 +1,157 @@ +/** + * Module-integration test for spec 016/1. + * + * Wires the REAL `mimeTypeFromUrl` + REAL `isImageMimeType` + REAL + * `filterImageMedia` + REAL `extractMarkdownImages` + REAL + * `downloadAndPrepareImages` (via dynamic import in `fetchWorkItemStep`), + * mocking only the per-provider download client (so we control the + * Content-Type response without needing a real Linear endpoint) and the + * upstream `readWorkItemWithMedia` (so we don't need a real PM provider). + * + * Pins the end-to-end MNG-357 reproduction: extension-less Linear URL flows + * through extract → filter → download → ContextInjection.images without + * being dropped. Pre-spec-016 behavior would fail this test because + * `filterImageMedia` would drop the `application/octet-stream` ref. + */ + +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const { mockLinearDownload, mockTrelloDownload, mockJiraDownload } = vi.hoisted(() => ({ + mockLinearDownload: vi.fn(), + mockTrelloDownload: vi.fn(), + mockJiraDownload: vi.fn(), +})); + +vi.mock('../../../src/linear/client.js', () => ({ + linearClient: { downloadAttachment: mockLinearDownload }, +})); +vi.mock('../../../src/trello/client.js', () => ({ + trelloClient: { downloadAttachment: mockTrelloDownload }, +})); +vi.mock('../../../src/jira/client.js', () => ({ + jiraClient: { downloadAttachment: mockJiraDownload }, +})); + +vi.mock('../../../src/gadgets/pm/core/readWorkItem.js', () => ({ + readWorkItemWithMedia: vi.fn(), + readWorkItem: vi.fn(), +})); + +vi.mock('../../../src/pm/index.js', async () => { + // Need to keep MAX_IMAGES_PER_WORK_ITEM real so the cap matches production + const real = await vi.importActual( + '../../../src/pm/index.js', + ); + return { + ...real, + getPMProviderOrNull: vi.fn(), + }; +}); + +import { fetchWorkItemStep } from '../../../src/agents/definitions/contextSteps.js'; +import { readWorkItemWithMedia } from '../../../src/gadgets/pm/core/readWorkItem.js'; +import { getPMProviderOrNull } from '../../../src/pm/index.js'; +import { extractMarkdownImages, filterImageMedia } from '../../../src/pm/media.js'; +import type { AgentInput } from '../../../src/types/index.js'; + +const mockReadWorkItemWithMedia = vi.mocked(readWorkItemWithMedia); +const mockGetPMProviderOrNull = vi.mocked(getPMProviderOrNull); + +describe('spec 016/1 — boot-path image pipeline (module-integration)', () => { + beforeEach(() => { + mockLinearDownload.mockReset(); + mockTrelloDownload.mockReset(); + mockJiraDownload.mockReset(); + mockReadWorkItemWithMedia.mockReset(); + mockGetPMProviderOrNull.mockReset(); + }); + + function makeParams(input: Partial) { + return { + input: input as AgentInput, + repoDir: '/tmp/repo', + contextFiles: [], + logWriter: vi.fn(), + }; + } + + it('MNG-357 reproduction: extension-less Linear URL extracted via real path lands as image with resolved MIME', async () => { + // Step 1: real extraction — pin that Linear-shaped URLs survive the filter via image/* sentinel. + const description = '![](https://uploads.linear.app/abc-123-def-456)'; + const refs = extractMarkdownImages(description); + expect(refs).toHaveLength(1); + expect(refs[0].mimeType).toBe('image/*'); + const filtered = filterImageMedia(refs); + expect(filtered).toHaveLength(1); // wildcard survived + + // Step 2: real fetchWorkItemStep flow with that ref + a stubbed download. + mockReadWorkItemWithMedia.mockResolvedValue({ + text: '# MNG-357\n\n![](https://uploads.linear.app/abc-123-def-456)', + media: filtered, // pass the real-extracted refs + urlsDetected: filtered.length, + }); + mockGetPMProviderOrNull.mockReturnValue({ type: 'linear' } as never); + mockLinearDownload.mockResolvedValue({ + buffer: Buffer.from('PNG-bytes-here'), + mimeType: 'image/png', // server-side Content-Type — this is the ground truth + }); + + const result = await fetchWorkItemStep(makeParams({ workItemId: 'MNG-357' })); + + // Image was delivered; MIME resolved to the Content-Type, not the wildcard. + expect(result).toHaveLength(1); + expect(result[0].images).toHaveLength(1); + expect(result[0].images?.[0].mimeType).toBe('image/png'); + expect(result[0].images?.[0].base64Data).toBe(Buffer.from('PNG-bytes-here').toString('base64')); + }); + + it('emits the diagnostic line `[image-pipeline] work-item-fetch summary` with non-zero downloads', async () => { + const refs = extractMarkdownImages('![](https://uploads.linear.app/abc-123)'); + mockReadWorkItemWithMedia.mockResolvedValue({ + text: '# x', + media: refs, + urlsDetected: 1, + }); + mockGetPMProviderOrNull.mockReturnValue({ type: 'linear' } as never); + mockLinearDownload.mockResolvedValue({ + buffer: Buffer.from('x'), + mimeType: 'image/png', + }); + + const params = makeParams({ workItemId: 'MNG-357' }); + await fetchWorkItemStep(params); + + expect(params.logWriter).toHaveBeenCalledWith( + 'INFO', + '[image-pipeline] work-item-fetch summary', + expect.objectContaining({ + provider: 'linear', + workItemId: 'MNG-357', + urlsDetected: 1, + urlsAfterFilter: 1, + urlsDownloaded: 1, + urlsFailed: 0, + }), + ); + }); + + it('Trello PNG URL regression: extensioned URL still resolves and downloads', async () => { + const refs = extractMarkdownImages('![](https://trello.com/foo.png)'); + expect(refs[0].mimeType).toBe('image/png'); // extension-resolved + + mockReadWorkItemWithMedia.mockResolvedValue({ + text: '# t', + media: refs, + urlsDetected: refs.length, + }); + mockGetPMProviderOrNull.mockReturnValue({ type: 'trello' } as never); + mockTrelloDownload.mockResolvedValue({ + buffer: Buffer.from('y'), + mimeType: 'image/png', + }); + + const result = await fetchWorkItemStep(makeParams({ workItemId: 'card-1' })); + expect(result[0].images).toHaveLength(1); + expect(result[0].images?.[0].mimeType).toBe('image/png'); + }); +}); diff --git a/tests/unit/agents/definitions/contextSteps.test.ts b/tests/unit/agents/definitions/contextSteps.test.ts index 78ab8217..f119c062 100644 --- a/tests/unit/agents/definitions/contextSteps.test.ts +++ b/tests/unit/agents/definitions/contextSteps.test.ts @@ -231,6 +231,7 @@ describe('fetchWorkItemStep', () => { mockReadWorkItemWithMedia.mockResolvedValue({ text: '# Card Title\n\nDescription', media: [], + urlsDetected: 0, }); mockGetPMProviderOrNull.mockReturnValue({ type: 'trello' } as never); @@ -353,11 +354,12 @@ describe('fetchWorkItemStep', () => { expect(result[0].images).toHaveLength(1); expect(result[0].images?.[0].base64Data).toBe(Buffer.from('ok').toString('base64')); - // WARN for the null return, URL sanitized (no query params) + // WARN for the null return, URL sanitized (no query params); spec 016/1 + // renamed the helper so the message prefix is now `downloadAndPrepareImages`. expect(params.logWriter).toHaveBeenCalledWith( 'WARN', - 'fetchWorkItemStep: image download returned null', - { url: 'https://trello.com/fail.png' }, + 'downloadAndPrepareImages: download returned null', + { workItemId: 'card-1', url: 'https://trello.com/fail.png' }, ); }); @@ -381,12 +383,15 @@ describe('fetchWorkItemStep', () => { expect(result[0].images).toBeUndefined(); expect(params.logWriter).toHaveBeenCalledWith( 'WARN', - 'fetchWorkItemStep: failed to download image', - { url: 'https://trello.com/err.png', error: 'network failure' }, + 'downloadAndPrepareImages: failed to download image', + { workItemId: 'card-1', url: 'https://trello.com/err.png', error: 'network failure' }, ); }); - it('emits INFO logs before and after download with correct counts', async () => { + // Spec 016/1 AC#5: single grep-stable INFO line per work-item-fetch with + // the `[image-pipeline] work-item-fetch summary` literal prefix and a + // stable field schema. Replaces the prior pre/post WARN/INFO log pair. + it("emits the '[image-pipeline] work-item-fetch summary' INFO line with correct counts", async () => { mockReadWorkItemWithMedia.mockResolvedValue({ text: '# Card', media: [ @@ -394,6 +399,7 @@ describe('fetchWorkItemStep', () => { { url: 'https://trello.com/b.png', mimeType: 'image/png', source: 'description' }, { url: 'https://trello.com/c.png', mimeType: 'image/png', source: 'description' }, ], + urlsDetected: 3, }); mockGetPMProviderOrNull.mockReturnValue({ type: 'trello' } as never); mockTrelloDownload @@ -406,13 +412,71 @@ describe('fetchWorkItemStep', () => { expect(params.logWriter).toHaveBeenCalledWith( 'INFO', - 'fetchWorkItemStep: downloading work item images', - { workItemId: 'card-1', count: 3 }, + '[image-pipeline] work-item-fetch summary', + expect.objectContaining({ + provider: 'trello', + workItemId: 'card-1', + urlsDetected: 3, + urlsAfterFilter: 3, + urlsDownloaded: 1, + urlsFailed: 2, + urlsByMimeType: { 'image/png': 3 }, + }), ); + }); + + it('emits the diagnostic line even when no images are present (urlsDetected: 0)', async () => { + mockReadWorkItemWithMedia.mockResolvedValue({ text: '# Card', media: [], urlsDetected: 0 }); + mockGetPMProviderOrNull.mockReturnValue({ type: 'trello' } as never); + + const params = makeParams({ workItemId: 'card-1' }); + await fetchWorkItemStep(params); + + expect(params.logWriter).toHaveBeenCalledWith( + 'INFO', + '[image-pipeline] work-item-fetch summary', + expect.objectContaining({ + provider: 'trello', + workItemId: 'card-1', + urlsDetected: 0, + urlsAfterFilter: 0, + urlsDownloaded: 0, + urlsFailed: 0, + urlsByMimeType: {}, + }), + ); + }); + + it('reports MIME distribution covering both extensioned and image/* wildcard refs', async () => { + mockReadWorkItemWithMedia.mockResolvedValue({ + text: '# Card', + media: [ + { url: 'https://trello.com/a.png', mimeType: 'image/png', source: 'description' }, + { + url: 'https://uploads.linear.app/abc', + mimeType: 'image/*', + source: 'description', + }, + ], + urlsDetected: 2, + }); + mockGetPMProviderOrNull.mockReturnValue({ type: 'linear' } as never); + mockLinearDownload.mockResolvedValue({ buffer: Buffer.from('x'), mimeType: 'image/png' }); + + const params = makeParams({ workItemId: 'MNG-357' }); + await fetchWorkItemStep(params); + expect(params.logWriter).toHaveBeenCalledWith( 'INFO', - 'fetchWorkItemStep: image download complete', - { workItemId: 'card-1', attempted: 3, downloaded: 1, skipped: 2 }, + '[image-pipeline] work-item-fetch summary', + expect.objectContaining({ + provider: 'linear', + workItemId: 'MNG-357', + urlsDetected: 2, + urlsAfterFilter: 2, + urlsDownloaded: 2, + urlsByMimeType: { 'image/png': 1, 'image/*': 1 }, + }), ); }); @@ -422,7 +486,11 @@ describe('fetchWorkItemStep', () => { mimeType: 'image/png', source: 'description' as const, })); - mockReadWorkItemWithMedia.mockResolvedValue({ text: '# Card', media: manyMedia }); + mockReadWorkItemWithMedia.mockResolvedValue({ + text: '# Card', + media: manyMedia, + urlsDetected: 35, + }); mockGetPMProviderOrNull.mockReturnValue({ type: 'trello' } as never); mockTrelloDownload.mockResolvedValue({ buffer: Buffer.from('data'), mimeType: 'image/png' }); diff --git a/tests/unit/gadgets/pm/core/readWorkItem.test.ts b/tests/unit/gadgets/pm/core/readWorkItem.test.ts index eb88cfaf..4767c99f 100644 --- a/tests/unit/gadgets/pm/core/readWorkItem.test.ts +++ b/tests/unit/gadgets/pm/core/readWorkItem.test.ts @@ -1,12 +1,30 @@ -import { describe, expect, it, vi } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { createMockPMProvider } from '../../../../helpers/mockPMProvider.js'; const mockProvider = createMockPMProvider(); +const { mockDownloadAndPrepareImages, mockWriteRuntimeImages } = vi.hoisted(() => ({ + mockDownloadAndPrepareImages: vi.fn().mockResolvedValue({ images: [], failures: [] }), + mockWriteRuntimeImages: vi.fn().mockResolvedValue({ paths: [], failures: [] }), +})); + vi.mock('../../../../../src/pm/index.js', () => ({ getPMProvider: vi.fn(() => mockProvider), filterImageMedia: vi.fn((refs) => refs.filter((r) => r.mimeType.startsWith('image/'))), + getPMProviderOrNull: vi.fn(() => mockProvider), +})); + +vi.mock('../../../../../src/pm/download-and-prepare.js', () => ({ + downloadAndPrepareImages: mockDownloadAndPrepareImages, +})); + +vi.mock('../../../../../src/gadgets/pm/core/writeRuntimeImages.js', () => ({ + writeRuntimeImages: mockWriteRuntimeImages, +})); + +vi.mock('../../../../../src/utils/logging.js', () => ({ + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() }, })); import { @@ -207,6 +225,166 @@ describe('readWorkItem', () => { // Second comment appears first (reversed order) expect(secondPos).toBeLessThan(firstPos); }); + + // ===================================================================== + // Spec 016/2: runtime gadget downloads + writes images to disk + // ===================================================================== + describe('spec 016/2 — runtime image delivery', () => { + beforeEach(() => { + mockDownloadAndPrepareImages.mockReset(); + mockDownloadAndPrepareImages.mockResolvedValue({ images: [], failures: [] }); + mockWriteRuntimeImages.mockReset(); + mockWriteRuntimeImages.mockResolvedValue({ paths: [], failures: [] }); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('when work item has images, downloads + writes them and inlines paths into text', async () => { + mockProvider.getWorkItem.mockResolvedValue({ + ...baseItem, + inlineMedia: [ + { + url: 'https://uploads.linear.app/abc', + mimeType: 'image/*', + altText: 'Screenshot.png', + source: 'description', + }, + ], + }); + mockProvider.getChecklists.mockResolvedValue([]); + mockProvider.getAttachments.mockResolvedValue([]); + mockDownloadAndPrepareImages.mockResolvedValue({ + images: [ + { + base64Data: Buffer.from('PNG').toString('base64'), + mimeType: 'image/png', + altText: 'Screenshot.png', + }, + ], + failures: [], + }); + mockWriteRuntimeImages.mockResolvedValue({ + paths: ['.cascade/context/images/work-item-item1-img-0.png'], + failures: [], + }); + + const result = await readWorkItem('item1', false); + + // Text should mention the on-disk path the agent can Read. + expect(result).toContain('.cascade/context/images/work-item-item1-img-0.png'); + expect(mockDownloadAndPrepareImages).toHaveBeenCalledTimes(1); + expect(mockWriteRuntimeImages).toHaveBeenCalledTimes(1); + expect(mockWriteRuntimeImages).toHaveBeenCalledWith( + expect.objectContaining({ + workItemId: 'item1', + images: expect.arrayContaining([expect.objectContaining({ mimeType: 'image/png' })]), + }), + ); + }); + + it('when work item has no images, returns text unchanged (no disk write)', async () => { + mockProvider.getWorkItem.mockResolvedValue(baseItem); + mockProvider.getChecklists.mockResolvedValue([]); + mockProvider.getAttachments.mockResolvedValue([]); + + const result = await readWorkItem('item1', false); + + expect(result).toContain('# Test Work Item'); + expect(mockWriteRuntimeImages).not.toHaveBeenCalled(); + }); + + it('emits the diagnostic log line at runtime path with same prefix as boot path', async () => { + const { logger } = await import('../../../../../src/utils/logging.js'); + vi.mocked(logger.info).mockClear(); + + mockProvider.getWorkItem.mockResolvedValue({ + ...baseItem, + inlineMedia: [{ url: 'https://x/a.png', mimeType: 'image/png', source: 'description' }], + }); + mockProvider.getChecklists.mockResolvedValue([]); + mockProvider.getAttachments.mockResolvedValue([]); + mockDownloadAndPrepareImages.mockResolvedValue({ + images: [{ base64Data: 'aGk=', mimeType: 'image/png', altText: undefined }], + failures: [], + }); + mockWriteRuntimeImages.mockResolvedValue({ + paths: ['.cascade/context/images/work-item-item1-img-0.png'], + failures: [], + }); + + await readWorkItem('item1', false); + + expect(logger.info).toHaveBeenCalledWith( + '[image-pipeline] work-item-fetch summary', + expect.objectContaining({ + workItemId: 'item1', + urlsDetected: expect.any(Number), + urlsAfterFilter: expect.any(Number), + urlsDownloaded: 1, + urlsFailed: 0, + }), + ); + }); + + it('when download fails, the failure is recorded in the diagnostic log; no path appears in text', async () => { + const { logger } = await import('../../../../../src/utils/logging.js'); + vi.mocked(logger.info).mockClear(); + + mockProvider.getWorkItem.mockResolvedValue({ + ...baseItem, + inlineMedia: [{ url: 'https://x/fail.png', mimeType: 'image/png', source: 'description' }], + }); + mockProvider.getChecklists.mockResolvedValue([]); + mockProvider.getAttachments.mockResolvedValue([]); + mockDownloadAndPrepareImages.mockResolvedValue({ + images: [], + failures: [{ url: 'https://x/fail.png', reason: 'network error' }], + }); + + const result = await readWorkItem('item1', false); + + // No on-disk path included. + expect(result).not.toContain('.cascade/context/images/work-item'); + // Failure was visible in the diagnostic log line. + expect(logger.info).toHaveBeenCalledWith( + '[image-pipeline] work-item-fetch summary', + expect.objectContaining({ urlsDownloaded: 0, urlsFailed: 1 }), + ); + }); + + it('text shape preserved: existing sections (Description, Comments) remain', async () => { + mockProvider.getWorkItem.mockResolvedValue({ + ...baseItem, + inlineMedia: [{ url: 'https://x/a.png', mimeType: 'image/png', source: 'description' }], + }); + mockProvider.getChecklists.mockResolvedValue([]); + mockProvider.getAttachments.mockResolvedValue([]); + mockProvider.getWorkItemComments.mockResolvedValue([ + { + id: 'c1', + author: { name: 'A', id: 'u', username: 'a' }, + date: '2024-01-01T00:00:00Z', + text: 'a comment', + }, + ]); + mockDownloadAndPrepareImages.mockResolvedValue({ + images: [{ base64Data: 'aGk=', mimeType: 'image/png', altText: undefined }], + failures: [], + }); + mockWriteRuntimeImages.mockResolvedValue({ + paths: ['.cascade/context/images/work-item-item1-img-0.png'], + failures: [], + }); + + const result = await readWorkItem('item1', true); + + expect(result).toContain('## Description'); + expect(result).toContain('a comment'); + expect(result).toContain('.cascade/context/images/work-item-item1-img-0.png'); + }); + }); }); describe('readWorkItemWithMedia', () => { diff --git a/tests/unit/gadgets/pm/core/writeRuntimeImages.test.ts b/tests/unit/gadgets/pm/core/writeRuntimeImages.test.ts new file mode 100644 index 00000000..b90544b6 --- /dev/null +++ b/tests/unit/gadgets/pm/core/writeRuntimeImages.test.ts @@ -0,0 +1,140 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const { mockMkdir, mockWriteFile, mockAccess } = vi.hoisted(() => ({ + mockMkdir: vi.fn().mockResolvedValue(undefined), + mockWriteFile: vi.fn().mockResolvedValue(undefined), + mockAccess: vi.fn().mockRejectedValue(new Error('ENOENT')), +})); + +vi.mock('node:fs/promises', () => ({ + mkdir: mockMkdir, + writeFile: mockWriteFile, + access: mockAccess, +})); + +vi.mock('../../../../../src/utils/logging.js', () => ({ + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() }, +})); + +import { writeRuntimeImages } from '../../../../../src/gadgets/pm/core/writeRuntimeImages.js'; +import { logger } from '../../../../../src/utils/logging.js'; + +const mockLogger = vi.mocked(logger); + +describe('writeRuntimeImages', () => { + beforeEach(() => { + mockMkdir.mockReset(); + mockMkdir.mockResolvedValue(undefined); + mockWriteFile.mockReset(); + mockWriteFile.mockResolvedValue(undefined); + mockAccess.mockReset(); + mockAccess.mockRejectedValue(new Error('ENOENT')); + mockLogger.info.mockReset(); + mockLogger.warn.mockReset(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('writes each image with work-item--img-.', async () => { + const result = await writeRuntimeImages({ + workItemId: 'MNG-357', + images: [ + { base64Data: Buffer.from('a').toString('base64'), mimeType: 'image/png' }, + { base64Data: Buffer.from('b').toString('base64'), mimeType: 'image/jpeg' }, + ], + }); + + expect(mockWriteFile).toHaveBeenCalledTimes(2); + const firstPath = mockWriteFile.mock.calls[0][0] as string; + const secondPath = mockWriteFile.mock.calls[1][0] as string; + expect(firstPath).toContain('work-item-MNG-357-img-0.png'); + expect(secondPath).toContain('work-item-MNG-357-img-1.jpg'); + expect(result.paths).toHaveLength(2); + }); + + it('derives extension from resolved MIME, NOT from URL', async () => { + await writeRuntimeImages({ + workItemId: 'card-1', + images: [{ base64Data: Buffer.from('x').toString('base64'), mimeType: 'image/webp' }], + }); + + const path = mockWriteFile.mock.calls[0][0] as string; + expect(path).toMatch(/work-item-card-1-img-0\.webp$/); + }); + + it('falls back to .bin extension when MIME resolution failed (image/* sentinel)', async () => { + await writeRuntimeImages({ + workItemId: 'MNG-1', + images: [{ base64Data: Buffer.from('x').toString('base64'), mimeType: 'image/*' }], + }); + + const path = mockWriteFile.mock.calls[0][0] as string; + expect(path).toMatch(/work-item-MNG-1-img-0\.bin$/); + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.stringContaining('writeRuntimeImages: unresolved MIME'), + expect.objectContaining({ workItemId: 'MNG-1', mimeType: 'image/*' }), + ); + }); + + it('returns the list of relative paths it wrote', async () => { + const result = await writeRuntimeImages({ + workItemId: 'w1', + images: [ + { base64Data: Buffer.from('a').toString('base64'), mimeType: 'image/png' }, + { base64Data: Buffer.from('b').toString('base64'), mimeType: 'image/png' }, + ], + }); + + expect(result.paths).toEqual([ + '.cascade/context/images/work-item-w1-img-0.png', + '.cascade/context/images/work-item-w1-img-1.png', + ]); + }); + + it('creates the .cascade/context/images directory if it does not exist', async () => { + await writeRuntimeImages({ + workItemId: 'w1', + images: [{ base64Data: Buffer.from('x').toString('base64'), mimeType: 'image/png' }], + }); + + // mkdir called with recursive: true at least once + expect(mockMkdir).toHaveBeenCalled(); + const firstCall = mockMkdir.mock.calls[0]; + expect(firstCall[1]).toEqual({ recursive: true }); + }); + + it('returns empty paths when given no images', async () => { + const result = await writeRuntimeImages({ workItemId: 'w1', images: [] }); + expect(result.paths).toHaveLength(0); + expect(mockWriteFile).not.toHaveBeenCalled(); + }); + + it('captures write failure as a failure entry, does not throw', async () => { + mockWriteFile.mockRejectedValueOnce(new Error('disk full')); + + const result = await writeRuntimeImages({ + workItemId: 'w1', + images: [ + { base64Data: Buffer.from('a').toString('base64'), mimeType: 'image/png' }, + { base64Data: Buffer.from('b').toString('base64'), mimeType: 'image/png' }, + ], + }); + + expect(result.paths).toHaveLength(1); // only the second succeeded + expect(result.failures).toHaveLength(1); + expect(result.failures[0].reason).toContain('disk full'); + }); + + it('uses repoDir-relative path when repoDir option provided', async () => { + await writeRuntimeImages({ + workItemId: 'w1', + images: [{ base64Data: Buffer.from('x').toString('base64'), mimeType: 'image/png' }], + repoDir: '/tmp/my-repo', + }); + + const path = mockWriteFile.mock.calls[0][0] as string; + expect(path).toContain('/tmp/my-repo/.cascade/context/images/work-item-w1-img-0.png'); + }); +}); diff --git a/tests/unit/pm/download-and-prepare.test.ts b/tests/unit/pm/download-and-prepare.test.ts new file mode 100644 index 00000000..3f27b5fd --- /dev/null +++ b/tests/unit/pm/download-and-prepare.test.ts @@ -0,0 +1,162 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +// --------------------------------------------------------------------------- +// Hoisted mocks — the helper dispatches to per-provider download clients +// that we mock at the module level. +// --------------------------------------------------------------------------- + +const { + mockJiraDownloadAttachment, + mockLinearDownloadAttachment, + mockTrelloDownloadAttachment, + mockGetPMProviderOrNull, +} = vi.hoisted(() => ({ + mockJiraDownloadAttachment: vi.fn(), + mockLinearDownloadAttachment: vi.fn(), + mockTrelloDownloadAttachment: vi.fn(), + mockGetPMProviderOrNull: vi.fn(), +})); + +vi.mock('../../../src/jira/client.js', () => ({ + jiraClient: { downloadAttachment: mockJiraDownloadAttachment }, +})); +vi.mock('../../../src/linear/client.js', () => ({ + linearClient: { downloadAttachment: mockLinearDownloadAttachment }, +})); +vi.mock('../../../src/trello/client.js', () => ({ + trelloClient: { downloadAttachment: mockTrelloDownloadAttachment }, +})); +vi.mock('../../../src/pm/index.js', () => ({ + getPMProviderOrNull: mockGetPMProviderOrNull, +})); +vi.mock('../../../src/utils/logging.js', () => ({ + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() }, +})); + +import { downloadAndPrepareImages } from '../../../src/pm/download-and-prepare.js'; +import type { MediaReference } from '../../../src/pm/types.js'; + +describe('downloadAndPrepareImages', () => { + const noopLogWriter = vi.fn(); + + const ref = ( + url: string, + mimeType = 'image/png', + altText?: string, + source: 'description' | 'comment' | 'attachment' = 'description', + ): MediaReference => ({ url, mimeType, altText, source }); + + beforeEach(() => { + mockJiraDownloadAttachment.mockReset(); + mockLinearDownloadAttachment.mockReset(); + mockTrelloDownloadAttachment.mockReset(); + mockGetPMProviderOrNull.mockReset(); + noopLogWriter.mockReset(); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('downloads each ref and returns success array + failure array', async () => { + mockGetPMProviderOrNull.mockReturnValue({ type: 'linear' }); + mockLinearDownloadAttachment + .mockResolvedValueOnce({ buffer: Buffer.from('one'), mimeType: 'image/png' }) + .mockResolvedValueOnce({ buffer: Buffer.from('two'), mimeType: 'image/jpeg' }) + .mockResolvedValueOnce(null); // failure + + const result = await downloadAndPrepareImages( + 'MNG-357', + [ + ref('https://uploads.linear.app/a'), + ref('https://uploads.linear.app/b'), + ref('https://uploads.linear.app/c'), + ], + noopLogWriter, + ); + + expect(result.images).toHaveLength(2); + expect(result.failures).toHaveLength(1); + }); + + it('preserves base64 + altText + RESOLVED mimeType (not the input wildcard)', async () => { + mockGetPMProviderOrNull.mockReturnValue({ type: 'linear' }); + mockLinearDownloadAttachment.mockResolvedValueOnce({ + buffer: Buffer.from('hello'), + mimeType: 'image/png', + }); + + const result = await downloadAndPrepareImages( + 'MNG-1', + [ref('https://uploads.linear.app/abc', 'image/*', 'Screenshot.png')], + noopLogWriter, + ); + + expect(result.images).toHaveLength(1); + expect(result.images[0]).toEqual({ + base64Data: Buffer.from('hello').toString('base64'), + mimeType: 'image/png', // resolved, NOT the wildcard input + altText: 'Screenshot.png', + }); + }); + + it('caps at MAX_IMAGES_PER_WORK_ITEM (10)', async () => { + mockGetPMProviderOrNull.mockReturnValue({ type: 'linear' }); + mockLinearDownloadAttachment.mockResolvedValue({ + buffer: Buffer.from('x'), + mimeType: 'image/png', + }); + + const refs: MediaReference[] = Array.from({ length: 12 }, (_, i) => + ref(`https://uploads.linear.app/${i}`), + ); + await downloadAndPrepareImages('MNG-357', refs, noopLogWriter); + + expect(mockLinearDownloadAttachment).toHaveBeenCalledTimes(10); + }); + + it('dispatches to the correct per-provider download client', async () => { + mockGetPMProviderOrNull.mockReturnValue({ type: 'linear' }); + mockLinearDownloadAttachment.mockResolvedValue({ + buffer: Buffer.from('x'), + mimeType: 'image/png', + }); + await downloadAndPrepareImages('MNG-1', [ref('https://x.com/a.png')], noopLogWriter); + expect(mockLinearDownloadAttachment).toHaveBeenCalledTimes(1); + expect(mockJiraDownloadAttachment).not.toHaveBeenCalled(); + expect(mockTrelloDownloadAttachment).not.toHaveBeenCalled(); + }); + + it('falls back to trello when provider type is not jira or linear', async () => { + mockGetPMProviderOrNull.mockReturnValue({ type: 'trello' }); + mockTrelloDownloadAttachment.mockResolvedValue({ + buffer: Buffer.from('x'), + mimeType: 'image/png', + }); + await downloadAndPrepareImages('w1', [ref('https://x.com/a.png')], noopLogWriter); + expect(mockTrelloDownloadAttachment).toHaveBeenCalledTimes(1); + }); + + it('captures failure reason for download exceptions', async () => { + mockGetPMProviderOrNull.mockReturnValue({ type: 'linear' }); + mockLinearDownloadAttachment.mockRejectedValue(new Error('network blip')); + + const result = await downloadAndPrepareImages( + 'MNG-1', + [ref('https://uploads.linear.app/a')], + noopLogWriter, + ); + + expect(result.images).toHaveLength(0); + expect(result.failures).toHaveLength(1); + expect(result.failures[0]).toEqual({ + url: 'https://uploads.linear.app/a', + reason: 'network blip', + }); + }); + + it('returns empty arrays when given no refs', async () => { + const result = await downloadAndPrepareImages('w1', [], noopLogWriter); + expect(result).toEqual({ images: [], failures: [] }); + }); +}); diff --git a/tests/unit/pm/linear/extraction-coverage.test.ts b/tests/unit/pm/linear/extraction-coverage.test.ts new file mode 100644 index 00000000..a966ea0e --- /dev/null +++ b/tests/unit/pm/linear/extraction-coverage.test.ts @@ -0,0 +1,157 @@ +/** + * Regression net for spec 016/3 AC#7. + * + * Loads the captured/reconstructed Linear Issue GraphQL fixture and asserts + * our extraction picks up every inline image in it. Fails LOUDLY if Linear + * ever changes its payload shape in a way that loses inline images. + * + * Also pins the rule that `Issue.attachments` records (link previews from + * Slack/GitHub/Sentry) are NOT mistaken for inline images. + */ + +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { extractMarkdownImages } from '../../../../src/pm/media.js'; + +interface FixtureIssue { + id: string; + identifier: string; + description: string; + attachments: { + nodes: Array<{ id: string; title: string; url: string }>; + }; + comments: { + nodes: Array<{ id: string; body: string }>; + }; +} + +interface Fixture { + issue: FixtureIssue; +} + +function loadFixture(): FixtureIssue { + const fixturePath = join( + __dirname, + '..', + '..', + '..', + 'fixtures', + 'linear-issue-with-screenshot.json', + ); + const raw = readFileSync(fixturePath, 'utf-8'); + const fixture = JSON.parse(raw) as Fixture; + return fixture.issue; +} + +// The exact set of URLs that the fixture's description embeds via markdown +// image syntax. This is the regression-truth that Plan 1's extraction must +// always be able to recover from the fixture description string. +const EXPECTED_DESCRIPTION_IMAGE_URLS = [ + 'https://uploads.linear.app/abc-123-def-456-extension-less-uuid', + 'https://uploads.linear.app/xyz-789-with-alt-text/Mockup.png', + 'https://example.com/logo.svg', +]; + +const EXPECTED_COMMENT_IMAGE_URLS = ['https://uploads.linear.app/comment-screenshot-uuid']; + +describe('Linear extraction-coverage regression', () => { + it('description: extracts every inline image from the fixture description', () => { + const issue = loadFixture(); + const refs = extractMarkdownImages(issue.description); + const urls = refs.map((r) => r.url); + + // Every expected URL must be present. + for (const expectedUrl of EXPECTED_DESCRIPTION_IMAGE_URLS) { + expect( + urls, + `Linear description image MISSED: ${expectedUrl} — Linear payload may have changed shape; update fixture or extraction.`, + ).toContain(expectedUrl); + } + expect(refs).toHaveLength(EXPECTED_DESCRIPTION_IMAGE_URLS.length); + }); + + it('description: assigns image/* sentinel to extension-less Linear URLs', () => { + const issue = loadFixture(); + const refs = extractMarkdownImages(issue.description); + const linearExtensionless = refs.find( + (r) => r.url === 'https://uploads.linear.app/abc-123-def-456-extension-less-uuid', + ); + expect(linearExtensionless?.mimeType).toBe('image/*'); + }); + + it('description: assigns concrete MIME for extensioned Linear URL', () => { + const issue = loadFixture(); + const refs = extractMarkdownImages(issue.description); + const mockup = refs.find( + (r) => r.url === 'https://uploads.linear.app/xyz-789-with-alt-text/Mockup.png', + ); + expect(mockup?.mimeType).toBe('image/png'); + expect(mockup?.altText).toBe('Annotated mockup'); + }); + + it('description: external SVG URL extracted with image/svg+xml MIME', () => { + const issue = loadFixture(); + const refs = extractMarkdownImages(issue.description); + const svg = refs.find((r) => r.url === 'https://example.com/logo.svg'); + expect(svg?.mimeType).toBe('image/svg+xml'); + }); + + it('description: non-image markdown links are NOT extracted', () => { + const issue = loadFixture(); + const refs = extractMarkdownImages(issue.description); + // "[See related ticket](...)" is a regular markdown link, not an image. + expect(refs.find((r) => r.url.includes('MNG-100'))).toBeUndefined(); + }); + + it('comments: extracts inline images from each comment body', () => { + const issue = loadFixture(); + const allCommentRefs: string[] = []; + for (const comment of issue.comments.nodes) { + const refs = extractMarkdownImages(comment.body, 'comment'); + allCommentRefs.push(...refs.map((r) => r.url)); + } + + for (const expectedUrl of EXPECTED_COMMENT_IMAGE_URLS) { + expect(allCommentRefs, `Linear comment image MISSED: ${expectedUrl}`).toContain(expectedUrl); + } + expect(allCommentRefs).toHaveLength(EXPECTED_COMMENT_IMAGE_URLS.length); + }); + + it('comments: source field marks them as comment-origin', () => { + const issue = loadFixture(); + const refs = extractMarkdownImages(issue.comments.nodes[0].body, 'comment'); + expect(refs[0].source).toBe('comment'); + }); + + it('attachments: formal Attachment records (Slack/GitHub/Sentry link previews) are NOT mistaken for inline images', () => { + const issue = loadFixture(); + // The Linear adapter's getAttachments returns these. They have URLs but + // they're link previews, not inline images. Our inline-image extraction + // only reads the description and comment bodies — never the attachments + // connection. This test pins that contract by asserting that none of + // the attachment URLs appear in the description-extracted set. + const descRefs = extractMarkdownImages(issue.description); + const descUrls = new Set(descRefs.map((r) => r.url)); + for (const att of issue.attachments.nodes) { + expect( + descUrls.has(att.url), + `Linear attachment leaked into description extraction: ${att.url}`, + ).toBe(false); + } + }); + + it('regression net: meta-test confirms the test mechanism works (assertion fires when fixture is wrong)', () => { + // Sanity: prove that .toContain() actually fails when an expected URL + // is missing. If the fixture were stripped of all images, this test's + // guarantee (the spec AC#7 "fails loudly" promise) would still hold — + // the meta-check confirms the negative case. + const refs = extractMarkdownImages('No images here, just text.'); + const urls = refs.map((r) => r.url); + expect(() => { + for (const expectedUrl of EXPECTED_DESCRIPTION_IMAGE_URLS) { + expect(urls).toContain(expectedUrl); + } + }).toThrow(); + }); +}); diff --git a/tests/unit/pm/media.test.ts b/tests/unit/pm/media.test.ts index cf095a73..a5751238 100644 --- a/tests/unit/pm/media.test.ts +++ b/tests/unit/pm/media.test.ts @@ -72,6 +72,16 @@ describe('isImageMimeType', () => { it('trims whitespace before checking', () => { expect(isImageMimeType(' image/png ')).toBe(true); }); + + // Spec 016/1: image/* wildcard sentinel for extension-less PM URLs whose + // MIME is resolved at download-time via Content-Type header. + it("accepts the 'image/*' wildcard sentinel", () => { + expect(isImageMimeType('image/*')).toBe(true); + }); + + it("preserves strict acceptance: 'application/*' is NOT an image wildcard", () => { + expect(isImageMimeType('application/*')).toBe(false); + }); }); // --------------------------------------------------------------------------- @@ -119,6 +129,36 @@ describe('filterImageMedia', () => { // --------------------------------------------------------------------------- describe('extractMarkdownImages', () => { + // Spec 016/1: extension-less Linear pasted-image URLs must survive the + // pre-download MIME filter via the image/* wildcard sentinel. + describe('spec 016/1 — extension-less PM URLs', () => { + it("returns mimeType 'image/*' for extension-less Linear uploads.linear.app URL", () => { + const refs = extractMarkdownImages('![](https://uploads.linear.app/abc-123-def-456)'); + expect(refs).toHaveLength(1); + expect(refs[0].mimeType).toBe('image/*'); + }); + + it('returns concrete mimeType for extensioned Linear URL (regression-safe)', () => { + const refs = extractMarkdownImages( + '![Screenshot](https://uploads.linear.app/abc/Screenshot.png)', + ); + expect(refs).toHaveLength(1); + expect(refs[0].mimeType).toBe('image/png'); + }); + + it("returns 'application/octet-stream' for extension-less non-PM URL (no over-broad wildcard)", () => { + const refs = extractMarkdownImages('![](https://example.com/random-file)'); + expect(refs).toHaveLength(1); + expect(refs[0].mimeType).toBe('application/octet-stream'); + }); + + it("Trello PNG URL still returns 'image/png' (regression)", () => { + const refs = extractMarkdownImages('![](https://trello.com/foo.png)'); + expect(refs).toHaveLength(1); + expect(refs[0].mimeType).toBe('image/png'); + }); + }); + // Basic happy path it('extracts a single image', () => { const refs = extractMarkdownImages('Hello ![logo](https://example.com/logo.png)'); From a86eb5352d45fc42f676244b0d4a30daf8c1d9af Mon Sep 17 00:00:00 2001 From: Zbigniew Sobiecki Date: Mon, 27 Apr 2026 00:37:07 +0200 Subject: [PATCH 8/8] fix(linear): drop comment-mention planning-state gate that prod payload never satisfies (#1210) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #1201 added a `currentStateId !== planningStateId` gate to the Linear comment @mention trigger that read `data.issue.stateId` from the webhook payload. Linear's Comment webhook does not ship `stateId` on the nested issue (verified across four prod payloads on 2026-04-26 — 8cd0108a, b93e4925, 6548cd14, 3d95b210). The gate therefore always evaluated to true and silently dropped every legitimate bot @mention, including the one on MNG-346 that motivated this fix. The agent (respond-to-planning-comment) is now responsible for any planning-only behavior; the trigger no longer gates on state and avoids an extra Linear GraphQL round-trip per comment. Also corrects `LinearWebhookCommentTriggerData.issue` to match what Linear actually ships (six keys, no `stateId`, optional `team`) — the old type lied and PR #1201 trusted it. Tests pin a real prod-shape Comment payload as a regression. JIRA's equivalent gate is unaffected (its `comment_created` payload does ship `issue.fields.status.name`). Co-authored-by: Claude Opus 4.7 (1M context) --- src/triggers/linear/comment-mention.ts | 30 ++------- src/triggers/linear/types.ts | 5 +- .../triggers/linear-comment-mention.test.ts | 64 ++++++++++++------- 3 files changed, 52 insertions(+), 47 deletions(-) diff --git a/src/triggers/linear/comment-mention.ts b/src/triggers/linear/comment-mention.ts index 49f498c7..d7fe9864 100644 --- a/src/triggers/linear/comment-mention.ts +++ b/src/triggers/linear/comment-mention.ts @@ -1,8 +1,11 @@ /** * Linear comment @mention trigger. * - * Fires when someone @mentions the CASCADE bot user in a Linear issue comment - * on an issue in the PLANNING state. Runs the respond-to-planning-comment agent. + * Fires when someone @mentions the CASCADE bot user in a Linear issue comment. + * Runs the respond-to-planning-comment agent, which is itself responsible for + * any planning-only behavior — Linear's Comment webhook payload does not ship + * the issue's current state, so the trigger cannot gate on it without an extra + * GraphQL round-trip. * * Linear webhook structure for comment creation: * action: 'create', type: 'Comment' @@ -12,7 +15,6 @@ * data.issue.identifier: the issue identifier (e.g. TEAM-123) */ -import { getLinearConfig } from '../../pm/config.js'; import { resolveLinearBotIdentity } from '../../router/bot-identity-resolvers.js'; import type { TriggerContext, TriggerHandler, TriggerResult } from '../../types/index.js'; import { logger } from '../../utils/logging.js'; @@ -131,29 +133,9 @@ export class LinearCommentMentionTrigger implements TriggerHandler { return null; } - // Gate on PLANNING state — only respond to comments on PLANNING issues - const linearConfig = getLinearConfig(ctx.project); - const planningStateId = linearConfig?.statuses.planning; - if (!planningStateId) { - logger.debug( - 'Planning state not configured for Linear project, skipping comment mention trigger', - { projectId: ctx.project.id }, - ); - return null; - } - const currentStateId = issue?.stateId; - if (currentStateId !== planningStateId) { - logger.debug('Linear issue not in planning state, skipping comment mention trigger', { - issueIdentifier, - currentStateId, - planningStateId, - }); - return null; - } - const issueUrl = issue?.url; - logger.info('Linear comment @mention detected on PLANNING issue, triggering agent', { + logger.info('Linear comment @mention detected, triggering agent', { issueIdentifier, commentAuthorId, botUserId, diff --git a/src/triggers/linear/types.ts b/src/triggers/linear/types.ts index 718ed812..52c620d2 100644 --- a/src/triggers/linear/types.ts +++ b/src/triggers/linear/types.ts @@ -29,13 +29,16 @@ export interface LinearWebhookCommentTriggerData { userId: string; createdAt: string; updatedAt: string; + // Linear's Comment webhook ships only these six fields on the nested + // issue — no `stateId`, no `state`, no labels. Verified against prod + // payloads on 2026-04-26. issue?: { id: string; identifier: string; title: string; teamId: string; url: string; - stateId: string; + team?: { id: string; key: string; name: string }; }; } diff --git a/tests/unit/triggers/linear-comment-mention.test.ts b/tests/unit/triggers/linear-comment-mention.test.ts index 40a5efdf..4a2a9045 100644 --- a/tests/unit/triggers/linear-comment-mention.test.ts +++ b/tests/unit/triggers/linear-comment-mention.test.ts @@ -56,6 +56,9 @@ function buildCtx( noIssue?: boolean; } = {}, ): TriggerContext { + // Linear's Comment webhook payload does NOT include `stateId` on the + // nested issue object — see prod payloads from 2026-04-26. Mirror that + // shape here so the test fixture matches reality. const issue = overrides.noIssue ? undefined : { @@ -64,7 +67,6 @@ function buildCtx( title: 'Test issue', teamId: 'team-abc', url: overrides.issueUrl ?? 'https://linear.app/org/issue/TEAM-99', - stateId: 'state-todo', }; return { @@ -242,7 +244,6 @@ describe('LinearCommentMentionTrigger', () => { id: 'fallback-issue-id', // no identifier url: 'https://linear.app/org/issue/fallback', - stateId: 'state-todo', // must be in planning state }; const result = await trigger.handle(ctx); expect(result?.workItemId).toBe('fallback-issue-id'); @@ -265,29 +266,48 @@ describe('LinearCommentMentionTrigger', () => { expect(result?.agentInput.linearIssueId).toBe('issue-uuid-99'); }); - it('returns null when issue is not in PLANNING state', async () => { - const ctx = buildCtx(); - const data = ctx.payload as Record; - (data.data as Record).issue = { - id: ISSUE_ID, - identifier: ISSUE_IDENTIFIER, - title: 'Test issue', - teamId: 'team-abc', - url: 'https://linear.app/org/issue/TEAM-99', - stateId: 'state-in-progress', // not planning + // Regression: PR #1201 added a `currentStateId !== planningStateId` + // gate that read `data.issue.stateId`. Linear's Comment webhook + // payload does not ship `stateId` on the nested issue (verified + // across prod payloads 8cd0108a / b93e4925 / 6548cd14 / 3d95b210 + // on 2026-04-26), so the gate always failed and the trigger + // silently dropped every legitimate @mention. The agent itself + // is responsible for any planning-only behavior. + it('fires on a real prod-shape Comment payload that omits issue.stateId', async () => { + const ctx: TriggerContext = { + project: mockProject, + source: 'linear', + payload: { + action: 'create', + type: 'Comment', + organizationId: 'org-mongrel', + webhookTimestamp: 1777242085749, + data: { + id: '733cf70a-e145-4fa1-ad9b-c03bda0c73fb', + body: '@cascade let\'s keep naming closer to "sandbox" so we stay container-provider agnostic', + issueId: ISSUE_ID, + userId: OTHER_USER_ID, + createdAt: '2026-04-26T22:21:25.354Z', + updatedAt: '2026-04-26T22:21:25.338Z', + // Six keys, no stateId — matches prod + issue: { + id: ISSUE_ID, + identifier: ISSUE_IDENTIFIER, + title: 'Test issue', + teamId: 'team-abc', + url: 'https://linear.app/org/issue/TEAM-99', + team: { id: 'team-abc', key: 'TEAM', name: 'team' }, + } as unknown as Record, + }, + url: 'https://linear.app', + }, }; - const result = await trigger.handle(ctx); - expect(result).toBeNull(); - }); - it('returns null when planning state is not configured in project', async () => { - const ctx = buildCtx(); - (ctx as Record).project = { - ...mockProject, - linear: { teamId: 'team-abc', statuses: {} }, // no planning state - }; const result = await trigger.handle(ctx); - expect(result).toBeNull(); + + expect(result).not.toBeNull(); + expect(result?.agentType).toBe('respond-to-planning-comment'); + expect(result?.workItemId).toBe(ISSUE_IDENTIFIER); }); it('includes triggerCommentBody (canonical) in agentInput', async () => {