diff --git a/src/agents/definitions/backlog-manager.yaml b/src/agents/definitions/backlog-manager.yaml index e8646c5f..c502c6c3 100644 --- a/src/agents/definitions/backlog-manager.yaml +++ b/src/agents/definitions/backlog-manager.yaml @@ -54,6 +54,15 @@ triggers: defaultEnabled: false contextPipeline: [pipelineSnapshot] +# Required context — runs for EVERY invocation regardless of trigger source. +# Manual `cascade runs trigger --agent-type backlog-manager` would otherwise +# skip the per-trigger contextPipeline (triggerEvent is undefined), and the +# agent would freelance with no snapshot. Empty pipelineSnapshot result +# aborts the run with a structured error + Sentry capture under tag +# `context_pipeline_required_step_failed` (closes the 2026-04-29 incident +# where MNG-422 was pulled from SPLITTING into TODO). +requiredContext: [pipelineSnapshot] + strategies: {} prompts: diff --git a/src/agents/definitions/profiles.ts b/src/agents/definitions/profiles.ts index e08355ff..3a0b94ab 100644 --- a/src/agents/definitions/profiles.ts +++ b/src/agents/definitions/profiles.ts @@ -5,6 +5,7 @@ * Capabilities determine tools, gadgets, and integration requirements. */ +import { captureException } from '../../sentry.js'; import type { AgentInput } from '../../types/index.js'; import type { Capability, IntegrationChecker } from '../capabilities/index.js'; import { @@ -174,6 +175,10 @@ function buildProfileFromDefinition(def: AgentDefinition, agentType: string): Ag // Get triggers for dynamic context pipeline resolution const triggers = def.triggers ?? []; + // Agent-level required context steps — run regardless of trigger source, + // must produce >0 injections, and abort the run otherwise. + const requiredContext = def.requiredContext ?? []; + // Get task prompt template from prompts (required by schema) const taskPromptTemplate = def.prompts.taskPrompt; @@ -204,11 +209,44 @@ function buildProfileFromDefinition(def: AgentDefinition, agentType: string): Ag const contextPipeline = resolveContextPipeline(triggers, params.input.triggerEvent); const injections: ContextInjection[] = []; + const ranSteps = new Set(); + + // Phase 1: required steps. Always run, regardless of trigger source. + // Empty result OR thrown error aborts the run — the safety net for + // manual retriggers and missing PM provider scope. + for (const step of requiredContext) { + const stepFn = resolveRegistry(CONTEXT_STEP_REGISTRY, step, 'requiredContext step'); + let result: ContextInjection[]; + try { + result = await stepFn(params); + } catch (err) { + captureException(err, { + tags: { source: 'context_pipeline_required_step_failed', step, agent: agentType }, + }); + throw err; + } + if (result.length === 0) { + const err = new Error( + `Required context step "${step}" produced no injections. Agent cannot run safely without it.`, + ); + captureException(err, { + tags: { source: 'context_pipeline_required_step_failed', step, agent: agentType }, + }); + throw err; + } + injections.push(...result); + ranSteps.add(step); + } + + // Phase 2: trigger pipeline. Skip steps already executed in phase 1. for (const step of contextPipeline) { + if (ranSteps.has(step)) continue; const stepFn = resolveRegistry(CONTEXT_STEP_REGISTRY, step, 'contextPipeline step'); const result = await stepFn(params); injections.push(...result); + ranSteps.add(step); } + return injections; }, buildTaskPrompt: (input) => diff --git a/src/agents/definitions/schema.ts b/src/agents/definitions/schema.ts index 8124bc20..b4a417f7 100644 --- a/src/agents/definitions/schema.ts +++ b/src/agents/definitions/schema.ts @@ -309,6 +309,24 @@ export const AgentDefinitionSchema = z.object({ hint: z.string(), /** Custom prompts (taskPrompt required, systemPrompt optional) */ prompts: PromptsSchema, + /** + * Context steps that MUST run AND succeed for every invocation of this + * agent — regardless of trigger source (manual, webhook, internal). A + * step is considered to have "succeeded" when it returns one or more + * context injections; an empty result or a thrown error aborts the run + * with a structured error and a Sentry capture under tag + * `context_pipeline_required_step_failed`. + * + * Required steps run BEFORE the per-trigger `contextPipeline` and are + * deduped (a step listed in both runs once). When `triggerEvent` is + * undefined (manual `cascade runs trigger`), the per-trigger pipeline + * is empty by definition — required steps are the only safety net. + * + * Use case: backlog-manager declares `requiredContext: [pipelineSnapshot]` + * so manual retriggers cannot run with a missing pipeline snapshot and + * freelance into selecting cards from non-BACKLOG lists. + */ + requiredContext: z.array(ContextStepNameSchema).default([]), }); /** diff --git a/src/agents/prompts/templates/backlog-manager.eta b/src/agents/prompts/templates/backlog-manager.eta index 70ef4baa..344a563d 100644 --- a/src/agents/prompts/templates/backlog-manager.eta +++ b/src/agents/prompts/templates/backlog-manager.eta @@ -96,6 +96,7 @@ Manual intervention may be needed to unblock the backlog. ## Rules +- **HARD CONSTRAINT — NEVER MOVE A <%= it.workItemNounCap || 'CARD' %> NOT IN BACKLOG.** The only valid source list is BACKLOG, and the only valid destination is TODO. Never move <%= it.workItemNounPlural || 'cards' %> from SPLITTING, PLANNING, TODO, IN_PROGRESS, IN_REVIEW, DONE, or MERGED — those <%= it.workItemNounPlural || 'cards' %> are already mid-pipeline and another agent or human owns their state. If you don't see a <%= it.workItemNoun || 'card' %> in the BACKLOG section of the Pipeline Snapshot, you CANNOT select it. NEVER call `ListWorkItems` against non-BACKLOG containers to discover candidates — the snapshot's BACKLOG section is the only valid source. If the snapshot is missing or its BACKLOG section is empty, ABORT — do not improvise by listing other lists. - ALWAYS check pipeline status FIRST before scanning the backlog - NEVER move <%= it.workItemNounPlural || 'cards' %> if the active pipeline is at capacity (<%= it.maxInFlightItems ?? 1 %> item(s)) - EXIT SILENTLY if pipeline is at capacity - do not post comments diff --git a/tests/unit/agents/definitions/profiles.test.ts b/tests/unit/agents/definitions/profiles.test.ts index 170b867b..6688a45f 100644 --- a/tests/unit/agents/definitions/profiles.test.ts +++ b/tests/unit/agents/definitions/profiles.test.ts @@ -25,8 +25,13 @@ vi.mock('../../../../src/agents/prompts/index.js', () => ({ validateTemplate: vi.fn().mockReturnValue({ valid: true }), })); +const mockPipelineSnapshotStep = vi.fn(); +const mockWorkItemStep = vi.fn(); vi.mock('../../../../src/agents/definitions/strategies.js', () => ({ - CONTEXT_STEP_REGISTRY: {}, + CONTEXT_STEP_REGISTRY: { + pipelineSnapshot: (...args: unknown[]) => mockPipelineSnapshotStep(...args), + workItem: (...args: unknown[]) => mockWorkItemStep(...args), + }, })); import { @@ -223,6 +228,177 @@ describe('getAgentProfile', () => { expect(profile.finishHooks.requiresReview).toBe(false); expect(profile.finishHooks.requiresPushedChanges).toBe(true); }); + + // ============================================================================ + // requiredContext (Fix A + Fix C — backlog-manager scope safety) + // + // `requiredContext` is an agent-level array of context steps that: + // 1. ALWAYS run, regardless of whether the trigger has its own contextPipeline + // or whether triggerEvent is undefined (manual trigger). + // 2. MUST produce >0 injections, otherwise the agent run aborts with a + // structured error. + // + // Closes the prod incident on 2026-04-29 where backlog-manager was manually + // triggered with `triggerEvent: undefined`, ran with no pipelineSnapshot + // pre-load, freelanced by listing all PM containers, and moved cards from + // SPLITTING to TODO. + // ============================================================================ + + describe('requiredContext (always-run, fail-closed)', () => { + beforeEach(() => { + mockPipelineSnapshotStep.mockReset(); + mockWorkItemStep.mockReset(); + }); + + it('runs requiredContext steps even when triggerEvent is undefined (manual trigger)', async () => { + // Regression pin against the 2026-04-29 incident: manual `cascade runs + // trigger --agent-type backlog-manager` ran with NO pipelineSnapshot + // because resolveContextPipeline returned [] for undefined triggerEvent. + mockPipelineSnapshotStep.mockResolvedValue([ + { toolName: 'PipelineSnapshot', params: {}, result: 'ok', description: 'snapshot' }, + ]); + mockResolveAgentDefinition.mockResolvedValue( + makeDefinition({ requiredContext: ['pipelineSnapshot'] }), + ); + + const profile = await getAgentProfile('backlog-manager'); + const result = await profile.fetchContext({ + input: {}, // no triggerEvent — manual trigger + } as Parameters[0]); + + expect(mockPipelineSnapshotStep).toHaveBeenCalledOnce(); + expect(result).toHaveLength(1); + expect(result[0].toolName).toBe('PipelineSnapshot'); + }); + + it('aborts with a structured error when a requiredContext step returns 0 injections', async () => { + // Fix C: fail-closed. Today fetchPipelineSnapshotStep returns [] when + // no PM provider is in scope — agent runs with no snapshot and + // freelances. Required-step empty result must abort the run. + mockPipelineSnapshotStep.mockResolvedValue([]); + mockResolveAgentDefinition.mockResolvedValue( + makeDefinition({ requiredContext: ['pipelineSnapshot'] }), + ); + + const profile = await getAgentProfile('backlog-manager'); + + await expect( + profile.fetchContext({ + input: {}, + } as Parameters[0]), + ).rejects.toThrow(/required context step.*pipelineSnapshot/i); + }); + + it('aborts with a structured error when a requiredContext step throws', async () => { + mockPipelineSnapshotStep.mockRejectedValue(new Error('PM provider unavailable')); + mockResolveAgentDefinition.mockResolvedValue( + makeDefinition({ requiredContext: ['pipelineSnapshot'] }), + ); + + const profile = await getAgentProfile('backlog-manager'); + + await expect( + profile.fetchContext({ + input: {}, + } as Parameters[0]), + ).rejects.toThrow(/PM provider unavailable|required context step/i); + }); + + it('does not double-run a step that is in BOTH requiredContext and the trigger pipeline', async () => { + // Avoid duplicate snapshot fetch when a webhook trigger (e.g. + // scm:pr-merged) lists pipelineSnapshot in its contextPipeline AND + // the agent declares it as requiredContext. + mockPipelineSnapshotStep.mockResolvedValue([ + { toolName: 'PipelineSnapshot', params: {}, result: 'ok', description: 'snapshot' }, + ]); + mockResolveAgentDefinition.mockResolvedValue( + makeDefinition({ + requiredContext: ['pipelineSnapshot'], + triggers: [ + { + event: 'scm:pr-merged', + label: 'PR Merged', + defaultEnabled: false, + parameters: [], + contextPipeline: ['pipelineSnapshot'], + }, + ], + }), + ); + + const profile = await getAgentProfile('backlog-manager'); + await profile.fetchContext({ + input: { triggerEvent: 'scm:pr-merged' }, + } as Parameters[0]); + + expect(mockPipelineSnapshotStep).toHaveBeenCalledOnce(); + }); + + it('runs requiredContext FIRST, then non-required trigger pipeline steps', async () => { + const order: string[] = []; + mockPipelineSnapshotStep.mockImplementation(async () => { + order.push('pipelineSnapshot'); + return [ + { + toolName: 'PipelineSnapshot', + params: {}, + result: 'ok', + description: 'snapshot', + }, + ]; + }); + mockWorkItemStep.mockImplementation(async () => { + order.push('workItem'); + return []; + }); + mockResolveAgentDefinition.mockResolvedValue( + makeDefinition({ + requiredContext: ['pipelineSnapshot'], + triggers: [ + { + event: 'pm:status-changed', + label: 'Status Changed', + defaultEnabled: true, + parameters: [], + contextPipeline: ['workItem'], + }, + ], + }), + ); + + const profile = await getAgentProfile('backlog-manager'); + await profile.fetchContext({ + input: { triggerEvent: 'pm:status-changed' }, + } as Parameters[0]); + + expect(order).toEqual(['pipelineSnapshot', 'workItem']); + }); + + it('preserves existing behavior: agents without requiredContext are unaffected', async () => { + mockResolveAgentDefinition.mockResolvedValue( + makeDefinition({ + // No requiredContext field + triggers: [ + { + event: 'pm:status-changed', + label: 'Status Changed', + defaultEnabled: true, + parameters: [], + contextPipeline: [], + }, + ], + }), + ); + + const profile = await getAgentProfile('implementation'); + const result = await profile.fetchContext({ + input: { triggerEvent: 'pm:status-changed' }, + } as Parameters[0]); + + expect(result).toEqual([]); + expect(mockPipelineSnapshotStep).not.toHaveBeenCalled(); + }); + }); }); describe('needsGitStateStopHooks', () => { diff --git a/tests/unit/api/routers/agentDefinitions.test.ts b/tests/unit/api/routers/agentDefinitions.test.ts index be4aa5eb..fd760b71 100644 --- a/tests/unit/api/routers/agentDefinitions.test.ts +++ b/tests/unit/api/routers/agentDefinitions.test.ts @@ -101,6 +101,7 @@ function createMockDefinition(overrides?: Partial): AgentDefini taskPrompt: 'Analyze and process the work item with ID: <%= it.cardId %>. The work item data has been pre-loaded.', }, + requiredContext: [], ...overrides, } as AgentDefinition; } diff --git a/web/src/components/settings/agent-definition-shared.tsx b/web/src/components/settings/agent-definition-shared.tsx index 63c8ad08..087a63f1 100644 --- a/web/src/components/settings/agent-definition-shared.tsx +++ b/web/src/components/settings/agent-definition-shared.tsx @@ -68,6 +68,7 @@ export const EMPTY_DEFINITION: AgentDefinition = { taskPrompt: 'Analyze and process the work item with ID: <%= it.workItemId %>. The work item data has been pre-loaded.', }, + requiredContext: [], }; // ─────────────────────────────────────────────────────────────────────────────