Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/agents/definitions/backlog-manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@ triggers:
defaultEnabled: false
contextPipeline: [pipelineSnapshot]

# Required context — runs for EVERY invocation regardless of trigger source.
# Manual `cascade runs trigger --agent-type backlog-manager` would otherwise
# skip the per-trigger contextPipeline (triggerEvent is undefined), and the
# agent would freelance with no snapshot. Empty pipelineSnapshot result
# aborts the run with a structured error + Sentry capture under tag
# `context_pipeline_required_step_failed` (closes the 2026-04-29 incident
# where MNG-422 was pulled from SPLITTING into TODO).
requiredContext: [pipelineSnapshot]

strategies: {}

prompts:
Expand Down
38 changes: 38 additions & 0 deletions src/agents/definitions/profiles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* Capabilities determine tools, gadgets, and integration requirements.
*/

import { captureException } from '../../sentry.js';
import type { AgentInput } from '../../types/index.js';
import type { Capability, IntegrationChecker } from '../capabilities/index.js';
import {
Expand Down Expand Up @@ -174,6 +175,10 @@ function buildProfileFromDefinition(def: AgentDefinition, agentType: string): Ag
// Get triggers for dynamic context pipeline resolution
const triggers = def.triggers ?? [];

// Agent-level required context steps — run regardless of trigger source,
// must produce >0 injections, and abort the run otherwise.
const requiredContext = def.requiredContext ?? [];

// Get task prompt template from prompts (required by schema)
const taskPromptTemplate = def.prompts.taskPrompt;

Expand Down Expand Up @@ -204,11 +209,44 @@ function buildProfileFromDefinition(def: AgentDefinition, agentType: string): Ag
const contextPipeline = resolveContextPipeline(triggers, params.input.triggerEvent);

const injections: ContextInjection[] = [];
const ranSteps = new Set<ContextStepName>();

// Phase 1: required steps. Always run, regardless of trigger source.
// Empty result OR thrown error aborts the run — the safety net for
// manual retriggers and missing PM provider scope.
for (const step of requiredContext) {
const stepFn = resolveRegistry(CONTEXT_STEP_REGISTRY, step, 'requiredContext step');
let result: ContextInjection[];
try {
result = await stepFn(params);
} catch (err) {
captureException(err, {
tags: { source: 'context_pipeline_required_step_failed', step, agent: agentType },
});
throw err;
}
if (result.length === 0) {
const err = new Error(
`Required context step "${step}" produced no injections. Agent cannot run safely without it.`,
);
captureException(err, {
tags: { source: 'context_pipeline_required_step_failed', step, agent: agentType },
});
throw err;
}
injections.push(...result);
ranSteps.add(step);
}

// Phase 2: trigger pipeline. Skip steps already executed in phase 1.
for (const step of contextPipeline) {
if (ranSteps.has(step)) continue;
const stepFn = resolveRegistry(CONTEXT_STEP_REGISTRY, step, 'contextPipeline step');
const result = await stepFn(params);
injections.push(...result);
ranSteps.add(step);
}

return injections;
},
buildTaskPrompt: (input) =>
Expand Down
18 changes: 18 additions & 0 deletions src/agents/definitions/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,24 @@ export const AgentDefinitionSchema = z.object({
hint: z.string(),
/** Custom prompts (taskPrompt required, systemPrompt optional) */
prompts: PromptsSchema,
/**
* Context steps that MUST run AND succeed for every invocation of this
* agent — regardless of trigger source (manual, webhook, internal). A
* step is considered to have "succeeded" when it returns one or more
* context injections; an empty result or a thrown error aborts the run
* with a structured error and a Sentry capture under tag
* `context_pipeline_required_step_failed`.
*
* Required steps run BEFORE the per-trigger `contextPipeline` and are
* deduped (a step listed in both runs once). When `triggerEvent` is
* undefined (manual `cascade runs trigger`), the per-trigger pipeline
* is empty by definition — required steps are the only safety net.
*
* Use case: backlog-manager declares `requiredContext: [pipelineSnapshot]`
* so manual retriggers cannot run with a missing pipeline snapshot and
* freelance into selecting cards from non-BACKLOG lists.
*/
requiredContext: z.array(ContextStepNameSchema).default([]),
});

/**
Expand Down
1 change: 1 addition & 0 deletions src/agents/prompts/templates/backlog-manager.eta
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ Manual intervention may be needed to unblock the backlog.

## Rules

- **HARD CONSTRAINT — NEVER MOVE A <%= it.workItemNounCap || 'CARD' %> NOT IN BACKLOG.** The only valid source list is BACKLOG, and the only valid destination is TODO. Never move <%= it.workItemNounPlural || 'cards' %> from SPLITTING, PLANNING, TODO, IN_PROGRESS, IN_REVIEW, DONE, or MERGED — those <%= it.workItemNounPlural || 'cards' %> are already mid-pipeline and another agent or human owns their state. If you don't see a <%= it.workItemNoun || 'card' %> in the BACKLOG section of the Pipeline Snapshot, you CANNOT select it. NEVER call `ListWorkItems` against non-BACKLOG containers to discover candidates — the snapshot's BACKLOG section is the only valid source. If the snapshot is missing or its BACKLOG section is empty, ABORT — do not improvise by listing other lists.
- ALWAYS check pipeline status FIRST before scanning the backlog
- NEVER move <%= it.workItemNounPlural || 'cards' %> if the active pipeline is at capacity (<%= it.maxInFlightItems ?? 1 %> item(s))
- EXIT SILENTLY if pipeline is at capacity - do not post comments
Expand Down
178 changes: 177 additions & 1 deletion tests/unit/agents/definitions/profiles.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,13 @@ vi.mock('../../../../src/agents/prompts/index.js', () => ({
validateTemplate: vi.fn().mockReturnValue({ valid: true }),
}));

const mockPipelineSnapshotStep = vi.fn();
const mockWorkItemStep = vi.fn();
vi.mock('../../../../src/agents/definitions/strategies.js', () => ({
CONTEXT_STEP_REGISTRY: {},
CONTEXT_STEP_REGISTRY: {
pipelineSnapshot: (...args: unknown[]) => mockPipelineSnapshotStep(...args),
workItem: (...args: unknown[]) => mockWorkItemStep(...args),
},
}));

import {
Expand Down Expand Up @@ -223,6 +228,177 @@ describe('getAgentProfile', () => {
expect(profile.finishHooks.requiresReview).toBe(false);
expect(profile.finishHooks.requiresPushedChanges).toBe(true);
});

// ============================================================================
// requiredContext (Fix A + Fix C — backlog-manager scope safety)
//
// `requiredContext` is an agent-level array of context steps that:
// 1. ALWAYS run, regardless of whether the trigger has its own contextPipeline
// or whether triggerEvent is undefined (manual trigger).
// 2. MUST produce >0 injections, otherwise the agent run aborts with a
// structured error.
//
// Closes the prod incident on 2026-04-29 where backlog-manager was manually
// triggered with `triggerEvent: undefined`, ran with no pipelineSnapshot
// pre-load, freelanced by listing all PM containers, and moved cards from
// SPLITTING to TODO.
// ============================================================================

describe('requiredContext (always-run, fail-closed)', () => {
beforeEach(() => {
mockPipelineSnapshotStep.mockReset();
mockWorkItemStep.mockReset();
});

it('runs requiredContext steps even when triggerEvent is undefined (manual trigger)', async () => {
// Regression pin against the 2026-04-29 incident: manual `cascade runs
// trigger --agent-type backlog-manager` ran with NO pipelineSnapshot
// because resolveContextPipeline returned [] for undefined triggerEvent.
mockPipelineSnapshotStep.mockResolvedValue([
{ toolName: 'PipelineSnapshot', params: {}, result: 'ok', description: 'snapshot' },
]);
mockResolveAgentDefinition.mockResolvedValue(
makeDefinition({ requiredContext: ['pipelineSnapshot'] }),
);

const profile = await getAgentProfile('backlog-manager');
const result = await profile.fetchContext({
input: {}, // no triggerEvent — manual trigger
} as Parameters<typeof profile.fetchContext>[0]);

expect(mockPipelineSnapshotStep).toHaveBeenCalledOnce();
expect(result).toHaveLength(1);
expect(result[0].toolName).toBe('PipelineSnapshot');
});

it('aborts with a structured error when a requiredContext step returns 0 injections', async () => {
// Fix C: fail-closed. Today fetchPipelineSnapshotStep returns [] when
// no PM provider is in scope — agent runs with no snapshot and
// freelances. Required-step empty result must abort the run.
mockPipelineSnapshotStep.mockResolvedValue([]);
mockResolveAgentDefinition.mockResolvedValue(
makeDefinition({ requiredContext: ['pipelineSnapshot'] }),
);

const profile = await getAgentProfile('backlog-manager');

await expect(
profile.fetchContext({
input: {},
} as Parameters<typeof profile.fetchContext>[0]),
).rejects.toThrow(/required context step.*pipelineSnapshot/i);
});

it('aborts with a structured error when a requiredContext step throws', async () => {
mockPipelineSnapshotStep.mockRejectedValue(new Error('PM provider unavailable'));
mockResolveAgentDefinition.mockResolvedValue(
makeDefinition({ requiredContext: ['pipelineSnapshot'] }),
);

const profile = await getAgentProfile('backlog-manager');

await expect(
profile.fetchContext({
input: {},
} as Parameters<typeof profile.fetchContext>[0]),
).rejects.toThrow(/PM provider unavailable|required context step/i);
});

it('does not double-run a step that is in BOTH requiredContext and the trigger pipeline', async () => {
// Avoid duplicate snapshot fetch when a webhook trigger (e.g.
// scm:pr-merged) lists pipelineSnapshot in its contextPipeline AND
// the agent declares it as requiredContext.
mockPipelineSnapshotStep.mockResolvedValue([
{ toolName: 'PipelineSnapshot', params: {}, result: 'ok', description: 'snapshot' },
]);
mockResolveAgentDefinition.mockResolvedValue(
makeDefinition({
requiredContext: ['pipelineSnapshot'],
triggers: [
{
event: 'scm:pr-merged',
label: 'PR Merged',
defaultEnabled: false,
parameters: [],
contextPipeline: ['pipelineSnapshot'],
},
],
}),
);

const profile = await getAgentProfile('backlog-manager');
await profile.fetchContext({
input: { triggerEvent: 'scm:pr-merged' },
} as Parameters<typeof profile.fetchContext>[0]);

expect(mockPipelineSnapshotStep).toHaveBeenCalledOnce();
});

it('runs requiredContext FIRST, then non-required trigger pipeline steps', async () => {
const order: string[] = [];
mockPipelineSnapshotStep.mockImplementation(async () => {
order.push('pipelineSnapshot');
return [
{
toolName: 'PipelineSnapshot',
params: {},
result: 'ok',
description: 'snapshot',
},
];
});
mockWorkItemStep.mockImplementation(async () => {
order.push('workItem');
return [];
});
mockResolveAgentDefinition.mockResolvedValue(
makeDefinition({
requiredContext: ['pipelineSnapshot'],
triggers: [
{
event: 'pm:status-changed',
label: 'Status Changed',
defaultEnabled: true,
parameters: [],
contextPipeline: ['workItem'],
},
],
}),
);

const profile = await getAgentProfile('backlog-manager');
await profile.fetchContext({
input: { triggerEvent: 'pm:status-changed' },
} as Parameters<typeof profile.fetchContext>[0]);

expect(order).toEqual(['pipelineSnapshot', 'workItem']);
});

it('preserves existing behavior: agents without requiredContext are unaffected', async () => {
mockResolveAgentDefinition.mockResolvedValue(
makeDefinition({
// No requiredContext field
triggers: [
{
event: 'pm:status-changed',
label: 'Status Changed',
defaultEnabled: true,
parameters: [],
contextPipeline: [],
},
],
}),
);

const profile = await getAgentProfile('implementation');
const result = await profile.fetchContext({
input: { triggerEvent: 'pm:status-changed' },
} as Parameters<typeof profile.fetchContext>[0]);

expect(result).toEqual([]);
expect(mockPipelineSnapshotStep).not.toHaveBeenCalled();
});
});
});

describe('needsGitStateStopHooks', () => {
Expand Down
1 change: 1 addition & 0 deletions tests/unit/api/routers/agentDefinitions.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ function createMockDefinition(overrides?: Partial<AgentDefinition>): AgentDefini
taskPrompt:
'Analyze and process the work item with ID: <%= it.cardId %>. The work item data has been pre-loaded.',
},
requiredContext: [],
...overrides,
} as AgentDefinition;
}
Expand Down
1 change: 1 addition & 0 deletions web/src/components/settings/agent-definition-shared.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ export const EMPTY_DEFINITION: AgentDefinition = {
taskPrompt:
'Analyze and process the work item with ID: <%= it.workItemId %>. The work item data has been pre-loaded.',
},
requiredContext: [],
};

// ─────────────────────────────────────────────────────────────────────────────
Expand Down
Loading