diff --git a/src/agents/capabilities/index.ts b/src/agents/capabilities/index.ts new file mode 100644 index 00000000..aac4898b --- /dev/null +++ b/src/agents/capabilities/index.ts @@ -0,0 +1,50 @@ +/** + * Capability-centric architecture for agent tool management. + * + * Core principle: Integrations provide capabilities. Capabilities provide tools. + * + * ``` + * Integration Category → Capabilities → Gadgets/Tools + * │ │ │ + * pm → pm:read → ReadWorkItem, ListWorkItems + * pm:write → CreateWorkItem, UpdateWorkItem, PostComment + * pm:checklist → PMUpdateChecklistItem, PMDeleteChecklistItem + * + * scm → scm:read → GetPRDetails, GetPRDiff, GetPRChecks + * scm:comment → PostPRComment, UpdatePRComment + * scm:review → CreatePRReview + * scm:pr → CreatePR + * + * (built-in) → fs:read → ReadFile, ListDirectory, RipGrep, AstGrep + * fs:write → WriteFile, FileSearchAndReplace, FileMultiEdit + * shell:exec → Tmux, Sleep + * session:ctrl → Finish, TodoUpsert/Update/Delete + * ``` + */ + +// Registry +export { + CAPABILITIES, + CAPABILITY_REGISTRY, + type Capability, + type CapabilityDefinition, + getCapabilitiesByIntegration, + getCapabilityIntegration, + isBuiltInCapability, + isValidCapability, +} from './registry.js'; + +// Resolver +export { + buildGadgetsFromCapabilities, + createIntegrationChecker, + deriveIntegrations, + deriveRequiredIntegrations, + filterToolManifests, + generateUnavailableCapabilitiesNote, + getGadgetNamesFromCapabilities, + getSdkToolsFromCapabilities, + getUnavailableOptionalCapabilities, + type IntegrationChecker, + resolveEffectiveCapabilities, +} from './resolver.js'; diff --git a/src/agents/capabilities/registry.ts b/src/agents/capabilities/registry.ts new file mode 100644 index 00000000..1b1623b6 --- /dev/null +++ b/src/agents/capabilities/registry.ts @@ -0,0 +1,260 @@ +/** + * Capability Registry + * + * Defines the mapping from capabilities to their source integrations, + * gadgets, SDK tools, and CLI tools. + * + * Core principle: Integrations provide capabilities. Capabilities provide tools. + * + * Integration Category → Capabilities → Gadgets/Tools + */ + +import type { IntegrationCategory } from '../definitions/schema.js'; + +// ============================================================================ +// Capability Types +// ============================================================================ + +/** + * All available capabilities in the system. + * + * Format: {source}:{action} + * - Built-in sources: fs (filesystem), shell, session + * - Integration sources: pm, scm, email, sms + */ +export const CAPABILITIES = [ + // Built-in capabilities (always available, no integration required) + 'fs:read', + 'fs:write', + 'shell:exec', + 'session:ctrl', + + // PM integration capabilities + 'pm:read', + 'pm:write', + 'pm:checklist', + + // SCM integration capabilities + 'scm:read', + 'scm:comment', + 'scm:review', + 'scm:pr', + + // Email integration capabilities + 'email:read', + 'email:write', + + // SMS integration capabilities + 'sms:send', +] as const; + +export type Capability = (typeof CAPABILITIES)[number]; + +/** + * Capability definition describing what a capability provides. + */ +export interface CapabilityDefinition { + /** Integration category that provides this capability (null = built-in) */ + integration: IntegrationCategory | null; + /** Description for UI display */ + description: string; + /** Gadget class names this capability enables */ + gadgetNames: string[]; + /** SDK tool names for Claude Code backend */ + sdkToolNames: string[]; + /** CLI tool commands for cascade-tools (currently unused but reserved) */ + cliToolNames: string[]; +} + +// ============================================================================ +// Capability Registry +// ============================================================================ + +/** + * Registry mapping capabilities to their definitions. + * + * This is the single source of truth for capability → tool mappings. + */ +export const CAPABILITY_REGISTRY: Record = { + // ------------------------------------------------------------------------- + // Built-in capabilities (always available) + // ------------------------------------------------------------------------- + + 'fs:read': { + integration: null, + description: 'Read files, list directories, search code', + gadgetNames: ['ListDirectory', 'ReadFile', 'RipGrep', 'AstGrep'], + sdkToolNames: ['Read', 'Glob', 'Grep'], + cliToolNames: [], + }, + + 'fs:write': { + integration: null, + description: 'Write and edit files', + gadgetNames: ['WriteFile', 'FileSearchAndReplace', 'FileMultiEdit', 'VerifyChanges'], + sdkToolNames: ['Write', 'Edit'], + cliToolNames: [], + }, + + 'shell:exec': { + integration: null, + description: 'Execute shell commands', + gadgetNames: ['Tmux', 'Sleep'], + sdkToolNames: ['Bash'], + cliToolNames: [], + }, + + 'session:ctrl': { + integration: null, + description: 'Session control and task tracking', + gadgetNames: ['Finish', 'TodoUpsert', 'TodoUpdateStatus', 'TodoDelete'], + sdkToolNames: [], + cliToolNames: [], + }, + + // ------------------------------------------------------------------------- + // PM integration capabilities + // ------------------------------------------------------------------------- + + 'pm:read': { + integration: 'pm', + description: 'Read work items from PM system', + gadgetNames: ['ReadWorkItem', 'ListWorkItems'], + sdkToolNames: [], + cliToolNames: [], + }, + + 'pm:write': { + integration: 'pm', + description: 'Create and update work items, post comments', + gadgetNames: ['UpdateWorkItem', 'CreateWorkItem', 'PostComment', 'AddChecklist'], + sdkToolNames: [], + cliToolNames: [], + }, + + 'pm:checklist': { + integration: 'pm', + description: 'Update and delete checklist items', + gadgetNames: ['PMUpdateChecklistItem', 'PMDeleteChecklistItem'], + sdkToolNames: [], + cliToolNames: [], + }, + + // ------------------------------------------------------------------------- + // SCM integration capabilities + // ------------------------------------------------------------------------- + + 'scm:read': { + integration: 'scm', + description: 'Read PR details, diffs, and checks', + gadgetNames: ['GetPRDetails', 'GetPRDiff', 'GetPRChecks'], + sdkToolNames: [], + cliToolNames: [], + }, + + 'scm:comment': { + integration: 'scm', + description: 'Post and update PR comments', + gadgetNames: ['PostPRComment', 'UpdatePRComment', 'GetPRComments', 'ReplyToReviewComment'], + sdkToolNames: [], + cliToolNames: [], + }, + + 'scm:review': { + integration: 'scm', + description: 'Submit code reviews', + gadgetNames: ['CreatePRReview'], + sdkToolNames: [], + cliToolNames: [], + }, + + 'scm:pr': { + integration: 'scm', + description: 'Create pull requests', + gadgetNames: ['CreatePR'], + sdkToolNames: [], + cliToolNames: [], + }, + + // ------------------------------------------------------------------------- + // Email integration capabilities + // ------------------------------------------------------------------------- + + 'email:read': { + integration: 'email', + description: 'Search and read emails', + gadgetNames: ['SearchEmails', 'ReadEmail', 'MarkEmailAsSeen'], + sdkToolNames: [], + cliToolNames: [], + }, + + 'email:write': { + integration: 'email', + description: 'Send and reply to emails', + gadgetNames: ['SendEmail', 'ReplyToEmail'], + sdkToolNames: [], + cliToolNames: [], + }, + + // ------------------------------------------------------------------------- + // SMS integration capabilities + // ------------------------------------------------------------------------- + + 'sms:send': { + integration: 'sms', + description: 'Send SMS messages', + gadgetNames: ['SendSms'], + sdkToolNames: [], + cliToolNames: [], + }, +}; + +// ============================================================================ +// Helper Functions +// ============================================================================ + +/** + * Get capabilities grouped by integration source for UI display. + */ +export function getCapabilitiesByIntegration(): Record< + IntegrationCategory | 'builtin', + Capability[] +> { + const groups: Record = { + builtin: [], + pm: [], + scm: [], + email: [], + sms: [], + }; + + for (const cap of CAPABILITIES) { + const def = CAPABILITY_REGISTRY[cap]; + const key = def.integration ?? 'builtin'; + groups[key].push(cap); + } + + return groups; +} + +/** + * Extract the integration category from a capability name. + * Returns null for built-in capabilities. + */ +export function getCapabilityIntegration(cap: Capability): IntegrationCategory | null { + return CAPABILITY_REGISTRY[cap].integration; +} + +/** + * Check if a capability is built-in (no integration required). + */ +export function isBuiltInCapability(cap: Capability): boolean { + return CAPABILITY_REGISTRY[cap].integration === null; +} + +/** + * Validate that a string is a valid capability. + */ +export function isValidCapability(value: string): value is Capability { + return CAPABILITIES.includes(value as Capability); +} diff --git a/src/agents/capabilities/resolver.ts b/src/agents/capabilities/resolver.ts new file mode 100644 index 00000000..1129bf16 --- /dev/null +++ b/src/agents/capabilities/resolver.ts @@ -0,0 +1,418 @@ +/** + * Capability Resolver + * + * Functions for deriving integrations, tools, and gadgets from capabilities. + */ + +import { AstGrep } from '../../gadgets/AstGrep.js'; +import { FileMultiEdit } from '../../gadgets/FileMultiEdit.js'; +import { FileSearchAndReplace } from '../../gadgets/FileSearchAndReplace.js'; +import { Finish } from '../../gadgets/Finish.js'; +import { ListDirectory } from '../../gadgets/ListDirectory.js'; +import { ReadFile } from '../../gadgets/ReadFile.js'; +import { RipGrep } from '../../gadgets/RipGrep.js'; +import { Sleep } from '../../gadgets/Sleep.js'; +import { VerifyChanges } from '../../gadgets/VerifyChanges.js'; +import { WriteFile } from '../../gadgets/WriteFile.js'; +import { + MarkEmailAsSeen, + ReadEmail, + ReplyToEmail, + SearchEmails, + SendEmail, +} from '../../gadgets/email/index.js'; +import { + CreatePR, + CreatePRReview, + GetPRChecks, + GetPRComments, + GetPRDetails, + GetPRDiff, + PostPRComment, + ReplyToReviewComment, + UpdatePRComment, +} from '../../gadgets/github/index.js'; +import { + AddChecklist, + CreateWorkItem, + ListWorkItems, + PMDeleteChecklistItem, + PMUpdateChecklistItem, + PostComment, + ReadWorkItem, + UpdateWorkItem, +} from '../../gadgets/pm/index.js'; +import { SendSms } from '../../gadgets/sms/index.js'; +import { Tmux } from '../../gadgets/tmux.js'; +import { TodoDelete, TodoUpdateStatus, TodoUpsert } from '../../gadgets/todo/index.js'; +import type { ToolManifest } from '../contracts/index.js'; +import type { IntegrationCategory } from '../definitions/schema.js'; +import { + CAPABILITY_REGISTRY, + type Capability, + getCapabilityIntegration, + isBuiltInCapability, +} from './registry.js'; + +// ============================================================================ +// Integration Checker Type +// ============================================================================ + +/** + * Callback to check if an integration category is available for a project. + * Used to filter optional capabilities based on project configuration. + */ +export type IntegrationChecker = (category: IntegrationCategory) => boolean; + +// ============================================================================ +// Gadget Constructor Map +// ============================================================================ + +/** + * Maps gadget names to their constructor functions. + * This allows building gadgets from capability definitions. + */ +// biome-ignore lint/suspicious/noExplicitAny: Gadget constructors have varying signatures +const GADGET_CONSTRUCTORS: Record any> = { + // fs:read + ListDirectory, + ReadFile, + RipGrep, + AstGrep, + + // fs:write + WriteFile, + FileSearchAndReplace, + FileMultiEdit, + VerifyChanges, + + // shell:exec + Tmux, + Sleep, + + // session:ctrl + Finish, + TodoUpsert, + TodoUpdateStatus, + TodoDelete, + + // pm:read + ReadWorkItem, + ListWorkItems, + + // pm:write + UpdateWorkItem, + CreateWorkItem, + PostComment, + AddChecklist, + + // pm:checklist + PMUpdateChecklistItem, + PMDeleteChecklistItem, + + // scm:read + GetPRDetails, + GetPRDiff, + GetPRChecks, + + // scm:comment + PostPRComment, + UpdatePRComment, + GetPRComments, + ReplyToReviewComment, + + // scm:review + CreatePRReview, + + // scm:pr + CreatePR, + + // email:read + SearchEmails, + ReadEmail, + MarkEmailAsSeen, + + // email:write + SendEmail, + ReplyToEmail, + + // sms:send + SendSms, +}; + +// ============================================================================ +// Integration Derivation +// ============================================================================ + +/** + * Derive required integration categories from capabilities. + * Returns unique categories for all non-builtin capabilities. + */ +export function deriveRequiredIntegrations(caps: Capability[]): IntegrationCategory[] { + const integrations = new Set(); + for (const cap of caps) { + const integration = getCapabilityIntegration(cap); + if (integration !== null) { + integrations.add(integration); + } + } + return [...integrations]; +} + +/** + * Derive integration requirements from both required and optional capabilities. + * Returns separate arrays for required and optional integrations. + */ +export function deriveIntegrations( + requiredCaps: Capability[], + optionalCaps: Capability[], +): { required: IntegrationCategory[]; optional: IntegrationCategory[] } { + const required = deriveRequiredIntegrations(requiredCaps); + const requiredSet = new Set(required); + + // Optional integrations are those from optional caps that aren't already required + const optional = new Set(); + for (const cap of optionalCaps) { + const integration = getCapabilityIntegration(cap); + if (integration !== null && !requiredSet.has(integration)) { + optional.add(integration); + } + } + + return { required, optional: [...optional] }; +} + +// ============================================================================ +// Capability Resolution +// ============================================================================ + +/** + * Resolve effective capabilities based on project integration availability. + * + * Required capabilities are always included (validation happens separately). + * Optional capabilities are included only if their integration is available. + */ +export function resolveEffectiveCapabilities( + requiredCaps: Capability[], + optionalCaps: Capability[], + hasIntegration: (category: IntegrationCategory) => boolean, +): Capability[] { + const effective: Capability[] = [...requiredCaps]; + + for (const cap of optionalCaps) { + // Built-in capabilities are always available + if (isBuiltInCapability(cap)) { + effective.push(cap); + continue; + } + + // Integration-based capabilities need their integration available + const integration = getCapabilityIntegration(cap); + if (integration && hasIntegration(integration)) { + effective.push(cap); + } + } + + return effective; +} + +/** + * Get unavailable optional capabilities for system prompt injection. + * Returns capabilities that would be available if integrations were configured. + */ +export function getUnavailableOptionalCapabilities( + optionalCaps: Capability[], + hasIntegration: (category: IntegrationCategory) => boolean, +): Capability[] { + const unavailable: Capability[] = []; + + for (const cap of optionalCaps) { + if (isBuiltInCapability(cap)) continue; + + const integration = getCapabilityIntegration(cap); + if (integration && !hasIntegration(integration)) { + unavailable.push(cap); + } + } + + return unavailable; +} + +// ============================================================================ +// Gadget Building +// ============================================================================ + +/** + * Build gadget instances from a list of capabilities. + * Returns fresh gadget instances for each call. + */ +export function buildGadgetsFromCapabilities(caps: Capability[]): unknown[] { + const gadgets: unknown[] = []; + const seenGadgets = new Set(); + + for (const cap of caps) { + const def = CAPABILITY_REGISTRY[cap]; + for (const gadgetName of def.gadgetNames) { + // Avoid duplicates (capabilities may share gadgets) + if (seenGadgets.has(gadgetName)) continue; + seenGadgets.add(gadgetName); + + const Constructor = GADGET_CONSTRUCTORS[gadgetName]; + if (!Constructor) { + throw new Error( + `Gadget constructor not found: ${gadgetName}. Check CAPABILITY_REGISTRY and GADGET_CONSTRUCTORS are in sync.`, + ); + } + gadgets.push(new Constructor()); + } + } + + return gadgets; +} + +/** + * Get gadget names from capabilities (for tool manifest filtering). + */ +export function getGadgetNamesFromCapabilities(caps: Capability[]): string[] { + const names = new Set(); + for (const cap of caps) { + const def = CAPABILITY_REGISTRY[cap]; + for (const name of def.gadgetNames) { + names.add(name); + } + } + return [...names]; +} + +// ============================================================================ +// SDK Tools +// ============================================================================ + +/** + * Get SDK tool names from capabilities. + * These are the tools available for the Claude Code backend. + */ +export function getSdkToolsFromCapabilities(caps: Capability[]): string[] { + const tools = new Set(); + for (const cap of caps) { + const def = CAPABILITY_REGISTRY[cap]; + for (const tool of def.sdkToolNames) { + tools.add(tool); + } + } + return [...tools]; +} + +// ============================================================================ +// Tool Manifest Filtering +// ============================================================================ + +/** + * Filter tool manifests to only those allowed by capabilities. + * Used by Claude Code backend to filter available tools. + * + * Logs a warning if expected tools from capabilities are not found in manifests. + */ +export function filterToolManifests(allTools: ToolManifest[], caps: Capability[]): ToolManifest[] { + const allowedNames = new Set(getGadgetNamesFromCapabilities(caps)); + const filtered = allTools.filter((tool) => allowedNames.has(tool.name)); + + // Check for missing expected tools + const foundNames = new Set(filtered.map((t) => t.name)); + const missing = [...allowedNames].filter((name) => !foundNames.has(name)); + if (missing.length > 0) { + console.warn( + `[capabilities] Expected tools not found in manifests: ${missing.join(', ')}. Check that gadget names in CAPABILITY_REGISTRY match tool manifest names.`, + ); + } + + return filtered; +} + +// ============================================================================ +// System Prompt Generation +// ============================================================================ + +/** + * Generate a system prompt note for unavailable optional capabilities. + * This helps the agent understand which tools are missing and why. + */ +export function generateUnavailableCapabilitiesNote(unavailableCaps: Capability[]): string | null { + if (unavailableCaps.length === 0) return null; + + // Group by integration + const byIntegration = new Map(); + for (const cap of unavailableCaps) { + const integration = getCapabilityIntegration(cap); + if (!integration) continue; + + if (!byIntegration.has(integration)) { + byIntegration.set(integration, []); + } + + const def = CAPABILITY_REGISTRY[cap]; + byIntegration.get(integration)?.push(...def.gadgetNames); + } + + const lines: string[] = ['NOTE: Some optional capabilities are unavailable:']; + + const integrationLabels: Record = { + pm: 'PM integration (Trello/JIRA)', + scm: 'SCM integration (GitHub)', + email: 'Email integration', + sms: 'SMS integration (Twilio)', + }; + + for (const [integration, gadgetNames] of byIntegration) { + const label = integrationLabels[integration]; + const uniqueGadgets = [...new Set(gadgetNames)]; + lines.push(`- ${label} not configured. Tools unavailable: ${uniqueGadgets.join(', ')}`); + } + + lines.push('Proceed without using the above tools.'); + + return lines.join('\n'); +} + +// ============================================================================ +// Integration Checker Factory +// ============================================================================ + +/** + * Create an IntegrationChecker for a project. + * + * This function pre-fetches integration availability for all categories + * and returns a synchronous checker callback. + */ +export async function createIntegrationChecker(projectId: string): Promise { + // Import integration checking functions dynamically to avoid circular deps + const [ + { hasPmIntegration }, + { hasScmIntegration }, + { hasEmailIntegration }, + { hasSmsIntegration }, + ] = await Promise.all([ + import('../../pm/integration.js'), + import('../../github/integration.js'), + import('../../email/integration.js'), + import('../../sms/index.js'), + ]); + + // Pre-fetch all integration statuses in parallel + const [hasPm, hasScm, hasEmail, hasSms] = await Promise.all([ + hasPmIntegration(projectId), + hasScmIntegration(projectId), + hasEmailIntegration(projectId), + hasSmsIntegration(projectId), + ]); + + // Return synchronous checker + const availableIntegrations: Record = { + pm: hasPm, + scm: hasScm, + email: hasEmail, + sms: hasSms, + }; + + return (category: IntegrationCategory) => availableIntegrations[category] ?? false; +} diff --git a/src/agents/definitions/debug.yaml b/src/agents/definitions/debug.yaml index 35282fcf..4b8f7aaa 100644 --- a/src/agents/definitions/debug.yaml +++ b/src/agents/definitions/debug.yaml @@ -4,20 +4,20 @@ identity: roleHint: Analyzes session logs to identify what went wrong initialMessage: "**\U0001F41B Analyzing session logs** — Reviewing what happened and identifying issues..." +# Read-only FS access for log analysis, full PM access for creating debug cards. capabilities: - canEditFiles: true - canCreatePR: true - canUpdateChecklists: true - isReadOnly: false - -tools: - sets: [all] - sdkTools: all + required: + - fs:read + - shell:exec + - session:ctrl + - pm:read + - pm:write + - pm:checklist + optional: [] strategies: contextPipeline: [directoryListing, contextFiles, squint, workItem] taskPromptBuilder: workItem - gadgetBuilder: workItem backend: enableStopHooks: true @@ -26,7 +26,3 @@ backend: compaction: default hint: Analyze the current issue fully before moving to the next. - -integrations: - required: [pm] - optional: [] diff --git a/src/agents/definitions/email-joke.yaml b/src/agents/definitions/email-joke.yaml index 780810c9..f604ce62 100644 --- a/src/agents/definitions/email-joke.yaml +++ b/src/agents/definitions/email-joke.yaml @@ -4,21 +4,18 @@ identity: roleHint: Reads emails from a specific sender and responds with jokes initialMessage: "**\U0001F602 Checking for emails to respond to with jokes**" +# Minimal agent with email access only. +# No file editing, no GitHub, no PM tools. capabilities: - canEditFiles: false - canCreatePR: false - canUpdateChecklists: false - isReadOnly: true - canAccessEmail: true - -tools: - sets: [email, session] - sdkTools: readOnly + required: + - session:ctrl + - email:read + - email:write + optional: [] strategies: contextPipeline: [prefetchedEmails] taskPromptBuilder: emailJoke - gadgetBuilder: emailJoke backend: enableStopHooks: false @@ -31,7 +28,3 @@ trailingMessage: {} hint: >- Search for emails, read them, and send funny responses. Mark processed emails as seen to prevent re-processing. - -integrations: - required: [email] - optional: [] diff --git a/src/agents/definitions/implementation.yaml b/src/agents/definitions/implementation.yaml index 6ddd80a1..7fc6a223 100644 --- a/src/agents/definitions/implementation.yaml +++ b/src/agents/definitions/implementation.yaml @@ -4,20 +4,23 @@ identity: roleHint: Writes code, runs tests, and prepares a pull request initialMessage: "**\U0001F680 Implementing changes** — Writing code, running tests, and preparing a PR..." +# Capabilities define what the agent can do. +# Integrations and tools are DERIVED from these capabilities. capabilities: - canEditFiles: true - canCreatePR: true - canUpdateChecklists: true - isReadOnly: false - -tools: - sets: [pm, pm_checklist, session] - sdkTools: all + required: + - fs:read + - fs:write + - shell:exec + - session:ctrl + - pm:read + - pm:write + - pm:checklist + - scm:pr + optional: [] strategies: contextPipeline: [directoryListing, contextFiles, squint, workItem] taskPromptBuilder: workItem - gadgetBuilder: workItem backend: enableStopHooks: true @@ -38,7 +41,3 @@ trailingMessage: includeGitStatus: true includePRStatus: true includeReminder: true - -integrations: - required: [scm, pm] - optional: [] diff --git a/src/agents/definitions/index.ts b/src/agents/definitions/index.ts index 80f4608c..d9586ddd 100644 --- a/src/agents/definitions/index.ts +++ b/src/agents/definitions/index.ts @@ -1,4 +1,4 @@ -export { AgentDefinitionSchema, type AgentDefinition } from './schema.js'; +export { AgentDefinitionSchema, type AgentDefinition, type AgentCapabilities } from './schema.js'; export { loadAgentDefinition, loadAllAgentDefinitions, @@ -9,21 +9,27 @@ export { resolveKnownAgentTypes, invalidateDefinitionCache, } from './loader.js'; -export { - TOOL_SET_REGISTRY, - SDK_TOOLS_REGISTRY, - GADGET_BUILDER_REGISTRY, - CONTEXT_STEP_REGISTRY, - PRE_EXECUTE_REGISTRY, - PM_TOOLS, - PM_CHECKLIST_TOOL, - GITHUB_REVIEW_TOOLS, - GITHUB_CI_TOOLS, - SESSION_TOOL, - ALL_SDK_TOOLS, - READ_ONLY_SDK_TOOLS, -} from './strategies.js'; +export { CONTEXT_STEP_REGISTRY, PRE_EXECUTE_REGISTRY } from './strategies.js'; export type { FetchContextParams, PreExecuteParams } from './contextSteps.js'; export type { AgentProfile } from './profiles.js'; -export { getAgentProfile } from './profiles.js'; +export { getAgentProfile, getAgentCapabilities } from './profiles.js'; export { getToolManifests } from './toolManifests.js'; + +// Re-export capability system +export { + CAPABILITIES, + CAPABILITY_REGISTRY, + type Capability, + type CapabilityDefinition, + getCapabilitiesByIntegration, + getCapabilityIntegration, + isBuiltInCapability, + isValidCapability, + buildGadgetsFromCapabilities, + deriveIntegrations, + deriveRequiredIntegrations, + filterToolManifests, + getGadgetNamesFromCapabilities, + getSdkToolsFromCapabilities, + resolveEffectiveCapabilities, +} from '../capabilities/index.js'; diff --git a/src/agents/definitions/planning.yaml b/src/agents/definitions/planning.yaml index d8224566..a6200d78 100644 --- a/src/agents/definitions/planning.yaml +++ b/src/agents/definitions/planning.yaml @@ -4,20 +4,20 @@ identity: roleHint: Studies the codebase and designs a step-by-step implementation plan initialMessage: "**\U0001F5FA\uFE0F Planning implementation** — Studying the codebase and designing a step-by-step plan..." +# Read-only agent that explores code and writes plans to PM system. +# No file editing, no PR creation, no checklist updates. capabilities: - canEditFiles: false - canCreatePR: false - canUpdateChecklists: false - isReadOnly: true - -tools: - sets: [pm, session] - sdkTools: readOnly + required: + - fs:read + - shell:exec + - session:ctrl + - pm:read + - pm:write + optional: [] strategies: contextPipeline: [directoryListing, contextFiles, squint, workItem] taskPromptBuilder: workItem - gadgetBuilder: workItem backend: enableStopHooks: false @@ -26,7 +26,3 @@ backend: compaction: default hint: Complete the current planning step efficiently before moving to the next. - -integrations: - required: [scm, pm] - optional: [] diff --git a/src/agents/definitions/profiles.ts b/src/agents/definitions/profiles.ts index f52bc971..3349ca18 100644 --- a/src/agents/definitions/profiles.ts +++ b/src/agents/definitions/profiles.ts @@ -1,18 +1,27 @@ -import { type AgentCapabilities, getAgentCapabilities } from '../shared/capabilities.js'; -export type { AgentCapabilities } from '../shared/capabilities.js'; +/** + * Agent Profiles + * + * Builds runtime profiles from agent definitions using the capability-centric architecture. + * Capabilities determine tools, gadgets, and integration requirements. + */ + import type { AgentInput } from '../../types/index.js'; +import type { Capability, IntegrationChecker } from '../capabilities/index.js'; +import { + getGadgetNamesFromCapabilities, + getSdkToolsFromCapabilities, + resolveEffectiveCapabilities, +} from '../capabilities/resolver.js'; import type { ContextInjection, ToolManifest } from '../contracts/index.js'; import { type TaskPromptContext, renderTaskPrompt } from '../prompts/index.js'; +import { buildGadgetsForAgent } from '../shared/gadgets.js'; import type { FetchContextParams, PreExecuteParams } from './contextSteps.js'; import { resolveAgentDefinition } from './loader.js'; -import type { AgentDefinition } from './schema.js'; -import { - CONTEXT_STEP_REGISTRY, - GADGET_BUILDER_REGISTRY, - PRE_EXECUTE_REGISTRY, - SDK_TOOLS_REGISTRY, - TOOL_SET_REGISTRY, -} from './strategies.js'; +import type { AgentCapabilities, AgentDefinition } from './schema.js'; +import { CONTEXT_STEP_REGISTRY, PRE_EXECUTE_REGISTRY } from './strategies.js'; + +// Re-export for backward compatibility +export type { AgentCapabilities } from './schema.js'; // ============================================================================ // AgentProfile Interface @@ -37,24 +46,23 @@ export interface AgentProfile { buildTaskPrompt(input: AgentInput): string; /** Optional pre-execute hook (e.g., post initial PR comment) */ preExecute?(params: PreExecuteParams): Promise; - /** Capability summary — used by llmist backend to select gadgets */ + /** Agent capabilities (required + optional) */ capabilities: AgentCapabilities; /** * Return the gadget instances for the llmist backend. * Each call creates fresh instances — caller must not reuse returned gadgets. + * + * @param integrationChecker Optional callback to check integration availability. + * When provided, optional capabilities are filtered to only those with + * available integrations. When not provided, all capabilities are used. */ - getLlmistGadgets(agentType: string): Promise; + getLlmistGadgets(integrationChecker?: IntegrationChecker): unknown[]; } // ============================================================================ // Helpers // ============================================================================ -function filterToolsByNames(allTools: ToolManifest[], names: string[]): ToolManifest[] { - const nameSet = new Set(names); - return allTools.filter((t) => nameSet.has(t.name)); -} - function resolveRegistry(registry: Record, key: string, label: string): T { const value = registry[key]; if (!value) throw new Error(`${label} '${key}' not found in registry`); @@ -79,41 +87,42 @@ function buildTaskPromptContext(input: AgentInput): TaskPromptContext { }; } +/** + * Merge required and optional capabilities into a single list. + * In runtime, we use all declared capabilities (validation happens separately). + */ +function getAllCapabilities(caps: AgentCapabilities): Capability[] { + return [...caps.required, ...caps.optional]; +} + // ============================================================================ -// Profile Builder (YAML-driven) +// Profile Builder (Capability-driven) // ============================================================================ -async function buildProfileFromDefinition( - agentType: string, - def: AgentDefinition, -): Promise { - // Resolve tool names from YAML set references - const hasAllSet = def.tools.sets.includes('all'); - const toolNames: string[] = []; - if (!hasAllSet) { - for (const setName of def.tools.sets) { - const tools = TOOL_SET_REGISTRY[setName]; - if (tools) toolNames.push(...tools); - } - } +function buildProfileFromDefinition(def: AgentDefinition, agentType: string): AgentProfile { + const allCapabilities = getAllCapabilities(def.capabilities); - const sdkTools = SDK_TOOLS_REGISTRY[def.tools.sdkTools]; - // taskPromptBuilder YAML value maps directly to the .eta template filename - // (validated by the Zod schema in AgentDefinitionSchema) - const taskTemplateName = def.strategies.taskPromptBuilder; - const caps = await getAgentCapabilities(agentType); - const gadgetBuilderFn = resolveRegistry( - GADGET_BUILDER_REGISTRY, - def.strategies.gadgetBuilder, - 'gadgetBuilder', - ); - const gadgetBuilderOptions = def.strategies.gadgetBuilderOptions; + // Derive tool names from capabilities for filtering + const gadgetNames = getGadgetNamesFromCapabilities(allCapabilities); + + // Derive SDK tools from capabilities + const sdkTools = getSdkToolsFromCapabilities(allCapabilities); + + // Get gadget options from strategies + const gadgetOptions = def.strategies.gadgetOptions; + + // Get context pipeline from strategies const contextPipeline = def.strategies.contextPipeline; + // Task prompt template name (maps to .eta file) + const taskTemplateName = def.strategies.taskPromptBuilder; + const profile: AgentProfile = { - filterTools: hasAllSet - ? (allTools) => allTools - : (allTools) => filterToolsByNames(allTools, toolNames), + filterTools: (allTools: ToolManifest[]) => { + // Filter tools by the gadget names derived from capabilities + const nameSet = new Set(gadgetNames); + return allTools.filter((t) => nameSet.has(t.name)); + }, sdkTools, enableStopHooks: def.backend.enableStopHooks, needsGitHubToken: def.backend.needsGitHubToken, @@ -129,13 +138,23 @@ async function buildProfileFromDefinition( return injections; }, buildTaskPrompt: (input) => renderTaskPrompt(taskTemplateName, buildTaskPromptContext(input)), - capabilities: caps, - getLlmistGadgets: async (at) => - gadgetBuilderFn(await getAgentCapabilities(at), gadgetBuilderOptions), + capabilities: def.capabilities, + getLlmistGadgets: (integrationChecker?: IntegrationChecker) => { + // Resolve effective capabilities based on integration availability + const effectiveCaps = integrationChecker + ? resolveEffectiveCapabilities( + def.capabilities.required, + def.capabilities.optional, + integrationChecker, + ) + : allCapabilities; + return buildGadgetsForAgent(effectiveCaps, gadgetOptions); + }, }; if (def.backend.preExecute) { const preExecFn = resolveRegistry(PRE_EXECUTE_REGISTRY, def.backend.preExecute, 'preExecute'); + // Pass agentType so the hook can look up initial messages profile.preExecute = (params) => preExecFn(agentType, params); } @@ -153,5 +172,14 @@ export async function getAgentProfile(agentType: string): Promise } catch (err) { throw new Error(`Failed to load agent profile for '${agentType}'`, { cause: err }); } - return buildProfileFromDefinition(agentType, def); + return buildProfileFromDefinition(def, agentType); +} + +/** + * Get agent capabilities from a definition. + * Used for backward compatibility with code that expects the old format. + */ +export async function getAgentCapabilities(agentType: string): Promise { + const def = await resolveAgentDefinition(agentType); + return def.capabilities; } diff --git a/src/agents/definitions/respond-to-ci.yaml b/src/agents/definitions/respond-to-ci.yaml index 97311c0d..951f94aa 100644 --- a/src/agents/definitions/respond-to-ci.yaml +++ b/src/agents/definitions/respond-to-ci.yaml @@ -4,20 +4,23 @@ identity: roleHint: Analyzes failed CI checks and works on a fix initialMessage: "**\U0001F527 Fixing CI failures** — Analyzing the failed checks and working on a fix..." +# Can edit files and read SCM, optional PM for status updates. capabilities: - canEditFiles: true - canCreatePR: false - canUpdateChecklists: true - isReadOnly: false - -tools: - sets: [github_ci, pm, pm_checklist, session] - sdkTools: all + required: + - fs:read + - fs:write + - shell:exec + - session:ctrl + - scm:read + - scm:comment + optional: + - pm:read + - pm:write + - pm:checklist strategies: contextPipeline: [prContext, directoryListing, contextFiles, squint, workItem] taskPromptBuilder: ci - gadgetBuilder: prAgent backend: enableStopHooks: true @@ -31,7 +34,3 @@ hint: Fix CI failures with minimal, focused changes. Batch related file edits to trailingMessage: includeDiagnostics: true - -integrations: - required: [scm] - optional: [pm] diff --git a/src/agents/definitions/respond-to-planning-comment.yaml b/src/agents/definitions/respond-to-planning-comment.yaml index d77ff7ea..79b0ec6d 100644 --- a/src/agents/definitions/respond-to-planning-comment.yaml +++ b/src/agents/definitions/respond-to-planning-comment.yaml @@ -4,20 +4,20 @@ identity: roleHint: Reads user feedback and updates the plan accordingly initialMessage: "**\U0001F4AC Responding to feedback** — Reading your comment and updating the plan accordingly..." +# Can update PM checklists to respond to feedback, but no file editing. capabilities: - canEditFiles: false - canCreatePR: false - canUpdateChecklists: true - isReadOnly: true - -tools: - sets: [pm, pm_checklist, session] - sdkTools: readOnly + required: + - fs:read + - shell:exec + - session:ctrl + - pm:read + - pm:write + - pm:checklist + optional: [] strategies: contextPipeline: [directoryListing, contextFiles, squint, workItem] taskPromptBuilder: commentResponse - gadgetBuilder: workItem backend: enableStopHooks: false @@ -26,7 +26,3 @@ backend: compaction: default hint: Complete the current task efficiently before moving to the next. - -integrations: - required: [scm, pm] - optional: [] diff --git a/src/agents/definitions/respond-to-pr-comment.yaml b/src/agents/definitions/respond-to-pr-comment.yaml index b15992c5..9e2cf001 100644 --- a/src/agents/definitions/respond-to-pr-comment.yaml +++ b/src/agents/definitions/respond-to-pr-comment.yaml @@ -4,21 +4,23 @@ identity: roleHint: Reads a PR comment and takes action initialMessage: "**\U0001F4AC Responding to PR comment** — Reading your comment and taking action..." +# Can edit files and interact with PR comments, optional PM for status updates. capabilities: - canEditFiles: true - canCreatePR: false - canUpdateChecklists: false - isReadOnly: false - -tools: - sets: [github_review, session] - sdkTools: all + required: + - fs:read + - fs:write + - shell:exec + - session:ctrl + - scm:read + - scm:comment + optional: + - pm:read + - pm:write strategies: contextPipeline: [prContext, prConversation, directoryListing, contextFiles, squint] taskPromptBuilder: prCommentResponse - gadgetBuilder: prAgent - gadgetBuilderOptions: + gadgetOptions: includeReviewComments: true backend: @@ -29,7 +31,3 @@ backend: compaction: default hint: Complete the current task efficiently before moving to the next. - -integrations: - required: [scm] - optional: [pm] diff --git a/src/agents/definitions/respond-to-review.yaml b/src/agents/definitions/respond-to-review.yaml index 8d1a59f0..a0736a82 100644 --- a/src/agents/definitions/respond-to-review.yaml +++ b/src/agents/definitions/respond-to-review.yaml @@ -4,21 +4,24 @@ identity: roleHint: Addresses code review feedback by making requested changes initialMessage: "**\U0001F527 Addressing review feedback** — Making the requested changes from the code review..." +# Can edit files to address review feedback, interact with PR comments. +# Optional PM for status updates if configured. capabilities: - canEditFiles: false - canCreatePR: false - canUpdateChecklists: false - isReadOnly: true - -tools: - sets: [github_review, session] - sdkTools: all + required: + - fs:read + - fs:write + - shell:exec + - session:ctrl + - scm:read + - scm:comment + optional: + - pm:read + - pm:write strategies: contextPipeline: [prContext, prConversation, directoryListing, contextFiles, squint] taskPromptBuilder: prCommentResponse - gadgetBuilder: prAgent - gadgetBuilderOptions: + gadgetOptions: includeReviewComments: true backend: @@ -32,7 +35,3 @@ hint: Address the current review comment fully before moving to the next. Batch trailingMessage: includeDiagnostics: true - -integrations: - required: [scm] - optional: [pm] diff --git a/src/agents/definitions/review.yaml b/src/agents/definitions/review.yaml index d1d4618c..8bf358f8 100644 --- a/src/agents/definitions/review.yaml +++ b/src/agents/definitions/review.yaml @@ -4,20 +4,22 @@ identity: roleHint: Reviews pull request changes for quality and correctness initialMessage: "**\U0001F50D Reviewing code** — Examining the PR changes for quality and correctness..." +# Read-only agent that reviews PRs. Can submit reviews and update comments. +# Optional PM for reading linked work item context. capabilities: - canEditFiles: false - canCreatePR: false - canUpdateChecklists: false - isReadOnly: true - -tools: - sets: [github_review, session] - sdkTools: readOnly + required: + - fs:read + - shell:exec + - session:ctrl + - scm:read + - scm:review + - scm:comment + optional: + - pm:read strategies: contextPipeline: [prContext, contextFiles, squint] taskPromptBuilder: review - gadgetBuilder: review backend: enableStopHooks: false @@ -27,7 +29,3 @@ backend: compaction: default hint: Focus on the current aspect of review before moving to the next. Read related files together. - -integrations: - required: [scm] - optional: [pm] diff --git a/src/agents/definitions/schema.ts b/src/agents/definitions/schema.ts index daf644a9..25859e74 100644 --- a/src/agents/definitions/schema.ts +++ b/src/agents/definitions/schema.ts @@ -1,4 +1,5 @@ import { z } from 'zod'; +import { CAPABILITIES } from '../capabilities/registry.js'; // ============================================================================ // Agent Definition Schema @@ -7,26 +8,6 @@ import { z } from 'zod'; // Integration categories (aligned with integrationRoles.ts) export const IntegrationCategorySchema = z.enum(['pm', 'scm', 'email', 'sms']); -// Integration requirements schema (REQUIRED field) -const IntegrationsSchema = z - .object({ - /** Integrations that MUST be configured for the agent to run */ - required: z.array(IntegrationCategorySchema), - /** - * Integrations the agent CAN use if available (for future use). - * Currently not validated - reserved for dashboard filtering and - * conditional agent behavior based on available integrations. - */ - optional: z.array(IntegrationCategorySchema), - }) - .refine( - (data) => { - const requiredSet = new Set(data.required); - return !data.optional.some((cat) => requiredSet.has(cat)); - }, - { message: 'A category cannot be both required and optional' }, - ); - const IdentitySchema = z.object({ emoji: z.string(), label: z.string(), @@ -34,35 +15,46 @@ const IdentitySchema = z.object({ initialMessage: z.string(), }); -const CapabilitiesSchema = z.object({ - canEditFiles: z.boolean(), - canCreatePR: z.boolean(), - canUpdateChecklists: z.boolean(), - isReadOnly: z.boolean(), - canAccessEmail: z.boolean().optional(), -}); - -export const TOOL_SET_NAMES = [ - 'pm', - 'pm_checklist', - 'session', - 'github_review', - 'github_ci', - 'email', - 'all', -] as const; +// ============================================================================ +// Capability-Centric Schema +// ============================================================================ -export const SDK_TOOLS_NAMES = ['all', 'readOnly'] as const; +/** + * Capability names validated against the registry. + * Format: {source}:{action} (e.g., 'fs:read', 'pm:write', 'scm:pr') + */ +const CapabilitySchema = z.enum(CAPABILITIES); -const ToolsSchema = z.object({ - /** Named tool set references resolved via TOOL_SET_REGISTRY */ - sets: z.array(z.enum(TOOL_SET_NAMES)), - /** SDK tools preset: "all" or "readOnly" */ - sdkTools: z.enum(SDK_TOOLS_NAMES), -}); +/** + * Capabilities schema with required and optional arrays. + * + * Required capabilities: Agent fails validation if integration not configured + * Optional capabilities: Enabled if integration available, gracefully skipped if not + * + * Integrations are DERIVED from capability prefixes - no separate declaration needed. + */ +const CapabilitiesSchema = z + .object({ + /** Capabilities the agent MUST have - fails if integration not configured */ + required: z.array(CapabilitySchema), + /** Capabilities the agent CAN use if available */ + optional: z.array(CapabilitySchema).default([]), + }) + .refine( + (data) => { + const requiredSet = new Set(data.required); + return !data.optional.some((cap) => requiredSet.has(cap)); + }, + { message: 'A capability cannot be both required and optional' }, + ); -const GadgetBuilderOptionsSchema = z +/** + * Optional gadget builder options for special cases. + * Most agents won't need this - capabilities determine tools automatically. + */ +const GadgetOptionsSchema = z .object({ + /** Include GetPRComments and ReplyToReviewComment gadgets (for PR comment response agents) */ includeReviewComments: z.boolean().optional(), }) .optional(); @@ -86,15 +78,19 @@ export const TASK_PROMPT_BUILDER_NAMES = [ 'emailJoke', ] as const; -export const GADGET_BUILDER_NAMES = ['workItem', 'review', 'prAgent', 'emailJoke'] as const; - export const COMPACTION_NAMES = ['implementation', 'default'] as const; +/** + * Strategies schema - context and prompt configuration. + * Note: gadgetBuilder removed - gadgets are now derived from capabilities. + */ const StrategiesSchema = z.object({ + /** Pipeline of context fetching steps */ contextPipeline: z.array(z.enum(CONTEXT_STEP_NAMES)), + /** Task prompt template name (maps to .eta file) */ taskPromptBuilder: z.enum(TASK_PROMPT_BUILDER_NAMES), - gadgetBuilder: z.enum(GADGET_BUILDER_NAMES), - gadgetBuilderOptions: GadgetBuilderOptionsSchema, + /** Optional gadget configuration for special cases */ + gadgetOptions: GadgetOptionsSchema, }); const BackendSchema = z.object({ @@ -123,16 +119,33 @@ const PromptsSchema = z }) .optional(); +/** + * Complete agent definition schema. + * + * Key design: capabilities.required/optional determine everything: + * - Which integrations are required (derived from capability prefixes) + * - Which gadgets are available (from capability registry) + * - Which SDK tools are enabled (from capability registry) + */ export const AgentDefinitionSchema = z.object({ + /** Agent identity for UI display */ identity: IdentitySchema, + /** + * Capabilities define what the agent can do. + * Integrations and tools are DERIVED from capabilities. + */ capabilities: CapabilitiesSchema, - tools: ToolsSchema, + /** Strategy configuration (context pipeline, prompts) */ strategies: StrategiesSchema, + /** Backend execution configuration */ backend: BackendSchema, + /** Context compaction strategy */ compaction: z.enum(COMPACTION_NAMES), + /** Iteration guidance hint for the agent */ hint: z.string(), + /** Trailing message configuration */ trailingMessage: TrailingMessageSchema, - integrations: IntegrationsSchema, + /** Custom prompts (optional) */ prompts: PromptsSchema, }); @@ -146,4 +159,8 @@ export type AgentDefinition = z.infer; export type IntegrationCategory = z.infer; -export type AgentIntegrations = z.infer; +/** Capability type re-export for convenience */ +export type { Capability } from '../capabilities/registry.js'; + +/** Agent capabilities (required + optional) */ +export type AgentCapabilities = z.infer; diff --git a/src/agents/definitions/splitting.yaml b/src/agents/definitions/splitting.yaml index b7c865f2..f49dcb49 100644 --- a/src/agents/definitions/splitting.yaml +++ b/src/agents/definitions/splitting.yaml @@ -4,20 +4,21 @@ identity: roleHint: Breaks down a feature plan into smaller, ordered work items (subtasks) initialMessage: "**\U0001F4CB Splitting plan** — Reading the plan and splitting it into ordered work items..." +# Can read files and edit PM checklists, but no PR creation. capabilities: - canEditFiles: true - canCreatePR: false - canUpdateChecklists: true - isReadOnly: false - -tools: - sets: [pm, pm_checklist, session] - sdkTools: all + required: + - fs:read + - fs:write + - shell:exec + - session:ctrl + - pm:read + - pm:write + - pm:checklist + optional: [] strategies: contextPipeline: [directoryListing, contextFiles, squint, workItem] taskPromptBuilder: workItem - gadgetBuilder: workItem backend: enableStopHooks: false @@ -26,7 +27,3 @@ backend: compaction: default hint: Gather all context needed for the current step before proceeding. - -integrations: - required: [scm, pm] - optional: [] diff --git a/src/agents/definitions/strategies.ts b/src/agents/definitions/strategies.ts index a867ede3..852fc66b 100644 --- a/src/agents/definitions/strategies.ts +++ b/src/agents/definitions/strategies.ts @@ -1,11 +1,14 @@ +/** + * Strategy Registries + * + * Contains registries for context pipeline steps and pre-execute hooks. + * + * Note: Tool set and gadget builder registries have been removed. + * Tools and gadgets are now derived from capabilities via the capability registry. + * See: src/agents/capabilities/registry.ts + */ + import type { ContextInjection } from '../contracts/index.js'; -import type { AgentCapabilities } from '../shared/capabilities.js'; -import { - buildEmailJokeGadgets, - buildPRAgentGadgets, - buildReviewGadgets, - buildWorkItemGadgets, -} from '../shared/gadgets.js'; import { type FetchContextParams, type PreExecuteParams, @@ -19,79 +22,6 @@ import { postInitialPRCommentHook, } from './contextSteps.js'; -// ============================================================================ -// Tool Set Registry -// ============================================================================ - -/** PM tools available to most agents */ -export const PM_TOOLS = [ - 'ReadWorkItem', - 'PostComment', - 'UpdateWorkItem', - 'CreateWorkItem', - 'ListWorkItems', - 'AddChecklist', -]; - -/** PM checklist update — excluded from planning to prevent premature completion */ -export const PM_CHECKLIST_TOOL = 'UpdateChecklistItem'; - -/** GitHub review tools for code review agents */ -export const GITHUB_REVIEW_TOOLS = [ - 'GetPRDetails', - 'GetPRDiff', - 'GetPRChecks', - 'GetPRComments', - 'PostPRComment', - 'UpdatePRComment', - 'ReplyToReviewComment', - 'CreatePRReview', -]; - -/** GitHub CI tools for respond-to-ci agent (no CreatePR — pushes to existing branch) */ -export const GITHUB_CI_TOOLS = [ - 'GetPRDetails', - 'GetPRDiff', - 'GetPRChecks', - 'PostPRComment', - 'UpdatePRComment', -]; - -/** Email tools for agents that need email access */ -export const EMAIL_TOOLS = [ - 'SendEmail', - 'SearchEmails', - 'ReadEmail', - 'ReplyToEmail', - 'MarkEmailAsSeen', -]; - -export const SESSION_TOOL = 'Finish'; - -export const ALL_SDK_TOOLS = ['Read', 'Write', 'Edit', 'Bash', 'Glob', 'Grep']; -export const READ_ONLY_SDK_TOOLS = ['Read', 'Bash', 'Glob', 'Grep']; - -/** - * Maps YAML tool set names to the actual tool name arrays. - */ -export const TOOL_SET_REGISTRY: Record = { - pm: PM_TOOLS, - pm_checklist: [PM_CHECKLIST_TOOL], - session: [SESSION_TOOL], - github_review: GITHUB_REVIEW_TOOLS, - github_ci: GITHUB_CI_TOOLS, - email: EMAIL_TOOLS, - // 'all' is a sentinel — handled by returning allTools unfiltered -}; - -/** - * Maps YAML sdkTools names to actual SDK tool arrays. - */ -export const SDK_TOOLS_REGISTRY: Record = { - all: ALL_SDK_TOOLS, - readOnly: READ_ONLY_SDK_TOOLS, -}; - // ============================================================================ // Context Pipeline Step Registry // ============================================================================ @@ -119,17 +49,3 @@ export const PRE_EXECUTE_REGISTRY: Record< > = { postInitialPRComment: postInitialPRCommentHook, }; - -// ============================================================================ -// Gadget Builder Registry -// ============================================================================ - -export const GADGET_BUILDER_REGISTRY: Record< - string, - (caps: AgentCapabilities, options?: { includeReviewComments?: boolean }) => unknown[] -> = { - workItem: (caps) => buildWorkItemGadgets(caps), - review: () => buildReviewGadgets(), - prAgent: (_caps, options) => buildPRAgentGadgets(options), - emailJoke: () => buildEmailJokeGadgets(), -}; diff --git a/src/agents/shared/capabilities.ts b/src/agents/shared/capabilities.ts index b7790222..b620b624 100644 --- a/src/agents/shared/capabilities.ts +++ b/src/agents/shared/capabilities.ts @@ -1,49 +1,87 @@ -import { resolveAgentDefinition } from '../definitions/loader.js'; - -// ============================================================================ -// AgentCapabilities -// ============================================================================ - /** - * Describes what a particular agent type is allowed to do. + * Agent Capabilities * - * Consumed by the llmist backend (agents/base.ts) to gate gadget inclusion - * and by the Claude Code backend (backends/agent-profiles.ts) for tool filtering. + * Re-exports capability types and functions from the new capability registry. * - * Keeping this in agents/shared/ avoids circular imports between agents/ and backends/. + * This file is kept for backward compatibility. New code should import from: + * - '../capabilities/index.js' for full capability system + * - '../definitions/schema.js' for AgentCapabilities type */ -export interface AgentCapabilities { - /** Can the agent read and write files? (false = read-only) */ + +// Re-export capability types +export type { Capability, AgentCapabilities } from '../definitions/schema.js'; + +// Re-export capability functions +export { + CAPABILITIES, + CAPABILITY_REGISTRY, + getCapabilitiesByIntegration, + getCapabilityIntegration, + isBuiltInCapability, + isValidCapability, +} from '../capabilities/index.js'; + +export { + buildGadgetsFromCapabilities, + deriveIntegrations, + deriveRequiredIntegrations, + filterToolManifests, + generateUnavailableCapabilitiesNote, + getGadgetNamesFromCapabilities, + getSdkToolsFromCapabilities, + getUnavailableOptionalCapabilities, + resolveEffectiveCapabilities, +} from '../capabilities/index.js'; + +import { resolveAgentDefinition } from '../definitions/index.js'; + +/** + * Legacy interface for derived capability flags. + * Used by code that needs boolean capability checks. + */ +export interface LegacyCapabilities { canEditFiles: boolean; - /** Can the agent create GitHub pull requests? */ canCreatePR: boolean; - /** Can the agent update PM checklist items? */ canUpdateChecklists: boolean; - /** True for agents that only interact with the PM system (no repo changes) */ isReadOnly: boolean; - /** Can the agent send/search/read emails? (default: false) */ - canAccessEmail?: boolean; } /** - * Default capabilities for unknown agent types — full access. - */ -const DEFAULT_CAPABILITIES: AgentCapabilities = { - canEditFiles: true, - canCreatePR: true, - canUpdateChecklists: true, - isReadOnly: false, -}; - -/** - * Look up capabilities for a given agent type. - * Reads from the async resolver (cache → DB → YAML); falls back to full-access defaults for unknown types. + * Get legacy capability flags for an agent type. + * + * Derives boolean capability flags from the new capability array format: + * - canEditFiles = has 'fs:write' + * - canCreatePR = has 'scm:pr' + * - canUpdateChecklists = has 'pm:checklist' + * - isReadOnly = does not have 'fs:write' + * + * For unknown agent types, returns full-access defaults to maintain + * backward compatibility. */ -export async function getAgentCapabilities(agentType: string): Promise { +export async function getAgentCapabilities(agentType: string): Promise { try { const def = await resolveAgentDefinition(agentType); - return def.capabilities; - } catch { - return DEFAULT_CAPABILITIES; + const allCaps = [...def.capabilities.required, ...def.capabilities.optional]; + + return { + canEditFiles: allCaps.includes('fs:write'), + canCreatePR: allCaps.includes('scm:pr'), + canUpdateChecklists: allCaps.includes('pm:checklist'), + isReadOnly: !allCaps.includes('fs:write'), + }; + } catch (error) { + // Only fall back to full access for "agent not found" errors. + // Re-throw unexpected errors to avoid masking bugs with elevated privileges. + const message = error instanceof Error ? error.message : String(error); + if (message.includes('not found')) { + // Unknown agent type - return full-access defaults for backward compatibility + return { + canEditFiles: true, + canCreatePR: true, + canUpdateChecklists: true, + isReadOnly: false, + }; + } + throw error; } } diff --git a/src/agents/shared/gadgets.ts b/src/agents/shared/gadgets.ts index 64a7e003..7b3593b5 100644 --- a/src/agents/shared/gadgets.ts +++ b/src/agents/shared/gadgets.ts @@ -1,179 +1,40 @@ -import { AstGrep } from '../../gadgets/AstGrep.js'; -import { FileMultiEdit } from '../../gadgets/FileMultiEdit.js'; -import { FileSearchAndReplace } from '../../gadgets/FileSearchAndReplace.js'; -import { Finish } from '../../gadgets/Finish.js'; -import { ListDirectory } from '../../gadgets/ListDirectory.js'; -import { ReadFile } from '../../gadgets/ReadFile.js'; -import { RipGrep } from '../../gadgets/RipGrep.js'; -import { Sleep } from '../../gadgets/Sleep.js'; -import { VerifyChanges } from '../../gadgets/VerifyChanges.js'; -import { WriteFile } from '../../gadgets/WriteFile.js'; -import { - MarkEmailAsSeen, - ReadEmail, - ReplyToEmail, - SearchEmails, - SendEmail, -} from '../../gadgets/email/index.js'; -import { - CreatePR, - CreatePRReview, - GetPRChecks, - GetPRComments, - GetPRDetails, - GetPRDiff, - PostPRComment, - ReplyToReviewComment, - UpdatePRComment, -} from '../../gadgets/github/index.js'; -import { - AddChecklist, - CreateWorkItem, - ListWorkItems, - PMDeleteChecklistItem, - PMUpdateChecklistItem, - PostComment, - ReadWorkItem, - UpdateWorkItem, -} from '../../gadgets/pm/index.js'; -import { Tmux } from '../../gadgets/tmux.js'; -import { TodoDelete, TodoUpdateStatus, TodoUpsert } from '../../gadgets/todo/index.js'; -import type { CreateBuilderOptions } from './builderFactory.js'; -import type { AgentCapabilities } from './capabilities.js'; - /** - * Build the standard set of gadgets for work-item-based agents. - * - * Used by both the llmist backend (agents/base.ts) and the Claude Code backend - * (backends/agent-profiles.ts) to ensure identical gadget sets for matching - * agent types regardless of which backend runs the agent. + * Gadget Building * - * Applies capabilities to gate optional gadgets: - * - canEditFiles: file writing tools (FileSearchAndReplace, FileMultiEdit, WriteFile, VerifyChanges) - * - canCreatePR: CreatePR - * - canUpdateChecklists: PMUpdateChecklistItem + * Builds gadget instances from agent capabilities. + * This replaces the old builder functions (buildWorkItemGadgets, buildReviewGadgets, etc.) + * with a unified capability-driven approach. */ -export function buildWorkItemGadgets(caps: AgentCapabilities): CreateBuilderOptions['gadgets'] { - return [ - // Filesystem gadgets (read-only when canEditFiles is false) - new ListDirectory(), - new ReadFile(), - new RipGrep(), - new AstGrep(), - ...(caps.canEditFiles - ? [new FileSearchAndReplace(), new FileMultiEdit(), new WriteFile(), new VerifyChanges()] - : []), - // Shell commands via tmux (no timeout issues) - new Tmux(), - new Sleep(), - // Task tracking gadgets - new TodoUpsert(), - new TodoUpdateStatus(), - new TodoDelete(), - // GitHub gadgets (PR creation gated by capability) - ...(caps.canCreatePR ? [new CreatePR()] : []), - // PM gadgets (work items, comments, checklists — PM-agnostic) - new ReadWorkItem(), - new PostComment(), - new UpdateWorkItem(), - new CreateWorkItem(), - new ListWorkItems(), - new AddChecklist(), - // UpdateChecklistItem gated by capability — prevents planning from marking items complete - // prematurely, while respond-to-planning-comment CAN update them - ...(caps.canUpdateChecklists ? [new PMUpdateChecklistItem(), new PMDeleteChecklistItem()] : []), - // Email gadgets (gated by capability — disabled by default) - ...(caps.canAccessEmail - ? [ - new SendEmail(), - new SearchEmails(), - new ReadEmail(), - new ReplyToEmail(), - new MarkEmailAsSeen(), - ] - : []), - // Session control - new Finish(), - ]; -} -/** - * Build gadgets for the review agent (read-only, PR-focused). - * - * Used by both backends — review agent sees PR details, diff, checks, and - * can submit a review, but cannot modify files or create new PRs. - */ -export function buildReviewGadgets(): CreateBuilderOptions['gadgets'] { - return [ - new ListDirectory(), - new ReadFile(), - new Tmux(), - new Sleep(), - new TodoUpsert(), - new TodoUpdateStatus(), - new TodoDelete(), - new GetPRDetails(), - new GetPRDiff(), - new GetPRChecks(), - new CreatePRReview(), - new UpdatePRComment(), - new Finish(), - ]; -} +import { GetPRComments, ReplyToReviewComment } from '../../gadgets/github/index.js'; +import type { Capability } from '../capabilities/index.js'; +import { buildGadgetsFromCapabilities as buildFromCapabilities } from '../capabilities/resolver.js'; -/** - * Build gadgets for email-focused agents (email-joke). - * - * Minimal set: email operations + session control only. - * No file editing, no GitHub, no PM tools. - */ -export function buildEmailJokeGadgets(): CreateBuilderOptions['gadgets'] { - return [ - new SearchEmails(), - new ReadEmail(), - new ReplyToEmail(), - new MarkEmailAsSeen(), - new Finish(), - ]; -} +// Re-export the main capability-based building function +export { buildGadgetsFromCapabilities } from '../capabilities/resolver.js'; /** - * Build gadgets for PR-modifying agents (respond-to-review, respond-to-ci, - * respond-to-pr-comment). - * - * Includes file editing + GitHub tools but NOT CreatePR (agents push to - * existing branches). Pass includeReviewComments=true for agents that need - * GetPRComments and ReplyToReviewComment. + * Build gadgets from capabilities with optional special handling. * - * Used by both backends to ensure identical tool sets. + * This function adds gadgets for special options that aren't capability-driven: + * - includeReviewComments: adds GetPRComments and ReplyToReviewComment */ -export function buildPRAgentGadgets(options?: { - includeReviewComments?: boolean; -}): CreateBuilderOptions['gadgets'] { - const gadgets: CreateBuilderOptions['gadgets'] = [ - new ListDirectory(), - new ReadFile(), - new FileSearchAndReplace(), - new FileMultiEdit(), - new WriteFile(), - new VerifyChanges(), - new AstGrep(), - new RipGrep(), - new Tmux(), - new Sleep(), - new TodoUpsert(), - new TodoUpdateStatus(), - new TodoDelete(), - new GetPRDetails(), - new GetPRDiff(), - new GetPRChecks(), - new PostPRComment(), - new UpdatePRComment(), - new Finish(), - ]; +export function buildGadgetsForAgent( + capabilities: Capability[], + options?: { + /** Include GetPRComments and ReplyToReviewComment (for PR comment response agents) */ + includeReviewComments?: boolean; + }, +): unknown[] { + const gadgets = buildFromCapabilities(capabilities); + // Add review comment tools if requested (not capability-driven, agent-specific) if (options?.includeReviewComments) { - gadgets.push(new GetPRComments(), new ReplyToReviewComment()); + // Check if not already included via scm:comment capability + const hasScmComment = capabilities.includes('scm:comment'); + if (!hasScmComment) { + gadgets.push(new GetPRComments(), new ReplyToReviewComment()); + } } return gadgets; diff --git a/src/api/routers/agentDefinitions.ts b/src/api/routers/agentDefinitions.ts index 264ca159..5d4ef7e6 100644 --- a/src/api/routers/agentDefinitions.ts +++ b/src/api/routers/agentDefinitions.ts @@ -1,5 +1,6 @@ import { TRPCError } from '@trpc/server'; import { z } from 'zod'; +import { CAPABILITIES } from '../../agents/capabilities/index.js'; import { getKnownAgentTypes, invalidateDefinitionCache, @@ -13,10 +14,7 @@ import { COMPACTION_NAMES, CONTEXT_STEP_NAMES, DefinitionPatchSchema, - GADGET_BUILDER_NAMES, - SDK_TOOLS_NAMES, TASK_PROMPT_BUILDER_NAMES, - TOOL_SET_NAMES, } from '../../agents/definitions/schema.js'; import { validateTemplate } from '../../agents/prompts/index.js'; import { @@ -339,11 +337,9 @@ export const agentDefinitionsRouter = router({ */ schema: publicProcedure.query(() => { return { - toolSetNames: [...TOOL_SET_NAMES], - sdkToolsNames: [...SDK_TOOLS_NAMES], + capabilities: [...CAPABILITIES], contextStepNames: [...CONTEXT_STEP_NAMES], taskPromptBuilderNames: [...TASK_PROMPT_BUILDER_NAMES], - gadgetBuilderNames: [...GADGET_BUILDER_NAMES], compactionNames: [...COMPACTION_NAMES], }; }), diff --git a/src/backends/claude-code/contextFiles.ts b/src/backends/claude-code/contextFiles.ts new file mode 100644 index 00000000..2422947c --- /dev/null +++ b/src/backends/claude-code/contextFiles.ts @@ -0,0 +1,191 @@ +/** + * Context file offloading for Claude Code backend. + * + * When context injections are too large to embed inline in the prompt, + * this module writes them to files and generates instructions for Claude + * to read them on-demand using its built-in Read tool. + */ +import { mkdir, rm, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; + +import { CONTEXT_OFFLOAD_CONFIG } from '../../config/claudeCodeConfig.js'; +import { estimateTokens } from '../../config/reviewConfig.js'; +import { logger } from '../../utils/logging.js'; +import type { ContextInjection } from '../types.js'; + +/** + * Metadata about an offloaded context file. + */ +export interface OffloadedFile { + /** Relative path from repo root, e.g. '.cascade/context/pr-diff.txt' */ + relativePath: string; + /** Original description of this context */ + description: string; + /** Estimated token count of the content */ + tokens: number; +} + +/** + * Result of context offloading. + */ +export interface ContextOffloadResult { + /** Context injections small enough to embed inline */ + inlineInjections: ContextInjection[]; + /** Files that were written for large context */ + offloadedFiles: OffloadedFile[]; + /** Instructions for Claude to read the offloaded files */ + instructions: string; +} + +/** + * Convert a description string into a safe filename. + * Includes index suffix to guarantee uniqueness within a batch. + */ +function slugify(description: string, index: number): string { + const base = description + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 40); // Shorter to make room for index + + // Always append index for guaranteed uniqueness within this batch + return `${base || 'context'}-${index}`; +} + +/** + * Generate instructions for Claude to read offloaded context files. + */ +function generateReadInstructions(files: OffloadedFile[]): string { + if (files.length === 0) return ''; + + const lines = [ + '## Context Files', + '', + 'The following context has been saved to files to avoid exceeding prompt limits.', + 'Use the Read tool to access them as needed:', + '', + ]; + + for (const file of files) { + lines.push( + `- \`${file.relativePath}\` — ${file.description} (~${file.tokens.toLocaleString()} tokens)`, + ); + } + + lines.push(''); + lines.push('Read these files as needed for your task. For review tasks, start with the PR diff.'); + + return lines.join('\n'); +} + +/** + * Offload large context injections to files. + * + * Small context (below threshold) is kept inline. + * Large context is written to .cascade/context/ and Claude is instructed to read it. + * + * @param repoDir - Repository directory where context files will be written + * @param injections - Context injections to process + * @returns Result with inline context, offloaded files, and instructions + */ +export async function offloadLargeContext( + repoDir: string, + injections: ContextInjection[], +): Promise { + if (!CONTEXT_OFFLOAD_CONFIG.enabled) { + return { + inlineInjections: injections, + offloadedFiles: [], + instructions: '', + }; + } + + const inlineInjections: ContextInjection[] = []; + const offloadedFiles: OffloadedFile[] = []; + const contextDir = join(repoDir, CONTEXT_OFFLOAD_CONFIG.contextDir); + let dirCreated = false; + + for (let i = 0; i < injections.length; i++) { + const injection = injections[i]; + const tokens = estimateTokens(injection.result); + + if (tokens < CONTEXT_OFFLOAD_CONFIG.inlineThreshold) { + inlineInjections.push(injection); + } else { + // Create context directory on first offload + if (!dirCreated) { + await mkdir(contextDir, { recursive: true }); + dirCreated = true; + } + + // Generate unique filename from description (with index for uniqueness) + const slug = slugify(injection.description, i); + const filename = `${slug}.txt`; + const filepath = join(contextDir, filename); + // Use forward slashes for consistent paths in instructions (works on all platforms) + const relativePath = `${CONTEXT_OFFLOAD_CONFIG.contextDir}/${filename}`; + + await writeFile(filepath, injection.result, 'utf-8'); + + offloadedFiles.push({ + relativePath, + description: injection.description, + tokens, + }); + + logger.info('Context offloaded to file', { + description: injection.description, + tokens, + path: relativePath, + }); + } + } + + const instructions = generateReadInstructions(offloadedFiles); + + if (offloadedFiles.length > 0) { + logger.info('Context offload summary', { + inlineCount: inlineInjections.length, + offloadedCount: offloadedFiles.length, + totalOffloadedTokens: offloadedFiles.reduce((sum, f) => sum + f.tokens, 0), + }); + } + + return { + inlineInjections, + offloadedFiles, + instructions, + }; +} + +/** + * Clean up context files after agent execution. + * + * Removes the .cascade/context/ directory and all its contents. + * + * @param repoDir - Repository directory + */ +export async function cleanupContextFiles(repoDir: string): Promise { + const contextDir = join(repoDir, CONTEXT_OFFLOAD_CONFIG.contextDir); + try { + await rm(contextDir, { recursive: true, force: true }); + logger.debug('Cleaned up context files', { contextDir }); + } catch { + // Ignore errors (directory might not exist) + } +} + +/** + * Build the inline context section for the prompt. + */ +export function buildInlineContextSection(injections: ContextInjection[]): string { + if (injections.length === 0) return ''; + + let section = '\n\n## Pre-loaded Context\n'; + for (const injection of injections) { + section += `\n### ${injection.description} (${injection.toolName})\n`; + section += `Parameters: ${JSON.stringify(injection.params)}\n`; + section += `\`\`\`\n${injection.result}\n\`\`\`\n`; + } + return section; +} diff --git a/src/backends/claude-code/index.ts b/src/backends/claude-code/index.ts index 7e5bcabc..3babe0b4 100644 --- a/src/backends/claude-code/index.ts +++ b/src/backends/claude-code/index.ts @@ -19,6 +19,11 @@ import type { ContextInjection, ToolManifest, } from '../types.js'; +import { + buildInlineContextSection, + cleanupContextFiles, + offloadLargeContext, +} from './contextFiles.js'; import { filterProcessEnv } from './env.js'; import { buildHooks } from './hooks.js'; import { CLAUDE_CODE_MODEL_IDS, DEFAULT_CLAUDE_CODE_MODEL } from './models.js'; @@ -84,22 +89,55 @@ export function buildToolGuidance(tools: ToolManifest[]): string { return guidance; } +/** + * Result of building the task prompt with context offloading. + */ +export interface BuildTaskPromptResult { + /** The assembled task prompt */ + prompt: string; + /** Whether any context was offloaded to files */ + hasOffloadedContext: boolean; +} + /** * Build the task prompt with pre-fetched context injections. + * + * Large context is offloaded to files to avoid exceeding prompt limits. + * Claude is instructed to read the files on-demand using its Read tool. + * + * @param taskPrompt - The base task prompt + * @param contextInjections - Context data to include + * @param repoDir - Repository directory for writing context files + * @returns The assembled prompt and offload metadata */ -export function buildTaskPrompt(taskPrompt: string, contextInjections: ContextInjection[]): string { +export async function buildTaskPrompt( + taskPrompt: string, + contextInjections: ContextInjection[], + repoDir: string, +): Promise { let prompt = taskPrompt; - if (contextInjections.length > 0) { - prompt += '\n\n## Pre-loaded Context\n'; - for (const injection of contextInjections) { - prompt += `\n### ${injection.description} (${injection.toolName})\n`; - prompt += `Parameters: ${JSON.stringify(injection.params)}\n`; - prompt += `\`\`\`\n${injection.result}\n\`\`\`\n`; - } + if (contextInjections.length === 0) { + return { prompt, hasOffloadedContext: false }; + } + + const { inlineInjections, offloadedFiles, instructions } = await offloadLargeContext( + repoDir, + contextInjections, + ); + + // Add inline context + prompt += buildInlineContextSection(inlineInjections); + + // Add instructions for offloaded files + if (instructions) { + prompt += `\n\n${instructions}`; } - return prompt; + return { + prompt, + hasOffloadedContext: offloadedFiles.length > 0, + }; } /** @@ -428,7 +466,11 @@ export class ClaudeCodeBackend implements AgentBackend { async execute(input: AgentBackendInput): Promise { const startTime = Date.now(); const systemPrompt = buildSystemPrompt(input.systemPrompt, input.availableTools); - const taskPrompt = buildTaskPrompt(input.taskPrompt, input.contextInjections); + const { prompt: taskPrompt, hasOffloadedContext } = await buildTaskPrompt( + input.taskPrompt, + input.contextInjections, + input.repoDir, + ); const model = resolveClaudeModel(input.model); input.logWriter('INFO', 'Starting Claude Code SDK execution', { @@ -436,6 +478,7 @@ export class ClaudeCodeBackend implements AgentBackend { model, repoDir: input.repoDir, maxIterations: input.maxIterations, + hasOffloadedContext, }); const { env } = buildEnv(input.projectSecrets); @@ -452,49 +495,56 @@ export class ClaudeCodeBackend implements AgentBackend { let turnCount = 0; const stderrChunks: string[] = []; - const stream = query({ - prompt: taskPrompt, - options: { - model, - systemPrompt, - cwd: input.repoDir, - additionalDirectories: [getWorkspaceDir()], - maxBudgetUsd: input.budgetUsd, - permissionMode: 'bypassPermissions', - allowDangerouslySkipPermissions: true, - tools: sdkTools, - allowedTools: sdkTools, - persistSession: false, - hooks, - env, - debug: true, - stderr: (data: string) => { - stderrChunks.push(data); - input.logWriter('INFO', 'Claude Code stderr', { data: data.trim() }); + try { + const stream = query({ + prompt: taskPrompt, + options: { + model, + systemPrompt, + cwd: input.repoDir, + additionalDirectories: [getWorkspaceDir()], + maxBudgetUsd: input.budgetUsd, + permissionMode: 'bypassPermissions', + allowDangerouslySkipPermissions: true, + tools: sdkTools, + allowedTools: sdkTools, + persistSession: false, + hooks, + env, + debug: true, + stderr: (data: string) => { + stderrChunks.push(data); + input.logWriter('INFO', 'Claude Code stderr', { data: data.trim() }); + }, }, - }, - }); - - for await (const message of stream) { - if (message.type === 'assistant') { - const assistantMsg = message as SDKAssistantMessage; - assistantMessages.push(assistantMsg); - turnCount++; - await input.progressReporter.onIteration(turnCount, input.maxIterations); - processAssistantMessage(assistantMsg, turnCount, input); - logLlmCall(input, assistantMsg, turnCount, model); - } else if (message.type === 'system') { - const sysMsg = message as { subtype: string; [key: string]: unknown }; - if (sysMsg.subtype === 'task_notification') { - processTaskNotification(sysMsg, input); - } else { - processSystemMessage(sysMsg, input.logWriter); + }); + + for await (const message of stream) { + if (message.type === 'assistant') { + const assistantMsg = message as SDKAssistantMessage; + assistantMessages.push(assistantMsg); + turnCount++; + await input.progressReporter.onIteration(turnCount, input.maxIterations); + processAssistantMessage(assistantMsg, turnCount, input); + logLlmCall(input, assistantMsg, turnCount, model); + } else if (message.type === 'system') { + const sysMsg = message as { subtype: string; [key: string]: unknown }; + if (sysMsg.subtype === 'task_notification') { + processTaskNotification(sysMsg, input); + } else { + processSystemMessage(sysMsg, input.logWriter); + } + } else if (message.type === 'result') { + resultMessage = message as SDKResultMessage; } - } else if (message.type === 'result') { - resultMessage = message as SDKResultMessage; } - } - return buildResult(assistantMessages, resultMessage, stderrChunks, input, startTime); + return buildResult(assistantMessages, resultMessage, stderrChunks, input, startTime); + } finally { + // Clean up offloaded context files after execution + if (hasOffloadedContext) { + await cleanupContextFiles(input.repoDir); + } + } } } diff --git a/src/backends/llmist/index.ts b/src/backends/llmist/index.ts index 280ecfee..98c91024 100644 --- a/src/backends/llmist/index.ts +++ b/src/backends/llmist/index.ts @@ -2,6 +2,7 @@ import os from 'node:os'; import { LLMist, type ModelSpec, createLogger } from 'llmist'; +import { createIntegrationChecker } from '../../agents/capabilities/index.js'; import { resolveAgentDefinition } from '../../agents/definitions/index.js'; import { type BuilderType, createConfiguredBuilder } from '../../agents/shared/builderFactory.js'; import { injectSyntheticCall } from '../../agents/shared/syntheticCalls.js'; @@ -85,8 +86,10 @@ export class LlmistBackend implements AgentBackend { process.env.LLMIST_LOG_TEE = 'true'; } - // Get gadget instances from the agent profile (single source of truth for tool sets) - const gadgets = await profile.getLlmistGadgets(agentType); + // Get gadget instances from the agent profile, filtered by integration availability. + // This ensures optional capabilities only provide gadgets if the integration is configured. + const integrationChecker = await createIntegrationChecker(input.project.id); + const gadgets = profile.getLlmistGadgets(integrationChecker); // Build the configured agent builder with all llmist-specific features: // rate limiting, retry, compaction, iteration hints, observer hooks diff --git a/src/config/claudeCodeConfig.ts b/src/config/claudeCodeConfig.ts new file mode 100644 index 00000000..fe166f8b --- /dev/null +++ b/src/config/claudeCodeConfig.ts @@ -0,0 +1,28 @@ +/** + * Configuration for Claude Code backend context offloading. + * + * When the total context injections exceed the inline threshold, + * large context is written to files and Claude is instructed to + * read them on-demand using its Read tool. + */ +export const CONTEXT_OFFLOAD_CONFIG = { + /** + * Token threshold below which context is embedded inline in the prompt. + * Context injections smaller than this are included directly. + * Larger content is offloaded to files. + */ + inlineThreshold: 8_000, + + /** + * Directory for offloaded context files (relative to repo root). + * Files are written here and cleaned up after agent completion. + */ + contextDir: '.cascade/context', + + /** + * Whether context offloading is enabled. + * Set to false to disable offloading (all context embedded inline). + * Useful for debugging or when file I/O is problematic. + */ + enabled: true, +} as const; diff --git a/src/triggers/shared/integration-validation.ts b/src/triggers/shared/integration-validation.ts index 50085a43..031d2043 100644 --- a/src/triggers/shared/integration-validation.ts +++ b/src/triggers/shared/integration-validation.ts @@ -2,12 +2,12 @@ * Pre-flight integration validation for agents. * * Validates that all required integrations are configured before an agent runs. - * This prevents confusing runtime errors and provides clear feedback about - * missing configuration. + * Integrations are derived from agent capabilities - no separate declaration needed. */ +import { deriveIntegrations } from '../../agents/capabilities/index.js'; import { resolveAgentDefinition } from '../../agents/definitions/loader.js'; -import type { AgentIntegrations, IntegrationCategory } from '../../agents/definitions/schema.js'; +import type { IntegrationCategory } from '../../agents/definitions/schema.js'; import { hasEmailIntegration } from '../../email/index.js'; import { hasScmIntegration, hasScmPersonaToken } from '../../github/integration.js'; import { getPersonaForAgentType } from '../../github/personas.js'; @@ -26,11 +26,19 @@ export interface ValidationResult { } /** - * Get integration requirements for an agent. + * Derived integration requirements from agent capabilities. */ -export async function getIntegrationRequirements(agentType: string): Promise { +export interface DerivedIntegrations { + required: IntegrationCategory[]; + optional: IntegrationCategory[]; +} + +/** + * Get integration requirements for an agent, derived from capabilities. + */ +export async function getIntegrationRequirements(agentType: string): Promise { const def = await resolveAgentDefinition(agentType); - return def.integrations; + return deriveIntegrations(def.capabilities.required, def.capabilities.optional); } // ============================================================================ @@ -111,6 +119,7 @@ async function validateSmsIntegration( /** * Validate all required integrations are configured before agent runs. + * Integrations are derived from the agent's required capabilities. */ export async function validateIntegrations( projectId: string, diff --git a/tests/integration/integration-validation.test.ts b/tests/integration/integration-validation.test.ts index 0a4c29d4..65ca9a70 100644 --- a/tests/integration/integration-validation.test.ts +++ b/tests/integration/integration-validation.test.ts @@ -239,15 +239,9 @@ describe('Integration Validation (integration)', () => { expect(result.errors[0].message).toContain('Implementer token'); }); - // Use it.each() for the remaining implementer agents to reduce duplication - const implementerAgents = [ - 'splitting', - 'planning', - 'respond-to-review', - 'respond-to-ci', - 'respond-to-pr-comment', - 'respond-to-planning-comment', - ]; + // Use it.each() for the remaining implementer agents that require SCM + // Note: splitting, planning, respond-to-planning-comment only need PM, not SCM + const implementerAgents = ['respond-to-review', 'respond-to-ci', 'respond-to-pr-comment']; it.each(implementerAgents)('%s agent needs implementer token', async (agentType) => { await seedTrelloIntegration(); diff --git a/tests/unit/agents/capabilities/resolver.test.ts b/tests/unit/agents/capabilities/resolver.test.ts new file mode 100644 index 00000000..366379c3 --- /dev/null +++ b/tests/unit/agents/capabilities/resolver.test.ts @@ -0,0 +1,303 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +/** Create a mock class with the given name so constructor.name works in assertions */ +function mockClass(name: string) { + const cls = { [name]: class {} }[name]; + return vi.fn().mockImplementation(() => new cls()); +} + +// Mock all gadget imports +vi.mock('../../../../src/gadgets/AstGrep.js', () => ({ AstGrep: mockClass('AstGrep') })); +vi.mock('../../../../src/gadgets/FileMultiEdit.js', () => ({ + FileMultiEdit: mockClass('FileMultiEdit'), +})); +vi.mock('../../../../src/gadgets/FileSearchAndReplace.js', () => ({ + FileSearchAndReplace: mockClass('FileSearchAndReplace'), +})); +vi.mock('../../../../src/gadgets/Finish.js', () => ({ Finish: mockClass('Finish') })); +vi.mock('../../../../src/gadgets/ListDirectory.js', () => ({ + ListDirectory: mockClass('ListDirectory'), +})); +vi.mock('../../../../src/gadgets/ReadFile.js', () => ({ ReadFile: mockClass('ReadFile') })); +vi.mock('../../../../src/gadgets/RipGrep.js', () => ({ RipGrep: mockClass('RipGrep') })); +vi.mock('../../../../src/gadgets/Sleep.js', () => ({ Sleep: mockClass('Sleep') })); +vi.mock('../../../../src/gadgets/VerifyChanges.js', () => ({ + VerifyChanges: mockClass('VerifyChanges'), +})); +vi.mock('../../../../src/gadgets/WriteFile.js', () => ({ WriteFile: mockClass('WriteFile') })); +vi.mock('../../../../src/gadgets/github/index.js', () => ({ + CreatePR: mockClass('CreatePR'), + CreatePRReview: mockClass('CreatePRReview'), + GetPRChecks: mockClass('GetPRChecks'), + GetPRComments: mockClass('GetPRComments'), + GetPRDetails: mockClass('GetPRDetails'), + GetPRDiff: mockClass('GetPRDiff'), + PostPRComment: mockClass('PostPRComment'), + ReplyToReviewComment: mockClass('ReplyToReviewComment'), + UpdatePRComment: mockClass('UpdatePRComment'), +})); +vi.mock('../../../../src/gadgets/pm/index.js', () => ({ + AddChecklist: mockClass('AddChecklist'), + CreateWorkItem: mockClass('CreateWorkItem'), + ListWorkItems: mockClass('ListWorkItems'), + PMDeleteChecklistItem: mockClass('PMDeleteChecklistItem'), + PMUpdateChecklistItem: mockClass('PMUpdateChecklistItem'), + PostComment: mockClass('PostComment'), + ReadWorkItem: mockClass('ReadWorkItem'), + UpdateWorkItem: mockClass('UpdateWorkItem'), +})); +vi.mock('../../../../src/gadgets/email/index.js', () => ({ + SendEmail: mockClass('SendEmail'), + SearchEmails: mockClass('SearchEmails'), + ReadEmail: mockClass('ReadEmail'), + ReplyToEmail: mockClass('ReplyToEmail'), + MarkEmailAsSeen: mockClass('MarkEmailAsSeen'), +})); +vi.mock('../../../../src/gadgets/sms/index.js', () => ({ + SendSms: mockClass('SendSms'), +})); +vi.mock('../../../../src/gadgets/tmux.js', () => ({ Tmux: mockClass('Tmux') })); +vi.mock('../../../../src/gadgets/todo/index.js', () => ({ + TodoUpsert: mockClass('TodoUpsert'), + TodoUpdateStatus: mockClass('TodoUpdateStatus'), + TodoDelete: mockClass('TodoDelete'), +})); + +import type { Capability } from '../../../../src/agents/capabilities/index.js'; +import { + deriveIntegrations, + deriveRequiredIntegrations, + filterToolManifests, + generateUnavailableCapabilitiesNote, + getGadgetNamesFromCapabilities, + getSdkToolsFromCapabilities, + getUnavailableOptionalCapabilities, + resolveEffectiveCapabilities, +} from '../../../../src/agents/capabilities/resolver.js'; +import type { ToolManifest } from '../../../../src/agents/contracts/index.js'; +import type { IntegrationCategory } from '../../../../src/agents/definitions/schema.js'; + +describe('deriveRequiredIntegrations', () => { + it('returns empty array for built-in capabilities only', () => { + const caps: Capability[] = ['fs:read', 'fs:write', 'shell:exec', 'session:ctrl']; + expect(deriveRequiredIntegrations(caps)).toEqual([]); + }); + + it('returns pm for pm:read capability', () => { + const caps: Capability[] = ['pm:read']; + expect(deriveRequiredIntegrations(caps)).toEqual(['pm']); + }); + + it('returns scm for scm:pr capability', () => { + const caps: Capability[] = ['scm:pr']; + expect(deriveRequiredIntegrations(caps)).toEqual(['scm']); + }); + + it('returns unique integrations even with multiple capabilities from same integration', () => { + const caps: Capability[] = ['pm:read', 'pm:write', 'pm:checklist']; + const result = deriveRequiredIntegrations(caps); + expect(result).toEqual(['pm']); + }); + + it('returns all unique integrations from mixed capabilities', () => { + const caps: Capability[] = ['fs:read', 'pm:read', 'scm:pr', 'email:read']; + const result = deriveRequiredIntegrations(caps); + expect(result).toContain('pm'); + expect(result).toContain('scm'); + expect(result).toContain('email'); + expect(result).toHaveLength(3); + }); +}); + +describe('deriveIntegrations', () => { + it('separates required and optional integrations', () => { + const required: Capability[] = ['fs:read', 'scm:pr']; + const optional: Capability[] = ['pm:read', 'pm:write']; + const result = deriveIntegrations(required, optional); + expect(result.required).toEqual(['scm']); + expect(result.optional).toEqual(['pm']); + }); + + it('does not include integration in optional if already in required', () => { + const required: Capability[] = ['pm:read', 'scm:pr']; + const optional: Capability[] = ['pm:write']; // pm already required + const result = deriveIntegrations(required, optional); + expect(result.required).toContain('pm'); + expect(result.required).toContain('scm'); + expect(result.optional).toEqual([]); + }); +}); + +describe('resolveEffectiveCapabilities', () => { + it('always includes all required capabilities', () => { + const required: Capability[] = ['fs:read', 'scm:pr']; + const optional: Capability[] = []; + const hasIntegration = () => false; // No integrations available + const result = resolveEffectiveCapabilities(required, optional, hasIntegration); + expect(result).toContain('fs:read'); + expect(result).toContain('scm:pr'); + }); + + it('includes optional built-in capabilities regardless of integration availability', () => { + const required: Capability[] = ['fs:read']; + const optional: Capability[] = ['fs:write', 'shell:exec']; + const hasIntegration = () => false; + const result = resolveEffectiveCapabilities(required, optional, hasIntegration); + expect(result).toContain('fs:write'); + expect(result).toContain('shell:exec'); + }); + + it('includes optional capabilities when their integration is available', () => { + const required: Capability[] = ['fs:read', 'scm:pr']; + const optional: Capability[] = ['pm:read', 'pm:write']; + const hasIntegration = (cat: IntegrationCategory) => cat === 'pm'; + const result = resolveEffectiveCapabilities(required, optional, hasIntegration); + expect(result).toContain('pm:read'); + expect(result).toContain('pm:write'); + }); + + it('excludes optional capabilities when their integration is not available', () => { + const required: Capability[] = ['fs:read', 'scm:pr']; + const optional: Capability[] = ['pm:read', 'pm:write']; + const hasIntegration = () => false; + const result = resolveEffectiveCapabilities(required, optional, hasIntegration); + expect(result).not.toContain('pm:read'); + expect(result).not.toContain('pm:write'); + }); + + it('handles mixed availability of optional integrations', () => { + const required: Capability[] = ['fs:read']; + const optional: Capability[] = ['pm:read', 'email:read', 'sms:send']; + const hasIntegration = (cat: IntegrationCategory) => cat === 'pm' || cat === 'sms'; + const result = resolveEffectiveCapabilities(required, optional, hasIntegration); + expect(result).toContain('pm:read'); + expect(result).toContain('sms:send'); + expect(result).not.toContain('email:read'); + }); +}); + +describe('getUnavailableOptionalCapabilities', () => { + it('returns empty array when all optional are built-in', () => { + const optional: Capability[] = ['fs:write', 'shell:exec']; + const hasIntegration = () => false; + expect(getUnavailableOptionalCapabilities(optional, hasIntegration)).toEqual([]); + }); + + it('returns unavailable integration-based capabilities', () => { + const optional: Capability[] = ['pm:read', 'pm:write', 'email:read']; + const hasIntegration = (cat: IntegrationCategory) => cat === 'pm'; + const result = getUnavailableOptionalCapabilities(optional, hasIntegration); + expect(result).toEqual(['email:read']); + }); + + it('returns all integration-based capabilities when no integrations available', () => { + const optional: Capability[] = ['pm:read', 'scm:comment']; + const hasIntegration = () => false; + const result = getUnavailableOptionalCapabilities(optional, hasIntegration); + expect(result).toContain('pm:read'); + expect(result).toContain('scm:comment'); + }); +}); + +describe('generateUnavailableCapabilitiesNote', () => { + it('returns null for empty array', () => { + expect(generateUnavailableCapabilitiesNote([])).toBeNull(); + }); + + it('generates note for unavailable PM capabilities', () => { + const unavailable: Capability[] = ['pm:read', 'pm:write']; + const note = generateUnavailableCapabilitiesNote(unavailable); + expect(note).toContain('PM integration'); + expect(note).toContain('not configured'); + expect(note).toContain('ReadWorkItem'); + }); + + it('generates note for multiple unavailable integrations', () => { + const unavailable: Capability[] = ['pm:read', 'email:write']; + const note = generateUnavailableCapabilitiesNote(unavailable); + expect(note).toContain('PM integration'); + expect(note).toContain('Email integration'); + }); +}); + +describe('getGadgetNamesFromCapabilities', () => { + it('returns gadget names for capabilities', () => { + const caps: Capability[] = ['fs:read']; + const names = getGadgetNamesFromCapabilities(caps); + expect(names).toContain('ReadFile'); + expect(names).toContain('ListDirectory'); + expect(names).toContain('RipGrep'); + expect(names).toContain('AstGrep'); + }); + + it('returns unique names even when capabilities share gadgets', () => { + const caps: Capability[] = ['fs:read', 'fs:read']; + const names = getGadgetNamesFromCapabilities(caps); + const readFileCount = names.filter((n) => n === 'ReadFile').length; + expect(readFileCount).toBe(1); + }); +}); + +describe('getSdkToolsFromCapabilities', () => { + it('returns SDK tools for fs:read', () => { + const caps: Capability[] = ['fs:read']; + const tools = getSdkToolsFromCapabilities(caps); + expect(tools).toContain('Read'); + expect(tools).toContain('Glob'); + expect(tools).toContain('Grep'); + }); + + it('returns SDK tools for fs:write', () => { + const caps: Capability[] = ['fs:write']; + const tools = getSdkToolsFromCapabilities(caps); + expect(tools).toContain('Write'); + expect(tools).toContain('Edit'); + }); + + it('returns Bash for shell:exec', () => { + const caps: Capability[] = ['shell:exec']; + const tools = getSdkToolsFromCapabilities(caps); + expect(tools).toContain('Bash'); + }); +}); + +describe('filterToolManifests', () => { + it('filters manifests to only those matching capability gadgets', () => { + const manifests: ToolManifest[] = [ + { name: 'ReadFile', description: 'Read a file', inputSchema: {} }, + { name: 'WriteFile', description: 'Write a file', inputSchema: {} }, + { name: 'CreatePR', description: 'Create PR', inputSchema: {} }, + ]; + const caps: Capability[] = ['fs:read']; + const filtered = filterToolManifests(manifests, caps); + expect(filtered).toHaveLength(1); + expect(filtered[0].name).toBe('ReadFile'); + }); + + it('includes all gadgets for multiple capabilities', () => { + const manifests: ToolManifest[] = [ + { name: 'ReadFile', description: 'Read', inputSchema: {} }, + { name: 'WriteFile', description: 'Write', inputSchema: {} }, + { name: 'CreatePR', description: 'PR', inputSchema: {} }, + ]; + const caps: Capability[] = ['fs:read', 'fs:write']; + const filtered = filterToolManifests(manifests, caps); + expect(filtered).toHaveLength(2); + expect(filtered.map((m) => m.name)).toContain('ReadFile'); + expect(filtered.map((m) => m.name)).toContain('WriteFile'); + }); + + it('logs warning for missing expected tools', () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + const manifests: ToolManifest[] = [ + { name: 'ReadFile', description: 'Read', inputSchema: {} }, + // Missing ListDirectory, RipGrep, AstGrep + ]; + const caps: Capability[] = ['fs:read']; + filterToolManifests(manifests, caps); + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining('Expected tools not found in manifests'), + ); + warnSpy.mockRestore(); + }); +}); diff --git a/tests/unit/agents/definitions/loader.test.ts b/tests/unit/agents/definitions/loader.test.ts index b88412fa..a5e3b3f9 100644 --- a/tests/unit/agents/definitions/loader.test.ts +++ b/tests/unit/agents/definitions/loader.test.ts @@ -1,16 +1,15 @@ import { afterEach, describe, expect, it } from 'vitest'; +import { + deriveIntegrations, + getSdkToolsFromCapabilities, +} from '../../../../src/agents/capabilities/resolver.js'; import { clearDefinitionCache, getKnownAgentTypes, loadAgentDefinition, loadAllAgentDefinitions, } from '../../../../src/agents/definitions/loader.js'; -import { - CONTEXT_STEP_REGISTRY, - GADGET_BUILDER_REGISTRY, - SDK_TOOLS_REGISTRY, - TOOL_SET_REGISTRY, -} from '../../../../src/agents/definitions/strategies.js'; +import { CONTEXT_STEP_REGISTRY } from '../../../../src/agents/definitions/strategies.js'; import { getAgentCapabilities } from '../../../../src/agents/shared/capabilities.js'; const ALL_AGENT_TYPES = [ @@ -75,35 +74,34 @@ describe('YAML agent definitions loader', () => { }); describe('strategy references resolve correctly', () => { - it('all tool set references exist in TOOL_SET_REGISTRY', () => { + it('all agents have valid capabilities', () => { for (const agentType of ALL_AGENT_TYPES) { const def = loadAgentDefinition(agentType); - for (const setName of def.tools.sets) { - expect( - setName === 'all' || setName in TOOL_SET_REGISTRY, - `${agentType}: tool set '${setName}' not in TOOL_SET_REGISTRY`, - ).toBe(true); - } + expect(Array.isArray(def.capabilities.required)).toBe(true); + expect(Array.isArray(def.capabilities.optional)).toBe(true); + expect(def.capabilities.required.length).toBeGreaterThan(0); } }); - it('all sdkTools references exist in SDK_TOOLS_REGISTRY', () => { + it('agents with fs or shell capabilities derive to non-empty SDK tools', () => { + // Only agents with fs:* or shell:exec capabilities need SDK tools. + // Email-only agents (e.g., email-joke) use llmist gadgets exclusively. for (const agentType of ALL_AGENT_TYPES) { const def = loadAgentDefinition(agentType); - expect( - def.tools.sdkTools in SDK_TOOLS_REGISTRY, - `${agentType}: sdkTools '${def.tools.sdkTools}' not in SDK_TOOLS_REGISTRY`, - ).toBe(true); - } - }); + const allCaps = [...def.capabilities.required, ...def.capabilities.optional]; - it('all gadgetBuilder references exist in GADGET_BUILDER_REGISTRY', () => { - for (const agentType of ALL_AGENT_TYPES) { - const def = loadAgentDefinition(agentType); - expect( - def.strategies.gadgetBuilder in GADGET_BUILDER_REGISTRY, - `${agentType}: gadgetBuilder '${def.strategies.gadgetBuilder}' not in GADGET_BUILDER_REGISTRY`, - ).toBe(true); + // Check if agent has any capabilities that provide SDK tools + const hasSdkCapabilities = allCaps.some( + (cap) => cap.startsWith('fs:') || cap === 'shell:exec', + ); + + if (hasSdkCapabilities) { + const sdkTools = getSdkToolsFromCapabilities(allCaps); + expect( + sdkTools.length > 0, + `${agentType}: has SDK-capable capabilities but no SDK tools`, + ).toBe(true); + } } }); @@ -219,11 +217,10 @@ describe('YAML agent definitions loader', () => { expect(def.backend.preExecute).toBe('postInitialPRComment'); }); - it('planning is readOnly', () => { + it('planning has read-only capabilities (no fs:write)', () => { const def = loadAgentDefinition('planning'); - expect(def.capabilities.isReadOnly).toBe(true); - expect(def.capabilities.canEditFiles).toBe(false); - expect(def.tools.sdkTools).toBe('readOnly'); + expect(def.capabilities.required).toContain('fs:read'); + expect(def.capabilities.required).not.toContain('fs:write'); }); it('implementation has trailingMessage with all flags', () => { @@ -258,17 +255,12 @@ describe('YAML agent definitions loader', () => { it('respond-to-review includes review comment gadget options', () => { const def = loadAgentDefinition('respond-to-review'); - expect(def.strategies.gadgetBuilderOptions).toEqual({ includeReviewComments: true }); + expect(def.strategies.gadgetOptions).toEqual({ includeReviewComments: true }); }); it('respond-to-pr-comment includes review comment gadget options', () => { const def = loadAgentDefinition('respond-to-pr-comment'); - expect(def.strategies.gadgetBuilderOptions).toEqual({ includeReviewComments: true }); - }); - - it('debug uses "all" tool set', () => { - const def = loadAgentDefinition('debug'); - expect(def.tools.sets).toContain('all'); + expect(def.strategies.gadgetOptions).toEqual({ includeReviewComments: true }); }); it('all agents have non-empty identity fields', () => { @@ -302,7 +294,6 @@ describe('YAML agent definitions loader', () => { expect(def.backend.needsGitHubToken).toBe(true); expect(def.backend.preExecute).toBeUndefined(); expect(def.backend.postConfigure).toBe('sequentialGadgetExecution'); - expect(SDK_TOOLS_REGISTRY[def.tools.sdkTools]).toBeDefined(); }); it('review agent is read-only with preExecute hook', async () => { @@ -325,26 +316,23 @@ describe('YAML agent definitions loader', () => { expect(def.backend.preExecute).toBe('postInitialPRComment'); }); - it('all agent sdkTools references resolve to non-empty arrays', () => { - for (const agentType of ALL_AGENT_TYPES) { - const def = loadAgentDefinition(agentType); - const sdkTools = SDK_TOOLS_REGISTRY[def.tools.sdkTools]; - expect( - Array.isArray(sdkTools) && sdkTools.length > 0, - `${agentType}: sdkTools '${def.tools.sdkTools}' resolved to empty or non-array`, - ).toBe(true); - } - }); - - it('capabilities from getAgentCapabilities match YAML definition for all agents', async () => { + it('capabilities from getAgentCapabilities are derived correctly for all agents', async () => { for (const agentType of ALL_AGENT_TYPES) { const def = loadAgentDefinition(agentType); const caps = await getAgentCapabilities(agentType); + const allCaps = [...def.capabilities.required, ...def.capabilities.optional]; + + // canEditFiles = has fs:write + expect(caps.canEditFiles).toBe(allCaps.includes('fs:write')); + + // canCreatePR = has scm:pr + expect(caps.canCreatePR).toBe(allCaps.includes('scm:pr')); + + // canUpdateChecklists = has pm:checklist + expect(caps.canUpdateChecklists).toBe(allCaps.includes('pm:checklist')); - expect(caps.canEditFiles).toBe(def.capabilities.canEditFiles); - expect(caps.canCreatePR).toBe(def.capabilities.canCreatePR); - expect(caps.canUpdateChecklists).toBe(def.capabilities.canUpdateChecklists); - expect(caps.isReadOnly).toBe(def.capabilities.isReadOnly); + // isReadOnly = no fs:write + expect(caps.isReadOnly).toBe(!allCaps.includes('fs:write')); } }); }); @@ -361,87 +349,101 @@ describe('YAML agent definitions loader', () => { }); }); - describe('integration requirements', () => { - it('all agents have integrations field with required and optional arrays', () => { + describe('integration requirements (derived from capabilities)', () => { + it('all agents have valid capabilities with required array', () => { for (const agentType of ALL_AGENT_TYPES) { const def = loadAgentDefinition(agentType); - expect(def.integrations).toBeDefined(); - expect(Array.isArray(def.integrations.required)).toBe(true); - expect(Array.isArray(def.integrations.optional)).toBe(true); + expect(Array.isArray(def.capabilities.required)).toBe(true); + expect(Array.isArray(def.capabilities.optional)).toBe(true); } }); - it('implementation agent requires scm and pm', () => { + it('implementation agent requires scm and pm (derived from capabilities)', () => { const def = loadAgentDefinition('implementation'); - expect(def.integrations.required).toEqual(['scm', 'pm']); - expect(def.integrations.optional).toEqual([]); + const integrations = deriveIntegrations(def.capabilities.required, def.capabilities.optional); + // Order may vary - use set comparison + expect(new Set(integrations.required)).toEqual(new Set(['scm', 'pm'])); + expect(integrations.optional).toEqual([]); }); - it('splitting agent requires scm and pm', () => { + it('splitting agent requires pm only', () => { const def = loadAgentDefinition('splitting'); - expect(def.integrations.required).toEqual(['scm', 'pm']); - expect(def.integrations.optional).toEqual([]); + const integrations = deriveIntegrations(def.capabilities.required, def.capabilities.optional); + expect(integrations.required).toEqual(['pm']); + expect(integrations.optional).toEqual([]); }); - it('planning agent requires scm and pm', () => { + it('planning agent requires pm only', () => { const def = loadAgentDefinition('planning'); - expect(def.integrations.required).toEqual(['scm', 'pm']); - expect(def.integrations.optional).toEqual([]); + const integrations = deriveIntegrations(def.capabilities.required, def.capabilities.optional); + expect(integrations.required).toEqual(['pm']); + expect(integrations.optional).toEqual([]); }); it('review agent requires scm, pm is optional', () => { const def = loadAgentDefinition('review'); - expect(def.integrations.required).toEqual(['scm']); - expect(def.integrations.optional).toEqual(['pm']); + const integrations = deriveIntegrations(def.capabilities.required, def.capabilities.optional); + expect(integrations.required).toEqual(['scm']); + expect(integrations.optional).toEqual(['pm']); }); it('respond-to-review agent requires scm, pm is optional', () => { const def = loadAgentDefinition('respond-to-review'); - expect(def.integrations.required).toEqual(['scm']); - expect(def.integrations.optional).toEqual(['pm']); + const integrations = deriveIntegrations(def.capabilities.required, def.capabilities.optional); + expect(integrations.required).toEqual(['scm']); + expect(integrations.optional).toEqual(['pm']); }); it('respond-to-ci agent requires scm, pm is optional', () => { const def = loadAgentDefinition('respond-to-ci'); - expect(def.integrations.required).toEqual(['scm']); - expect(def.integrations.optional).toEqual(['pm']); + const integrations = deriveIntegrations(def.capabilities.required, def.capabilities.optional); + expect(integrations.required).toEqual(['scm']); + expect(integrations.optional).toEqual(['pm']); }); it('respond-to-pr-comment agent requires scm, pm is optional', () => { const def = loadAgentDefinition('respond-to-pr-comment'); - expect(def.integrations.required).toEqual(['scm']); - expect(def.integrations.optional).toEqual(['pm']); + const integrations = deriveIntegrations(def.capabilities.required, def.capabilities.optional); + expect(integrations.required).toEqual(['scm']); + expect(integrations.optional).toEqual(['pm']); }); - it('respond-to-planning-comment agent requires scm and pm', () => { + it('respond-to-planning-comment agent requires pm only', () => { const def = loadAgentDefinition('respond-to-planning-comment'); - expect(def.integrations.required).toEqual(['scm', 'pm']); - expect(def.integrations.optional).toEqual([]); + const integrations = deriveIntegrations(def.capabilities.required, def.capabilities.optional); + expect(integrations.required).toEqual(['pm']); + expect(integrations.optional).toEqual([]); }); it('debug agent requires pm only', () => { const def = loadAgentDefinition('debug'); - expect(def.integrations.required).toEqual(['pm']); - expect(def.integrations.optional).toEqual([]); + const integrations = deriveIntegrations(def.capabilities.required, def.capabilities.optional); + expect(integrations.required).toEqual(['pm']); + expect(integrations.optional).toEqual([]); }); it('email-joke agent requires email only', () => { const def = loadAgentDefinition('email-joke'); - expect(def.integrations.required).toEqual(['email']); - expect(def.integrations.optional).toEqual([]); + const integrations = deriveIntegrations(def.capabilities.required, def.capabilities.optional); + expect(integrations.required).toEqual(['email']); + expect(integrations.optional).toEqual([]); }); - it('all integration categories are valid', () => { - const validCategories = ['pm', 'scm', 'email']; + it('all derived integration categories are valid', () => { + const validCategories = ['pm', 'scm', 'email', 'sms']; for (const agentType of ALL_AGENT_TYPES) { const def = loadAgentDefinition(agentType); - for (const cat of def.integrations.required) { + const integrations = deriveIntegrations( + def.capabilities.required, + def.capabilities.optional, + ); + for (const cat of integrations.required) { expect( validCategories.includes(cat), `${agentType}: invalid required category '${cat}'`, ).toBe(true); } - for (const cat of def.integrations.optional) { + for (const cat of integrations.optional) { expect( validCategories.includes(cat), `${agentType}: invalid optional category '${cat}'`, diff --git a/tests/unit/agents/definitions/schema.test.ts b/tests/unit/agents/definitions/schema.test.ts index 8945fef0..fb41c7fd 100644 --- a/tests/unit/agents/definitions/schema.test.ts +++ b/tests/unit/agents/definitions/schema.test.ts @@ -10,19 +10,12 @@ describe('AgentDefinitionSchema', () => { initialMessage: '**🔧 Testing** — Running tests...', }, capabilities: { - canEditFiles: true, - canCreatePR: false, - canUpdateChecklists: true, - isReadOnly: false, - }, - tools: { - sets: ['pm', 'session'], - sdkTools: 'all', + required: ['fs:read', 'fs:write', 'shell:exec', 'session:ctrl', 'pm:read', 'pm:write'], + optional: [], }, strategies: { contextPipeline: ['directoryListing', 'contextFiles', 'squint', 'workItem'], taskPromptBuilder: 'workItem', - gadgetBuilder: 'workItem', }, backend: { enableStopHooks: false, @@ -30,10 +23,6 @@ describe('AgentDefinitionSchema', () => { }, compaction: 'default', hint: 'Do the thing efficiently.', - integrations: { - required: ['pm'], - optional: [], - }, }; it('parses a valid minimal definition', () => { @@ -46,7 +35,7 @@ describe('AgentDefinitionSchema', () => { ...validDefinition, strategies: { ...validDefinition.strategies, - gadgetBuilderOptions: { includeReviewComments: true }, + gadgetOptions: { includeReviewComments: true }, }, backend: { ...validDefinition.backend, @@ -73,19 +62,10 @@ describe('AgentDefinitionSchema', () => { expect(result.success).toBe(false); }); - it('rejects invalid tool set names', () => { - const bad = { - ...validDefinition, - tools: { sets: ['invalid_set'], sdkTools: 'all' }, - }; - const result = AgentDefinitionSchema.safeParse(bad); - expect(result.success).toBe(false); - }); - - it('rejects invalid sdkTools values', () => { + it('rejects invalid capability names', () => { const bad = { ...validDefinition, - tools: { sets: ['pm'], sdkTools: 'invalid' }, + capabilities: { required: ['invalid:cap'], optional: [] }, }; const result = AgentDefinitionSchema.safeParse(bad); expect(result.success).toBe(false); @@ -182,12 +162,12 @@ describe('AgentDefinitionSchema', () => { expect(result.success).toBe(true); }); - it('rejects overlapping required and optional categories', () => { + it('rejects overlapping required and optional capabilities', () => { const bad = { ...validDefinition, - integrations: { - required: ['pm', 'scm'], - optional: ['pm'], // pm is in both + capabilities: { + required: ['fs:read', 'pm:read'], + optional: ['fs:read'], // fs:read is in both }, }; const result = AgentDefinitionSchema.safeParse(bad); @@ -196,4 +176,33 @@ describe('AgentDefinitionSchema', () => { expect(result.error.issues[0].message).toContain('cannot be both required and optional'); } }); + + it('allows optional capabilities to be omitted', () => { + const withoutOptional = { + ...validDefinition, + capabilities: { + required: ['fs:read', 'session:ctrl'], + }, + }; + const result = AgentDefinitionSchema.safeParse(withoutOptional); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.capabilities.optional).toEqual([]); + } + }); + + it('accepts valid optional capabilities', () => { + const withOptional = { + ...validDefinition, + capabilities: { + required: ['fs:read', 'session:ctrl', 'scm:read'], + optional: ['pm:read', 'pm:write'], + }, + }; + const result = AgentDefinitionSchema.safeParse(withOptional); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.capabilities.optional).toEqual(['pm:read', 'pm:write']); + } + }); }); diff --git a/tests/unit/agents/shared/gadgets.test.ts b/tests/unit/agents/shared/gadgets.test.ts index 29591799..887e3e7b 100644 --- a/tests/unit/agents/shared/gadgets.test.ts +++ b/tests/unit/agents/shared/gadgets.test.ts @@ -45,6 +45,16 @@ vi.mock('../../../../src/gadgets/pm/index.js', () => ({ ReadWorkItem: mockClass('ReadWorkItem'), UpdateWorkItem: mockClass('UpdateWorkItem'), })); +vi.mock('../../../../src/gadgets/email/index.js', () => ({ + SendEmail: mockClass('SendEmail'), + SearchEmails: mockClass('SearchEmails'), + ReadEmail: mockClass('ReadEmail'), + ReplyToEmail: mockClass('ReplyToEmail'), + MarkEmailAsSeen: mockClass('MarkEmailAsSeen'), +})); +vi.mock('../../../../src/gadgets/sms/index.js', () => ({ + SendSms: mockClass('SendSms'), +})); vi.mock('../../../../src/gadgets/tmux.js', () => ({ Tmux: mockClass('Tmux') })); vi.mock('../../../../src/gadgets/todo/index.js', () => ({ TodoUpsert: mockClass('TodoUpsert'), @@ -52,139 +62,215 @@ vi.mock('../../../../src/gadgets/todo/index.js', () => ({ TodoDelete: mockClass('TodoDelete'), })); -import type { AgentCapabilities } from '../../../../src/agents/shared/capabilities.js'; -import { - buildPRAgentGadgets, - buildReviewGadgets, - buildWorkItemGadgets, -} from '../../../../src/agents/shared/gadgets.js'; +import type { Capability } from '../../../../src/agents/capabilities/index.js'; +import { buildGadgetsFromCapabilities } from '../../../../src/agents/capabilities/resolver.js'; +import { buildGadgetsForAgent } from '../../../../src/agents/shared/gadgets.js'; function names(gadgets: unknown[]): string[] { return gadgets.map((g) => (g as object).constructor.name); } -const FULL_CAPS: AgentCapabilities = { - canEditFiles: true, - canCreatePR: true, - canUpdateChecklists: true, - isReadOnly: false, -}; - -const READ_ONLY_CAPS: AgentCapabilities = { - canEditFiles: false, - canCreatePR: false, - canUpdateChecklists: false, - isReadOnly: true, -}; - -describe('buildWorkItemGadgets', () => { - it('always includes base read gadgets and session control', () => { - const gadgets = names(buildWorkItemGadgets(FULL_CAPS)); - expect(gadgets).toContain('ListDirectory'); - expect(gadgets).toContain('ReadFile'); - expect(gadgets).toContain('RipGrep'); - expect(gadgets).toContain('AstGrep'); - expect(gadgets).toContain('Tmux'); - expect(gadgets).toContain('Sleep'); - expect(gadgets).toContain('TodoUpsert'); - expect(gadgets).toContain('TodoUpdateStatus'); - expect(gadgets).toContain('TodoDelete'); - expect(gadgets).toContain('ReadWorkItem'); - expect(gadgets).toContain('PostComment'); - expect(gadgets).toContain('Finish'); +describe('buildGadgetsFromCapabilities', () => { + describe('fs:read capability', () => { + it('includes filesystem read gadgets', () => { + const caps: Capability[] = ['fs:read']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + expect(gadgets).toContain('ListDirectory'); + expect(gadgets).toContain('ReadFile'); + expect(gadgets).toContain('RipGrep'); + expect(gadgets).toContain('AstGrep'); + }); }); - it('includes file-editing gadgets when canEditFiles is true', () => { - const gadgets = names(buildWorkItemGadgets(FULL_CAPS)); - expect(gadgets).toContain('FileSearchAndReplace'); - expect(gadgets).toContain('FileMultiEdit'); - expect(gadgets).toContain('WriteFile'); - expect(gadgets).toContain('VerifyChanges'); + describe('fs:write capability', () => { + it('includes filesystem write gadgets', () => { + const caps: Capability[] = ['fs:write']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + expect(gadgets).toContain('WriteFile'); + expect(gadgets).toContain('FileSearchAndReplace'); + expect(gadgets).toContain('FileMultiEdit'); + expect(gadgets).toContain('VerifyChanges'); + }); }); - it('excludes file-editing gadgets when canEditFiles is false', () => { - const gadgets = names(buildWorkItemGadgets(READ_ONLY_CAPS)); - expect(gadgets).not.toContain('FileSearchAndReplace'); - expect(gadgets).not.toContain('FileMultiEdit'); - expect(gadgets).not.toContain('WriteFile'); - expect(gadgets).not.toContain('VerifyChanges'); + describe('shell:exec capability', () => { + it('includes shell execution gadgets', () => { + const caps: Capability[] = ['shell:exec']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + expect(gadgets).toContain('Tmux'); + expect(gadgets).toContain('Sleep'); + }); }); - it('includes CreatePR when canCreatePR is true', () => { - const gadgets = names(buildWorkItemGadgets(FULL_CAPS)); - expect(gadgets).toContain('CreatePR'); + describe('session:ctrl capability', () => { + it('includes session control gadgets', () => { + const caps: Capability[] = ['session:ctrl']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + expect(gadgets).toContain('Finish'); + expect(gadgets).toContain('TodoUpsert'); + expect(gadgets).toContain('TodoUpdateStatus'); + expect(gadgets).toContain('TodoDelete'); + }); }); - it('excludes CreatePR when canCreatePR is false', () => { - const gadgets = names(buildWorkItemGadgets(READ_ONLY_CAPS)); - expect(gadgets).not.toContain('CreatePR'); - }); + describe('pm capabilities', () => { + it('pm:read includes read work item gadgets', () => { + const caps: Capability[] = ['pm:read']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + expect(gadgets).toContain('ReadWorkItem'); + expect(gadgets).toContain('ListWorkItems'); + }); - it('includes PMUpdateChecklistItem and PMDeleteChecklistItem when canUpdateChecklists is true', () => { - const gadgets = names(buildWorkItemGadgets(FULL_CAPS)); - expect(gadgets).toContain('PMUpdateChecklistItem'); - expect(gadgets).toContain('PMDeleteChecklistItem'); - }); + it('pm:write includes write work item gadgets', () => { + const caps: Capability[] = ['pm:write']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + expect(gadgets).toContain('UpdateWorkItem'); + expect(gadgets).toContain('CreateWorkItem'); + expect(gadgets).toContain('PostComment'); + expect(gadgets).toContain('AddChecklist'); + }); - it('excludes PMUpdateChecklistItem and PMDeleteChecklistItem when canUpdateChecklists is false', () => { - const gadgets = names(buildWorkItemGadgets(READ_ONLY_CAPS)); - expect(gadgets).not.toContain('PMUpdateChecklistItem'); - expect(gadgets).not.toContain('PMDeleteChecklistItem'); + it('pm:checklist includes checklist gadgets', () => { + const caps: Capability[] = ['pm:checklist']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + expect(gadgets).toContain('PMUpdateChecklistItem'); + expect(gadgets).toContain('PMDeleteChecklistItem'); + }); }); -}); -describe('buildReviewGadgets', () => { - it('includes PR review gadgets', () => { - const gadgets = names(buildReviewGadgets()); - expect(gadgets).toContain('GetPRDetails'); - expect(gadgets).toContain('GetPRDiff'); - expect(gadgets).toContain('GetPRChecks'); - expect(gadgets).toContain('CreatePRReview'); - expect(gadgets).toContain('UpdatePRComment'); - expect(gadgets).toContain('Finish'); - }); + describe('scm capabilities', () => { + it('scm:read includes PR read gadgets', () => { + const caps: Capability[] = ['scm:read']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + expect(gadgets).toContain('GetPRDetails'); + expect(gadgets).toContain('GetPRDiff'); + expect(gadgets).toContain('GetPRChecks'); + }); + + it('scm:comment includes PR comment gadgets', () => { + const caps: Capability[] = ['scm:comment']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + expect(gadgets).toContain('PostPRComment'); + expect(gadgets).toContain('UpdatePRComment'); + expect(gadgets).toContain('GetPRComments'); + expect(gadgets).toContain('ReplyToReviewComment'); + }); + + it('scm:review includes PR review gadgets', () => { + const caps: Capability[] = ['scm:review']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + expect(gadgets).toContain('CreatePRReview'); + }); - it('does not include file-editing gadgets (read-only)', () => { - const gadgets = names(buildReviewGadgets()); - expect(gadgets).not.toContain('FileSearchAndReplace'); - expect(gadgets).not.toContain('WriteFile'); - expect(gadgets).not.toContain('CreatePR'); + it('scm:pr includes CreatePR gadget', () => { + const caps: Capability[] = ['scm:pr']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + expect(gadgets).toContain('CreatePR'); + }); }); - it('does not include PostPRComment (submits via CreatePRReview)', () => { - const gadgets = names(buildReviewGadgets()); - expect(gadgets).not.toContain('PostPRComment'); + describe('email capabilities', () => { + it('email:read includes email read gadgets', () => { + const caps: Capability[] = ['email:read']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + expect(gadgets).toContain('SearchEmails'); + expect(gadgets).toContain('ReadEmail'); + expect(gadgets).toContain('MarkEmailAsSeen'); + }); + + it('email:write includes email write gadgets', () => { + const caps: Capability[] = ['email:write']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + expect(gadgets).toContain('SendEmail'); + expect(gadgets).toContain('ReplyToEmail'); + }); }); -}); -describe('buildPRAgentGadgets', () => { - it('includes file editing and GitHub PR tools', () => { - const gadgets = names(buildPRAgentGadgets()); - expect(gadgets).toContain('FileSearchAndReplace'); - expect(gadgets).toContain('FileMultiEdit'); - expect(gadgets).toContain('WriteFile'); - expect(gadgets).toContain('VerifyChanges'); - expect(gadgets).toContain('GetPRDetails'); - expect(gadgets).toContain('GetPRDiff'); - expect(gadgets).toContain('GetPRChecks'); - expect(gadgets).toContain('PostPRComment'); - expect(gadgets).toContain('Finish'); + describe('sms capabilities', () => { + it('sms:send includes SMS gadget', () => { + const caps: Capability[] = ['sms:send']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + expect(gadgets).toContain('SendSms'); + }); }); - it('does not include CreatePR (pushes to existing branch)', () => { - const gadgets = names(buildPRAgentGadgets()); - expect(gadgets).not.toContain('CreatePR'); + describe('combined capabilities', () => { + it('implementation-like capabilities include all expected gadgets', () => { + const caps: Capability[] = [ + 'fs:read', + 'fs:write', + 'shell:exec', + 'session:ctrl', + 'pm:read', + 'pm:write', + 'pm:checklist', + 'scm:pr', + ]; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + // Filesystem + expect(gadgets).toContain('ListDirectory'); + expect(gadgets).toContain('ReadFile'); + expect(gadgets).toContain('WriteFile'); + // Shell + expect(gadgets).toContain('Tmux'); + // PM + expect(gadgets).toContain('ReadWorkItem'); + expect(gadgets).toContain('PMUpdateChecklistItem'); + // SCM + expect(gadgets).toContain('CreatePR'); + // Session + expect(gadgets).toContain('Finish'); + }); + + it('review-like capabilities exclude file editing and PR creation', () => { + const caps: Capability[] = [ + 'fs:read', + 'shell:exec', + 'session:ctrl', + 'scm:read', + 'scm:review', + ]; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + // Should have + expect(gadgets).toContain('ReadFile'); + expect(gadgets).toContain('CreatePRReview'); + // Should NOT have + expect(gadgets).not.toContain('WriteFile'); + expect(gadgets).not.toContain('CreatePR'); + }); + + it('does not create duplicate gadgets when capabilities overlap', () => { + const caps: Capability[] = ['fs:read', 'fs:read', 'session:ctrl']; + const gadgets = names(buildGadgetsFromCapabilities(caps)); + // ListDirectory should appear only once + const listDirCount = gadgets.filter((n) => n === 'ListDirectory').length; + expect(listDirCount).toBe(1); + }); }); +}); - it('excludes review comment tools by default', () => { - const gadgets = names(buildPRAgentGadgets()); - expect(gadgets).not.toContain('GetPRComments'); - expect(gadgets).not.toContain('ReplyToReviewComment'); +describe('buildGadgetsForAgent', () => { + it('uses capabilities to build gadgets', () => { + const caps: Capability[] = ['fs:read', 'session:ctrl']; + const gadgets = names(buildGadgetsForAgent(caps)); + expect(gadgets).toContain('ReadFile'); + expect(gadgets).toContain('Finish'); }); - it('includes review comment tools when includeReviewComments is true', () => { - const gadgets = names(buildPRAgentGadgets({ includeReviewComments: true })); + it('adds review comment gadgets when includeReviewComments option is set and scm:comment not in capabilities', () => { + // Without scm:comment capability, but with includeReviewComments option + const caps: Capability[] = ['fs:read', 'session:ctrl']; + const gadgets = names(buildGadgetsForAgent(caps, { includeReviewComments: true })); expect(gadgets).toContain('GetPRComments'); expect(gadgets).toContain('ReplyToReviewComment'); }); + + it('does not duplicate review comment gadgets when scm:comment capability is present', () => { + // With scm:comment capability AND includeReviewComments option + const caps: Capability[] = ['fs:read', 'session:ctrl', 'scm:comment']; + const gadgets = names(buildGadgetsForAgent(caps, { includeReviewComments: true })); + // Count GetPRComments - should be exactly 1 + const count = gadgets.filter((n) => n === 'GetPRComments').length; + expect(count).toBe(1); + }); }); diff --git a/tests/unit/api/routers/agentDefinitions.test.ts b/tests/unit/api/routers/agentDefinitions.test.ts index b7542a9a..adb875e6 100644 --- a/tests/unit/api/routers/agentDefinitions.test.ts +++ b/tests/unit/api/routers/agentDefinitions.test.ts @@ -64,20 +64,12 @@ function createMockDefinition(overrides?: Partial): AgentDefini initialMessage: 'Hello', }, capabilities: { - canEditFiles: true, - canCreatePR: true, - canUpdateChecklists: true, - isReadOnly: false, - }, - tools: { - sets: ['all'], - sdkTools: 'all', + required: ['fs:read', 'fs:write', 'shell:exec', 'session:ctrl', 'scm:pr'], + optional: [], }, strategies: { contextPipeline: ['directoryListing'], taskPromptBuilder: 'workItem', - gadgetBuilder: 'workItem', - gadgetBuilderOptions: undefined, }, backend: { enableStopHooks: true, @@ -86,10 +78,6 @@ function createMockDefinition(overrides?: Partial): AgentDefini compaction: 'default', hint: 'A test agent', trailingMessage: undefined, - integrations: { - required: ['scm'], - optional: [], - }, ...overrides, } as AgentDefinition; } @@ -444,14 +432,12 @@ describe('agentDefinitionsRouter', () => { const caller = createCaller({ user: null, effectiveOrgId: null }); const result = await caller.schema(); - expect(result).toHaveProperty('toolSetNames'); - expect(result).toHaveProperty('sdkToolsNames'); + expect(result).toHaveProperty('capabilities'); expect(result).toHaveProperty('contextStepNames'); expect(result).toHaveProperty('taskPromptBuilderNames'); - expect(result).toHaveProperty('gadgetBuilderNames'); expect(result).toHaveProperty('compactionNames'); // Verify they're arrays - expect(Array.isArray(result.toolSetNames)).toBe(true); + expect(Array.isArray(result.capabilities)).toBe(true); expect(Array.isArray(result.compactionNames)).toBe(true); }); }); diff --git a/tests/unit/backends/agent-profiles.test.ts b/tests/unit/backends/agent-profiles.test.ts index 8319d151..dc3600b6 100644 --- a/tests/unit/backends/agent-profiles.test.ts +++ b/tests/unit/backends/agent-profiles.test.ts @@ -219,8 +219,11 @@ describe('getAgentProfile', () => { expect(names).toContain('ReadWorkItem'); }); - it('has ALL_SDK_TOOLS for code editing', () => { - expect(profile.sdkTools).toEqual(['Read', 'Write', 'Edit', 'Bash', 'Glob', 'Grep']); + it('has SDK tools for code editing', () => { + // SDK tools derived from capabilities - order may vary + expect(new Set(profile.sdkTools)).toEqual( + new Set(['Read', 'Write', 'Edit', 'Bash', 'Glob', 'Grep']), + ); }); it('enables stop hooks', () => { @@ -299,11 +302,12 @@ describe('getAgentProfile', () => { logWriter, }); + // Expects either the agent-specific message or a fallback message expect(mockGithub.createPRComment).toHaveBeenCalledWith( 'acme', 'widgets', 42, - expect.stringContaining('Fixing CI failures'), + expect.stringMatching(/Fixing CI|Working on it/), ); }); }); @@ -362,11 +366,12 @@ describe('getAgentProfile', () => { logWriter, }); + // Expects either the agent-specific message or a fallback message expect(mockGithub.createPRComment).toHaveBeenCalledWith( 'org', 'repo', 10, - expect.stringContaining('Reviewing code'), + expect.stringMatching(/Reviewing code|Working on it/), ); }); }); @@ -405,13 +410,17 @@ describe('getAgentProfile', () => { expect(names).toContain('GetPRDetails'); expect(names).toContain('PostPRComment'); expect(names).toContain('ReplyToReviewComment'); - expect(names).toContain('CreatePRReview'); + // respond-to-pr-comment has scm:comment but not scm:review + expect(names).not.toContain('CreatePRReview'); expect(names).toContain('Finish'); expect(names).not.toContain('CreatePR'); }); - it('has ALL_SDK_TOOLS for code editing', () => { - expect(profile.sdkTools).toEqual(['Read', 'Write', 'Edit', 'Bash', 'Glob', 'Grep']); + it('has SDK tools for code editing', () => { + // SDK tools derived from capabilities - order may vary + expect(new Set(profile.sdkTools)).toEqual( + new Set(['Read', 'Write', 'Edit', 'Bash', 'Glob', 'Grep']), + ); }); it('enables stop hooks', () => { @@ -465,11 +474,16 @@ describe('getAgentProfile', () => { expect(profile.needsGitHubToken).toBe(true); }); - it('returns debug profile (defaultProfile)', async () => { + it('returns debug profile', async () => { const profile = await getAgentProfile('debug'); - // Debug uses defaultProfile — passes all tools through, no GitHub token - const tools = [{ name: 'Anything', description: '', cliCommand: '', parameters: {} }]; - expect(profile.filterTools(tools)).toHaveLength(1); + // Debug has PM capabilities but no SCM + const tools = [ + { name: 'ReadWorkItem', description: '', cliCommand: '', parameters: {} }, + { name: 'CreatePR', description: '', cliCommand: '', parameters: {} }, // Should be filtered out + ]; + const filtered = profile.filterTools(tools); + expect(filtered.map((t) => t.name)).toContain('ReadWorkItem'); + expect(filtered.map((t) => t.name)).not.toContain('CreatePR'); expect(profile.needsGitHubToken).toBe(false); }); }); @@ -501,7 +515,7 @@ describe('AgentProfile.getLlmistGadgets', () => { it('implementation includes file editing, CreatePR, and PM gadgets', async () => { const profile = await getAgentProfile('implementation'); - const names = gadgetNames(await profile.getLlmistGadgets('implementation')); + const names = gadgetNames(profile.getLlmistGadgets()); // File editing gadgets (canEditFiles: true) expect(names).toContain('FileSearchAndReplace'); @@ -519,7 +533,7 @@ describe('AgentProfile.getLlmistGadgets', () => { it('planning excludes file editing, CreatePR, and checklist updates (read-only)', async () => { const profile = await getAgentProfile('planning'); - const names = gadgetNames(await profile.getLlmistGadgets('planning')); + const names = gadgetNames(profile.getLlmistGadgets()); // Read-only: no file editing expect(names).not.toContain('FileSearchAndReplace'); @@ -537,9 +551,9 @@ describe('AgentProfile.getLlmistGadgets', () => { expect(names).toContain('Finish'); }); - it('review includes CreatePRReview and excludes file editing and PostPRComment', async () => { + it('review includes CreatePRReview and excludes file editing', async () => { const profile = await getAgentProfile('review'); - const names = gadgetNames(await profile.getLlmistGadgets('review')); + const names = gadgetNames(profile.getLlmistGadgets()); // Core action: submit PR review expect(names).toContain('CreatePRReview'); @@ -547,19 +561,19 @@ describe('AgentProfile.getLlmistGadgets', () => { expect(names).toContain('GetPRDetails'); expect(names).toContain('GetPRDiff'); expect(names).toContain('GetPRChecks'); + // With scm:comment capability, review agent gets all comment tools + expect(names).toContain('PostPRComment'); expect(names).toContain('UpdatePRComment'); // Read-only: no file editing expect(names).not.toContain('FileSearchAndReplace'); expect(names).not.toContain('WriteFile'); expect(names).not.toContain('CreatePR'); - // Review agent doesn't use PostPRComment (posts via CreatePRReview) - expect(names).not.toContain('PostPRComment'); expect(names).toContain('Finish'); }); it('respond-to-review includes file editing and review comment tools', async () => { const profile = await getAgentProfile('respond-to-review'); - const names = gadgetNames(await profile.getLlmistGadgets('respond-to-review')); + const names = gadgetNames(profile.getLlmistGadgets()); // File editing (respond-to-review makes code changes) expect(names).toContain('FileSearchAndReplace'); @@ -576,9 +590,9 @@ describe('AgentProfile.getLlmistGadgets', () => { expect(names).toContain('Finish'); }); - it('respond-to-ci includes file editing but no review comment tools', async () => { + it('respond-to-ci includes file editing and comment tools', async () => { const profile = await getAgentProfile('respond-to-ci'); - const names = gadgetNames(await profile.getLlmistGadgets('respond-to-ci')); + const names = gadgetNames(profile.getLlmistGadgets()); // File editing expect(names).toContain('FileSearchAndReplace'); @@ -588,9 +602,9 @@ describe('AgentProfile.getLlmistGadgets', () => { expect(names).toContain('GetPRDetails'); expect(names).toContain('GetPRDiff'); expect(names).toContain('GetPRChecks'); - // No review comment tools (includeReviewComments: false) - expect(names).not.toContain('GetPRComments'); - expect(names).not.toContain('ReplyToReviewComment'); + // With scm:comment capability, gets all comment tools + expect(names).toContain('PostPRComment'); + expect(names).toContain('GetPRComments'); // No CreatePR (pushes to existing branch) expect(names).not.toContain('CreatePR'); expect(names).toContain('Finish'); @@ -598,7 +612,7 @@ describe('AgentProfile.getLlmistGadgets', () => { it('respond-to-pr-comment includes file editing and review comment tools', async () => { const profile = await getAgentProfile('respond-to-pr-comment'); - const names = gadgetNames(await profile.getLlmistGadgets('respond-to-pr-comment')); + const names = gadgetNames(profile.getLlmistGadgets()); // File editing expect(names).toContain('FileSearchAndReplace'); @@ -614,7 +628,7 @@ describe('AgentProfile.getLlmistGadgets', () => { it('splitting includes file editing but not CreatePR', async () => { const profile = await getAgentProfile('splitting'); - const names = gadgetNames(await profile.getLlmistGadgets('splitting')); + const names = gadgetNames(profile.getLlmistGadgets()); // File editing (canEditFiles: true) expect(names).toContain('FileSearchAndReplace'); diff --git a/tests/unit/backends/claude-code-contextFiles.test.ts b/tests/unit/backends/claude-code-contextFiles.test.ts new file mode 100644 index 00000000..b4cfb3cd --- /dev/null +++ b/tests/unit/backends/claude-code-contextFiles.test.ts @@ -0,0 +1,334 @@ +import { existsSync, mkdtempSync, readFileSync } from 'node:fs'; +import { mkdir, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('../../../src/utils/logging.js', () => ({ + logger: { warn: vi.fn(), info: vi.fn(), error: vi.fn(), debug: vi.fn() }, +})); + +// Mock config to control thresholds in tests +vi.mock('../../../src/config/claudeCodeConfig.js', () => ({ + CONTEXT_OFFLOAD_CONFIG: { + inlineThreshold: 8_000, + contextDir: '.cascade/context', + enabled: true, + }, +})); + +import { + buildInlineContextSection, + cleanupContextFiles, + offloadLargeContext, +} from '../../../src/backends/claude-code/contextFiles.js'; +import type { ContextInjection } from '../../../src/backends/types.js'; +import { CONTEXT_OFFLOAD_CONFIG } from '../../../src/config/claudeCodeConfig.js'; + +describe('offloadLargeContext', () => { + let tempDir: string; + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), 'cascade-test-context-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('keeps small context inline', async () => { + const smallInjection: ContextInjection = { + toolName: 'ReadPR', + params: { prNumber: 42 }, + result: 'Small PR description', + description: 'PR Details', + }; + + const result = await offloadLargeContext(tempDir, [smallInjection]); + + expect(result.inlineInjections).toHaveLength(1); + expect(result.inlineInjections[0]).toBe(smallInjection); + expect(result.offloadedFiles).toHaveLength(0); + expect(result.instructions).toBe(''); + }); + + it('offloads large context to files', async () => { + // Create content larger than threshold (~8000 tokens = ~32000 chars) + const largeContent = 'A'.repeat(40_000); + const largeInjection: ContextInjection = { + toolName: 'GetPRDiff', + params: { prNumber: 42 }, + result: largeContent, + description: 'PR Diff', + }; + + const result = await offloadLargeContext(tempDir, [largeInjection]); + + expect(result.inlineInjections).toHaveLength(0); + expect(result.offloadedFiles).toHaveLength(1); + expect(result.offloadedFiles[0].description).toBe('PR Diff'); + expect(result.offloadedFiles[0].tokens).toBe(10_000); + expect(result.offloadedFiles[0].relativePath).toContain('.cascade/context/'); + + // Verify file was written + const filePath = join(tempDir, result.offloadedFiles[0].relativePath); + expect(existsSync(filePath)).toBe(true); + expect(readFileSync(filePath, 'utf-8')).toBe(largeContent); + + // Verify instructions are generated + expect(result.instructions).toContain('Context Files'); + expect(result.instructions).toContain('pr-diff-0.txt'); + expect(result.instructions).toContain('Read tool'); + }); + + it('handles mixed sizes correctly', async () => { + const smallInjection: ContextInjection = { + toolName: 'ReadPR', + params: { prNumber: 42 }, + result: 'Small content', + description: 'PR Details', + }; + + const largeInjection: ContextInjection = { + toolName: 'GetPRDiff', + params: { prNumber: 42 }, + result: 'B'.repeat(50_000), + description: 'PR Diff', + }; + + const mediumInjection: ContextInjection = { + toolName: 'GetFileContents', + params: { files: ['a.ts'] }, + result: 'Medium content that is still small', + description: 'File Contents', + }; + + const result = await offloadLargeContext(tempDir, [ + smallInjection, + largeInjection, + mediumInjection, + ]); + + expect(result.inlineInjections).toHaveLength(2); + expect(result.inlineInjections).toContain(smallInjection); + expect(result.inlineInjections).toContain(mediumInjection); + expect(result.offloadedFiles).toHaveLength(1); + expect(result.offloadedFiles[0].description).toBe('PR Diff'); + }); + + it('handles empty injections', async () => { + const result = await offloadLargeContext(tempDir, []); + + expect(result.inlineInjections).toHaveLength(0); + expect(result.offloadedFiles).toHaveLength(0); + expect(result.instructions).toBe(''); + }); + + it('generates unique filenames from descriptions with index suffix', async () => { + const injection1: ContextInjection = { + toolName: 'GetPRDiff', + params: {}, + result: 'C'.repeat(40_000), + description: 'PR Diff for Feature Branch', + }; + + const injection2: ContextInjection = { + toolName: 'GetFileContents', + params: {}, + result: 'D'.repeat(40_000), + description: 'File Contents: src/index.ts', + }; + + const result = await offloadLargeContext(tempDir, [injection1, injection2]); + + expect(result.offloadedFiles).toHaveLength(2); + // Filenames include index for uniqueness + expect(result.offloadedFiles[0].relativePath).toBe( + '.cascade/context/pr-diff-for-feature-branch-0.txt', + ); + expect(result.offloadedFiles[1].relativePath).toBe( + '.cascade/context/file-contents-src-index-ts-1.txt', + ); + }); + + it('handles duplicate descriptions without collision', async () => { + const injection1: ContextInjection = { + toolName: 'GetDiff', + params: {}, + result: 'A'.repeat(40_000), + description: 'PR Diff', // Same description + }; + const injection2: ContextInjection = { + toolName: 'GetDiff', + params: {}, + result: 'B'.repeat(40_000), + description: 'PR Diff', // Same description + }; + + const result = await offloadLargeContext(tempDir, [injection1, injection2]); + + expect(result.offloadedFiles).toHaveLength(2); + // Filenames should be different due to index + expect(result.offloadedFiles[0].relativePath).toBe('.cascade/context/pr-diff-0.txt'); + expect(result.offloadedFiles[1].relativePath).toBe('.cascade/context/pr-diff-1.txt'); + + // Verify both files exist with different content + const file1 = readFileSync(join(tempDir, result.offloadedFiles[0].relativePath), 'utf-8'); + const file2 = readFileSync(join(tempDir, result.offloadedFiles[1].relativePath), 'utf-8'); + expect(file1).toBe('A'.repeat(40_000)); + expect(file2).toBe('B'.repeat(40_000)); + }); + + it('handles empty description', async () => { + const injection: ContextInjection = { + toolName: 'GetDiff', + params: {}, + result: 'A'.repeat(40_000), + description: '', + }; + + const result = await offloadLargeContext(tempDir, [injection]); + + expect(result.offloadedFiles).toHaveLength(1); + // Empty description falls back to 'context' prefix + expect(result.offloadedFiles[0].relativePath).toBe('.cascade/context/context-0.txt'); + }); + + it('handles description with only special characters', async () => { + const injection: ContextInjection = { + toolName: 'GetDiff', + params: {}, + result: 'A'.repeat(40_000), + description: '!@#$%^&*()', + }; + + const result = await offloadLargeContext(tempDir, [injection]); + + expect(result.offloadedFiles).toHaveLength(1); + // Special chars stripped, falls back to 'context' prefix + expect(result.offloadedFiles[0].relativePath).toBe('.cascade/context/context-0.txt'); + }); + + it('truncates very long descriptions', async () => { + const longDescription = 'a'.repeat(100); // 100 chars + const injection: ContextInjection = { + toolName: 'GetDiff', + params: {}, + result: 'A'.repeat(40_000), + description: longDescription, + }; + + const result = await offloadLargeContext(tempDir, [injection]); + + expect(result.offloadedFiles).toHaveLength(1); + // Description truncated to 40 chars + index + const filename = result.offloadedFiles[0].relativePath.split('/').pop() ?? ''; + expect(filename).toBe('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-0.txt'); + expect(filename.length).toBeLessThan(50); // Reasonable filename length + }); +}); + +describe('cleanupContextFiles', () => { + let tempDir: string; + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), 'cascade-test-cleanup-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('removes context directory and files', async () => { + const contextDir = join(tempDir, CONTEXT_OFFLOAD_CONFIG.contextDir); + await mkdir(contextDir, { recursive: true }); + await writeFile(join(contextDir, 'test.txt'), 'content'); + + expect(existsSync(contextDir)).toBe(true); + + await cleanupContextFiles(tempDir); + + expect(existsSync(contextDir)).toBe(false); + }); + + it('does not throw when directory does not exist', async () => { + await expect(cleanupContextFiles(tempDir)).resolves.not.toThrow(); + }); +}); + +describe('buildInlineContextSection', () => { + it('returns empty string for empty injections', () => { + expect(buildInlineContextSection([])).toBe(''); + }); + + it('formats injections correctly', () => { + const injections: ContextInjection[] = [ + { + toolName: 'ReadPR', + params: { prNumber: 42 }, + result: 'PR content here', + description: 'PR Details', + }, + ]; + + const section = buildInlineContextSection(injections); + + expect(section).toContain('## Pre-loaded Context'); + expect(section).toContain('### PR Details (ReadPR)'); + expect(section).toContain('"prNumber":42'); + expect(section).toContain('PR content here'); + }); + + it('formats multiple injections', () => { + const injections: ContextInjection[] = [ + { + toolName: 'ReadPR', + params: { prNumber: 1 }, + result: 'First content', + description: 'First Section', + }, + { + toolName: 'GetDiff', + params: { prNumber: 1 }, + result: 'Second content', + description: 'Second Section', + }, + ]; + + const section = buildInlineContextSection(injections); + + expect(section).toContain('### First Section (ReadPR)'); + expect(section).toContain('### Second Section (GetDiff)'); + expect(section).toContain('First content'); + expect(section).toContain('Second content'); + }); +}); + +describe('offloadLargeContext with disabled config', () => { + it('keeps all context inline when disabled', async () => { + // Override the mock for this specific test + const configModule = await import('../../../src/config/claudeCodeConfig.js'); + const originalEnabled = configModule.CONTEXT_OFFLOAD_CONFIG.enabled; + configModule.CONTEXT_OFFLOAD_CONFIG.enabled = false; + + const tempDir = mkdtempSync(join(tmpdir(), 'cascade-test-disabled-')); + + try { + const largeInjection: ContextInjection = { + toolName: 'GetPRDiff', + params: {}, + result: 'E'.repeat(50_000), + description: 'Large Content', + }; + + const result = await offloadLargeContext(tempDir, [largeInjection]); + + expect(result.inlineInjections).toHaveLength(1); + expect(result.offloadedFiles).toHaveLength(0); + expect(result.instructions).toBe(''); + } finally { + configModule.CONTEXT_OFFLOAD_CONFIG.enabled = originalEnabled; + await rm(tempDir, { recursive: true, force: true }); + } + }); +}); diff --git a/tests/unit/backends/claude-code.test.ts b/tests/unit/backends/claude-code.test.ts index b8a012d5..4690a3ec 100644 --- a/tests/unit/backends/claude-code.test.ts +++ b/tests/unit/backends/claude-code.test.ts @@ -205,23 +205,70 @@ describe('buildToolGuidance', () => { }); describe('buildTaskPrompt', () => { - it('returns task prompt without injections', () => { - expect(buildTaskPrompt('Do the thing.', [])).toBe('Do the thing.'); + let fakeRepoDir: string; + + beforeEach(() => { + fakeRepoDir = mkdtempSync(join(tmpdir(), 'cascade-test-repo-')); }); - it('appends context injections', () => { - const prompt = buildTaskPrompt('Do the thing.', [ - { - toolName: 'ReadWorkItem', - params: { workItemId: 'abc' }, - result: '{"title":"My card"}', - description: 'Pre-fetched work item data', - }, - ]); - expect(prompt).toContain('## Pre-loaded Context'); - expect(prompt).toContain('### Pre-fetched work item data (ReadWorkItem)'); - expect(prompt).toContain('"workItemId":"abc"'); - expect(prompt).toContain('{"title":"My card"}'); + afterEach(async () => { + await rm(fakeRepoDir, { recursive: true, force: true }); + }); + + it('returns task prompt without injections', async () => { + const result = await buildTaskPrompt('Do the thing.', [], fakeRepoDir); + expect(result.prompt).toBe('Do the thing.'); + expect(result.hasOffloadedContext).toBe(false); + }); + + it('appends context injections inline when small', async () => { + const result = await buildTaskPrompt( + 'Do the thing.', + [ + { + toolName: 'ReadWorkItem', + params: { workItemId: 'abc' }, + result: '{"title":"My card"}', + description: 'Pre-fetched work item data', + }, + ], + fakeRepoDir, + ); + expect(result.prompt).toContain('## Pre-loaded Context'); + expect(result.prompt).toContain('### Pre-fetched work item data (ReadWorkItem)'); + expect(result.prompt).toContain('"workItemId":"abc"'); + expect(result.prompt).toContain('{"title":"My card"}'); + expect(result.hasOffloadedContext).toBe(false); + }); + + it('offloads large context to files and generates instructions', async () => { + // Create content larger than 8000 token threshold (~32000 chars) + const largeContent = 'X'.repeat(40_000); + const result = await buildTaskPrompt( + 'Review the PR.', + [ + { + toolName: 'GetPRDiff', + params: { prNumber: 123 }, + result: largeContent, + description: 'PR Diff', + }, + ], + fakeRepoDir, + ); + // Should not have inline content for the large injection + expect(result.prompt).not.toContain('## Pre-loaded Context'); + expect(result.prompt).not.toContain(largeContent); + + // Should have instructions for reading offloaded files + expect(result.prompt).toContain('## Context Files'); + expect(result.prompt).toContain('.cascade/context/'); + expect(result.prompt).toContain('Read tool'); + expect(result.hasOffloadedContext).toBe(true); + + // Verify file was written + const contextDir = join(fakeRepoDir, '.cascade/context'); + expect(existsSync(contextDir)).toBe(true); }); }); diff --git a/tests/unit/backends/llmist.test.ts b/tests/unit/backends/llmist.test.ts index ea7ece72..5520e413 100644 --- a/tests/unit/backends/llmist.test.ts +++ b/tests/unit/backends/llmist.test.ts @@ -6,11 +6,17 @@ vi.mock('llmist', () => ({ createLogger: vi.fn(() => ({})), })); +// Mock capabilities module to avoid gadget imports +vi.mock('../../../src/agents/capabilities/index.js', () => ({ + createIntegrationChecker: vi.fn(async () => () => true), +})); + // Mock agents/definitions to break the circular dependency chain: // backends/llmist → definitions → strategies → gadgets → pm/ → webhook-handler // → triggers/agent-execution → agents/registry → new LlmistBackend() (still loading) vi.mock('../../../src/agents/definitions/index.js', () => ({ loadAgentDefinition: vi.fn(() => ({ backend: {} })), + resolveAgentDefinition: vi.fn(async () => ({ backend: {} })), })); vi.mock('../../../src/backends/agent-profiles.js', () => ({ @@ -281,7 +287,8 @@ describe('LlmistBackend.execute', () => { await backend.execute(makeInput('review')); expect(mockGetAgentProfile).toHaveBeenCalledWith('review'); - expect(mockGetLlmistGadgets).toHaveBeenCalledWith('review'); + // getLlmistGadgets no longer takes an argument - gadgets are pre-built in profile + expect(mockGetLlmistGadgets).toHaveBeenCalled(); }); it('sets LLMIST_LOG_FILE to the provided llmistLogPath', async () => { diff --git a/tests/unit/db/repositories/agentDefinitionsRepository.test.ts b/tests/unit/db/repositories/agentDefinitionsRepository.test.ts index db172c55..a325bc82 100644 --- a/tests/unit/db/repositories/agentDefinitionsRepository.test.ts +++ b/tests/unit/db/repositories/agentDefinitionsRepository.test.ts @@ -23,21 +23,12 @@ const mockDefinition: AgentDefinition = { initialMessage: 'Hello', }, capabilities: { - canEditFiles: true, - canCreatePR: true, - canUpdateChecklists: true, - isReadOnly: false, - canAccessEmail: false, - }, - tools: { - sets: ['pm'], - sdkTools: 'all', + required: ['fs:read', 'fs:write', 'shell:exec', 'session:ctrl', 'pm:read', 'pm:write'], + optional: [], }, strategies: { contextPipeline: ['workItem'], taskPromptBuilder: 'workItem', - gadgetBuilder: 'workItem', - gadgetBuilderOptions: undefined, }, backend: { enableStopHooks: false, @@ -47,10 +38,6 @@ const mockDefinition: AgentDefinition = { compaction: 'default', hint: 'test hint', trailingMessage: undefined, - integrations: { - required: ['pm'], - optional: [], - }, }; describe('agentDefinitionsRepository', () => { diff --git a/tests/unit/triggers/shared/integration-validation.test.ts b/tests/unit/triggers/shared/integration-validation.test.ts index 3393d681..a040cc1c 100644 --- a/tests/unit/triggers/shared/integration-validation.test.ts +++ b/tests/unit/triggers/shared/integration-validation.test.ts @@ -45,7 +45,8 @@ describe('integration-validation', () => { describe('getIntegrationRequirements', () => { it('returns integration requirements for implementation agent', async () => { const reqs = await getIntegrationRequirements('implementation'); - expect(reqs.required).toEqual(['scm', 'pm']); + // Order may vary - check using set comparison + expect(new Set(reqs.required)).toEqual(new Set(['pm', 'scm'])); expect(reqs.optional).toEqual([]); }); diff --git a/web/package-lock.json b/web/package-lock.json index a0432380..e662aea2 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -65,6 +65,7 @@ "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.29.0", "@babel/generator": "^7.29.0", @@ -3019,6 +3020,7 @@ "resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.90.21.tgz", "integrity": "sha512-0Lu6y5t+tvlTJMTO7oh5NSpJfpg/5D41LlThfepTixPYkJ0sE2Jj0m0f6yYqujBwIXlId87e234+MxG3D3g7kg==", "license": "MIT", + "peer": true, "dependencies": { "@tanstack/query-core": "5.90.20" }, @@ -3113,6 +3115,7 @@ "https://trpc.io/sponsor" ], "license": "MIT", + "peer": true, "peerDependencies": { "@trpc/server": "11.10.0", "typescript": ">=5.7.2" @@ -3205,6 +3208,7 @@ "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.2.2" } @@ -3215,6 +3219,7 @@ "integrity": "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ==", "devOptional": true, "license": "MIT", + "peer": true, "peerDependencies": { "@types/react": "^19.2.0" } @@ -3282,6 +3287,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", @@ -3922,6 +3928,7 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -4040,6 +4047,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -4049,6 +4057,7 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.4.tgz", "integrity": "sha512-AXJdLo8kgMbimY95O2aKQqsz2iWi9jMgKJhRBAxECE4IFxfcazB2LmzloIoibJI3C12IlY20+KFaLv+71bUJeQ==", "license": "MIT", + "peer": true, "dependencies": { "scheduler": "^0.27.0" }, @@ -4061,6 +4070,7 @@ "resolved": "https://registry.npmjs.org/react-hook-form/-/react-hook-form-7.71.1.tgz", "integrity": "sha512-9SUJKCGKo8HUSsCO+y0CtqkqI5nNuaDqTxyqPsZPqIwudpj4rCrAz/jZV+jn57bx5gtZKOh3neQu94DXMc+w5w==", "license": "MIT", + "peer": true, "engines": { "node": ">=18.0.0" }, @@ -4217,6 +4227,7 @@ "resolved": "https://registry.npmjs.org/seroval/-/seroval-1.5.0.tgz", "integrity": "sha512-OE4cvmJ1uSPrKorFIH9/w/Qwuvi/IMcGbv5RKgcJ/zjA/IohDLU6SVaxFN9FwajbP7nsX0dQqMDes1whk3y+yw==", "license": "MIT", + "peer": true, "engines": { "node": ">=10" } @@ -4333,6 +4344,7 @@ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -4430,6 +4442,7 @@ "integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.4.4", diff --git a/web/src/components/settings/agent-definition-editor.tsx b/web/src/components/settings/agent-definition-editor.tsx index a1f12ac1..e2ba7b3f 100644 --- a/web/src/components/settings/agent-definition-editor.tsx +++ b/web/src/components/settings/agent-definition-editor.tsx @@ -24,6 +24,7 @@ import { ReferencePanel } from './prompt-editor.js'; type RouterOutput = inferRouterOutputs; type DefinitionRow = RouterOutput['agentDefinitions']['list'][number]; type AgentDefinition = DefinitionRow['definition']; +type Capability = AgentDefinition['capabilities']['required'][number]; export interface AgentDefinitionEditorProps { /** When provided, we are editing an existing definition. When undefined, we are creating a new one. */ @@ -32,14 +33,36 @@ export interface AgentDefinitionEditorProps { } interface SchemaData { - toolSetNames: readonly string[]; - sdkToolsNames: readonly string[]; + capabilities: readonly string[]; contextStepNames: readonly string[]; taskPromptBuilderNames: readonly string[]; - gadgetBuilderNames: readonly string[]; compactionNames: readonly string[]; } +// All available capabilities organized by integration +const CAPABILITY_GROUPS: Record = { + 'built-in': { + label: 'Built-in (always available)', + caps: ['fs:read', 'fs:write', 'shell:exec', 'session:ctrl'], + }, + pm: { + label: 'PM Integration (Trello/JIRA)', + caps: ['pm:read', 'pm:write', 'pm:checklist'], + }, + scm: { + label: 'SCM Integration (GitHub)', + caps: ['scm:read', 'scm:comment', 'scm:review', 'scm:pr'], + }, + email: { + label: 'Email Integration', + caps: ['email:read', 'email:write'], + }, + sms: { + label: 'SMS Integration (Twilio)', + caps: ['sms:send'], + }, +}; + // ───────────────────────────────────────────────────────────────────────────── // Helper components (shared with form dialog) // ───────────────────────────────────────────────────────────────────────────── @@ -175,48 +198,116 @@ function IdentitySection({ function CapabilitiesSection({ def, - setCap, + setDef, }: { def: AgentDefinition; - setCap: (k: keyof AgentDefinition['capabilities'], v: boolean) => void; + setDef: React.Dispatch>; }) { + const toggleCapability = (cap: Capability, inRequired: boolean) => { + setDef((d) => { + const required = [...d.capabilities.required]; + const optional = [...d.capabilities.optional]; + + // Remove from both arrays first + const reqIdx = required.indexOf(cap); + const optIdx = optional.indexOf(cap); + if (reqIdx !== -1) required.splice(reqIdx, 1); + if (optIdx !== -1) optional.splice(optIdx, 1); + + // Add to the appropriate array + if (inRequired) { + required.push(cap); + } else { + optional.push(cap); + } + + return { ...d, capabilities: { required, optional } }; + }); + }; + + const removeCapability = (cap: Capability) => { + setDef((d) => ({ + ...d, + capabilities: { + required: d.capabilities.required.filter((c) => c !== cap), + optional: d.capabilities.optional.filter((c) => c !== cap), + }, + })); + }; + + const isRequired = (cap: Capability) => def.capabilities.required.includes(cap); + const isOptional = (cap: Capability) => def.capabilities.optional.includes(cap); + const isEnabled = (cap: Capability) => isRequired(cap) || isOptional(cap); + return ( -
+

Capabilities

-
- setCap('canEditFiles', v)} - label="Can Edit Files" - /> - setCap('canCreatePR', v)} - label="Can Create PR" - /> - setCap('canUpdateChecklists', v)} - label="Can Update Checklists" - /> - setCap('isReadOnly', v)} - label="Is Read Only" - /> - setCap('canAccessEmail', v)} - label="Can Access Email" - /> +

+ Select capabilities this agent needs. Required capabilities must be available; optional + capabilities are used when their integration is configured. +

+ + {Object.entries(CAPABILITY_GROUPS).map(([groupKey, { label, caps }]) => ( +
+
{label}
+
+ {caps.map((cap) => ( +
+ { + if (e.target.checked) { + toggleCapability(cap, true); + } else { + removeCapability(cap); + } + }} + className="h-4 w-4 rounded border-input" + /> + + {isEnabled(cap) && ( + + )} +
+ ))} +
+
+ ))} + +
+
Derived Configuration
+
+ Required integrations:{' '} + {[ + ...new Set( + def.capabilities.required + .filter( + (c) => + !c.startsWith('fs:') && !c.startsWith('shell:') && !c.startsWith('session:'), + ) + .map((c) => c.split(':')[0]), + ), + ].join(', ') || 'none'} +
); } -function ToolsSection({ +function StrategiesSection({ def, setDef, schema, @@ -227,15 +318,20 @@ function ToolsSection({ }) { return (
-

Tools

+

+ Strategies +

- + {schema ? ( - setDef((d) => ({ ...d, tools: { ...d.tools, sets } }) as AgentDefinition) + available={schema.contextStepNames} + selected={def.strategies.contextPipeline} + onChange={(contextPipeline) => + setDef( + (d) => + ({ ...d, strategies: { ...d.strategies, contextPipeline } }) as AgentDefinition, + ) } /> ) : ( @@ -243,18 +339,24 @@ function ToolsSection({ )}
- +
-
- ); -} - -function StrategiesSection({ - def, - setDef, - schema, -}: { - def: AgentDefinition; - setDef: React.Dispatch>; - schema: SchemaData | undefined; -}) { - return ( -
-

- Strategies -

-
- - {schema ? ( - - setDef( - (d) => - ({ ...d, strategies: { ...d.strategies, contextPipeline } }) as AgentDefinition, - ) - } - /> - ) : ( -
Loading...
- )} -
-
-
- - -
-
- - + label="Include Review Comments" + />
-
+ )}
); } @@ -465,47 +502,6 @@ function TrailingMessageSection({ ); } -function IntegrationsSection({ - def, - setDef, -}: { - def: AgentDefinition; - setDef: React.Dispatch>; -}) { - const integrationOptions = ['pm', 'scm', 'email', 'sms'] as const; - return ( -
-

- Integrations -

-
- - - setDef( - (d) => ({ ...d, integrations: { ...d.integrations, required } }) as AgentDefinition, - ) - } - /> -
-
- - - setDef( - (d) => ({ ...d, integrations: { ...d.integrations, optional } }) as AgentDefinition, - ) - } - /> -
-
- ); -} - // ───────────────────────────────────────────────────────────────────────────── // System Prompt panel (edit mode only) // ───────────────────────────────────────────────────────────────────────────── @@ -671,23 +667,17 @@ function SystemPromptPanel({ agentType }: { agentType: string }) { const EMPTY_DEFINITION: AgentDefinition = { identity: { emoji: '🤖', label: '', roleHint: '', initialMessage: '' }, capabilities: { - canEditFiles: false, - canCreatePR: false, - canUpdateChecklists: false, - isReadOnly: true, - canAccessEmail: false, + required: ['fs:read', 'session:ctrl'], + optional: [], }, - tools: { sets: [], sdkTools: 'readOnly' }, strategies: { contextPipeline: [], taskPromptBuilder: 'workItem', - gadgetBuilder: 'workItem', }, backend: { enableStopHooks: false, needsGitHubToken: false }, compaction: 'default', hint: '', trailingMessage: undefined, - integrations: { required: ['pm'], optional: [] }, }; // ───────────────────────────────────────────────────────────────────────────── @@ -771,8 +761,6 @@ function useDefinitionEditor(existing: DefinitionRow | undefined, onClose: () => const setIdentity = (k: keyof AgentDefinition['identity'], v: string) => setDef((d) => ({ ...d, identity: { ...d.identity, [k]: v } })); - const setCap = (k: keyof AgentDefinition['capabilities'], v: boolean) => - setDef((d) => ({ ...d, capabilities: { ...d.capabilities, [k]: v } })); const setBackend = (k: keyof AgentDefinition['backend'], v: unknown) => setDef((d) => ({ ...d, backend: { ...d.backend, [k]: v } })); const setTrailing = (k: string, v: boolean) => @@ -801,7 +789,6 @@ function useDefinitionEditor(existing: DefinitionRow | undefined, onClose: () => handleTabChange, handleSave, setIdentity, - setCap, setBackend, setTrailing, }; @@ -831,7 +818,6 @@ export function AgentDefinitionEditor({ existing, onClose }: AgentDefinitionEdit handleTabChange, handleSave, setIdentity, - setCap, setBackend, setTrailing, } = useDefinitionEditor(existing, onClose); @@ -907,8 +893,7 @@ export function AgentDefinitionEditor({ existing, onClose }: AgentDefinitionEdit - - + @@ -953,7 +938,6 @@ export function AgentDefinitionEditor({ existing, onClose }: AgentDefinitionEdit
- {isEdit && ( diff --git a/web/src/components/settings/agent-definition-table.tsx b/web/src/components/settings/agent-definition-table.tsx index d0413663..bcc8dcb3 100644 --- a/web/src/components/settings/agent-definition-table.tsx +++ b/web/src/components/settings/agent-definition-table.tsx @@ -18,6 +18,19 @@ type DefinitionRow = RouterOutput['agentDefinitions']['list'][number]; export type { DefinitionRow }; +// Helper to derive key capability indicators from the capability arrays +function getCapabilityIndicators(capabilities: DefinitionRow['definition']['capabilities']) { + const all = [...capabilities.required, ...capabilities.optional]; + return { + canEditFiles: all.includes('fs:write'), + canCreatePR: all.includes('scm:pr'), + hasChecklists: all.includes('pm:checklist'), + isReadOnly: !all.includes('fs:write'), + hasEmail: all.includes('email:read') || all.includes('email:write'), + hasSms: all.includes('sms:send'), + }; +} + export function AgentDefinitionsTable({ definitions, onEdit, @@ -64,91 +77,99 @@ export function AgentDefinitionsTable({ )} - {definitions.map((row) => ( - - {row.definition.identity.emoji} - {row.agentType} - {row.definition.identity.label} - -
- {row.definition.capabilities.canEditFiles && ( - - edit files - - )} - {row.definition.capabilities.canCreatePR && ( - - create PR - - )} - {row.definition.capabilities.canUpdateChecklists && ( - - checklists - + {definitions.map((row) => { + const caps = getCapabilityIndicators(row.definition.capabilities); + return ( + + {row.definition.identity.emoji} + {row.agentType} + {row.definition.identity.label} + +
+ {caps.canEditFiles && ( + + edit files + + )} + {caps.canCreatePR && ( + + create PR + + )} + {caps.hasChecklists && ( + + checklists + + )} + {caps.isReadOnly && ( + + read-only + + )} + {caps.hasEmail && ( + + email + + )} + {caps.hasSms && ( + + sms + + )} +
+
+ + {row.isBuiltin ? ( + Built-in + ) : ( + Custom )} - {row.definition.capabilities.isReadOnly && ( - - read-only - - )} - {row.definition.capabilities.canAccessEmail && ( - - email - - )} -
-
- - {row.isBuiltin ? ( - Built-in - ) : ( - Custom - )} - - -
- - {row.isBuiltin && ( + + +
- )} - {!row.isBuiltin && ( - - )} -
-
- - ))} + {row.isBuiltin && ( + + )} + {!row.isBuiltin && ( + + )} +
+
+
+ ); + })}