From d37412558fed12078949732f2bfd4df221a0823b Mon Sep 17 00:00:00 2001 From: imossaidqadri Date: Thu, 12 Mar 2026 21:58:03 +0500 Subject: [PATCH] feat: add clean context and token budget for subagents - Add useCleanContext flag to skip session history inheritance - Add maxContextTokens for context token budget enforcement - Add useStructuredOutput for structured summary format - Implement truncateContentToTokenBudget utility - Update Task tool to support runtime config overrides - Add comprehensive tests for new features - Document advanced subagent configuration options Fixes #2332 --- docs/users/features/sub-agents.md | 130 ++++++++++ .../core/src/subagents/subagent-manager.ts | 17 +- packages/core/src/subagents/subagent.test.ts | 29 ++- packages/core/src/subagents/subagent.ts | 31 ++- packages/core/src/subagents/types.ts | 39 ++- packages/core/src/tools/task.test.ts | 30 +++ packages/core/src/tools/task.ts | 35 +++ .../core/src/utils/environmentContext.test.ts | 245 +++++++++++++++++- packages/core/src/utils/environmentContext.ts | 239 ++++++++++++++++- 9 files changed, 761 insertions(+), 34 deletions(-) diff --git a/docs/users/features/sub-agents.md b/docs/users/features/sub-agents.md index 85ca4aff96..650bc701b5 100644 --- a/docs/users/features/sub-agents.md +++ b/docs/users/features/sub-agents.md @@ -150,6 +150,136 @@ Have the documentation-writer Subagents update the API reference Get the react-specialist Subagents to optimize this component's performance ``` +## Advanced Configuration + +### Runtime Configuration Options + +Subagents support advanced runtime configuration options that control context behavior and output formatting. These can be specified in the agent's configuration file or passed dynamically when delegating tasks. + +#### `useCleanContext` + +When enabled, the subagent starts with a fresh context window instead of inheriting the full main session history. This prevents context bloat during long sessions. + +```yaml +--- +name: focused-researcher +description: Researches topics without carrying main session context +runConfig: + useCleanContext: true +--- +``` + +**Benefits:** + +- Reduces token usage for focused tasks +- Prevents context pollution from unrelated conversations +- Improves performance for long-running sessions + +#### `maxContextTokens` + +Sets a maximum token budget for the subagent's context. When exceeded, older messages are truncated to fit within the budget. + +```yaml +--- +name: budget-conscious-agent +description: Works within strict token limits +runConfig: + maxContextTokens: 4000 +--- +``` + +**Benefits:** + +- Controls costs for expensive operations +- Ensures predictable token usage +- Automatically truncates old context when needed + +#### `useStructuredOutput` + +Instructs the subagent to format its output using a structured summary schema (findings, files changed, conclusion). This ensures only distilled summaries are injected back into the main context. + +```yaml +--- +name: concise-reporter +description: Provides structured, concise reports +runConfig: + useStructuredOutput: true +--- +``` + +**Output Format:** +When enabled, the subagent formats results as: + +```markdown +## Findings + +- Key discovery 1 +- Key discovery 2 + +## Files Changed + +- path/to/modified/file.ts +- path/to/new/file.test.ts + +## Conclusion + +Concise summary of what was accomplished. + +## Recommendations (Optional) + +- Suggested next steps +``` + +### Dynamic Configuration + +You can also pass runtime configuration overrides when delegating tasks: + +```typescript +// Example: Pass runConfig when using Task tool programmatically +{ + description: "Research with clean context", + prompt: "Find all usages of deprecated APIs", + subagent_type: "code-analyst", + runConfig: { + useCleanContext: true, + maxContextTokens: 2000, + useStructuredOutput: true + } +} +``` + +### Complete Example + +```yaml +--- +name: efficient-tester +description: Writes tests efficiently with controlled context and structured output +tools: + - read_file + - write_file + - read_many_files + - run_shell_command +modelConfig: + model: qwen3-coder-plus + temp: 0.7 +runConfig: + useCleanContext: true + maxContextTokens: 8000 + useStructuredOutput: true + max_turns: 10 + max_time_minutes: 5 +--- + +You are a testing specialist focused on efficient, targeted test creation. + +Your approach: +1. Quickly identify the code under test +2. Create focused tests for key functionality +3. Report results in a structured format + +Work efficiently within the token budget. +``` + ## Examples ### Development Workflow Agents diff --git a/packages/core/src/subagents/subagent-manager.ts b/packages/core/src/subagents/subagent-manager.ts index 0552fa60c4..7ae3290d40 100644 --- a/packages/core/src/subagents/subagent-manager.ts +++ b/packages/core/src/subagents/subagent-manager.ts @@ -583,6 +583,8 @@ export class SubagentManager { * * @param config - Subagent configuration * @param runtimeContext - Runtime context + * @param options - Optional event emitter and hooks + * @param runConfigOverrides - Optional runtime configuration overrides * @returns Promise resolving to SubAgentScope */ async createSubagentScope( @@ -592,9 +594,13 @@ export class SubagentManager { eventEmitter?: import('./subagent-events.js').SubAgentEventEmitter; hooks?: import('./subagent-hooks.js').SubagentHooks; }, + runConfigOverrides?: Partial, ): Promise { try { - const runtimeConfig = this.convertToRuntimeConfig(config); + const runtimeConfig = this.convertToRuntimeConfig( + config, + runConfigOverrides, + ); return await SubAgentScope.create( config.name, @@ -623,9 +629,13 @@ export class SubagentManager { * compatible with SubAgentScope.create(). * * @param config - File-based subagent configuration + * @param runConfigOverrides - Optional runtime configuration overrides * @returns Runtime configuration for SubAgentScope */ - convertToRuntimeConfig(config: SubagentConfig): SubagentRuntimeConfig { + convertToRuntimeConfig( + config: SubagentConfig, + runConfigOverrides?: Partial, + ): SubagentRuntimeConfig { // Build prompt configuration const promptConfig: PromptConfig = { systemPrompt: config.systemPrompt, @@ -636,9 +646,10 @@ export class SubagentManager { ...config.modelConfig, }; - // Build run configuration + // Build run configuration with overrides const runConfig: RunConfig = { ...config.runConfig, + ...runConfigOverrides, }; // Build tool configuration if tools are specified diff --git a/packages/core/src/subagents/subagent.test.ts b/packages/core/src/subagents/subagent.test.ts index 0286d11c85..9609c7947a 100644 --- a/packages/core/src/subagents/subagent.test.ts +++ b/packages/core/src/subagents/subagent.test.ts @@ -79,17 +79,24 @@ vi.mock('../core/contentGenerator.js', async (importOriginal) => { }); vi.mock('../utils/environmentContext.js', () => ({ getEnvironmentContext: vi.fn().mockResolvedValue([{ text: 'Env Context' }]), - getInitialChatHistory: vi.fn(async (_config, extraHistory) => [ - { - role: 'user', - parts: [{ text: 'Env Context' }], - }, - { - role: 'model', - parts: [{ text: 'Got it. Thanks for the context!' }], - }, - ...(extraHistory ?? []), - ]), + getInitialChatHistory: vi.fn( + async ( + _config, + _useCleanContext = false, + _maxContextTokens, + extraHistory, + ) => [ + { + role: 'user', + parts: [{ text: 'Env Context' }], + }, + { + role: 'model', + parts: [{ text: 'Got it. Thanks for the context!' }], + }, + ...(extraHistory ?? []), + ], + ), })); vi.mock('../core/nonInteractiveToolExecutor.js'); vi.mock('../ide/ide-client.js'); diff --git a/packages/core/src/subagents/subagent.ts b/packages/core/src/subagents/subagent.ts index 613bc80441..ee698db360 100644 --- a/packages/core/src/subagents/subagent.ts +++ b/packages/core/src/subagents/subagent.ts @@ -870,7 +870,12 @@ export class SubAgentScope { ); } - const envHistory = await getInitialChatHistory(this.runtimeContext); + const useCleanContext = this.runConfig.useCleanContext ?? false; + const maxContextTokens = this.runConfig.maxContextTokens; + const envHistory = await getInitialChatHistory(this.runtimeContext, { + useCleanContext, + maxContextTokens, + }); const start_history = [ ...envHistory, @@ -999,6 +1004,30 @@ Important Rules: - Use tools only when necessary to obtain facts or make changes. - When the task is complete, return the final result as a normal model response (not a tool call) and stop.`; + // Add structured output instructions if enabled + if (this.runConfig.useStructuredOutput) { + finalPrompt += ` + +Output Format: +When you complete the task, you MUST format your response using the following structured schema: + +## Findings +- List key discoveries, insights, or results here +- One finding per bullet point + +## Files Changed +- List any files you created or modified (full paths) +- If no files were changed, write "None" + +## Conclusion +Provide a concise summary of what was accomplished and the final result. + +## Recommendations (Optional) +- Suggest next steps or follow-up actions if applicable + +This structured format ensures your results can be efficiently summarized and injected back into the main conversation context.`; + } + // Append user memory (QWEN.md + output-language.md) to ensure subagent respects project conventions const userMemory = this.runtimeContext.getUserMemory(); if (userMemory && userMemory.trim().length > 0) { diff --git a/packages/core/src/subagents/types.ts b/packages/core/src/subagents/types.ts index efa73a7e4d..c1c5a153e1 100644 --- a/packages/core/src/subagents/types.ts +++ b/packages/core/src/subagents/types.ts @@ -240,8 +240,6 @@ export interface ToolConfig { export interface ModelConfig { /** * The name or identifier of the model to be used (e.g., 'qwen3-coder-plus'). - * - * TODO: In the future, this needs to support 'auto' or some other string to support routing use cases. */ model?: string; /** @@ -258,8 +256,6 @@ export interface ModelConfig { * Configures the execution environment and constraints for the subagent. * This interface defines parameters that control the subagent's runtime behavior, * such as maximum execution time, to prevent infinite loops or excessive resource consumption. - * - * TODO: Consider adding max_tokens as a form of budgeting. */ export interface RunConfig { /** The maximum execution time for the subagent in minutes. */ @@ -269,4 +265,39 @@ export interface RunConfig { * before the execution is terminated. Helps prevent infinite loops. */ max_turns?: number; + /** + * When true, the subagent starts with a clean context window, not inheriting + * the main session's conversation history via getInitialChatHistory(). + * Only environment context (working directory, date, OS) is provided. + * This prevents context bloat during long sessions. + */ + useCleanContext?: boolean; + /** + * Maximum number of tokens allowed for context injection. + * When exceeded, context is truncated to fit within this budget. + * If not specified, no token budget is enforced. + */ + maxContextTokens?: number; + /** + * When true, instructs the subagent to format its output using a structured + * summary schema (findings, files changed, conclusion). This ensures only + * distilled summaries are injected back into the main context. + */ + useStructuredOutput?: boolean; +} + +/** + * Structured summary of subagent output. + * When useStructuredOutput is enabled, the subagent should format its + * final output using this schema. + */ +export interface SubagentStructuredSummary { + /** Key findings discovered during subagent execution */ + findings: string[]; + /** List of files that were created or modified */ + filesChanged?: string[]; + /** Final conclusion or result of the subagent's work */ + conclusion: string; + /** Optional: recommendations for next steps */ + recommendations?: string[]; } diff --git a/packages/core/src/tools/task.test.ts b/packages/core/src/tools/task.test.ts index 458b026b69..ba79f48948 100644 --- a/packages/core/src/tools/task.test.ts +++ b/packages/core/src/tools/task.test.ts @@ -380,6 +380,7 @@ describe('TaskTool', () => { mockSubagents[0], config, expect.any(Object), // eventEmitter parameter + undefined, // runConfigOverrides (undefined when not provided in params) ); expect(mockSubagentScope.runNonInteractive).toHaveBeenCalledWith( mockContextState, @@ -534,5 +535,34 @@ describe('TaskTool', () => { expect(description).toBe('file-search subagent: "Search files"'); }); + + it('should pass runConfig overrides to createSubagentScope', async () => { + const params: TaskParams = { + description: 'Search files with clean context', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + runConfig: { + useCleanContext: true, + maxContextTokens: 1000, + useStructuredOutput: true, + }, + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + await invocation.execute(); + + expect(mockSubagentManager.createSubagentScope).toHaveBeenCalledWith( + mockSubagents[0], + config, + expect.any(Object), + expect.objectContaining({ + useCleanContext: true, + maxContextTokens: 1000, + useStructuredOutput: true, + }), + ); + }); }); }); diff --git a/packages/core/src/tools/task.ts b/packages/core/src/tools/task.ts index e811dde0df..ca63241761 100644 --- a/packages/core/src/tools/task.ts +++ b/packages/core/src/tools/task.ts @@ -21,6 +21,7 @@ import type { SubagentManager } from '../subagents/subagent-manager.js'; import { type SubagentConfig, SubagentTerminateMode, + type RunConfig, } from '../subagents/types.js'; import { ContextState } from '../subagents/subagent.js'; import { @@ -40,6 +41,11 @@ export interface TaskParams { description: string; prompt: string; subagent_type: string; + /** + * Optional runtime configuration overrides for the subagent. + * Allows customizing context behavior like useCleanContext, maxContextTokens, etc. + */ + runConfig?: Partial; } const debugLogger = createDebugLogger('TASK'); @@ -72,6 +78,34 @@ export class TaskTool extends BaseDeclarativeTool { type: 'string', description: 'The type of specialized agent to use for this task', }, + runConfig: { + type: 'object', + description: + 'Optional runtime configuration overrides for the subagent', + properties: { + useCleanContext: { + type: 'boolean', + description: 'Start with clean context, omitting session history', + }, + maxContextTokens: { + type: 'number', + description: 'Maximum token budget for context', + }, + useStructuredOutput: { + type: 'boolean', + description: 'Format output using structured summary schema', + }, + max_time_minutes: { + type: 'number', + description: 'Maximum execution time in minutes', + }, + max_turns: { + type: 'number', + description: 'Maximum number of conversational turns', + }, + }, + additionalProperties: false, + }, }, required: ['description', 'prompt', 'subagent_type'], additionalProperties: false, @@ -510,6 +544,7 @@ class TaskToolInvocation extends BaseToolInvocation { subagentConfig, this.config, { eventEmitter: this.eventEmitter }, + this.params.runConfig, // Pass runtime config overrides ); // Create context state with the task prompt diff --git a/packages/core/src/utils/environmentContext.test.ts b/packages/core/src/utils/environmentContext.test.ts index 0b24a9b018..5184a21d2b 100644 --- a/packages/core/src/utils/environmentContext.test.ts +++ b/packages/core/src/utils/environmentContext.test.ts @@ -18,6 +18,7 @@ import { getEnvironmentContext, getDirectoryContextString, getInitialChatHistory, + truncateContentToTokenBudget, } from './environmentContext.js'; import type { Config } from '../config/config.js'; import { getFolderStructure } from './getFolderStructure.js'; @@ -149,15 +150,20 @@ describe('getEnvironmentContext', () => { describe('getInitialChatHistory', () => { let mockConfig: Partial; + let mockGeminiClient: { getHistory: () => Content[] }; beforeEach(() => { vi.mocked(getFolderStructure).mockResolvedValue('Mock Folder Structure'); + mockGeminiClient = { + getHistory: vi.fn().mockReturnValue([]), + }; mockConfig = { getSkipStartupContext: vi.fn().mockReturnValue(false), getWorkspaceContext: vi.fn().mockReturnValue({ getDirectories: vi.fn().mockReturnValue(['/test/dir']), }), getFileService: vi.fn(), + getGeminiClient: vi.fn().mockReturnValue(mockGeminiClient), }; }); @@ -200,10 +206,9 @@ describe('getInitialChatHistory', () => { { role: 'user', parts: [{ text: 'custom context' }] }, ]; - const history = await getInitialChatHistory( - mockConfig as Config, + const history = await getInitialChatHistory(mockConfig as Config, { extraHistory, - ); + }); expect(mockConfig.getSkipStartupContext).toHaveBeenCalled(); expect(history).toEqual(extraHistory); @@ -222,4 +227,238 @@ describe('getInitialChatHistory', () => { expect(history).toEqual([]); }); + + it('returns clean context without session history when useCleanContext is true', async () => { + const mockGeminiClient = { + getHistory: vi.fn().mockReturnValue([ + { role: 'user', parts: [{ text: 'Previous conversation' }] }, + { role: 'model', parts: [{ text: 'Previous response' }] }, + ]), + }; + mockConfig.getGeminiClient = vi.fn().mockReturnValue(mockGeminiClient); + + const history = await getInitialChatHistory(mockConfig as Config, { + useCleanContext: true, + }); + + // Should only have environment context, not the session history + expect(history).toHaveLength(2); + expect(history[0]?.role).toBe('user'); + expect(history[0]?.parts?.[0]?.text).toContain( + "I'm currently working in the directory", + ); + expect(history[1]?.role).toBe('model'); + expect(history[1]?.parts?.[0]?.text).toBe( + 'Got it. Thanks for the context!', + ); + + // Session history should NOT be included + expect(history).not.toEqual( + expect.arrayContaining([ + expect.objectContaining({ + parts: [expect.objectContaining({ text: 'Previous conversation' })], + }), + ]), + ); + }); + + it('includes session history when useCleanContext is false (default)', async () => { + const mockGeminiClient = { + getHistory: vi.fn().mockReturnValue([ + { role: 'user', parts: [{ text: 'Previous conversation' }] }, + { role: 'model', parts: [{ text: 'Previous response' }] }, + ]), + }; + mockConfig.getGeminiClient = vi.fn().mockReturnValue(mockGeminiClient); + + const history = await getInitialChatHistory(mockConfig as Config, { + useCleanContext: false, + }); + + // Should have environment context + session history + expect(history.length).toBeGreaterThan(2); + expect(history).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + parts: [expect.objectContaining({ text: 'Previous conversation' })], + }), + ]), + ); + }); + + it('truncates content to fit within maxContextTokens budget', async () => { + const mockGeminiClient = { + getHistory: vi.fn().mockReturnValue([ + { role: 'user', parts: [{ text: 'A'.repeat(400) }] }, // ~100 tokens + { role: 'model', parts: [{ text: 'B'.repeat(400) }] }, // ~100 tokens + { role: 'user', parts: [{ text: 'C'.repeat(400) }] }, // ~100 tokens + ]), + }; + mockConfig.getGeminiClient = vi.fn().mockReturnValue(mockGeminiClient); + + // Set a very low token budget that should truncate + const history = await getInitialChatHistory(mockConfig as Config, { + useCleanContext: false, + maxContextTokens: 150, + }); + + // Should have truncated the content + expect(history.length).toBeLessThanOrEqual(3); + // First item (environment context) should always be preserved + expect(history[0]?.role).toBe('user'); + expect(history[0]?.parts?.[0]?.text).toContain( + "I'm currently working in the directory", + ); + }); + + it('does not truncate when content is already under token budget', async () => { + const mockGeminiClient = { + getHistory: vi + .fn() + .mockReturnValue([ + { role: 'user', parts: [{ text: 'Short message' }] }, + ]), + }; + mockConfig.getGeminiClient = vi.fn().mockReturnValue(mockGeminiClient); + + const history = await getInitialChatHistory(mockConfig as Config, { + useCleanContext: false, + maxContextTokens: 1000, + }); + + // Should not truncate - all content should be present + expect(history.length).toBeGreaterThan(2); + expect(history).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + parts: [expect.objectContaining({ text: 'Short message' })], + }), + ]), + ); + }); + + it('truncates to minimum when maxContextTokens is very low', async () => { + mockGeminiClient.getHistory = vi + .fn() + .mockReturnValue([ + { role: 'user', parts: [{ text: 'Previous'.repeat(100) }] }, + ]); + const history = await getInitialChatHistory(mockConfig as Config, { + useCleanContext: false, + maxContextTokens: 50, + }); + + // Very low token budget should truncate but preserve at least environment context + expect(history.length).toBeGreaterThan(0); + expect(history[0]?.role).toBe('user'); + // The text should be truncated but still start with the context preamble + expect(history[0]?.parts?.[0]?.text).toContain('This is the Qwen Code'); + expect(history[0]?.parts?.[0]?.text).toContain( + '[truncated due to token budget]', + ); + }); +}); + +describe('truncateContentToTokenBudget', () => { + it('returns content as-is when under token budget', () => { + const contents: Content[] = [ + { role: 'user', parts: [{ text: 'Short text' }] }, + { role: 'model', parts: [{ text: 'Response' }] }, + ]; + + const result = truncateContentToTokenBudget(contents, 1000); + + // Should return a clone with same structure (not mutate original) + expect(result).not.toBe(contents); + expect(result).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + role: 'user', + parts: expect.arrayContaining([ + expect.objectContaining({ text: 'Short text' }), + ]), + }), + expect.objectContaining({ + role: 'model', + parts: expect.arrayContaining([ + expect.objectContaining({ text: 'Response' }), + ]), + }), + ]), + ); + }); + + it('truncates oldest messages first when over budget', () => { + const contents: Content[] = [ + { role: 'user', parts: [{ text: 'Environment context' }] }, // Should be preserved + { role: 'user', parts: [{ text: 'A'.repeat(400) }] }, // ~100 tokens - should be removed + { role: 'model', parts: [{ text: 'B'.repeat(400) }] }, // ~100 tokens - should be removed + { role: 'user', parts: [{ text: 'Keep this' }] }, // Should be kept + ]; + + const result = truncateContentToTokenBudget(contents, 150); + + // First item and last item should be kept + expect(result.length).toBeLessThan(contents.length); + expect(result[0]?.parts?.[0]?.text).toContain('Environment context'); + }); + + it('preserves first item (environment context) even when over budget', () => { + const contents: Content[] = [ + { role: 'user', parts: [{ text: 'Important environment info' }] }, + { role: 'user', parts: [{ text: 'A'.repeat(1000) }] }, + ]; + + const result = truncateContentToTokenBudget(contents, 50); + + expect(result.length).toBeGreaterThan(0); + expect(result[0]?.parts?.[0]?.text).toContain('Important environment info'); + }); + + it('returns empty array when maxTokens is 0 or negative', () => { + const contents: Content[] = [ + { role: 'user', parts: [{ text: 'Some content' }] }, + ]; + + expect(truncateContentToTokenBudget(contents, 0)).toEqual([]); + expect(truncateContentToTokenBudget(contents, -10)).toEqual([]); + }); + + it('truncates text with ellipsis when still over budget after removing items', () => { + const longText = 'A'.repeat(1000); + const contents: Content[] = [ + { role: 'user', parts: [{ text: 'Env' }] }, + { role: 'user', parts: [{ text: longText }] }, + ]; + + const result = truncateContentToTokenBudget(contents, 50); + + expect(result.length).toBe(2); + expect(result[1]?.parts?.[0]?.text).toContain( + '... [truncated due to token budget]', + ); + }); + + it('handles empty content array', () => { + const result = truncateContentToTokenBudget([], 100); + expect(result).toEqual([]); + }); + + it('handles content without parts', () => { + const contents: Content[] = [ + { role: 'user', parts: undefined }, + { role: 'model', parts: [] }, + ]; + + const result = truncateContentToTokenBudget(contents, 100); + + // Should return a clone (not mutate original) + expect(result).not.toBe(contents); + expect(result).toEqual( + expect.arrayContaining([ + expect.objectContaining({ role: 'user', parts: undefined }), + expect.objectContaining({ role: 'model', parts: [] }), + ]), + ); + }); }); diff --git a/packages/core/src/utils/environmentContext.ts b/packages/core/src/utils/environmentContext.ts index 4f5c03209d..4693e03f35 100644 --- a/packages/core/src/utils/environmentContext.ts +++ b/packages/core/src/utils/environmentContext.ts @@ -8,6 +8,121 @@ import type { Content, Part } from '@google/genai'; import type { Config } from '../config/config.js'; import { getFolderStructure } from './getFolderStructure.js'; +/** + * Rough estimate: 1 token ≈ 4 characters for English text. + * This is a conservative estimate for quick truncation before API calls. + */ +const CHARS_PER_TOKEN_ESTIMATE = 4; + +/** + * Estimates the number of tokens in a string using character count. + * This is a fast approximation - use the API's countTokens for accuracy. + * @param text - The text to estimate tokens for. + * @returns Estimated token count. + */ +function estimateTokens(text: string): number { + return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE); +} + +/** + * Truncates content to fit within a token budget. + * Uses a greedy approach: removes oldest messages first until under budget. + * Clones content objects to avoid mutating the input. + * @param contents - Array of content items to truncate. + * @param maxTokens - Maximum token budget. + * @returns Truncated array of content items. + */ +export function truncateContentToTokenBudget( + contents: Content[], + maxTokens: number, +): Content[] { + if (maxTokens <= 0) { + return []; + } + + if (contents.length === 0) { + return []; + } + + // Calculate total estimated tokens + const totalTokens = contents.reduce((sum, content) => { + const text = content.parts?.map((p) => p.text || '').join('') || ''; + return sum + estimateTokens(text); + }, 0); + + // If already under budget, return a shallow clone to avoid mutation + if (totalTokens <= maxTokens) { + return contents.map((item) => ({ ...item })); + } + + // Deep clone the first item to avoid mutating the original + const firstItem = contents[0] + ? { + ...contents[0], + parts: contents[0].parts?.map((p) => ({ ...p })), + } + : undefined; + const restItems = contents.slice(1); + + // Calculate first item tokens + const firstItemText = + firstItem?.parts?.map((p) => p.text || '').join('') || ''; + const firstItemTokens = estimateTokens(firstItemText); + + // If first item alone exceeds budget, truncate it + if (firstItemTokens >= maxTokens && firstItem) { + const availableChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE; + if (availableChars > 0 && availableChars < firstItemText.length) { + firstItem.parts = [ + { + text: + firstItemText.slice(0, availableChars) + + '... [truncated due to token budget]', + }, + ]; + } + return [firstItem]; + } + + // Remove items from the beginning of restItems (oldest messages first) + // But keep at least one item if possible for truncation + let currentTokens = totalTokens; + const truncatedItems = [...restItems]; + + while (currentTokens > maxTokens && truncatedItems.length > 1) { + const removedItem = truncatedItems.shift()!; + const removedText = + removedItem.parts?.map((p) => p.text || '').join('') || ''; + currentTokens -= estimateTokens(removedText); + } + + // If still over budget with one item remaining, truncate that item + if (currentTokens > maxTokens && truncatedItems.length === 1) { + // Clone the last item before mutating + const lastItem = { + ...truncatedItems[0], + parts: truncatedItems[0].parts?.map((p) => ({ ...p })), + }; + const lastText = lastItem.parts?.map((p) => p.text || '').join('') || ''; + + const availableTokensForLast = maxTokens - firstItemTokens; + const availableCharsForLast = + availableTokensForLast * CHARS_PER_TOKEN_ESTIMATE; + + if (availableCharsForLast > 0 && availableCharsForLast < lastText.length) { + const truncatedText = + lastText.slice(0, availableCharsForLast) + + '... [truncated due to token budget]'; + lastItem.parts = [{ text: truncatedText }]; + } + + // Replace the original with the cloned/truncated version + truncatedItems[0] = lastItem; + } + + return firstItem ? [firstItem, ...truncatedItems] : truncatedItems; +} + /** * Generates a string describing the current workspace directories and their structures. * @param {Config} config - The runtime configuration and services. @@ -69,10 +184,45 @@ ${directoryContext} return [{ text: context }]; } +/** + * Options for configuring initial chat history. + */ +export interface GetInitialChatHistoryOptions { + /** When true, omits accumulated session history. */ + useCleanContext?: boolean; + /** Optional maximum token budget for context. */ + maxContextTokens?: number; + /** Optional additional history to append. */ + extraHistory?: Content[]; +} + +/** + * Retrieves the initial chat history to seed a chat session. + * By default, includes environment context plus any accumulated session history. + * When useCleanContext is true, only provides fresh environment context without + * prior session history - useful for subagents to avoid context bloat. + * @param {Config} config - The runtime configuration and services. + * @param options - Options for configuring the initial history. + * @returns A promise that resolves to an array of `Content` objects for chat history. + */ export async function getInitialChatHistory( config: Config, - extraHistory?: Content[], + options?: GetInitialChatHistoryOptions | Content[], ): Promise { + // Backward compatibility: if options is an array, treat it as extraHistory + let useCleanContext = false; + let maxContextTokens: number | undefined; + let extraHistory: Content[] | undefined; + + if (Array.isArray(options)) { + // Legacy call pattern: getInitialChatHistory(config, extraHistory) + extraHistory = options; + } else if (options) { + // New call pattern with options object + useCleanContext = options.useCleanContext ?? false; + maxContextTokens = options.maxContextTokens; + extraHistory = options.extraHistory; + } if (config.getSkipStartupContext()) { return extraHistory ? [...extraHistory] : []; } @@ -80,15 +230,80 @@ export async function getInitialChatHistory( const envParts = await getEnvironmentContext(config); const envContextString = envParts.map((part) => part.text || '').join('\n\n'); - return [ - { - role: 'user', - parts: [{ text: envContextString }], - }, - { - role: 'model', - parts: [{ text: 'Got it. Thanks for the context!' }], - }, - ...(extraHistory ?? []), - ]; + let history: Content[]; + + // When using clean context, skip any accumulated session history + if (useCleanContext) { + history = [ + { + role: 'user', + parts: [{ text: envContextString }], + }, + { + role: 'model', + parts: [{ text: 'Got it. Thanks for the context!' }], + }, + ...(extraHistory ?? []), + ]; + } else { + // Default behavior: include accumulated session history + let sessionHistory: Content[] = []; + try { + sessionHistory = config.getGeminiClient()?.getHistory() ?? []; + } catch { + // Client not initialized yet - use empty history + sessionHistory = []; + } + + // Strip the initial environment context + ack from session history + // to avoid duplication when we prepend fresh context below. + // The session history typically starts with: + // [{role: 'user', parts: [{text: envContext}]}, {role: 'model', parts: [{text: ack}]}] + // We remove these and keep only the actual conversation. + let strippedSessionHistory: Content[] = sessionHistory; + if ( + sessionHistory.length >= 2 && + sessionHistory[0].role === 'user' && + sessionHistory[1].role === 'model' + ) { + const firstUserText = + sessionHistory[0].parts?.map((p) => p.text || '').join('') || ''; + const firstModelText = + sessionHistory[1].parts?.map((p) => p.text || '').join('') || ''; + + // Check if this looks like environment context (contains working directory info) + if ( + firstUserText.includes('working in the directory') || + firstUserText.includes('working in the following directories') + ) { + // Check if model response is the standard ack + if ( + firstModelText.includes('Got it') && + firstModelText.includes('context') + ) { + strippedSessionHistory = sessionHistory.slice(2); + } + } + } + + history = [ + { + role: 'user', + parts: [{ text: envContextString }], + }, + { + role: 'model', + parts: [{ text: 'Got it. Thanks for the context!' }], + }, + ...strippedSessionHistory, + ...(extraHistory ?? []), + ]; + } + + // Apply token budget truncation if specified + if (maxContextTokens && maxContextTokens > 0) { + history = truncateContentToTokenBudget(history, maxContextTokens); + } + + return history; }