From d37412558fed12078949732f2bfd4df221a0823b Mon Sep 17 00:00:00 2001
From: imossaidqadri <imossaidquadri@gmail.com>
Date: Thu, 12 Mar 2026 21:58:03 +0500
Subject: [PATCH] feat: add clean context and token budget for subagents

- Add useCleanContext flag to skip session history inheritance
- Add maxContextTokens for context token budget enforcement
- Add useStructuredOutput for structured summary format
- Implement truncateContentToTokenBudget utility
- Update Task tool to support runtime config overrides
- Add comprehensive tests for new features
- Document advanced subagent configuration options

Fixes #2332
---
 docs/users/features/sub-agents.md             | 130 ++++++++++
 .../core/src/subagents/subagent-manager.ts    |  17 +-
 packages/core/src/subagents/subagent.test.ts  |  29 ++-
 packages/core/src/subagents/subagent.ts       |  31 ++-
 packages/core/src/subagents/types.ts          |  39 ++-
 packages/core/src/tools/task.test.ts          |  30 +++
 packages/core/src/tools/task.ts               |  35 +++
 .../core/src/utils/environmentContext.test.ts | 245 +++++++++++++++++-
 packages/core/src/utils/environmentContext.ts | 239 ++++++++++++++++-
 9 files changed, 761 insertions(+), 34 deletions(-)

diff --git a/docs/users/features/sub-agents.md b/docs/users/features/sub-agents.md
index 85ca4aff96..650bc701b5 100644
--- a/docs/users/features/sub-agents.md
+++ b/docs/users/features/sub-agents.md
@@ -150,6 +150,136 @@ Have the documentation-writer Subagents update the API reference
 Get the react-specialist Subagents to optimize this component's performance
 ```
 
+## Advanced Configuration
+
+### Runtime Configuration Options
+
+Subagents support advanced runtime configuration options that control context behavior and output formatting. These can be specified in the agent's configuration file or passed dynamically when delegating tasks.
+
+#### `useCleanContext`
+
+When enabled, the subagent starts with a fresh context window instead of inheriting the full main session history. This prevents context bloat during long sessions.
+
+```yaml
+---
+name: focused-researcher
+description: Researches topics without carrying main session context
+runConfig:
+  useCleanContext: true
+---
+```
+
+**Benefits:**
+
+- Reduces token usage for focused tasks
+- Prevents context pollution from unrelated conversations
+- Improves performance for long-running sessions
+
+#### `maxContextTokens`
+
+Sets a maximum token budget for the subagent's context. When exceeded, older messages are truncated to fit within the budget.
+
+```yaml
+---
+name: budget-conscious-agent
+description: Works within strict token limits
+runConfig:
+  maxContextTokens: 4000
+---
+```
+
+**Benefits:**
+
+- Controls costs for expensive operations
+- Ensures predictable token usage
+- Automatically truncates old context when needed
+
+#### `useStructuredOutput`
+
+Instructs the subagent to format its output using a structured summary schema (findings, files changed, conclusion). This ensures only distilled summaries are injected back into the main context.
+
+```yaml
+---
+name: concise-reporter
+description: Provides structured, concise reports
+runConfig:
+  useStructuredOutput: true
+---
+```
+
+**Output Format:**
+When enabled, the subagent formats results as:
+
+```markdown
+## Findings
+
+- Key discovery 1
+- Key discovery 2
+
+## Files Changed
+
+- path/to/modified/file.ts
+- path/to/new/file.test.ts
+
+## Conclusion
+
+Concise summary of what was accomplished.
+
+## Recommendations (Optional)
+
+- Suggested next steps
+```
+
+### Dynamic Configuration
+
+You can also pass runtime configuration overrides when delegating tasks:
+
+```typescript
+// Example: Pass runConfig when using Task tool programmatically
+{
+  description: "Research with clean context",
+  prompt: "Find all usages of deprecated APIs",
+  subagent_type: "code-analyst",
+  runConfig: {
+    useCleanContext: true,
+    maxContextTokens: 2000,
+    useStructuredOutput: true
+  }
+}
+```
+
+### Complete Example
+
+```yaml
+---
+name: efficient-tester
+description: Writes tests efficiently with controlled context and structured output
+tools:
+  - read_file
+  - write_file
+  - read_many_files
+  - run_shell_command
+modelConfig:
+  model: qwen3-coder-plus
+  temp: 0.7
+runConfig:
+  useCleanContext: true
+  maxContextTokens: 8000
+  useStructuredOutput: true
+  max_turns: 10
+  max_time_minutes: 5
+---
+
+You are a testing specialist focused on efficient, targeted test creation.
+
+Your approach:
+1. Quickly identify the code under test
+2. Create focused tests for key functionality
+3. Report results in a structured format
+
+Work efficiently within the token budget.
+```
+
 ## Examples
 
 ### Development Workflow Agents
diff --git a/packages/core/src/subagents/subagent-manager.ts b/packages/core/src/subagents/subagent-manager.ts
index 0552fa60c4..7ae3290d40 100644
--- a/packages/core/src/subagents/subagent-manager.ts
+++ b/packages/core/src/subagents/subagent-manager.ts
@@ -583,6 +583,8 @@ export class SubagentManager {
    *
    * @param config - Subagent configuration
    * @param runtimeContext - Runtime context
+   * @param options - Optional event emitter and hooks
+   * @param runConfigOverrides - Optional runtime configuration overrides
    * @returns Promise resolving to SubAgentScope
    */
   async createSubagentScope(
@@ -592,9 +594,13 @@ export class SubagentManager {
       eventEmitter?: import('./subagent-events.js').SubAgentEventEmitter;
       hooks?: import('./subagent-hooks.js').SubagentHooks;
     },
+    runConfigOverrides?: Partial<RunConfig>,
   ): Promise<SubAgentScope> {
     try {
-      const runtimeConfig = this.convertToRuntimeConfig(config);
+      const runtimeConfig = this.convertToRuntimeConfig(
+        config,
+        runConfigOverrides,
+      );
 
       return await SubAgentScope.create(
         config.name,
@@ -623,9 +629,13 @@ export class SubagentManager {
    * compatible with SubAgentScope.create().
    *
    * @param config - File-based subagent configuration
+   * @param runConfigOverrides - Optional runtime configuration overrides
    * @returns Runtime configuration for SubAgentScope
    */
-  convertToRuntimeConfig(config: SubagentConfig): SubagentRuntimeConfig {
+  convertToRuntimeConfig(
+    config: SubagentConfig,
+    runConfigOverrides?: Partial<RunConfig>,
+  ): SubagentRuntimeConfig {
     // Build prompt configuration
     const promptConfig: PromptConfig = {
       systemPrompt: config.systemPrompt,
@@ -636,9 +646,10 @@ export class SubagentManager {
       ...config.modelConfig,
     };
 
-    // Build run configuration
+    // Build run configuration with overrides
     const runConfig: RunConfig = {
       ...config.runConfig,
+      ...runConfigOverrides,
     };
 
     // Build tool configuration if tools are specified
diff --git a/packages/core/src/subagents/subagent.test.ts b/packages/core/src/subagents/subagent.test.ts
index 0286d11c85..9609c7947a 100644
--- a/packages/core/src/subagents/subagent.test.ts
+++ b/packages/core/src/subagents/subagent.test.ts
@@ -79,17 +79,24 @@ vi.mock('../core/contentGenerator.js', async (importOriginal) => {
 });
 vi.mock('../utils/environmentContext.js', () => ({
   getEnvironmentContext: vi.fn().mockResolvedValue([{ text: 'Env Context' }]),
-  getInitialChatHistory: vi.fn(async (_config, extraHistory) => [
-    {
-      role: 'user',
-      parts: [{ text: 'Env Context' }],
-    },
-    {
-      role: 'model',
-      parts: [{ text: 'Got it. Thanks for the context!' }],
-    },
-    ...(extraHistory ?? []),
-  ]),
+  getInitialChatHistory: vi.fn(
+    async (
+      _config,
+      _useCleanContext = false,
+      _maxContextTokens,
+      extraHistory,
+    ) => [
+      {
+        role: 'user',
+        parts: [{ text: 'Env Context' }],
+      },
+      {
+        role: 'model',
+        parts: [{ text: 'Got it. Thanks for the context!' }],
+      },
+      ...(extraHistory ?? []),
+    ],
+  ),
 }));
 vi.mock('../core/nonInteractiveToolExecutor.js');
 vi.mock('../ide/ide-client.js');
diff --git a/packages/core/src/subagents/subagent.ts b/packages/core/src/subagents/subagent.ts
index 613bc80441..ee698db360 100644
--- a/packages/core/src/subagents/subagent.ts
+++ b/packages/core/src/subagents/subagent.ts
@@ -870,7 +870,12 @@ export class SubAgentScope {
       );
     }
 
-    const envHistory = await getInitialChatHistory(this.runtimeContext);
+    const useCleanContext = this.runConfig.useCleanContext ?? false;
+    const maxContextTokens = this.runConfig.maxContextTokens;
+    const envHistory = await getInitialChatHistory(this.runtimeContext, {
+      useCleanContext,
+      maxContextTokens,
+    });
 
     const start_history = [
       ...envHistory,
@@ -999,6 +1004,30 @@ Important Rules:
  - Use tools only when necessary to obtain facts or make changes.
  - When the task is complete, return the final result as a normal model response (not a tool call) and stop.`;
 
+    // Add structured output instructions if enabled
+    if (this.runConfig.useStructuredOutput) {
+      finalPrompt += `
+
+Output Format:
+When you complete the task, you MUST format your response using the following structured schema:
+
+## Findings
+- List key discoveries, insights, or results here
+- One finding per bullet point
+
+## Files Changed
+- List any files you created or modified (full paths)
+- If no files were changed, write "None"
+
+## Conclusion
+Provide a concise summary of what was accomplished and the final result.
+
+## Recommendations (Optional)
+- Suggest next steps or follow-up actions if applicable
+
+This structured format ensures your results can be efficiently summarized and injected back into the main conversation context.`;
+    }
+
     // Append user memory (QWEN.md + output-language.md) to ensure subagent respects project conventions
     const userMemory = this.runtimeContext.getUserMemory();
     if (userMemory && userMemory.trim().length > 0) {
diff --git a/packages/core/src/subagents/types.ts b/packages/core/src/subagents/types.ts
index efa73a7e4d..c1c5a153e1 100644
--- a/packages/core/src/subagents/types.ts
+++ b/packages/core/src/subagents/types.ts
@@ -240,8 +240,6 @@ export interface ToolConfig {
 export interface ModelConfig {
   /**
    * The name or identifier of the model to be used (e.g., 'qwen3-coder-plus').
-   *
-   * TODO: In the future, this needs to support 'auto' or some other string to support routing use cases.
    */
   model?: string;
   /**
@@ -258,8 +256,6 @@ export interface ModelConfig {
  * Configures the execution environment and constraints for the subagent.
  * This interface defines parameters that control the subagent's runtime behavior,
  * such as maximum execution time, to prevent infinite loops or excessive resource consumption.
- *
- * TODO: Consider adding max_tokens as a form of budgeting.
  */
 export interface RunConfig {
   /** The maximum execution time for the subagent in minutes. */
@@ -269,4 +265,39 @@ export interface RunConfig {
    * before the execution is terminated. Helps prevent infinite loops.
    */
   max_turns?: number;
+  /**
+   * When true, the subagent starts with a clean context window, not inheriting
+   * the main session's conversation history via getInitialChatHistory().
+   * Only environment context (working directory, date, OS) is provided.
+   * This prevents context bloat during long sessions.
+   */
+  useCleanContext?: boolean;
+  /**
+   * Maximum number of tokens allowed for context injection.
+   * When exceeded, context is truncated to fit within this budget.
+   * If not specified, no token budget is enforced.
+   */
+  maxContextTokens?: number;
+  /**
+   * When true, instructs the subagent to format its output using a structured
+   * summary schema (findings, files changed, conclusion). This ensures only
+   * distilled summaries are injected back into the main context.
+   */
+  useStructuredOutput?: boolean;
+}
+
+/**
+ * Structured summary of subagent output.
+ * When useStructuredOutput is enabled, the subagent should format its
+ * final output using this schema.
+ */
+export interface SubagentStructuredSummary {
+  /** Key findings discovered during subagent execution */
+  findings: string[];
+  /** List of files that were created or modified */
+  filesChanged?: string[];
+  /** Final conclusion or result of the subagent's work */
+  conclusion: string;
+  /** Optional: recommendations for next steps */
+  recommendations?: string[];
 }
diff --git a/packages/core/src/tools/task.test.ts b/packages/core/src/tools/task.test.ts
index 458b026b69..ba79f48948 100644
--- a/packages/core/src/tools/task.test.ts
+++ b/packages/core/src/tools/task.test.ts
@@ -380,6 +380,7 @@ describe('TaskTool', () => {
         mockSubagents[0],
         config,
         expect.any(Object), // eventEmitter parameter
+        undefined, // runConfigOverrides (undefined when not provided in params)
       );
       expect(mockSubagentScope.runNonInteractive).toHaveBeenCalledWith(
         mockContextState,
@@ -534,5 +535,34 @@ describe('TaskTool', () => {
 
       expect(description).toBe('file-search subagent: "Search files"');
     });
+
+    it('should pass runConfig overrides to createSubagentScope', async () => {
+      const params: TaskParams = {
+        description: 'Search files with clean context',
+        prompt: 'Find all TypeScript files',
+        subagent_type: 'file-search',
+        runConfig: {
+          useCleanContext: true,
+          maxContextTokens: 1000,
+          useStructuredOutput: true,
+        },
+      };
+
+      const invocation = (
+        taskTool as TaskToolWithProtectedMethods
+      ).createInvocation(params);
+      await invocation.execute();
+
+      expect(mockSubagentManager.createSubagentScope).toHaveBeenCalledWith(
+        mockSubagents[0],
+        config,
+        expect.any(Object),
+        expect.objectContaining({
+          useCleanContext: true,
+          maxContextTokens: 1000,
+          useStructuredOutput: true,
+        }),
+      );
+    });
   });
 });
diff --git a/packages/core/src/tools/task.ts b/packages/core/src/tools/task.ts
index e811dde0df..ca63241761 100644
--- a/packages/core/src/tools/task.ts
+++ b/packages/core/src/tools/task.ts
@@ -21,6 +21,7 @@ import type { SubagentManager } from '../subagents/subagent-manager.js';
 import {
   type SubagentConfig,
   SubagentTerminateMode,
+  type RunConfig,
 } from '../subagents/types.js';
 import { ContextState } from '../subagents/subagent.js';
 import {
@@ -40,6 +41,11 @@ export interface TaskParams {
   description: string;
   prompt: string;
   subagent_type: string;
+  /**
+   * Optional runtime configuration overrides for the subagent.
+   * Allows customizing context behavior like useCleanContext, maxContextTokens, etc.
+   */
+  runConfig?: Partial<RunConfig>;
 }
 
 const debugLogger = createDebugLogger('TASK');
@@ -72,6 +78,34 @@ export class TaskTool extends BaseDeclarativeTool<TaskParams, ToolResult> {
           type: 'string',
           description: 'The type of specialized agent to use for this task',
         },
+        runConfig: {
+          type: 'object',
+          description:
+            'Optional runtime configuration overrides for the subagent',
+          properties: {
+            useCleanContext: {
+              type: 'boolean',
+              description: 'Start with clean context, omitting session history',
+            },
+            maxContextTokens: {
+              type: 'number',
+              description: 'Maximum token budget for context',
+            },
+            useStructuredOutput: {
+              type: 'boolean',
+              description: 'Format output using structured summary schema',
+            },
+            max_time_minutes: {
+              type: 'number',
+              description: 'Maximum execution time in minutes',
+            },
+            max_turns: {
+              type: 'number',
+              description: 'Maximum number of conversational turns',
+            },
+          },
+          additionalProperties: false,
+        },
       },
       required: ['description', 'prompt', 'subagent_type'],
       additionalProperties: false,
@@ -510,6 +544,7 @@ class TaskToolInvocation extends BaseToolInvocation<TaskParams, ToolResult> {
         subagentConfig,
         this.config,
         { eventEmitter: this.eventEmitter },
+        this.params.runConfig, // Pass runtime config overrides
       );
 
       // Create context state with the task prompt
diff --git a/packages/core/src/utils/environmentContext.test.ts b/packages/core/src/utils/environmentContext.test.ts
index 0b24a9b018..5184a21d2b 100644
--- a/packages/core/src/utils/environmentContext.test.ts
+++ b/packages/core/src/utils/environmentContext.test.ts
@@ -18,6 +18,7 @@ import {
   getEnvironmentContext,
   getDirectoryContextString,
   getInitialChatHistory,
+  truncateContentToTokenBudget,
 } from './environmentContext.js';
 import type { Config } from '../config/config.js';
 import { getFolderStructure } from './getFolderStructure.js';
@@ -149,15 +150,20 @@ describe('getEnvironmentContext', () => {
 
 describe('getInitialChatHistory', () => {
   let mockConfig: Partial<Config>;
+  let mockGeminiClient: { getHistory: () => Content[] };
 
   beforeEach(() => {
     vi.mocked(getFolderStructure).mockResolvedValue('Mock Folder Structure');
+    mockGeminiClient = {
+      getHistory: vi.fn().mockReturnValue([]),
+    };
     mockConfig = {
       getSkipStartupContext: vi.fn().mockReturnValue(false),
       getWorkspaceContext: vi.fn().mockReturnValue({
         getDirectories: vi.fn().mockReturnValue(['/test/dir']),
       }),
       getFileService: vi.fn(),
+      getGeminiClient: vi.fn().mockReturnValue(mockGeminiClient),
     };
   });
 
@@ -200,10 +206,9 @@ describe('getInitialChatHistory', () => {
       { role: 'user', parts: [{ text: 'custom context' }] },
     ];
 
-    const history = await getInitialChatHistory(
-      mockConfig as Config,
+    const history = await getInitialChatHistory(mockConfig as Config, {
       extraHistory,
-    );
+    });
 
     expect(mockConfig.getSkipStartupContext).toHaveBeenCalled();
     expect(history).toEqual(extraHistory);
@@ -222,4 +227,238 @@ describe('getInitialChatHistory', () => {
 
     expect(history).toEqual([]);
   });
+
+  it('returns clean context without session history when useCleanContext is true', async () => {
+    const mockGeminiClient = {
+      getHistory: vi.fn().mockReturnValue([
+        { role: 'user', parts: [{ text: 'Previous conversation' }] },
+        { role: 'model', parts: [{ text: 'Previous response' }] },
+      ]),
+    };
+    mockConfig.getGeminiClient = vi.fn().mockReturnValue(mockGeminiClient);
+
+    const history = await getInitialChatHistory(mockConfig as Config, {
+      useCleanContext: true,
+    });
+
+    // Should only have environment context, not the session history
+    expect(history).toHaveLength(2);
+    expect(history[0]?.role).toBe('user');
+    expect(history[0]?.parts?.[0]?.text).toContain(
+      "I'm currently working in the directory",
+    );
+    expect(history[1]?.role).toBe('model');
+    expect(history[1]?.parts?.[0]?.text).toBe(
+      'Got it. Thanks for the context!',
+    );
+
+    // Session history should NOT be included
+    expect(history).not.toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({
+          parts: [expect.objectContaining({ text: 'Previous conversation' })],
+        }),
+      ]),
+    );
+  });
+
+  it('includes session history when useCleanContext is false (default)', async () => {
+    const mockGeminiClient = {
+      getHistory: vi.fn().mockReturnValue([
+        { role: 'user', parts: [{ text: 'Previous conversation' }] },
+        { role: 'model', parts: [{ text: 'Previous response' }] },
+      ]),
+    };
+    mockConfig.getGeminiClient = vi.fn().mockReturnValue(mockGeminiClient);
+
+    const history = await getInitialChatHistory(mockConfig as Config, {
+      useCleanContext: false,
+    });
+
+    // Should have environment context + session history
+    expect(history.length).toBeGreaterThan(2);
+    expect(history).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({
+          parts: [expect.objectContaining({ text: 'Previous conversation' })],
+        }),
+      ]),
+    );
+  });
+
+  it('truncates content to fit within maxContextTokens budget', async () => {
+    const mockGeminiClient = {
+      getHistory: vi.fn().mockReturnValue([
+        { role: 'user', parts: [{ text: 'A'.repeat(400) }] }, // ~100 tokens
+        { role: 'model', parts: [{ text: 'B'.repeat(400) }] }, // ~100 tokens
+        { role: 'user', parts: [{ text: 'C'.repeat(400) }] }, // ~100 tokens
+      ]),
+    };
+    mockConfig.getGeminiClient = vi.fn().mockReturnValue(mockGeminiClient);
+
+    // Set a very low token budget that should truncate
+    const history = await getInitialChatHistory(mockConfig as Config, {
+      useCleanContext: false,
+      maxContextTokens: 150,
+    });
+
+    // Should have truncated the content
+    expect(history.length).toBeLessThanOrEqual(3);
+    // First item (environment context) should always be preserved
+    expect(history[0]?.role).toBe('user');
+    expect(history[0]?.parts?.[0]?.text).toContain(
+      "I'm currently working in the directory",
+    );
+  });
+
+  it('does not truncate when content is already under token budget', async () => {
+    const mockGeminiClient = {
+      getHistory: vi
+        .fn()
+        .mockReturnValue([
+          { role: 'user', parts: [{ text: 'Short message' }] },
+        ]),
+    };
+    mockConfig.getGeminiClient = vi.fn().mockReturnValue(mockGeminiClient);
+
+    const history = await getInitialChatHistory(mockConfig as Config, {
+      useCleanContext: false,
+      maxContextTokens: 1000,
+    });
+
+    // Should not truncate - all content should be present
+    expect(history.length).toBeGreaterThan(2);
+    expect(history).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({
+          parts: [expect.objectContaining({ text: 'Short message' })],
+        }),
+      ]),
+    );
+  });
+
+  it('truncates to minimum when maxContextTokens is very low', async () => {
+    mockGeminiClient.getHistory = vi
+      .fn()
+      .mockReturnValue([
+        { role: 'user', parts: [{ text: 'Previous'.repeat(100) }] },
+      ]);
+    const history = await getInitialChatHistory(mockConfig as Config, {
+      useCleanContext: false,
+      maxContextTokens: 50,
+    });
+
+    // Very low token budget should truncate but preserve at least environment context
+    expect(history.length).toBeGreaterThan(0);
+    expect(history[0]?.role).toBe('user');
+    // The text should be truncated but still start with the context preamble
+    expect(history[0]?.parts?.[0]?.text).toContain('This is the Qwen Code');
+    expect(history[0]?.parts?.[0]?.text).toContain(
+      '[truncated due to token budget]',
+    );
+  });
+});
+
+describe('truncateContentToTokenBudget', () => {
+  it('returns content as-is when under token budget', () => {
+    const contents: Content[] = [
+      { role: 'user', parts: [{ text: 'Short text' }] },
+      { role: 'model', parts: [{ text: 'Response' }] },
+    ];
+
+    const result = truncateContentToTokenBudget(contents, 1000);
+
+    // Should return a clone with same structure (not mutate original)
+    expect(result).not.toBe(contents);
+    expect(result).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({
+          role: 'user',
+          parts: expect.arrayContaining([
+            expect.objectContaining({ text: 'Short text' }),
+          ]),
+        }),
+        expect.objectContaining({
+          role: 'model',
+          parts: expect.arrayContaining([
+            expect.objectContaining({ text: 'Response' }),
+          ]),
+        }),
+      ]),
+    );
+  });
+
+  it('truncates oldest messages first when over budget', () => {
+    const contents: Content[] = [
+      { role: 'user', parts: [{ text: 'Environment context' }] }, // Should be preserved
+      { role: 'user', parts: [{ text: 'A'.repeat(400) }] }, // ~100 tokens - should be removed
+      { role: 'model', parts: [{ text: 'B'.repeat(400) }] }, // ~100 tokens - should be removed
+      { role: 'user', parts: [{ text: 'Keep this' }] }, // Should be kept
+    ];
+
+    const result = truncateContentToTokenBudget(contents, 150);
+
+    // First item and last item should be kept
+    expect(result.length).toBeLessThan(contents.length);
+    expect(result[0]?.parts?.[0]?.text).toContain('Environment context');
+  });
+
+  it('preserves first item (environment context) even when over budget', () => {
+    const contents: Content[] = [
+      { role: 'user', parts: [{ text: 'Important environment info' }] },
+      { role: 'user', parts: [{ text: 'A'.repeat(1000) }] },
+    ];
+
+    const result = truncateContentToTokenBudget(contents, 50);
+
+    expect(result.length).toBeGreaterThan(0);
+    expect(result[0]?.parts?.[0]?.text).toContain('Important environment info');
+  });
+
+  it('returns empty array when maxTokens is 0 or negative', () => {
+    const contents: Content[] = [
+      { role: 'user', parts: [{ text: 'Some content' }] },
+    ];
+
+    expect(truncateContentToTokenBudget(contents, 0)).toEqual([]);
+    expect(truncateContentToTokenBudget(contents, -10)).toEqual([]);
+  });
+
+  it('truncates text with ellipsis when still over budget after removing items', () => {
+    const longText = 'A'.repeat(1000);
+    const contents: Content[] = [
+      { role: 'user', parts: [{ text: 'Env' }] },
+      { role: 'user', parts: [{ text: longText }] },
+    ];
+
+    const result = truncateContentToTokenBudget(contents, 50);
+
+    expect(result.length).toBe(2);
+    expect(result[1]?.parts?.[0]?.text).toContain(
+      '... [truncated due to token budget]',
+    );
+  });
+
+  it('handles empty content array', () => {
+    const result = truncateContentToTokenBudget([], 100);
+    expect(result).toEqual([]);
+  });
+
+  it('handles content without parts', () => {
+    const contents: Content[] = [
+      { role: 'user', parts: undefined },
+      { role: 'model', parts: [] },
+    ];
+
+    const result = truncateContentToTokenBudget(contents, 100);
+
+    // Should return a clone (not mutate original)
+    expect(result).not.toBe(contents);
+    expect(result).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({ role: 'user', parts: undefined }),
+        expect.objectContaining({ role: 'model', parts: [] }),
+      ]),
+    );
+  });
 });
diff --git a/packages/core/src/utils/environmentContext.ts b/packages/core/src/utils/environmentContext.ts
index 4f5c03209d..4693e03f35 100644
--- a/packages/core/src/utils/environmentContext.ts
+++ b/packages/core/src/utils/environmentContext.ts
@@ -8,6 +8,121 @@ import type { Content, Part } from '@google/genai';
 import type { Config } from '../config/config.js';
 import { getFolderStructure } from './getFolderStructure.js';
 
+/**
+ * Rough estimate: 1 token ≈ 4 characters for English text.
+ * This is a conservative estimate for quick truncation before API calls.
+ */
+const CHARS_PER_TOKEN_ESTIMATE = 4;
+
+/**
+ * Estimates the number of tokens in a string using character count.
+ * This is a fast approximation - use the API's countTokens for accuracy.
+ * @param text - The text to estimate tokens for.
+ * @returns Estimated token count.
+ */
+function estimateTokens(text: string): number {
+  return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE);
+}
+
+/**
+ * Truncates content to fit within a token budget.
+ * Uses a greedy approach: removes oldest messages first until under budget.
+ * Clones content objects to avoid mutating the input.
+ * @param contents - Array of content items to truncate.
+ * @param maxTokens - Maximum token budget.
+ * @returns Truncated array of content items.
+ */
+export function truncateContentToTokenBudget(
+  contents: Content[],
+  maxTokens: number,
+): Content[] {
+  if (maxTokens <= 0) {
+    return [];
+  }
+
+  if (contents.length === 0) {
+    return [];
+  }
+
+  // Calculate total estimated tokens
+  const totalTokens = contents.reduce((sum, content) => {
+    const text = content.parts?.map((p) => p.text || '').join('') || '';
+    return sum + estimateTokens(text);
+  }, 0);
+
+  // If already under budget, return a shallow clone to avoid mutation
+  if (totalTokens <= maxTokens) {
+    return contents.map((item) => ({ ...item }));
+  }
+
+  // Deep clone the first item to avoid mutating the original
+  const firstItem = contents[0]
+    ? {
+        ...contents[0],
+        parts: contents[0].parts?.map((p) => ({ ...p })),
+      }
+    : undefined;
+  const restItems = contents.slice(1);
+
+  // Calculate first item tokens
+  const firstItemText =
+    firstItem?.parts?.map((p) => p.text || '').join('') || '';
+  const firstItemTokens = estimateTokens(firstItemText);
+
+  // If first item alone exceeds budget, truncate it
+  if (firstItemTokens >= maxTokens && firstItem) {
+    const availableChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE;
+    if (availableChars > 0 && availableChars < firstItemText.length) {
+      firstItem.parts = [
+        {
+          text:
+            firstItemText.slice(0, availableChars) +
+            '... [truncated due to token budget]',
+        },
+      ];
+    }
+    return [firstItem];
+  }
+
+  // Remove items from the beginning of restItems (oldest messages first)
+  // But keep at least one item if possible for truncation
+  let currentTokens = totalTokens;
+  const truncatedItems = [...restItems];
+
+  while (currentTokens > maxTokens && truncatedItems.length > 1) {
+    const removedItem = truncatedItems.shift()!;
+    const removedText =
+      removedItem.parts?.map((p) => p.text || '').join('') || '';
+    currentTokens -= estimateTokens(removedText);
+  }
+
+  // If still over budget with one item remaining, truncate that item
+  if (currentTokens > maxTokens && truncatedItems.length === 1) {
+    // Clone the last item before mutating
+    const lastItem = {
+      ...truncatedItems[0],
+      parts: truncatedItems[0].parts?.map((p) => ({ ...p })),
+    };
+    const lastText = lastItem.parts?.map((p) => p.text || '').join('') || '';
+
+    const availableTokensForLast = maxTokens - firstItemTokens;
+    const availableCharsForLast =
+      availableTokensForLast * CHARS_PER_TOKEN_ESTIMATE;
+
+    if (availableCharsForLast > 0 && availableCharsForLast < lastText.length) {
+      const truncatedText =
+        lastText.slice(0, availableCharsForLast) +
+        '... [truncated due to token budget]';
+      lastItem.parts = [{ text: truncatedText }];
+    }
+
+    // Replace the original with the cloned/truncated version
+    truncatedItems[0] = lastItem;
+  }
+
+  return firstItem ? [firstItem, ...truncatedItems] : truncatedItems;
+}
+
 /**
  * Generates a string describing the current workspace directories and their structures.
  * @param {Config} config - The runtime configuration and services.
@@ -69,10 +184,45 @@ ${directoryContext}
   return [{ text: context }];
 }
 
+/**
+ * Options for configuring initial chat history.
+ */
+export interface GetInitialChatHistoryOptions {
+  /** When true, omits accumulated session history. */
+  useCleanContext?: boolean;
+  /** Optional maximum token budget for context. */
+  maxContextTokens?: number;
+  /** Optional additional history to append. */
+  extraHistory?: Content[];
+}
+
+/**
+ * Retrieves the initial chat history to seed a chat session.
+ * By default, includes environment context plus any accumulated session history.
+ * When useCleanContext is true, only provides fresh environment context without
+ * prior session history - useful for subagents to avoid context bloat.
+ * @param {Config} config - The runtime configuration and services.
+ * @param options - Options for configuring the initial history.
+ * @returns A promise that resolves to an array of `Content` objects for chat history.
+ */
 export async function getInitialChatHistory(
   config: Config,
-  extraHistory?: Content[],
+  options?: GetInitialChatHistoryOptions | Content[],
 ): Promise<Content[]> {
+  // Backward compatibility: if options is an array, treat it as extraHistory
+  let useCleanContext = false;
+  let maxContextTokens: number | undefined;
+  let extraHistory: Content[] | undefined;
+
+  if (Array.isArray(options)) {
+    // Legacy call pattern: getInitialChatHistory(config, extraHistory)
+    extraHistory = options;
+  } else if (options) {
+    // New call pattern with options object
+    useCleanContext = options.useCleanContext ?? false;
+    maxContextTokens = options.maxContextTokens;
+    extraHistory = options.extraHistory;
+  }
   if (config.getSkipStartupContext()) {
     return extraHistory ? [...extraHistory] : [];
   }
@@ -80,15 +230,80 @@ export async function getInitialChatHistory(
   const envParts = await getEnvironmentContext(config);
   const envContextString = envParts.map((part) => part.text || '').join('\n\n');
 
-  return [
-    {
-      role: 'user',
-      parts: [{ text: envContextString }],
-    },
-    {
-      role: 'model',
-      parts: [{ text: 'Got it. Thanks for the context!' }],
-    },
-    ...(extraHistory ?? []),
-  ];
+  let history: Content[];
+
+  // When using clean context, skip any accumulated session history
+  if (useCleanContext) {
+    history = [
+      {
+        role: 'user',
+        parts: [{ text: envContextString }],
+      },
+      {
+        role: 'model',
+        parts: [{ text: 'Got it. Thanks for the context!' }],
+      },
+      ...(extraHistory ?? []),
+    ];
+  } else {
+    // Default behavior: include accumulated session history
+    let sessionHistory: Content[] = [];
+    try {
+      sessionHistory = config.getGeminiClient()?.getHistory() ?? [];
+    } catch {
+      // Client not initialized yet - use empty history
+      sessionHistory = [];
+    }
+
+    // Strip the initial environment context + ack from session history
+    // to avoid duplication when we prepend fresh context below.
+    // The session history typically starts with:
+    // [{role: 'user', parts: [{text: envContext}]}, {role: 'model', parts: [{text: ack}]}]
+    // We remove these and keep only the actual conversation.
+    let strippedSessionHistory: Content[] = sessionHistory;
+    if (
+      sessionHistory.length >= 2 &&
+      sessionHistory[0].role === 'user' &&
+      sessionHistory[1].role === 'model'
+    ) {
+      const firstUserText =
+        sessionHistory[0].parts?.map((p) => p.text || '').join('') || '';
+      const firstModelText =
+        sessionHistory[1].parts?.map((p) => p.text || '').join('') || '';
+
+      // Check if this looks like environment context (contains working directory info)
+      if (
+        firstUserText.includes('working in the directory') ||
+        firstUserText.includes('working in the following directories')
+      ) {
+        // Check if model response is the standard ack
+        if (
+          firstModelText.includes('Got it') &&
+          firstModelText.includes('context')
+        ) {
+          strippedSessionHistory = sessionHistory.slice(2);
+        }
+      }
+    }
+
+    history = [
+      {
+        role: 'user',
+        parts: [{ text: envContextString }],
+      },
+      {
+        role: 'model',
+        parts: [{ text: 'Got it. Thanks for the context!' }],
+      },
+      ...strippedSessionHistory,
+      ...(extraHistory ?? []),
+    ];
+  }
+
+  // Apply token budget truncation if specified
+  if (maxContextTokens && maxContextTokens > 0) {
+    history = truncateContentToTokenBudget(history, maxContextTokens);
+  }
+
+  return history;
 }