From 5746424b18dd00ae34edc97a8ed3877952df183d Mon Sep 17 00:00:00 2001
From: galz10 <galzahavi@google.com>
Date: Mon, 20 Apr 2026 16:44:22 -0700
Subject: [PATCH 01/12] feat(core): enhance shell command validation and add
 core tools allowlist

- Implements recursive validation for shell command substitutions ($(...), `...`) and subshells.
- Adds support for settings.tools.core to allow explicit, strict allowlisting of core tools.
- Refactors PolicyEngine.check to handle shell wrappers (e.g., bash -c) and complex chained commands.
- Introduces comprehensive regression tests for shell safety, pipes, and redirection.
- Updates shell-utils to better identify and extract shell structure details using Tree-sitter.
---
 packages/core/src/policy/config.ts            |  52 +++++--
 .../src/policy/core-tools-mapping.test.ts     |  76 +++++++++
 .../core/src/policy/policy-engine.test.ts     |  36 ++++-
 packages/core/src/policy/policy-engine.ts     | 147 ++++++++----------
 .../policy/shell-safety-regression.test.ts    | 134 ++++++++++++++++
 packages/core/src/policy/shell-safety.test.ts |  24 +++
 .../src/policy/shell-substitution.test.ts     |  67 ++++++++
 packages/core/src/policy/types.ts             |   1 +
 packages/core/src/tools/ripGrep.test.ts       |   6 +-
 packages/core/src/utils/shell-utils.ts        |   8 +-
 10 files changed, 458 insertions(+), 93 deletions(-)
 create mode 100644 packages/core/src/policy/core-tools-mapping.test.ts
 create mode 100644 packages/core/src/policy/shell-safety-regression.test.ts
 create mode 100644 packages/core/src/policy/shell-substitution.test.ts

diff --git a/packages/core/src/policy/config.ts b/packages/core/src/policy/config.ts
index 359054add33..d7b06024a69 100644
--- a/packages/core/src/policy/config.ts
+++ b/packages/core/src/policy/config.ts
@@ -75,6 +75,7 @@ export const ADMIN_POLICY_TIER = 5;
 export const MCP_EXCLUDED_PRIORITY = USER_POLICY_TIER + 0.9;
 export const EXCLUDE_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.4;
 export const ALLOWED_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.3;
+export const CORE_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.25;
 export const TRUSTED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.2;
 export const ALLOWED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.1;
 
@@ -434,10 +435,12 @@ export async function createPolicyEngineConfig(
     }
   }
 
-  // Tools that are explicitly allowed in the settings.
-  // Priority: ALLOWED_TOOLS_FLAG_PRIORITY (user tier - explicit temporary allows)
-  if (settings.tools?.allowed) {
-    for (const tool of settings.tools.allowed) {
+  const mapToolsToRules = (
+    tools: string[],
+    priority: number,
+    source: string,
+  ) => {
+    for (const tool of tools) {
       // Check for legacy format: toolName(args)
       const match = tool.match(/^([a-zA-Z0-9_-]+)\((.*)\)$/);
       if (match) {
@@ -455,9 +458,9 @@ export async function createPolicyEngineConfig(
               rules.push({
                 toolName,
                 decision: PolicyDecision.ALLOW,
-                priority: ALLOWED_TOOLS_FLAG_PRIORITY,
+                priority,
                 argsPattern: new RegExp(pattern),
-                source: 'Settings (Tools Allowed)',
+                source,
               });
             }
           }
@@ -467,8 +470,8 @@ export async function createPolicyEngineConfig(
           rules.push({
             toolName,
             decision: PolicyDecision.ALLOW,
-            priority: ALLOWED_TOOLS_FLAG_PRIORITY,
-            source: 'Settings (Tools Allowed)',
+            priority,
+            source,
           });
         }
       } else {
@@ -479,11 +482,40 @@ export async function createPolicyEngineConfig(
         rules.push({
           toolName,
           decision: PolicyDecision.ALLOW,
-          priority: ALLOWED_TOOLS_FLAG_PRIORITY,
-          source: 'Settings (Tools Allowed)',
+          priority,
+          source,
         });
       }
     }
+  };
+
+  // Tools that are explicitly allowed in the settings.
+  // Priority: ALLOWED_TOOLS_FLAG_PRIORITY (user tier - explicit temporary allows)
+  if (settings.tools?.allowed) {
+    mapToolsToRules(
+      settings.tools.allowed,
+      ALLOWED_TOOLS_FLAG_PRIORITY,
+      'Settings (Tools Allowed)',
+    );
+  }
+
+  // Core tools that are restricted in the settings.
+  // Priority: CORE_TOOLS_FLAG_PRIORITY (user tier - core tool allowlist)
+  if (settings.tools?.core) {
+    mapToolsToRules(
+      settings.tools.core,
+      CORE_TOOLS_FLAG_PRIORITY,
+      'Settings (Core Tools)',
+    );
+
+    // If core tools are restricted, we should add a default DENY rule for everything else
+    // at a slightly lower priority than the explicit allows.
+    rules.push({
+      toolName: '*',
+      decision: PolicyDecision.DENY,
+      priority: CORE_TOOLS_FLAG_PRIORITY - 0.01,
+      source: 'Settings (Core Tools Allowlist Enforcement)',
+    });
   }
 
   // MCP servers that are trusted in the settings.
diff --git a/packages/core/src/policy/core-tools-mapping.test.ts b/packages/core/src/policy/core-tools-mapping.test.ts
new file mode 100644
index 00000000000..95877c6ac44
--- /dev/null
+++ b/packages/core/src/policy/core-tools-mapping.test.ts
@@ -0,0 +1,76 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { createPolicyEngineConfig } from './config.js';
+import { PolicyEngine } from './policy-engine.js';
+import { PolicyDecision, ApprovalMode } from './types.js';
+
+describe('PolicyEngine - Core Tools Mapping', () => {
+  it('should allow tools explicitly listed in settings.tools.core', async () => {
+    const settings = {
+      tools: {
+        core: ['run_shell_command(ls)', 'run_shell_command(git status)'],
+      },
+    };
+
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+      undefined,
+      true, // interactive
+    );
+
+    const engine = new PolicyEngine(config);
+
+    // Test simple tool name
+    const result1 = await engine.check(
+      { name: 'run_shell_command', args: { command: 'ls' } },
+      undefined,
+    );
+    expect(result1.decision).toBe(PolicyDecision.ALLOW);
+
+    // Test tool name with args
+    const result2 = await engine.check(
+      { name: 'run_shell_command', args: { command: 'git status' } },
+      undefined,
+    );
+    expect(result2.decision).toBe(PolicyDecision.ALLOW);
+
+    // Test tool not in core list
+    const result3 = await engine.check(
+      { name: 'run_shell_command', args: { command: 'npm test' } },
+      undefined,
+    );
+    // Should be DENIED because of strict allowlist
+    expect(result3.decision).toBe(PolicyDecision.DENY);
+  });
+
+  it('should allow tools in tools.core even if they are restricted by default policies', async () => {
+    // By default run_shell_command is ASK_USER.
+    // Putting it in tools.core should make it ALLOW.
+    const settings = {
+      tools: {
+        core: ['run_shell_command'],
+      },
+    };
+
+    const config = await createPolicyEngineConfig(
+      settings,
+      ApprovalMode.DEFAULT,
+      undefined,
+      true,
+    );
+
+    const engine = new PolicyEngine(config);
+
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: 'any command' } },
+      undefined,
+    );
+    expect(result.decision).toBe(PolicyDecision.ALLOW);
+  });
+});
diff --git a/packages/core/src/policy/policy-engine.test.ts b/packages/core/src/policy/policy-engine.test.ts
index 5606c49793f..8604a799616 100644
--- a/packages/core/src/policy/policy-engine.test.ts
+++ b/packages/core/src/policy/policy-engine.test.ts
@@ -43,6 +43,35 @@ vi.mock('../utils/shell-utils.js', async (importOriginal) => {
       }
       return [command];
     }),
+    parseCommandDetails: vi.fn().mockImplementation((command: string) => {
+      // Basic mock implementation for PolicyEngine test needs
+      const commands = command.includes('&&')
+        ? command.split('&&').map((c) => c.trim())
+        : [command.trim()];
+
+      // Detect $(...) or `...` and add as sub-commands for recursion tests
+      const subCommands = [...commands];
+      for (const cmd of commands) {
+        const subMatch = cmd.match(/\$\((.*)\)/) || cmd.match(/`(.*)`/);
+        if (subMatch?.[1]) {
+          subCommands.push(subMatch[1].trim());
+        }
+      }
+
+      return {
+        details: subCommands.map((c, i) => ({
+          name: c.split(' ')[0],
+          text: c,
+          startIndex: i === 0 ? 0 : -1, // Simple root indication
+        })),
+        hasError: false,
+      };
+    }),
+    stripShellWrapper: vi.fn().mockImplementation((command: string) => {
+      // Simple mock for stripping wrappers
+      const match = command.match(/^(?:bash|sh|zsh)\s+-c\s+["'](.*)["']$/i);
+      return match ? match[1] : command;
+    }),
     hasRedirection: vi.fn().mockImplementation(
       (command: string) =>
         // Simple mock: true if '>' is present, unless it looks like "-> arrow"
@@ -1862,7 +1891,6 @@ describe('PolicyEngine', () => {
     });
 
     it('should return ASK_USER in non-YOLO mode if shell command parsing fails', async () => {
-      const { splitCommands } = await import('../utils/shell-utils.js');
       const rules: PolicyRule[] = [
         {
           toolName: 'run_shell_command',
@@ -1877,7 +1905,11 @@ describe('PolicyEngine', () => {
       });
 
       // Simulate parsing failure
-      vi.mocked(splitCommands).mockReturnValueOnce([]);
+      const { parseCommandDetails } = await import('../utils/shell-utils.js');
+      vi.mocked(parseCommandDetails).mockReturnValueOnce({
+        details: [],
+        hasError: true,
+      });
 
       const result = await engine.check(
         { name: 'run_shell_command', args: { command: 'complex command' } },
diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts
index a9e049c74d2..7ff27bd91ad 100644
--- a/packages/core/src/policy/policy-engine.ts
+++ b/packages/core/src/policy/policy-engine.ts
@@ -7,8 +7,10 @@
 import { type FunctionCall } from '@google/genai';
 import {
   SHELL_TOOL_NAMES,
+  REDIRECTION_NAMES,
   initializeShellParsers,
-  splitCommands,
+  parseCommandDetails,
+  stripShellWrapper,
   hasRedirection,
   extractStringFromParseEntry,
 } from '../utils/shell-utils.js';
@@ -359,7 +361,8 @@ export class PolicyEngine {
     }
 
     await initializeShellParsers();
-    const subCommands = splitCommands(command);
+    const parsed = parseCommandDetails(command);
+    const subCommands = parsed?.details ?? [];
 
     if (subCommands.length === 0) {
       // If the matched rule says DENY, we should respect it immediately even if parsing fails.
@@ -380,115 +383,103 @@ export class PolicyEngine {
       );
 
       // Parsing logic failed, we can't trust it. Use default decision ASK_USER (or DENY in non-interactive).
-      // We return the rule that matched so the evaluation loop terminates.
       return {
         decision: this.defaultDecision,
         rule,
       };
     }
 
-    // If there are multiple parts, or if we just want to validate the single part against DENY rules
-    if (subCommands.length > 0) {
+    debugLogger.debug(
+      `[PolicyEngine.check] Validating shell command: ${subCommands.length} parts`,
+    );
+
+    if (ruleDecision === PolicyDecision.DENY) {
+      return { decision: PolicyDecision.DENY, rule };
+    }
+
+    // Start optimistically. If all parts are ALLOW, the whole is ALLOW.
+    // We will downgrade if any part is ASK_USER or DENY.
+    let aggregateDecision = PolicyDecision.ALLOW;
+    let responsibleRule: PolicyRule | undefined;
+
+    // Check for redirection on the full command string
+    if (this.shouldDowngradeForRedirection(command, allowRedirection)) {
       debugLogger.debug(
-        `[PolicyEngine.check] Validating shell command: ${subCommands.length} parts`,
+        `[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${command}`,
       );
+      aggregateDecision = PolicyDecision.ASK_USER;
+      responsibleRule = undefined; // Inherent policy
+    }
 
-      if (ruleDecision === PolicyDecision.DENY) {
-        return { decision: PolicyDecision.DENY, rule };
+    for (const detail of subCommands) {
+      if (REDIRECTION_NAMES.has(detail.name)) {
+        continue;
       }
 
-      // Start optimistically. If all parts are ALLOW, the whole is ALLOW.
-      // We will downgrade if any part is ASK_USER or DENY.
-      let aggregateDecision = PolicyDecision.ALLOW;
-      let responsibleRule: PolicyRule | undefined;
+      const subCmd = detail.text.trim();
+      const isAtomic =
+        subCmd === command ||
+        (detail.startIndex === 0 && detail.text.length === command.length);
 
-      // Check for redirection on the full command string
-      if (this.shouldDowngradeForRedirection(command, allowRedirection)) {
-        debugLogger.debug(
-          `[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${command}`,
+      // Recursive check for shell wrappers (bash -c, etc.)
+      const stripped = stripShellWrapper(subCmd);
+      if (stripped !== subCmd) {
+        const wrapperResult = await this.check(
+          { name: toolName, args: { command: stripped, dir_path } },
+          serverName,
+          toolAnnotations,
+          subagent,
+          true,
         );
-        aggregateDecision = PolicyDecision.ASK_USER;
-        responsibleRule = undefined; // Inherent policy
-      }
 
-      for (const rawSubCmd of subCommands) {
-        const subCmd = rawSubCmd.trim();
-        // Prevent infinite recursion for the root command
-        if (subCmd === command) {
-          if (this.shouldDowngradeForRedirection(subCmd, allowRedirection)) {
-            debugLogger.debug(
-              `[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${subCmd}`,
-            );
-            // Redirection always downgrades ALLOW to ASK_USER
-            if (aggregateDecision === PolicyDecision.ALLOW) {
-              aggregateDecision = PolicyDecision.ASK_USER;
-              responsibleRule = undefined; // Inherent policy
-            }
-          } else {
-            // Atomic command matching the rule.
-            if (
-              ruleDecision === PolicyDecision.ASK_USER &&
-              aggregateDecision === PolicyDecision.ALLOW
-            ) {
-              aggregateDecision = PolicyDecision.ASK_USER;
-              responsibleRule = rule;
-            }
-          }
-          continue;
+        if (wrapperResult.decision === PolicyDecision.DENY)
+          return wrapperResult;
+        if (wrapperResult.decision === PolicyDecision.ASK_USER) {
+          aggregateDecision = PolicyDecision.ASK_USER;
+          responsibleRule ??= wrapperResult.rule;
         }
+      }
 
+      if (isAtomic) {
+        if (
+          ruleDecision === PolicyDecision.ASK_USER &&
+          aggregateDecision === PolicyDecision.ALLOW
+        ) {
+          aggregateDecision = PolicyDecision.ASK_USER;
+          responsibleRule = rule;
+        }
+      } else {
         const subResult = await this.check(
           { name: toolName, args: { command: subCmd, dir_path } },
           serverName,
           toolAnnotations,
           subagent,
+          true,
         );
 
-        // subResult.decision is already filtered through applyNonInteractiveMode by this.check()
-        const subDecision = subResult.decision;
-
-        // If any part is DENIED, the whole command is DENY
-        if (subDecision === PolicyDecision.DENY) {
-          return {
-            decision: PolicyDecision.DENY,
-            rule: subResult.rule,
-          };
-        }
+        if (subResult.decision === PolicyDecision.DENY) return subResult;
 
-        // If any part requires ASK_USER, the whole command requires ASK_USER
-        if (subDecision === PolicyDecision.ASK_USER) {
+        if (subResult.decision === PolicyDecision.ASK_USER) {
           aggregateDecision = PolicyDecision.ASK_USER;
-          if (!responsibleRule) {
-            responsibleRule = subResult.rule;
-          }
+          responsibleRule ??= subResult.rule;
         }
 
-        // Check for redirection in allowed sub-commands
+        // Downgrade if sub-command has redirection
         if (
-          subDecision === PolicyDecision.ALLOW &&
+          subResult.decision === PolicyDecision.ALLOW &&
           this.shouldDowngradeForRedirection(subCmd, allowRedirection)
         ) {
-          debugLogger.debug(
-            `[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${subCmd}`,
-          );
           if (aggregateDecision === PolicyDecision.ALLOW) {
             aggregateDecision = PolicyDecision.ASK_USER;
             responsibleRule = undefined;
           }
         }
       }
-
-      return {
-        decision: aggregateDecision,
-        // If we stayed at ALLOW, we return the original rule (if any).
-        // If we downgraded, we return the responsible rule (or undefined if implicit).
-        rule: aggregateDecision === ruleDecision ? rule : responsibleRule,
-      };
     }
 
     return {
-      decision: ruleDecision,
-      rule,
+      decision: aggregateDecision,
+      rule: aggregateDecision === ruleDecision ? rule : responsibleRule,
     };
   }
 
@@ -501,6 +492,7 @@ export class PolicyEngine {
     serverName: string | undefined,
     toolAnnotations?: Record<string, unknown>,
     subagent?: string,
+    skipHeuristics = false,
   ): Promise<CheckResult> {
     // Case 1: Metadata injection is the primary and safest way to identify an MCP server.
     // If we have explicit `_serverName` metadata (usually injected by tool-registry for active tools), use it.
@@ -594,6 +586,7 @@ export class PolicyEngine {
 
         let ruleDecision = rule.decision;
         if (
+          !skipHeuristics &&
           isShellCommand &&
           command &&
           !('commandPrefix' in rule) &&
@@ -615,12 +608,10 @@ export class PolicyEngine {
             subagent,
           );
           decision = shellResult.decision;
-          if (shellResult.rule) {
-            matchedRule = shellResult.rule;
-            break;
-          }
+          matchedRule = shellResult.rule;
+          break;
         } else {
-          decision = rule.decision;
+          decision = ruleDecision;
           matchedRule = rule;
           break;
         }
@@ -643,7 +634,7 @@ export class PolicyEngine {
       );
       if (toolName && SHELL_TOOL_NAMES.includes(toolName)) {
         let heuristicDecision = this.defaultDecision;
-        if (command) {
+        if (!skipHeuristics && command) {
           heuristicDecision = await this.applyShellHeuristics(
             command,
             heuristicDecision,
diff --git a/packages/core/src/policy/shell-safety-regression.test.ts b/packages/core/src/policy/shell-safety-regression.test.ts
new file mode 100644
index 00000000000..1a1d6089590
--- /dev/null
+++ b/packages/core/src/policy/shell-safety-regression.test.ts
@@ -0,0 +1,134 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, beforeAll } from 'vitest';
+import { PolicyEngine } from './policy-engine.js';
+import { PolicyDecision, ApprovalMode } from './types.js';
+import { initializeShellParsers } from '../utils/shell-utils.js';
+import { buildArgsPatterns } from './utils.js';
+
+describe('PolicyEngine - Shell Safety Regression Suite', () => {
+  let engine: PolicyEngine;
+
+  beforeAll(async () => {
+    await initializeShellParsers();
+  });
+
+  const setupEngine = (allowedCommands: string[]) => {
+    const rules = allowedCommands.map((cmd) => ({
+      toolName: 'run_shell_command',
+      decision: PolicyDecision.ALLOW,
+      argsPattern: new RegExp(buildArgsPatterns(undefined, cmd)[0]!),
+      priority: 10,
+    }));
+
+    return new PolicyEngine({
+      rules,
+      approvalMode: ApprovalMode.DEFAULT,
+      defaultDecision: PolicyDecision.ASK_USER,
+    });
+  };
+
+  it('should block unauthorized chained command with &&', async () => {
+    engine = setupEngine(['echo']);
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: 'echo hi && ls' } },
+      undefined,
+    );
+    expect(result.decision).toBe(PolicyDecision.ASK_USER);
+  });
+
+  it('should allow authorized chained command with &&', async () => {
+    engine = setupEngine(['echo', 'ls']);
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: 'echo hi && ls' } },
+      undefined,
+    );
+    expect(result.decision).toBe(PolicyDecision.ALLOW);
+  });
+
+  it('should block unauthorized chained command with ||', async () => {
+    engine = setupEngine(['false']);
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: 'false || ls' } },
+      undefined,
+    );
+    expect(result.decision).toBe(PolicyDecision.ASK_USER);
+  });
+
+  it('should block unauthorized chained command with ;', async () => {
+    engine = setupEngine(['echo']);
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: 'echo hi; ls' } },
+      undefined,
+    );
+    expect(result.decision).toBe(PolicyDecision.ASK_USER);
+  });
+
+  it('should block unauthorized command in pipe |', async () => {
+    engine = setupEngine(['echo']);
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: 'echo hi | grep "hi"' } },
+      undefined,
+    );
+    expect(result.decision).toBe(PolicyDecision.ASK_USER);
+  });
+
+  it('should allow authorized command in pipe |', async () => {
+    engine = setupEngine(['echo', 'grep']);
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: 'echo hi | grep "hi"' } },
+      undefined,
+    );
+    expect(result.decision).toBe(PolicyDecision.ALLOW);
+  });
+
+  it('should block unauthorized chained command with &', async () => {
+    engine = setupEngine(['echo']);
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: 'echo hi & ls' } },
+      undefined,
+    );
+    expect(result.decision).toBe(PolicyDecision.ASK_USER);
+  });
+
+  it('should allow authorized chained command with &', async () => {
+    engine = setupEngine(['echo', 'ls']);
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: 'echo hi & ls' } },
+      undefined,
+    );
+    expect(result.decision).toBe(PolicyDecision.ALLOW);
+  });
+
+  it('should block unauthorized command in nested substitution', async () => {
+    engine = setupEngine(['echo', 'cat']);
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: 'echo $(cat $(ls))' } },
+      undefined,
+    );
+    expect(result.decision).toBe(PolicyDecision.ASK_USER);
+  });
+
+  it('should allow authorized command in nested substitution', async () => {
+    engine = setupEngine(['echo', 'cat', 'ls']);
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: 'echo $(cat $(ls))' } },
+      undefined,
+    );
+    expect(result.decision).toBe(PolicyDecision.ALLOW);
+  });
+
+  it('should block command redirection if not explicitly allowed', async () => {
+    engine = setupEngine(['echo']);
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: 'echo hi > /tmp/test' } },
+      undefined,
+    );
+    // Inherent policy: redirection downgrades to ASK_USER
+    expect(result.decision).toBe(PolicyDecision.ASK_USER);
+  });
+});
diff --git a/packages/core/src/policy/shell-safety.test.ts b/packages/core/src/policy/shell-safety.test.ts
index 340264485ec..51d3d26294a 100644
--- a/packages/core/src/policy/shell-safety.test.ts
+++ b/packages/core/src/policy/shell-safety.test.ts
@@ -59,6 +59,30 @@ vi.mock('../utils/shell-utils.js', async (importOriginal) => {
   return {
     ...actual,
     initializeShellParsers: vi.fn(),
+    parseCommandDetails: (command: string) => {
+      if (Object.prototype.hasOwnProperty.call(commandMap, command)) {
+        const subcommands = commandMap[command];
+        return {
+          details: subcommands.map((text) => ({
+            name: text.split(' ')[0],
+            text,
+            startIndex: command.indexOf(text),
+          })),
+          hasError: subcommands.length === 0 && command.includes('&&&'),
+        };
+      }
+      return {
+        details: [
+          {
+            name: command.split(' ')[0],
+            text: command,
+            startIndex: 0,
+          },
+        ],
+        hasError: false,
+      };
+    },
+    stripShellWrapper: (command: string) => command,
     splitCommands: (command: string) => {
       if (Object.prototype.hasOwnProperty.call(commandMap, command)) {
         return commandMap[command];
diff --git a/packages/core/src/policy/shell-substitution.test.ts b/packages/core/src/policy/shell-substitution.test.ts
new file mode 100644
index 00000000000..d858b517b78
--- /dev/null
+++ b/packages/core/src/policy/shell-substitution.test.ts
@@ -0,0 +1,67 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { expect, describe, it, beforeAll } from 'vitest';
+import { PolicyEngine } from './policy-engine.js';
+import { PolicyDecision } from './types.js';
+import { initializeShellParsers } from '../utils/shell-utils.js';
+
+describe('PolicyEngine Command Substitution Validation', () => {
+  beforeAll(async () => {
+    await initializeShellParsers();
+  });
+
+  const setupEngine = (blockedCmd: string) =>
+    new PolicyEngine({
+      defaultDecision: PolicyDecision.ALLOW,
+      rules: [
+        {
+          toolName: 'run_shell_command',
+          argsPattern: new RegExp(`"command":"${blockedCmd}"`),
+          decision: PolicyDecision.DENY,
+        },
+      ],
+    });
+
+  it('should block echo $(dangerous_cmd) when dangerous_cmd is explicitly blocked', async () => {
+    const engine = setupEngine('dangerous_cmd');
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: 'echo $(dangerous_cmd)' } },
+      'test-server',
+    );
+    expect(result.decision).toBe(PolicyDecision.DENY);
+  });
+
+  it('should block backtick substitution `dangerous_cmd`', async () => {
+    const engine = setupEngine('dangerous_cmd');
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: 'echo `dangerous_cmd`' } },
+      'test-server',
+    );
+    expect(result.decision).toBe(PolicyDecision.DENY);
+  });
+
+  it('should block commands inside subshells (dangerous_cmd)', async () => {
+    const engine = setupEngine('dangerous_cmd');
+    const result = await engine.check(
+      { name: 'run_shell_command', args: { command: '(dangerous_cmd)' } },
+      'test-server',
+    );
+    expect(result.decision).toBe(PolicyDecision.DENY);
+  });
+
+  it('should handle nested substitutions deeply', async () => {
+    const engine = setupEngine('deep_danger');
+    const result = await engine.check(
+      {
+        name: 'run_shell_command',
+        args: { command: 'echo $(ls $(deep_danger))' },
+      },
+      'test-server',
+    );
+    expect(result.decision).toBe(PolicyDecision.DENY);
+  });
+});
diff --git a/packages/core/src/policy/types.ts b/packages/core/src/policy/types.ts
index b843129c99a..aa5275865c9 100644
--- a/packages/core/src/policy/types.ts
+++ b/packages/core/src/policy/types.ts
@@ -335,6 +335,7 @@ export interface PolicySettings {
     allowed?: string[];
   };
   tools?: {
+    core?: string[];
     exclude?: string[];
     allowed?: string[];
   };
diff --git a/packages/core/src/tools/ripGrep.test.ts b/packages/core/src/tools/ripGrep.test.ts
index 9ad575833a7..bd3cd211899 100644
--- a/packages/core/src/tools/ripGrep.test.ts
+++ b/packages/core/src/tools/ripGrep.test.ts
@@ -1993,13 +1993,15 @@ describe('getRipgrepPath', () => {
       vi.mocked(fileExists).mockImplementation(
         async (checkPath) =>
           checkPath.includes(path.normalize('core/vendor/ripgrep')) &&
-          !checkPath.includes('tools'),
+          !checkPath.includes(path.join(path.sep, 'tools', path.sep)),
       );
 
       const resolvedPath = await getRipgrepPath();
       expect(resolvedPath).not.toBeNull();
       expect(resolvedPath).toContain(path.normalize('core/vendor/ripgrep'));
-      expect(resolvedPath).not.toContain('tools');
+      expect(resolvedPath).not.toContain(
+        path.join(path.sep, 'tools', path.sep),
+      );
     });
 
     it('should return null if binary is missing from both paths', async () => {
diff --git a/packages/core/src/utils/shell-utils.ts b/packages/core/src/utils/shell-utils.ts
index 46cffa1d35c..9aaa07ecd95 100644
--- a/packages/core/src/utils/shell-utils.ts
+++ b/packages/core/src/utils/shell-utils.ts
@@ -240,11 +240,13 @@ foreach ($commandAst in $commandAsts) {
   'utf16le',
 ).toString('base64');
 
-const REDIRECTION_NAMES = new Set([
+export const REDIRECTION_NAMES = new Set([
   'redirection (<)',
   'redirection (>)',
   'heredoc (<<)',
   'herestring (<<<)',
+  'command substitution',
+  'subshell',
 ]);
 
 function createParser(): Parser | null {
@@ -360,6 +362,10 @@ function extractNameFromNode(node: Node): string | null {
       return 'heredoc (<<)';
     case 'herestring_redirect':
       return 'herestring (<<<)';
+    case 'command_substitution':
+      return 'command substitution';
+    case 'subshell':
+      return 'subshell';
     default:
       return null;
   }

From 8c7b8373d4738765e0cf5c241aa2710d15919ef2 Mon Sep 17 00:00:00 2001
From: davidapierce <davidapierce@google.com>
Date: Tue, 21 Apr 2026 20:13:06 +0000
Subject: [PATCH 02/12] addressed the failure in shell-substitution.test.ts

---
 packages/core/src/utils/shell-utils.ts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/packages/core/src/utils/shell-utils.ts b/packages/core/src/utils/shell-utils.ts
index 9aaa07ecd95..0e3605e860e 100644
--- a/packages/core/src/utils/shell-utils.ts
+++ b/packages/core/src/utils/shell-utils.ts
@@ -246,6 +246,8 @@ export const REDIRECTION_NAMES = new Set([
   'heredoc (<<)',
   'herestring (<<<)',
   'command substitution',
+  'backtick substitution',
+  'process substitution',
   'subshell',
 ]);
 
@@ -363,7 +365,10 @@ function extractNameFromNode(node: Node): string | null {
     case 'herestring_redirect':
       return 'herestring (<<<)';
     case 'command_substitution':
+    case 'backtick_substitution':
       return 'command substitution';
+    case 'process_substitution':
+      return 'process substitution';
     case 'subshell':
       return 'subshell';
     default:

From ffd9f9fa15b0df65a48fa6e668a8ac132300b272 Mon Sep 17 00:00:00 2001
From: davidapierce <davidapierce@google.com>
Date: Tue, 21 Apr 2026 21:33:27 +0000
Subject: [PATCH 03/12] test(core): mock getShellConfiguration in shell
 substitution tests

---
 .../core/src/policy/shell-substitution.test.ts  | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/packages/core/src/policy/shell-substitution.test.ts b/packages/core/src/policy/shell-substitution.test.ts
index d858b517b78..f42e30d2079 100644
--- a/packages/core/src/policy/shell-substitution.test.ts
+++ b/packages/core/src/policy/shell-substitution.test.ts
@@ -4,11 +4,26 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { expect, describe, it, beforeAll } from 'vitest';
+import { expect, describe, it, beforeAll, vi } from 'vitest';
 import { PolicyEngine } from './policy-engine.js';
 import { PolicyDecision } from './types.js';
 import { initializeShellParsers } from '../utils/shell-utils.js';
 
+// Mock shell-utils to ensure consistent behavior across platforms (especially Windows CI)
+// We want to test PolicyEngine logic with Bash syntax rules.
+vi.mock('../utils/shell-utils.js', async (importOriginal) => {
+  const actual =
+    await importOriginal<typeof import('../utils/shell-utils.js')>();
+  return {
+    ...actual,
+    getShellConfiguration: () => ({
+      executable: 'bash',
+      argsPrefix: ['-c'],
+      shell: 'bash',
+    }),
+  };
+});
+
 describe('PolicyEngine Command Substitution Validation', () => {
   beforeAll(async () => {
     await initializeShellParsers();

From 579ebe08506ca64a29283299d9d87eb333237122 Mon Sep 17 00:00:00 2001
From: davidapierce <davidapierce@google.com>
Date: Wed, 22 Apr 2026 15:39:33 +0000
Subject: [PATCH 04/12] fix failing unit test for windows.

---
 .../core/src/policy/shell-substitution.test.ts    | 15 +++++++++++++++
 packages/core/src/utils/shell-utils.ts            |  3 ++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/packages/core/src/policy/shell-substitution.test.ts b/packages/core/src/policy/shell-substitution.test.ts
index f42e30d2079..cc28847233c 100644
--- a/packages/core/src/policy/shell-substitution.test.ts
+++ b/packages/core/src/policy/shell-substitution.test.ts
@@ -9,6 +9,21 @@ import { PolicyEngine } from './policy-engine.js';
 import { PolicyDecision } from './types.js';
 import { initializeShellParsers } from '../utils/shell-utils.js';
 
+// Mock node:os to ensure shell-utils logic always thinks it's on a POSIX-like system.
+// This ensures that internal calls to getShellConfiguration() and isWindows()
+// within the shell-utils module return 'bash' configuration, even on Windows CI.
+vi.mock('node:os', async (importOriginal) => {
+  const actual = await importOriginal<typeof import('node:os')>();
+  return {
+    ...actual,
+    default: {
+      ...actual,
+      platform: () => 'linux',
+    },
+    platform: () => 'linux',
+  };
+});
+
 // Mock shell-utils to ensure consistent behavior across platforms (especially Windows CI)
 // We want to test PolicyEngine logic with Bash syntax rules.
 vi.mock('../utils/shell-utils.js', async (importOriginal) => {
diff --git a/packages/core/src/utils/shell-utils.ts b/packages/core/src/utils/shell-utils.ts
index 0e3605e860e..d2b767788a5 100644
--- a/packages/core/src/utils/shell-utils.ts
+++ b/packages/core/src/utils/shell-utils.ts
@@ -365,8 +365,9 @@ function extractNameFromNode(node: Node): string | null {
     case 'herestring_redirect':
       return 'herestring (<<<)';
     case 'command_substitution':
-    case 'backtick_substitution':
       return 'command substitution';
+    case 'backtick_substitution':
+      return 'backtick substitution';
     case 'process_substitution':
       return 'process substitution';
     case 'subshell':

From add751ea25a247ca752551d8fe7b0a3ee74cb556 Mon Sep 17 00:00:00 2001
From: Keith Schaab <keithsc@google.com>
Date: Thu, 23 Apr 2026 01:55:22 +0000
Subject: [PATCH 05/12] Modifying strategy around planning modes and fixing ask
 handling

---
 .../__snapshots__/InputPrompt.test.tsx.snap   |  7 +++
 packages/core/src/policy/config.ts            | 15 ++++++
 packages/core/src/policy/policy-engine.ts     | 52 +++++++++++--------
 3 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap
index ab6fe9b9282..26577749ac1 100644
--- a/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap
+++ b/packages/cli/src/ui/components/__snapshots__/InputPrompt.test.tsx.snap
@@ -168,6 +168,13 @@ exports[`InputPrompt > mouse interaction > should toggle paste expansion on doub
 "
 `;
 
+exports[`InputPrompt > mouse interaction > should toggle paste expansion on double-click 4`] = `
+"▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀
+ > [Pasted Text: 10 lines]                                                                          
+▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
+"
+`;
+
 exports[`InputPrompt > multiline rendering > should correctly render multiline input including blank lines 1`] = `
 "────────────────────────────────────────────────────────────────────────────────────────────────────
 │ > hello                                                                                          │
diff --git a/packages/core/src/policy/config.ts b/packages/core/src/policy/config.ts
index d7b06024a69..fe0184b298c 100644
--- a/packages/core/src/policy/config.ts
+++ b/packages/core/src/policy/config.ts
@@ -435,10 +435,17 @@ export async function createPolicyEngineConfig(
     }
   }
 
+  const nonPlanModes = [
+    ApprovalMode.DEFAULT,
+    ApprovalMode.AUTO_EDIT,
+    ApprovalMode.YOLO,
+  ];
+
   const mapToolsToRules = (
     tools: string[],
     priority: number,
     source: string,
+    modes?: ApprovalMode[],
   ) => {
     for (const tool of tools) {
       // Check for legacy format: toolName(args)
@@ -461,6 +468,7 @@ export async function createPolicyEngineConfig(
                 priority,
                 argsPattern: new RegExp(pattern),
                 source,
+                modes,
               });
             }
           }
@@ -472,6 +480,7 @@ export async function createPolicyEngineConfig(
             decision: PolicyDecision.ALLOW,
             priority,
             source,
+            modes,
           });
         }
       } else {
@@ -484,6 +493,7 @@ export async function createPolicyEngineConfig(
           decision: PolicyDecision.ALLOW,
           priority,
           source,
+          modes,
         });
       }
     }
@@ -496,6 +506,7 @@ export async function createPolicyEngineConfig(
       settings.tools.allowed,
       ALLOWED_TOOLS_FLAG_PRIORITY,
       'Settings (Tools Allowed)',
+      nonPlanModes,
     );
   }
 
@@ -506,6 +517,7 @@ export async function createPolicyEngineConfig(
       settings.tools.core,
       CORE_TOOLS_FLAG_PRIORITY,
       'Settings (Core Tools)',
+      nonPlanModes,
     );
 
     // If core tools are restricted, we should add a default DENY rule for everything else
@@ -515,6 +527,7 @@ export async function createPolicyEngineConfig(
       decision: PolicyDecision.DENY,
       priority: CORE_TOOLS_FLAG_PRIORITY - 0.01,
       source: 'Settings (Core Tools Allowlist Enforcement)',
+      modes: nonPlanModes,
     });
   }
 
@@ -533,6 +546,7 @@ export async function createPolicyEngineConfig(
           decision: PolicyDecision.ALLOW,
           priority: TRUSTED_MCP_SERVER_PRIORITY,
           source: 'Settings (MCP Trusted)',
+          modes: nonPlanModes,
         });
       }
     }
@@ -551,6 +565,7 @@ export async function createPolicyEngineConfig(
         decision: PolicyDecision.ALLOW,
         priority: ALLOWED_MCP_SERVER_PRIORITY,
         source: 'Settings (MCP Allowed)',
+        modes: nonPlanModes,
       });
     }
   }
diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts
index 7ff27bd91ad..84109545025 100644
--- a/packages/core/src/policy/policy-engine.ts
+++ b/packages/core/src/policy/policy-engine.ts
@@ -397,18 +397,24 @@ export class PolicyEngine {
       return { decision: PolicyDecision.DENY, rule };
     }
 
-    // Start optimistically. If all parts are ALLOW, the whole is ALLOW.
-    // We will downgrade if any part is ASK_USER or DENY.
-    let aggregateDecision = PolicyDecision.ALLOW;
-    let responsibleRule: PolicyRule | undefined;
+    // Start with the decision from the rule or heuristics.
+    // If the tool call was already downgraded (e.g. by heuristics), we start there.
+    let aggregateDecision = ruleDecision;
 
-    // Check for redirection on the full command string
+    // If heuristics downgraded the decision, we don't blame the rule.
+    let responsibleRule: PolicyRule | undefined =
+      rule && ruleDecision === rule.decision ? rule : undefined;
+
+    // Check for redirection on the full command string.
+    // Redirection always downgrades ALLOW to ASK_USER (it never upgrades).
     if (this.shouldDowngradeForRedirection(command, allowRedirection)) {
-      debugLogger.debug(
-        `[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${command}`,
-      );
-      aggregateDecision = PolicyDecision.ASK_USER;
-      responsibleRule = undefined; // Inherent policy
+      if (aggregateDecision === PolicyDecision.ALLOW) {
+        debugLogger.debug(
+          `[PolicyEngine.check] Downgrading ALLOW to ASK_USER for redirected command: ${command}`,
+        );
+        aggregateDecision = PolicyDecision.ASK_USER;
+        responsibleRule = undefined; // Inherent policy
+      }
     }
 
     for (const detail of subCommands) {
@@ -435,20 +441,16 @@ export class PolicyEngine {
         if (wrapperResult.decision === PolicyDecision.DENY)
           return wrapperResult;
         if (wrapperResult.decision === PolicyDecision.ASK_USER) {
-          aggregateDecision = PolicyDecision.ASK_USER;
-          responsibleRule ??= wrapperResult.rule;
+          if (aggregateDecision === PolicyDecision.ALLOW) {
+            aggregateDecision = PolicyDecision.ASK_USER;
+            responsibleRule = wrapperResult.rule;
+          } else {
+            responsibleRule ??= wrapperResult.rule;
+          }
         }
       }
 
-      if (isAtomic) {
-        if (
-          ruleDecision === PolicyDecision.ASK_USER &&
-          aggregateDecision === PolicyDecision.ALLOW
-        ) {
-          aggregateDecision = PolicyDecision.ASK_USER;
-          responsibleRule = rule;
-        }
-      } else {
+      if (!isAtomic) {
         const subResult = await this.check(
           { name: toolName, args: { command: subCmd, dir_path } },
           serverName,
@@ -460,8 +462,12 @@ export class PolicyEngine {
         if (subResult.decision === PolicyDecision.DENY) return subResult;
 
         if (subResult.decision === PolicyDecision.ASK_USER) {
-          aggregateDecision = PolicyDecision.ASK_USER;
-          responsibleRule ??= subResult.rule;
+          if (aggregateDecision === PolicyDecision.ALLOW) {
+            aggregateDecision = PolicyDecision.ASK_USER;
+            responsibleRule = subResult.rule;
+          } else {
+            responsibleRule ??= subResult.rule;
+          }
         }
 
         // Downgrade if sub-command has redirection

From 5331853d36c587b6fd65501f121d17fe87aeff73 Mon Sep 17 00:00:00 2001
From: davidapierce <davidapierce@google.com>
Date: Thu, 23 Apr 2026 16:06:09 +0000
Subject: [PATCH 06/12] Add a deny policy for tool calls looser than explicetly
 allowed tool calls.

---
 packages/core/src/policy/config.ts | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/packages/core/src/policy/config.ts b/packages/core/src/policy/config.ts
index fe0184b298c..a980b250e48 100644
--- a/packages/core/src/policy/config.ts
+++ b/packages/core/src/policy/config.ts
@@ -446,7 +446,9 @@ export async function createPolicyEngineConfig(
     priority: number,
     source: string,
     modes?: ApprovalMode[],
+    addDefaultDenyForTools = false,
   ) => {
+    const toolsWithNarrowing = new Set<string>();
     for (const tool of tools) {
       // Check for legacy format: toolName(args)
       const match = tool.match(/^([a-zA-Z0-9_-]+)\((.*)\)$/);
@@ -459,6 +461,7 @@ export async function createPolicyEngineConfig(
 
         // Treat args as a command prefix for shell tool
         if (toolName === SHELL_TOOL_NAME) {
+          toolsWithNarrowing.add(toolName);
           const patterns = buildArgsPatterns(undefined, args);
           for (const pattern of patterns) {
             if (pattern) {
@@ -497,6 +500,18 @@ export async function createPolicyEngineConfig(
         });
       }
     }
+
+    if (addDefaultDenyForTools) {
+      for (const toolName of toolsWithNarrowing) {
+        rules.push({
+          toolName,
+          decision: PolicyDecision.DENY,
+          priority: priority - 0.01,
+          source: `${source} (Narrowing Enforcement)`,
+          modes,
+        });
+      }
+    }
   };
 
   // Tools that are explicitly allowed in the settings.
@@ -506,7 +521,8 @@ export async function createPolicyEngineConfig(
       settings.tools.allowed,
       ALLOWED_TOOLS_FLAG_PRIORITY,
       'Settings (Tools Allowed)',
-      nonPlanModes,
+      undefined,
+      true,
     );
   }
 

From 1734a03f2abbcc0520d96a32670317d3e70338c5 Mon Sep 17 00:00:00 2001
From: davidapierce <davidapierce@google.com>
Date: Thu, 23 Apr 2026 17:08:27 +0000
Subject: [PATCH 07/12] set aggregateDecision to ASK_USER if any result
 dictates asking the user.

---
 packages/core/src/policy/policy-engine.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts
index 84109545025..e0e3c61215c 100644
--- a/packages/core/src/policy/policy-engine.ts
+++ b/packages/core/src/policy/policy-engine.ts
@@ -442,11 +442,11 @@ export class PolicyEngine {
           return wrapperResult;
         if (wrapperResult.decision === PolicyDecision.ASK_USER) {
           if (aggregateDecision === PolicyDecision.ALLOW) {
-            aggregateDecision = PolicyDecision.ASK_USER;
             responsibleRule = wrapperResult.rule;
           } else {
             responsibleRule ??= wrapperResult.rule;
           }
+          aggregateDecision = PolicyDecision.ASK_USER;
         }
       }
 
@@ -463,11 +463,11 @@ export class PolicyEngine {
 
         if (subResult.decision === PolicyDecision.ASK_USER) {
           if (aggregateDecision === PolicyDecision.ALLOW) {
-            aggregateDecision = PolicyDecision.ASK_USER;
             responsibleRule = subResult.rule;
           } else {
             responsibleRule ??= subResult.rule;
           }
+          aggregateDecision = PolicyDecision.ASK_USER;
         }
 
         // Downgrade if sub-command has redirection

From ad56ab5069c238d03f15745f22685c2c172bb785 Mon Sep 17 00:00:00 2001
From: davidapierce <davidapierce@google.com>
Date: Thu, 23 Apr 2026 17:46:29 +0000
Subject: [PATCH 08/12] respect confirmation required setting when set by users
 in updated policy config

---
 packages/cli/src/config/settingsSchema.ts | 13 +++++++++++++
 packages/core/src/policy/config.ts        | 14 ++++++++++++++
 packages/core/src/policy/types.ts         |  1 +
 3 files changed, 28 insertions(+)

diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts
index f5da86b60af..05d4cfae7f2 100644
--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@@ -1667,6 +1667,19 @@ const SETTINGS_SCHEMA = {
         showInDialog: false,
         items: { type: 'string' },
       },
+      confirmationRequired: {
+        type: 'array',
+        label: 'Confirmation Required',
+        category: 'Advanced',
+        requiresRestart: true,
+        default: undefined as string[] | undefined,
+        description: oneLine`
+          Tool names that always require user confirmation.
+          Takes precedence over allowed tools and core tool allowlists.
+        `,
+        showInDialog: false,
+        items: { type: 'string' },
+      },
       exclude: {
         type: 'array',
         label: 'Exclude Tools',
diff --git a/packages/core/src/policy/config.ts b/packages/core/src/policy/config.ts
index a980b250e48..6d978479cb9 100644
--- a/packages/core/src/policy/config.ts
+++ b/packages/core/src/policy/config.ts
@@ -74,6 +74,7 @@ export const ADMIN_POLICY_TIER = 5;
 
 export const MCP_EXCLUDED_PRIORITY = USER_POLICY_TIER + 0.9;
 export const EXCLUDE_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.4;
+export const CONFIRMATION_REQUIRED_PRIORITY = USER_POLICY_TIER + 0.35;
 export const ALLOWED_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.3;
 export const CORE_TOOLS_FLAG_PRIORITY = USER_POLICY_TIER + 0.25;
 export const TRUSTED_MCP_SERVER_PRIORITY = USER_POLICY_TIER + 0.2;
@@ -526,6 +527,19 @@ export async function createPolicyEngineConfig(
     );
   }
 
+  // Tools that explicitly require confirmation in the settings.
+  // Priority: CONFIRMATION_REQUIRED_PRIORITY (overrides allowed and core)
+  if (settings.tools?.confirmationRequired) {
+    for (const tool of settings.tools.confirmationRequired) {
+      rules.push({
+        toolName: SHELL_TOOL_NAMES.includes(tool) ? SHELL_TOOL_NAME : tool,
+        decision: PolicyDecision.ASK_USER,
+        priority: CONFIRMATION_REQUIRED_PRIORITY,
+        source: 'Settings (Confirmation Required)',
+      });
+    }
+  }
+
   // Core tools that are restricted in the settings.
   // Priority: CORE_TOOLS_FLAG_PRIORITY (user tier - core tool allowlist)
   if (settings.tools?.core) {
diff --git a/packages/core/src/policy/types.ts b/packages/core/src/policy/types.ts
index aa5275865c9..672e3f84169 100644
--- a/packages/core/src/policy/types.ts
+++ b/packages/core/src/policy/types.ts
@@ -338,6 +338,7 @@ export interface PolicySettings {
     core?: string[];
     exclude?: string[];
     allowed?: string[];
+    confirmationRequired?: string[];
   };
   mcpServers?: Record<string, { trust?: boolean }>;
   // User provided policies that will replace the USER level policies in ~/.gemini/policies

From 4c6ec402e66d066151b8238be31eb4962c3b29a6 Mon Sep 17 00:00:00 2001
From: davidapierce <davidapierce@google.com>
Date: Thu, 23 Apr 2026 18:05:34 +0000
Subject: [PATCH 09/12] regenerate settings schema and update configuration.md
 to include tools confirmation setting

---
 docs/reference/configuration.md | 6 ++++++
 schemas/settings.schema.json    | 9 +++++++++
 2 files changed, 15 insertions(+)

diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md
index 56dd6b4b5d9..46b2ff980ec 100644
--- a/docs/reference/configuration.md
+++ b/docs/reference/configuration.md
@@ -1467,6 +1467,12 @@ their corresponding top-level category object in your `settings.json` file.
   - **Default:** `undefined`
   - **Requires restart:** Yes
 
+- **`tools.confirmationRequired`** (array):
+  - **Description:** Tool names that always require user confirmation. Takes
+    precedence over allowed tools and core tool allowlists.
+  - **Default:** `undefined`
+  - **Requires restart:** Yes
+
 - **`tools.exclude`** (array):
   - **Description:** Tool names to exclude from discovery.
   - **Default:** `undefined`
diff --git a/schemas/settings.schema.json b/schemas/settings.schema.json
index e7b362fc4e3..0e4d0050378 100644
--- a/schemas/settings.schema.json
+++ b/schemas/settings.schema.json
@@ -2531,6 +2531,15 @@
             "type": "string"
           }
         },
+        "confirmationRequired": {
+          "title": "Confirmation Required",
+          "description": "Tool names that always require user confirmation. Takes precedence over allowed tools and core tool allowlists.",
+          "markdownDescription": "Tool names that always require user confirmation. Takes precedence over allowed tools and core tool allowlists.\n\n- Category: `Advanced`\n- Requires restart: `yes`",
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
         "exclude": {
           "title": "Exclude Tools",
           "description": "Tool names to exclude from discovery.",

From 2e21f857e3729a8014c58086697041802c2fb711 Mon Sep 17 00:00:00 2001
From: davidapierce <davidapierce@google.com>
Date: Thu, 23 Apr 2026 18:56:10 +0000
Subject: [PATCH 10/12] add trusted workspace setting to evals

---
 .github/workflows/eval-pr.yml       | 1 +
 .github/workflows/eval.yml          | 1 +
 .github/workflows/evals-nightly.yml | 1 +
 3 files changed, 3 insertions(+)

diff --git a/.github/workflows/eval-pr.yml b/.github/workflows/eval-pr.yml
index 3e6784960ca..84d2dca5945 100644
--- a/.github/workflows/eval-pr.yml
+++ b/.github/workflows/eval-pr.yml
@@ -153,6 +153,7 @@ jobs:
           GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
           GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
           MODEL_LIST: '${{ env.MODEL_LIST }}'
+          GEMINI_CLI_TRUST_WORKSPACE: true
         run: |
           # Run the regression check loop. The script saves the report to a file.
           node scripts/run_eval_regression.js
diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
index 23dc1cfdfbd..e2a83eac706 100644
--- a/.github/workflows/eval.yml
+++ b/.github/workflows/eval.yml
@@ -30,6 +30,7 @@ jobs:
         GEMINI_API_KEY: '${{ secrets.EVAL_GEMINI_API_KEY }}'
         GCLI_LOCAL_FILE_TELEMETRY: 'True'
         EVAL_GCS_BUCKET: '${{ vars.EVAL_GCS_ARTIFACTS_BUCKET }}'
+        GEMINI_CLI_TRUST_WORKSPACE: true
     steps:
       - name: 'Authenticate to Google Cloud'
         id: 'auth'
diff --git a/.github/workflows/evals-nightly.yml b/.github/workflows/evals-nightly.yml
index fbb770ac847..99f983b8a68 100644
--- a/.github/workflows/evals-nightly.yml
+++ b/.github/workflows/evals-nightly.yml
@@ -68,6 +68,7 @@ jobs:
           GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
           GEMINI_MODEL: '${{ matrix.model }}'
           RUN_EVALS: 'true'
+          GEMINI_CLI_TRUST_WORKSPACE: true
           EVAL_SUITE_TYPE: "${{ github.event.inputs.suite_type || 'behavioral' }}"
           EVAL_SUITE_NAME: '${{ github.event.inputs.suite_name }}'
           TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}'

From bf768cb7a69b970e6dd4ab9e65cb4fff1980e2b3 Mon Sep 17 00:00:00 2001
From: Emily Hedlund <ehedlund@google.com>
Date: Thu, 23 Apr 2026 15:25:37 -0400
Subject: [PATCH 11/12] set GEMINI_CLI_TRUST_WORKSPACE in evals test helper and
 revert yml changes

---
 .github/workflows/eval-pr.yml       | 1 -
 .github/workflows/eval.yml          | 1 -
 .github/workflows/evals-nightly.yml | 1 -
 evals/test-helper.ts                | 1 +
 4 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/eval-pr.yml b/.github/workflows/eval-pr.yml
index 84d2dca5945..3e6784960ca 100644
--- a/.github/workflows/eval-pr.yml
+++ b/.github/workflows/eval-pr.yml
@@ -153,7 +153,6 @@ jobs:
           GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
           GH_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
           MODEL_LIST: '${{ env.MODEL_LIST }}'
-          GEMINI_CLI_TRUST_WORKSPACE: true
         run: |
           # Run the regression check loop. The script saves the report to a file.
           node scripts/run_eval_regression.js
diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
index e2a83eac706..23dc1cfdfbd 100644
--- a/.github/workflows/eval.yml
+++ b/.github/workflows/eval.yml
@@ -30,7 +30,6 @@ jobs:
         GEMINI_API_KEY: '${{ secrets.EVAL_GEMINI_API_KEY }}'
         GCLI_LOCAL_FILE_TELEMETRY: 'True'
         EVAL_GCS_BUCKET: '${{ vars.EVAL_GCS_ARTIFACTS_BUCKET }}'
-        GEMINI_CLI_TRUST_WORKSPACE: true
     steps:
       - name: 'Authenticate to Google Cloud'
         id: 'auth'
diff --git a/.github/workflows/evals-nightly.yml b/.github/workflows/evals-nightly.yml
index 99f983b8a68..fbb770ac847 100644
--- a/.github/workflows/evals-nightly.yml
+++ b/.github/workflows/evals-nightly.yml
@@ -68,7 +68,6 @@ jobs:
           GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
           GEMINI_MODEL: '${{ matrix.model }}'
           RUN_EVALS: 'true'
-          GEMINI_CLI_TRUST_WORKSPACE: true
           EVAL_SUITE_TYPE: "${{ github.event.inputs.suite_type || 'behavioral' }}"
           EVAL_SUITE_NAME: '${{ github.event.inputs.suite_name }}'
           TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}'
diff --git a/evals/test-helper.ts b/evals/test-helper.ts
index 7369a6919c9..af6bade2015 100644
--- a/evals/test-helper.ts
+++ b/evals/test-helper.ts
@@ -172,6 +172,7 @@ export async function internalEvalTest(evalCase: EvalCase) {
         timeout: evalCase.timeout,
         env: {
           GEMINI_CLI_ACTIVITY_LOG_TARGET: activityLogFile,
+          GEMINI_CLI_TRUST_WORKSPACE: 'true',
         },
       });
 

From 173d4d6af040185756f4db722c1a88c5194285ab Mon Sep 17 00:00:00 2001
From: davidapierce <davidapierce@google.com>
Date: Thu, 23 Apr 2026 19:57:45 +0000
Subject: [PATCH 12/12] disable the memoryV2 experiment in eval tests since
 they don't play nice with default read write permissions.

---
 evals/save_memory.eval.ts | 55 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/evals/save_memory.eval.ts b/evals/save_memory.eval.ts
index 8680f8eba8c..b31167fb4a0 100644
--- a/evals/save_memory.eval.ts
+++ b/evals/save_memory.eval.ts
@@ -18,6 +18,11 @@ describe('save_memory', () => {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: rememberingFavoriteColor,
+    params: {
+      settings: {
+        experimental: { memoryV2: false },
+      },
+    },
 
     prompt: `remember that my favorite color is  blue.
   
@@ -40,6 +45,11 @@ describe('save_memory', () => {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: rememberingCommandRestrictions,
+    params: {
+      settings: {
+        experimental: { memoryV2: false },
+      },
+    },
 
     prompt: `I don't want you to ever run npm commands.`,
     assert: async (rig, result) => {
@@ -61,6 +71,11 @@ describe('save_memory', () => {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: rememberingWorkflow,
+    params: {
+      settings: {
+        experimental: { memoryV2: false },
+      },
+    },
 
     prompt: `I want you to always lint after building.`,
     assert: async (rig, result) => {
@@ -83,6 +98,11 @@ describe('save_memory', () => {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: ignoringTemporaryInformation,
+    params: {
+      settings: {
+        experimental: { memoryV2: false },
+      },
+    },
 
     prompt: `I'm going to get a coffee.`,
     assert: async (rig, result) => {
@@ -108,6 +128,11 @@ describe('save_memory', () => {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: rememberingPetName,
+    params: {
+      settings: {
+        experimental: { memoryV2: false },
+      },
+    },
 
     prompt: `Please remember that my dog's name is Buddy.`,
     assert: async (rig, result) => {
@@ -129,6 +154,11 @@ describe('save_memory', () => {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: rememberingCommandAlias,
+    params: {
+      settings: {
+        experimental: { memoryV2: false },
+      },
+    },
 
     prompt: `When I say 'start server', you should run 'npm run dev'.`,
     assert: async (rig, result) => {
@@ -151,6 +181,11 @@ describe('save_memory', () => {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: savingDbSchemaLocationAsProjectMemory,
+    params: {
+      settings: {
+        experimental: { memoryV2: false },
+      },
+    },
     prompt: `The database schema for this workspace is located in \`db/schema.sql\`.`,
     assert: async (rig, result) => {
       const wasToolCalled = await rig.waitForToolCall(
@@ -180,6 +215,11 @@ describe('save_memory', () => {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: rememberingCodingStyle,
+    params: {
+      settings: {
+        experimental: { memoryV2: false },
+      },
+    },
 
     prompt: `I prefer to use tabs instead of spaces for indentation.`,
     assert: async (rig, result) => {
@@ -202,6 +242,11 @@ describe('save_memory', () => {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: savingBuildArtifactLocationAsProjectMemory,
+    params: {
+      settings: {
+        experimental: { memoryV2: false },
+      },
+    },
     prompt: `In this workspace, build artifacts are stored in the \`dist/artifacts\` directory.`,
     assert: async (rig, result) => {
       const wasToolCalled = await rig.waitForToolCall(
@@ -231,6 +276,11 @@ describe('save_memory', () => {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: savingMainEntryPointAsProjectMemory,
+    params: {
+      settings: {
+        experimental: { memoryV2: false },
+      },
+    },
     prompt: `The main entry point for this workspace is \`src/index.js\`.`,
     assert: async (rig, result) => {
       const wasToolCalled = await rig.waitForToolCall(
@@ -259,6 +309,11 @@ describe('save_memory', () => {
     suiteName: 'default',
     suiteType: 'behavioral',
     name: rememberingBirthday,
+    params: {
+      settings: {
+        experimental: { memoryV2: false },
+      },
+    },
 
     prompt: `My birthday is on June 15th.`,
     assert: async (rig, result) => {