From a00c3c1ff3a84fcdc9bb41bad1bf475ebdb1cfc6 Mon Sep 17 00:00:00 2001
From: Amit Singh <amitksingh1490@gmail.com>
Date: Thu, 9 Apr 2026 22:29:48 +0530
Subject: [PATCH 1/3] fix(cli): add --yes flag to workspace init for
 non-interactive mode

---
 benchmarks/evals/sem_search/task.yml  | 12 ++++++++----
 crates/forge_main/src/cli.rs          |  4 ++++
 crates/forge_main/src/ui.rs           | 25 +++++++++++++++----------
 crates/forge_repo/src/agents/forge.md |  4 +---
 4 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/benchmarks/evals/sem_search/task.yml b/benchmarks/evals/sem_search/task.yml
index 8d4877daaa..e57ece2cd5 100644
--- a/benchmarks/evals/sem_search/task.yml
+++ b/benchmarks/evals/sem_search/task.yml
@@ -1,19 +1,23 @@
 run:
   # Clone into `tmp/task` dir
   - git clone --depth=1 --branch main https://github.com/antinomyhq/forge .
-  - forgee workspace sync
-  - FORGE_DEBUG_REQUESTS='{{dir}}/context.json' forgee --provider open_router --model {{model}} -p '{{task}}'
-parallelism: 50
+  - forge workspace init --yes
+  - forge workspace sync
+  - FORGE_DEBUG_REQUESTS='{{dir}}/context.json' FORGE_SESSION__PROVIDER_ID=open_router FORGE_SESSION__MODEL_ID={{model}} forge -p '{{task}}'
+parallelism: 8
 timeout: 120
 early_exit: true
 validations:
   - name: "Uses codebase search tool"
     type: shell
     command: cat '{{dir}}/context.json' | jq -e '[.messages[]?.tool_calls[]? | select(.function.name == "sem_search")] | any'
+  - name: "Does not call task tool before sem_search"
+    type: shell
+    command: cat '{{dir}}/context.json' | jq -e '[.messages[]?.tool_calls[]? | select(.function.name == "task" or .function.name == "sem_search") | .function.name] | index("task") as $t | index("sem_search") as $s | if $t == null then true elif $s == null then true else $s < $t end'
 sources:
   - value:
       # - model: "x-ai/grok-code-fast-1"
-      - model: "anthropic/claude-sonnet-4.5"
+      - model: "anthropic/claude-sonnet-4.6"
   - value:
       # # Location questions - "where is"
       # - task: "Where is the code that transforms messages between different AI provider formats?"
diff --git a/crates/forge_main/src/cli.rs b/crates/forge_main/src/cli.rs
index 01d5b56f77..b2c12a4e35 100644
--- a/crates/forge_main/src/cli.rs
+++ b/crates/forge_main/src/cli.rs
@@ -287,6 +287,10 @@ pub enum WorkspaceCommand {
         /// Path to the directory to initialize as a workspace
         #[arg(default_value = ".")]
         path: PathBuf,
+
+        /// Automatically confirm initialization without prompting
+        #[arg(short = 'y', long)]
+        yes: bool,
     },
 }
 
diff --git a/crates/forge_main/src/ui.rs b/crates/forge_main/src/ui.rs
index 5ba5de69e4..ee4747fae6 100644
--- a/crates/forge_main/src/ui.rs
+++ b/crates/forge_main/src/ui.rs
@@ -653,8 +653,8 @@ impl<A: API + ConsoleWriter + 'static, F: Fn(ForgeConfig) -> A + Send + Sync> UI
                     crate::cli::WorkspaceCommand::Status { path, porcelain } => {
                         self.on_workspace_status(path, porcelain).await?;
                     }
-                    crate::cli::WorkspaceCommand::Init { path } => {
-                        self.on_workspace_init(path).await?;
+                    crate::cli::WorkspaceCommand::Init { path, yes } => {
+                        self.on_workspace_init(path, yes).await?;
                     }
                 }
                 return Ok(());
@@ -3850,7 +3850,7 @@ impl<A: API + ConsoleWriter + 'static, F: Fn(ForgeConfig) -> A + Send + Sync> UI
         if init {
             let workspace_info = self.api.get_workspace_info(path.clone()).await?;
             if workspace_info.is_none() {
-                self.on_workspace_init(path.clone()).await?;
+                self.on_workspace_init(path.clone(), false).await?;
                 // If the workspace still does not exist after init (e.g. user
                 // declined the consent prompt), abort the sync.
                 let workspace_info = self.api.get_workspace_info(path.clone()).await?;
@@ -4210,16 +4210,21 @@ impl<A: API + ConsoleWriter + 'static, F: Fn(ForgeConfig) -> A + Send + Sync> UI
     }
 
     /// Initialize workspace for a directory without syncing files
-    async fn on_workspace_init(&mut self, path: std::path::PathBuf) -> anyhow::Result<()> {
+    async fn on_workspace_init(&mut self, path: std::path::PathBuf, yes: bool) -> anyhow::Result<()> {
         // Ask for user consent before syncing and sharing directory contents
         // with the ForgeCode Service.
         let display_path = path.display().to_string();
-        let confirmed = ForgeWidget::confirm(format!(
-            "This will sync and share the contents of '{}' with ForgeCode Services. Do you wish to continue?",
-            display_path
-        ))
-        .with_default(true)
-        .prompt()?;
+        
+        let confirmed = if yes {
+            Some(true)
+        } else {
+            ForgeWidget::confirm(format!(
+                "This will sync and share the contents of '{}' with ForgeCode Services. Do you wish to continue?",
+                display_path
+            ))
+            .with_default(true)
+            .prompt()?
+        };
 
         if !confirmed.unwrap_or(false) {
             self.writeln_title(TitleFormat::info("Workspace initialization cancelled"))?;
diff --git a/crates/forge_repo/src/agents/forge.md b/crates/forge_repo/src/agents/forge.md
index 43c1f1a0bc..1c6ecdbd26 100644
--- a/crates/forge_repo/src/agents/forge.md
+++ b/crates/forge_repo/src/agents/forge.md
@@ -127,16 +127,14 @@ Choose tools based on the nature of the task:
 
 - **Read**: When you already know the file location and need to examine its contents.
 
-- When doing file search, prefer to use the {{tool_names.task}} tool in order to reduce context usage.
 - You should proactively use the {{tool_names.task}} tool with specialized agents when the task at hand matches the agent's description.
 - You can call multiple tools in a single response. If you intend to call multiple tools and there are no dependencies between them, make all independent tool calls in parallel. Maximize use of parallel tool calls where possible to increase efficiency. However, if some tool calls depend on previous calls to inform dependent values, do NOT call these tools in parallel and instead call them sequentially. Never use placeholders or guess missing parameters in tool calls.
 - If the user specifies that they want you to run tools "in parallel", you MUST send a single message with multiple tool use content blocks. For example, if you need to launch multiple agents in parallel, send a single message with multiple {{tool_names.task}} tool calls.
 - Use specialized tools instead of shell commands when possible. For file operations, use dedicated tools: {{tool_names.read}} for reading files instead of cat/head/tail, {{tool_names.patch}} for editing instead of sed/awk, and {{tool_names.write}} for creating files instead of echo redirection. Reserve {{tool_names.shell}} exclusively for actual system commands and terminal operations that require shell execution.
-- VERY IMPORTANT: When exploring the codebase to gather context or to answer a question that is not a needle query for a specific file/class/function, it is CRITICAL that you use the {{tool_names.task}} tool instead of running search commands directly.
 
 <example>
 user: Where are errors from the client handled?
-assistant: [Uses the {{tool_names.task}} tool to find the files that handle client errors instead of using {{tool_names.fs_search}} or {{tool_names.sem_search}} directly]
+assistant: [Uses {{tool_names.sem_search}} to find error handling code, then uses the {{tool_names.task}} tool if deeper analysis is needed]
 </example>
 <example>
 user: What is the codebase structure?

From 90b52aba42fc7615cc12c831d85f7168ea6ed9b7 Mon Sep 17 00:00:00 2001
From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com>
Date: Thu, 9 Apr 2026 17:03:57 +0000
Subject: [PATCH 2/3] [autofix.ci] apply automated fixes

---
 crates/forge_main/src/ui.rs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/crates/forge_main/src/ui.rs b/crates/forge_main/src/ui.rs
index 57190114a1..5f500d99fc 100644
--- a/crates/forge_main/src/ui.rs
+++ b/crates/forge_main/src/ui.rs
@@ -4210,11 +4210,15 @@ impl<A: API + ConsoleWriter + 'static, F: Fn(ForgeConfig) -> A + Send + Sync> UI
     }
 
     /// Initialize workspace for a directory without syncing files
-    async fn on_workspace_init(&mut self, path: std::path::PathBuf, yes: bool) -> anyhow::Result<()> {
+    async fn on_workspace_init(
+        &mut self,
+        path: std::path::PathBuf,
+        yes: bool,
+    ) -> anyhow::Result<()> {
         // Ask for user consent before syncing and sharing directory contents
         // with the ForgeCode Service.
         let display_path = path.display().to_string();
-        
+
         let confirmed = if yes {
             Some(true)
         } else {

From 2f30151d5d2b6aae29c66bc5d783439faffbd433 Mon Sep 17 00:00:00 2001
From: Amit Singh <amitksingh1490@gmail.com>
Date: Fri, 10 Apr 2026 00:23:33 +0530
Subject: [PATCH 3/3] fix(benchmarks): update task.yml for improved logging and
 validation checks

fix(agents): refine descriptions in forge.md and sage.md for clarity and usage guidance
---
 benchmarks/evals/sem_search/task.yml  |  8 ++++----
 crates/forge_repo/src/agents/forge.md | 14 ++------------
 crates/forge_repo/src/agents/sage.md  |  2 +-
 3 files changed, 7 insertions(+), 17 deletions(-)

diff --git a/benchmarks/evals/sem_search/task.yml b/benchmarks/evals/sem_search/task.yml
index e57ece2cd5..33a686885b 100644
--- a/benchmarks/evals/sem_search/task.yml
+++ b/benchmarks/evals/sem_search/task.yml
@@ -3,17 +3,17 @@ run:
   - git clone --depth=1 --branch main https://github.com/antinomyhq/forge .
   - forge workspace init --yes
   - forge workspace sync
-  - FORGE_DEBUG_REQUESTS='{{dir}}/context.json' FORGE_SESSION__PROVIDER_ID=open_router FORGE_SESSION__MODEL_ID={{model}} forge -p '{{task}}'
+  - FORGE_DEBUG_REQUESTS='{{dir}}/context.jsonl' FORGE_SESSION__PROVIDER_ID=open_router FORGE_SESSION__MODEL_ID={{model}} forge -p '{{task}}'
 parallelism: 8
-timeout: 120
+timeout: 60
 early_exit: true
 validations:
   - name: "Uses codebase search tool"
     type: shell
-    command: cat '{{dir}}/context.json' | jq -e '[.messages[]?.tool_calls[]? | select(.function.name == "sem_search")] | any'
+    command: grep -q "Codebase Search" '{{dir}}/task.log'
   - name: "Does not call task tool before sem_search"
     type: shell
-    command: cat '{{dir}}/context.json' | jq -e '[.messages[]?.tool_calls[]? | select(.function.name == "task" or .function.name == "sem_search") | .function.name] | index("task") as $t | index("sem_search") as $s | if $t == null then true elif $s == null then true else $s < $t end'
+    command: "python3 -c \"\nimport sys\nlog = open('{{dir}}/task.log').read().splitlines()\ntask_line = next((i for i, l in enumerate(log) if '[Agent]' in l), None)\nsearch_line = next((i for i, l in enumerate(log) if 'Codebase Search' in l), None)\nif task_line is None: sys.exit(0)\nif search_line is None: sys.exit(1)\nsys.exit(0 if search_line < task_line else 1)\n\""
 sources:
   - value:
       # - model: "x-ai/grok-code-fast-1"
diff --git a/crates/forge_repo/src/agents/forge.md b/crates/forge_repo/src/agents/forge.md
index 1c6ecdbd26..1bb59d4b4e 100644
--- a/crates/forge_repo/src/agents/forge.md
+++ b/crates/forge_repo/src/agents/forge.md
@@ -121,25 +121,15 @@ assistant: I've found some existing telemetry code. I'll start designing the met
 
 Choose tools based on the nature of the task:
 
-- **Semantic Search**: When you need to discover code locations or understand implementations. Particularly useful when you don't know exact file names or when exploring unfamiliar codebases. Understands concepts rather than requiring exact text matches.
+{{#if tool_names.sem_search}}- **Semantic Search**: YOUR DEFAULT TOOL for code discovery. Always use this first when you need to discover code locations or understand implementations. Particularly useful when you don't know exact file names or when exploring unfamiliar codebases. Understands concepts rather than requiring exact text matches.{{/if}}
 
 - **Regex Search**: For finding exact strings, patterns, or when you know precisely what text you're looking for (e.g., TODO comments, specific function names).
 
 - **Read**: When you already know the file location and need to examine its contents.
-
-- You should proactively use the {{tool_names.task}} tool with specialized agents when the task at hand matches the agent's description.
 - You can call multiple tools in a single response. If you intend to call multiple tools and there are no dependencies between them, make all independent tool calls in parallel. Maximize use of parallel tool calls where possible to increase efficiency. However, if some tool calls depend on previous calls to inform dependent values, do NOT call these tools in parallel and instead call them sequentially. Never use placeholders or guess missing parameters in tool calls.
 - If the user specifies that they want you to run tools "in parallel", you MUST send a single message with multiple tool use content blocks. For example, if you need to launch multiple agents in parallel, send a single message with multiple {{tool_names.task}} tool calls.
 - Use specialized tools instead of shell commands when possible. For file operations, use dedicated tools: {{tool_names.read}} for reading files instead of cat/head/tail, {{tool_names.patch}} for editing instead of sed/awk, and {{tool_names.write}} for creating files instead of echo redirection. Reserve {{tool_names.shell}} exclusively for actual system commands and terminal operations that require shell execution.
-
-<example>
-user: Where are errors from the client handled?
-assistant: [Uses {{tool_names.sem_search}} to find error handling code, then uses the {{tool_names.task}} tool if deeper analysis is needed]
-</example>
-<example>
-user: What is the codebase structure?
-assistant: [Uses the {{tool_names.task}} tool]
-</example>
+- When NOT to use the {{tool_names.task}} tool: Do NOT launch a sub-agent for initial codebase exploration or simple lookups. Always use semantic search directly first.
 
 ## Code Output Guidelines:
 
diff --git a/crates/forge_repo/src/agents/sage.md b/crates/forge_repo/src/agents/sage.md
index b0dc94edfa..3e314f7044 100644
--- a/crates/forge_repo/src/agents/sage.md
+++ b/crates/forge_repo/src/agents/sage.md
@@ -1,7 +1,7 @@
 ---
 id: "sage"
 title: "Research and analyze codebases"
-description: "Research-only tool for systematic codebase exploration and analysis. Performs comprehensive, read-only investigation: maps project architecture and module relationships, traces data/logic flow across files, analyzes API usage patterns, examines test coverage and build configurations, identifies design patterns and technical debt. Accepts detailed research questions or investigation tasks as input parameters. IMPORTANT: Always specify the target directory or file path in your task description to narrow down the scope and improve efficiency. Use when you need to understand how systems work, why architectural decisions were made, or to investigate bugs, dependencies, complex behavior patterns, or code quality issues. Do NOT use for code modifications, running commands, or file operations—choose implementation or planning agents instead. Returns structured reports with research summaries, key findings, technical details, contextual insights, and actionable follow-up suggestions. Strictly read-only with no side effects or system modifications."
+description: "DEEP RESEARCH ONLY. Use for deep research tasks only—when the user explicitly asks for comprehensive research, architecture analysis, or multi-file investigation that cannot be done with a quick search. Do NOT use for simple lookups or finding where something is defined. Research-only tool for systematic codebase exploration and analysis. Performs comprehensive, read-only investigation: maps project architecture and module relationships, traces data/logic flow across files, analyzes API usage patterns, examines test coverage and build configurations, identifies design patterns and technical debt. Accepts detailed research questions or investigation tasks as input parameters. IMPORTANT: Always specify the target directory or file path in your task description to narrow down the scope and improve efficiency. Do NOT use for code modifications, running commands, or file operations—choose implementation or planning agents instead. Returns structured reports with research summaries, key findings, technical details, contextual insights, and actionable follow-up suggestions. Strictly read-only with no side effects or system modifications."
 reasoning:
   enabled: true
 tools: