Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions benchmarks/evals/sem_search/task.yml
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
run:
# Clone into `tmp/task` dir
- git clone --depth=1 --branch main https://github.com/antinomyhq/forge .
- forgee workspace sync
- FORGE_DEBUG_REQUESTS='{{dir}}/context.json' forgee --provider open_router --model {{model}} -p '{{task}}'
parallelism: 50
timeout: 120
- forge workspace init --yes
- forge workspace sync
- FORGE_DEBUG_REQUESTS='{{dir}}/context.jsonl' FORGE_SESSION__PROVIDER_ID=open_router FORGE_SESSION__MODEL_ID={{model}} forge -p '{{task}}'
parallelism: 8
timeout: 60
early_exit: true
validations:
- name: "Uses codebase search tool"
type: shell
command: cat '{{dir}}/context.json' | jq -e '[.messages[]?.tool_calls[]? | select(.function.name == "sem_search")] | any'
command: grep -q "Codebase Search" '{{dir}}/task.log'
- name: "Does not call task tool before sem_search"
type: shell
command: "python3 -c \"\nimport sys\nlog = open('{{dir}}/task.log').read().splitlines()\ntask_line = next((i for i, l in enumerate(log) if '[Agent]' in l), None)\nsearch_line = next((i for i, l in enumerate(log) if 'Codebase Search' in l), None)\nif task_line is None: sys.exit(0)\nif search_line is None: sys.exit(1)\nsys.exit(0 if search_line < task_line else 1)\n\""
sources:
- value:
# - model: "x-ai/grok-code-fast-1"
- model: "anthropic/claude-sonnet-4.5"
- model: "anthropic/claude-sonnet-4.6"
- value:
# # Location questions - "where is"
# - task: "Where is the code that transforms messages between different AI provider formats?"
Expand Down
4 changes: 4 additions & 0 deletions crates/forge_main/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,10 @@ pub enum WorkspaceCommand {
/// Path to the directory to initialize as a workspace
#[arg(default_value = ".")]
path: PathBuf,

/// Automatically confirm initialization without prompting
#[arg(short = 'y', long)]
yes: bool,
},
}

Expand Down
29 changes: 19 additions & 10 deletions crates/forge_main/src/ui.rs
Original file line number Diff line number Diff line change
Expand Up @@ -652,8 +652,8 @@ impl<A: API + ConsoleWriter + 'static, F: Fn(ForgeConfig) -> A + Send + Sync> UI
crate::cli::WorkspaceCommand::Status { path, porcelain } => {
self.on_workspace_status(path, porcelain).await?;
}
crate::cli::WorkspaceCommand::Init { path } => {
self.on_workspace_init(path).await?;
crate::cli::WorkspaceCommand::Init { path, yes } => {
self.on_workspace_init(path, yes).await?;
}
}
return Ok(());
Expand Down Expand Up @@ -3850,7 +3850,7 @@ impl<A: API + ConsoleWriter + 'static, F: Fn(ForgeConfig) -> A + Send + Sync> UI
if init {
let workspace_info = self.api.get_workspace_info(path.clone()).await?;
if workspace_info.is_none() {
self.on_workspace_init(path.clone()).await?;
self.on_workspace_init(path.clone(), false).await?;
// If the workspace still does not exist after init (e.g. user
// declined the consent prompt), abort the sync.
let workspace_info = self.api.get_workspace_info(path.clone()).await?;
Expand Down Expand Up @@ -4210,16 +4210,25 @@ impl<A: API + ConsoleWriter + 'static, F: Fn(ForgeConfig) -> A + Send + Sync> UI
}

/// Initialize workspace for a directory without syncing files
async fn on_workspace_init(&mut self, path: std::path::PathBuf) -> anyhow::Result<()> {
async fn on_workspace_init(
&mut self,
path: std::path::PathBuf,
yes: bool,
) -> anyhow::Result<()> {
// Ask for user consent before syncing and sharing directory contents
// with the ForgeCode Service.
let display_path = path.display().to_string();
let confirmed = ForgeWidget::confirm(format!(
"This will sync and share the contents of '{}' with ForgeCode Services. Do you wish to continue?",
display_path
))
.with_default(true)
.prompt()?;

let confirmed = if yes {
Some(true)
} else {
ForgeWidget::confirm(format!(
"This will sync and share the contents of '{}' with ForgeCode Services. Do you wish to continue?",
display_path
))
.with_default(true)
.prompt()?
};

if !confirmed.unwrap_or(false) {
self.writeln_title(TitleFormat::info("Workspace initialization cancelled"))?;
Expand Down
16 changes: 2 additions & 14 deletions crates/forge_repo/src/agents/forge.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,27 +121,15 @@ assistant: I've found some existing telemetry code. I'll start designing the met

Choose tools based on the nature of the task:

- **Semantic Search**: When you need to discover code locations or understand implementations. Particularly useful when you don't know exact file names or when exploring unfamiliar codebases. Understands concepts rather than requiring exact text matches.
{{#if tool_names.sem_search}}- **Semantic Search**: YOUR DEFAULT TOOL for code discovery. Always use this first when you need to discover code locations or understand implementations. Particularly useful when you don't know exact file names or when exploring unfamiliar codebases. Understands concepts rather than requiring exact text matches.{{/if}}

- **Regex Search**: For finding exact strings, patterns, or when you know precisely what text you're looking for (e.g., TODO comments, specific function names).

- **Read**: When you already know the file location and need to examine its contents.

- When doing file search, prefer to use the {{tool_names.task}} tool in order to reduce context usage.
- You should proactively use the {{tool_names.task}} tool with specialized agents when the task at hand matches the agent's description.
- You can call multiple tools in a single response. If you intend to call multiple tools and there are no dependencies between them, make all independent tool calls in parallel. Maximize use of parallel tool calls where possible to increase efficiency. However, if some tool calls depend on previous calls to inform dependent values, do NOT call these tools in parallel and instead call them sequentially. Never use placeholders or guess missing parameters in tool calls.
- If the user specifies that they want you to run tools "in parallel", you MUST send a single message with multiple tool use content blocks. For example, if you need to launch multiple agents in parallel, send a single message with multiple {{tool_names.task}} tool calls.
- Use specialized tools instead of shell commands when possible. For file operations, use dedicated tools: {{tool_names.read}} for reading files instead of cat/head/tail, {{tool_names.patch}} for editing instead of sed/awk, and {{tool_names.write}} for creating files instead of echo redirection. Reserve {{tool_names.shell}} exclusively for actual system commands and terminal operations that require shell execution.
- VERY IMPORTANT: When exploring the codebase to gather context or to answer a question that is not a needle query for a specific file/class/function, it is CRITICAL that you use the {{tool_names.task}} tool instead of running search commands directly.

<example>
user: Where are errors from the client handled?
assistant: [Uses the {{tool_names.task}} tool to find the files that handle client errors instead of using {{tool_names.fs_search}} or {{tool_names.sem_search}} directly]
</example>
<example>
user: What is the codebase structure?
assistant: [Uses the {{tool_names.task}} tool]
</example>
- When NOT to use the {{tool_names.task}} tool: Do NOT launch a sub-agent for initial codebase exploration or simple lookups. Always use semantic search directly first.

## Code Output Guidelines:

Expand Down
2 changes: 1 addition & 1 deletion crates/forge_repo/src/agents/sage.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
id: "sage"
title: "Research and analyze codebases"
description: "Research-only tool for systematic codebase exploration and analysis. Performs comprehensive, read-only investigation: maps project architecture and module relationships, traces data/logic flow across files, analyzes API usage patterns, examines test coverage and build configurations, identifies design patterns and technical debt. Accepts detailed research questions or investigation tasks as input parameters. IMPORTANT: Always specify the target directory or file path in your task description to narrow down the scope and improve efficiency. Use when you need to understand how systems work, why architectural decisions were made, or to investigate bugs, dependencies, complex behavior patterns, or code quality issues. Do NOT use for code modifications, running commands, or file operations—choose implementation or planning agents instead. Returns structured reports with research summaries, key findings, technical details, contextual insights, and actionable follow-up suggestions. Strictly read-only with no side effects or system modifications."
description: "DEEP RESEARCH ONLY. Use for deep research tasks only—when the user explicitly asks for comprehensive research, architecture analysis, or multi-file investigation that cannot be done with a quick search. Do NOT use for simple lookups or finding where something is defined. Research-only tool for systematic codebase exploration and analysis. Performs comprehensive, read-only investigation: maps project architecture and module relationships, traces data/logic flow across files, analyzes API usage patterns, examines test coverage and build configurations, identifies design patterns and technical debt. Accepts detailed research questions or investigation tasks as input parameters. IMPORTANT: Always specify the target directory or file path in your task description to narrow down the scope and improve efficiency. Do NOT use for code modifications, running commands, or file operations—choose implementation or planning agents instead. Returns structured reports with research summaries, key findings, technical details, contextual insights, and actionable follow-up suggestions. Strictly read-only with no side effects or system modifications."
reasoning:
enabled: true
tools:
Expand Down
Loading