diff --git a/src/crates/core/src/agentic/agents/explore_agent.rs b/src/crates/core/src/agentic/agents/explore_agent.rs index 1da96625..bd1bc7c1 100644 --- a/src/crates/core/src/agentic/agents/explore_agent.rs +++ b/src/crates/core/src/agentic/agents/explore_agent.rs @@ -8,10 +8,10 @@ impl ExploreAgent { pub fn new() -> Self { Self { default_tools: vec![ - "LS".to_string(), - "Read".to_string(), "Grep".to_string(), "Glob".to_string(), + "Read".to_string(), + "LS".to_string(), ], } } @@ -32,7 +32,7 @@ impl Agent for ExploreAgent { } fn description(&self) -> &str { - r#"Subagent for **wide** codebase exploration only. Use when the main agent would need many sequential search/read rounds across multiple areas, or the user asks for an architectural survey. Do **not** use for narrow tasks: a known path, a single class/symbol lookup, one obvious Grep pattern, or reading a handful of files — the main agent should use Grep, Glob, and Read for those. When calling, set thoroughness in the prompt: \"quick\", \"medium\", or \"very thorough\"."# + r#"Read-only subagent for **wide** codebase exploration. Prefer search-first workflows: use Grep and Glob to narrow the space, then Read the small set of relevant files. Use LS only sparingly to confirm directory shape after search has narrowed the target. Do **not** use for narrow tasks: a known path, a single class/symbol lookup, one obvious Grep pattern, or reading a handful of files — the main agent should handle those directly. When calling, set thoroughness in the prompt: \"quick\", \"medium\", or \"very thorough\"."# } fn prompt_template_name(&self, _model_name: Option<&str>) -> &str { @@ -47,3 +47,33 @@ impl Agent for ExploreAgent { true } } + +#[cfg(test)] +mod tests { + use super::{Agent, ExploreAgent}; + + #[test] + fn uses_search_first_default_tool_order() { + let agent = ExploreAgent::new(); + assert_eq!( + agent.default_tools(), + vec![ + "Grep".to_string(), + "Glob".to_string(), + "Read".to_string(), + "LS".to_string(), + ] + ); + } + + #[test] + fn always_uses_default_prompt_template() { + let agent = ExploreAgent::new(); + assert_eq!(agent.prompt_template_name(Some("gpt-5.1")), "explore_agent"); + assert_eq!( + agent.prompt_template_name(Some("claude-sonnet-4")), + "explore_agent" + ); + assert_eq!(agent.prompt_template_name(None), "explore_agent"); + } +} diff --git a/src/crates/core/src/agentic/agents/prompts/explore_agent.md b/src/crates/core/src/agentic/agents/prompts/explore_agent.md index ce332874..10c44859 100644 --- a/src/crates/core/src/agentic/agents/prompts/explore_agent.md +++ b/src/crates/core/src/agentic/agents/prompts/explore_agent.md @@ -1,4 +1,4 @@ -You are an agent for BitFun (an AI IDE). Given the user's message, you should use the tools available to complete the task. Do what has been asked; nothing more, nothing less. When you complete the task simply respond with a detailed writeup. +You are a read-only codebase exploration agent for BitFun (an AI IDE). Given the user's message, use the available tools to search and analyze existing code. Do what has been asked; nothing more, nothing less. When you complete the task simply respond with a detailed writeup. Your strengths: - Searching for code, configurations, and patterns across large codebases @@ -7,15 +7,19 @@ Your strengths: - Performing multi-step research tasks Guidelines: -- For file searches: Use Grep or Glob when you need to search broadly. Use Read when you know the specific file path. -- For analysis: Start broad and narrow down. Use multiple search strategies if the first doesn't yield results. +- This is a read-only task. Never attempt to modify files, create files, delete files, or change workspace state. +- Search first. Use Grep or Glob to narrow the candidate set before reading files. +- Use Read only after search has identified a small set of relevant files or when the exact file path is already known. +- Use LS sparingly. It is only for confirming directory shape after Grep or Glob has already narrowed the target area. Do not recursively walk the tree directory-by-directory as a default strategy. +- Prefer multiple targeted searches over broad directory listing. If the first search does not answer the question, try a different pattern, symbol name, or naming convention. +- For analysis: start broad with search, then narrow to the minimum number of files needed to answer accurately. - Be thorough: Check multiple locations, consider different naming conventions, look for related files. - In your final response always share relevant file names and code snippets. Any file paths you return in your response MUST be absolute. Do NOT use relative paths. - When analyzing UI layout and styling, output related file paths (absolute) and original code snippets to avoid information loss. - For clear communication, avoid using emojis. Notes: -- The bash tool should only be used when other tools cannot meet your requirements. -- Agent threads always have their cwd reset between bash calls, as a result please only use absolute file paths. +- Prefer Grep, Glob, Read, and LS over Bash. The bash tool should only be used when the dedicated exploration tools cannot meet your requirements. +- Agent threads always have their cwd reset between bash calls, so only use absolute file paths if Bash is necessary. - In your final response always share relevant file names and code snippets. Any file paths you return in your response MUST be absolute. Do NOT use relative paths. - For clear communication with the user the assistant MUST avoid using emojis. diff --git a/src/crates/core/src/agentic/tools/implementations/glob_tool.rs b/src/crates/core/src/agentic/tools/implementations/glob_tool.rs index 0f4d6edc..081d9190 100644 --- a/src/crates/core/src/agentic/tools/implementations/glob_tool.rs +++ b/src/crates/core/src/agentic/tools/implementations/glob_tool.rs @@ -1,17 +1,120 @@ use crate::agentic::tools::framework::{Tool, ToolResult, ToolUseContext}; use crate::util::errors::{BitFunError, BitFunResult}; use async_trait::async_trait; -use globset::GlobBuilder; +use globset::{GlobBuilder, GlobMatcher}; use ignore::WalkBuilder; use log::warn; use serde_json::{json, Value}; -use std::path::Path; +use std::cmp::Ordering; +use std::collections::BinaryHeap; +use std::path::{Component, Path, PathBuf}; + +#[derive(Debug, Clone, Eq, PartialEq)] +struct GlobCandidate { + depth: usize, + path: String, +} + +impl Ord for GlobCandidate { + fn cmp(&self, other: &Self) -> Ordering { + self.depth + .cmp(&other.depth) + .then_with(|| self.path.cmp(&other.path)) + } +} + +impl PartialOrd for GlobCandidate { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +fn extract_glob_base_directory(pattern: &str) -> (String, String) { + let glob_start = pattern.find(['*', '?', '[', '{']); + + match glob_start { + Some(index) => { + let static_prefix = &pattern[..index]; + let last_separator = static_prefix + .char_indices() + .rev() + .find(|(_, ch)| *ch == '/' || *ch == '\\') + .map(|(idx, _)| idx); + + if let Some(separator_index) = last_separator { + ( + static_prefix[..separator_index].to_string(), + pattern[separator_index + 1..].to_string(), + ) + } else { + (String::new(), pattern.to_string()) + } + } + None => { + let trimmed = pattern.trim_end_matches(['/', '\\']); + let literal_path = Path::new(trimmed); + let base_dir = literal_path + .parent() + .filter(|parent| !parent.as_os_str().is_empty() && *parent != Path::new(".")) + .map(|parent| parent.to_string_lossy().to_string()) + .unwrap_or_default(); + let file_name = literal_path + .file_name() + .map(|name| name.to_string_lossy().to_string()) + .unwrap_or_else(|| trimmed.to_string()); + + let relative_pattern = if pattern.ends_with('/') || pattern.ends_with('\\') { + format!("{}/", file_name) + } else { + file_name + }; + + (base_dir, relative_pattern) + } + } +} + +fn normalize_path(path: &Path) -> String { + dunce::simplified(path).to_string_lossy().replace('\\', "/") +} + +fn is_safe_relative_subpath(path: &Path) -> bool { + !path.is_absolute() + && path + .components() + .all(|component| matches!(component, Component::Normal(_) | Component::CurDir)) +} + +fn derive_walk_root(search_path_abs: &Path, pattern: &str) -> (PathBuf, String) { + let (base_dir, relative_pattern) = extract_glob_base_directory(pattern); + let base_path = Path::new(&base_dir); + + if base_dir.is_empty() || !is_safe_relative_subpath(base_path) { + return (search_path_abs.to_path_buf(), pattern.to_string()); + } + + let walk_root = search_path_abs.join(base_path); + if walk_root.starts_with(search_path_abs) { + (walk_root, relative_pattern) + } else { + (search_path_abs.to_path_buf(), pattern.to_string()) + } +} + +fn match_relative_path(matcher: &GlobMatcher, relative_path: &str, is_dir: bool) -> bool { + if is_dir { + matcher.is_match(relative_path) || matcher.is_match(&format!("{}/", relative_path)) + } else { + matcher.is_match(relative_path) + } +} pub fn glob_with_ignore( search_path: &str, pattern: &str, ignore: bool, ignore_hidden: bool, + limit: usize, ) -> Result, Box> { let path = std::path::Path::new(search_path); if !path.exists() { @@ -22,21 +125,26 @@ pub fn glob_with_ignore( } let search_path_abs = dunce::canonicalize(Path::new(search_path))?; - let search_path_str = search_path_abs.to_string_lossy(); + let (walk_root, relative_pattern) = derive_walk_root(&search_path_abs, pattern); - let absolute_pattern = format!("{}/{}", search_path_str, pattern); + if !walk_root.exists() || !walk_root.is_dir() || limit == 0 { + return Ok(Vec::new()); + } - let glob = GlobBuilder::new(&absolute_pattern) + let glob = GlobBuilder::new(&relative_pattern) .literal_separator(true) .build()? .compile_matcher(); - let walker = WalkBuilder::new(&search_path_abs) + let walker = WalkBuilder::new(&walk_root) + .ignore(ignore) .git_ignore(ignore) + .git_global(ignore) + .git_exclude(ignore) .hidden(ignore_hidden) .build(); - let mut results = Vec::new(); + let mut best_matches = BinaryHeap::with_capacity(limit.saturating_add(1)); for entry in walker { let entry = match entry { @@ -47,13 +155,40 @@ pub fn glob_with_ignore( } }; let path = entry.path().to_path_buf(); + let relative_path = match path.strip_prefix(&walk_root) { + Ok(relative) => relative, + Err(_) => continue, + }; + let relative_path = normalize_path(relative_path); + + if match_relative_path( + &glob, + &relative_path, + entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false), + ) { + let normalized_path = normalize_path(&path); + let candidate = GlobCandidate { + depth: normalized_path.split('/').count(), + path: normalized_path, + }; - if glob.is_match(&path) { - let simplified_path = dunce::simplified(&path); - results.push(simplified_path.to_string_lossy().to_string()); + if best_matches.len() < limit { + best_matches.push(candidate); + } else if let Some(worst_match) = best_matches.peek() { + if candidate < *worst_match { + best_matches.pop(); + best_matches.push(candidate); + } + } } } + let mut results = best_matches + .into_sorted_vec() + .into_iter() + .map(|candidate| candidate.path) + .collect::>(); + results.sort(); Ok(results) } @@ -62,11 +197,12 @@ fn limit_paths(paths: &[String], limit: usize) -> Vec { .iter() .map(|path| { let normalized_path = path.replace('\\', "/"); - let n = normalized_path.split('/').count(); - (n, normalized_path) + let depth = normalized_path.split('/').count(); + (depth, normalized_path) }) .collect::>(); - depth_and_paths.sort_by_key(|(depth, _)| *depth); + depth_and_paths.sort_by(|left, right| left.0.cmp(&right.0).then_with(|| left.1.cmp(&right.1))); + let mut result = depth_and_paths .into_iter() .take(limit) @@ -84,20 +220,29 @@ fn call_glob(search_path: &str, pattern: &str, limit: usize) -> Result String { - let name_pattern = if pattern.contains("**/") { - pattern.replacen("**/", "", 1) + let search_dir_path = Path::new(search_dir); + let (remote_walk_root, remote_pattern) = derive_walk_root(search_dir_path, pattern); + + let name_pattern = if remote_pattern.contains("**/") { + remote_pattern.replacen("**/", "", 1) + } else if remote_pattern.contains('/') || remote_pattern.contains('\\') { + "*".to_string() } else { - pattern.to_string() + remote_pattern }; - let escaped_dir = search_dir.replace('\'', "'\\''"); + let escaped_dir = remote_walk_root.to_string_lossy().replace('\'', "'\\''"); let escaped_pattern = name_pattern.replace('\'', "'\\''"); format!( @@ -197,9 +342,9 @@ impl Tool for GlobTool { // Remote workspace: use `find` via the workspace shell if context.is_remote() { - let ws_shell = context.ws_shell().ok_or_else(|| { - BitFunError::tool("Workspace shell not available".to_string()) - })?; + let ws_shell = context + .ws_shell() + .ok_or_else(|| BitFunError::tool("Workspace shell not available".to_string()))?; let search_dir = resolved_str.clone(); let find_cmd = build_remote_find_command(&search_dir, pattern, limit); @@ -229,12 +374,11 @@ impl Tool for GlobTool { "match_count": limited.len() }), result_for_assistant: Some(result_text), - image_attachments: None, - }]); + image_attachments: None, + }]); } - let matches = call_glob(&resolved_str, pattern, limit) - .map_err(|e| BitFunError::tool(e))?; + let matches = call_glob(&resolved_str, pattern, limit).map_err(|e| BitFunError::tool(e))?; let result_text = if matches.is_empty() { format!("No files found matching pattern '{}'", pattern) @@ -256,3 +400,68 @@ impl Tool for GlobTool { Ok(vec![result]) } } + +#[cfg(test)] +mod tests { + use super::{call_glob, derive_walk_root, extract_glob_base_directory}; + use std::fs; + use std::path::PathBuf; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn make_temp_dir(name: &str) -> PathBuf { + let unique = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos(); + let dir = std::env::temp_dir().join(format!("bitfun-glob-tool-{name}-{unique}")); + fs::create_dir_all(&dir).unwrap(); + dir + } + + #[test] + fn extracts_static_glob_prefix() { + assert_eq!( + extract_glob_base_directory("src/**/*.rs"), + ("src".to_string(), "**/*.rs".to_string()) + ); + assert_eq!( + extract_glob_base_directory("*.rs"), + (String::new(), "*.rs".to_string()) + ); + assert_eq!( + extract_glob_base_directory("src/lib.rs"), + ("src".to_string(), "lib.rs".to_string()) + ); + } + + #[test] + fn does_not_expand_walk_root_outside_search_path() { + let root = std::env::temp_dir().join("bitfun-glob-root"); + let (walk_root, relative_pattern) = derive_walk_root(&root, "../*.rs"); + + assert_eq!(walk_root, root); + assert_eq!(relative_pattern, "../*.rs".to_string()); + } + + #[test] + fn keeps_shallowest_matches_without_collecting_everything() { + let root = make_temp_dir("limit"); + fs::create_dir_all(root.join("src/deep")).unwrap(); + fs::create_dir_all(root.join("tests")).unwrap(); + fs::write(root.join("Cargo.toml"), "").unwrap(); + fs::write(root.join("src/lib.rs"), "").unwrap(); + fs::write(root.join("src/deep/mod.rs"), "").unwrap(); + fs::write(root.join("tests/mod.rs"), "").unwrap(); + + let matches = call_glob(root.to_string_lossy().as_ref(), "**/*.rs", 2).unwrap(); + + assert_eq!(matches.len(), 2); + assert!(matches.iter().any(|path| path.ends_with("/src/lib.rs"))); + assert!(matches.iter().any(|path| path.ends_with("/tests/mod.rs"))); + assert!(!matches + .iter() + .any(|path| path.ends_with("/src/deep/mod.rs"))); + + let _ = fs::remove_dir_all(root); + } +} diff --git a/src/crates/core/src/agentic/tools/implementations/grep_tool.rs b/src/crates/core/src/agentic/tools/implementations/grep_tool.rs index 2f801ba1..afa3ada7 100644 --- a/src/crates/core/src/agentic/tools/implementations/grep_tool.rs +++ b/src/crates/core/src/agentic/tools/implementations/grep_tool.rs @@ -3,7 +3,11 @@ use crate::util::errors::{BitFunError, BitFunResult}; use async_trait::async_trait; use serde_json::{json, Value}; use std::sync::Arc; -use tool_runtime::search::grep_search::{grep_search, GrepOptions, OutputMode, ProgressCallback}; +use tool_runtime::search::grep_search::{ + grep_search, GrepOptions, GrepSearchResult, OutputMode, ProgressCallback, +}; + +const DEFAULT_HEAD_LIMIT: usize = 250; pub struct GrepTool; @@ -12,6 +16,79 @@ impl GrepTool { Self } + fn resolve_head_limit(input: &Value) -> Option { + match input.get("head_limit").and_then(|v| v.as_u64()) { + Some(0) => None, + Some(value) => Some(value as usize), + None => Some(DEFAULT_HEAD_LIMIT), + } + } + + fn shell_escape(value: &str) -> String { + value.replace('\'', "'\\''") + } + + fn parse_glob_patterns(glob: Option<&str>) -> Vec { + let Some(glob) = glob else { + return Vec::new(); + }; + + let mut patterns = Vec::new(); + for raw_pattern in glob.split_whitespace() { + if raw_pattern.contains('{') && raw_pattern.contains('}') { + patterns.push(raw_pattern.to_string()); + } else { + patterns.extend( + raw_pattern + .split(',') + .filter(|pattern| !pattern.is_empty()) + .map(|pattern| pattern.to_string()), + ); + } + } + patterns + } + + fn resolve_offset(input: &Value) -> usize { + input + .get("offset") + .and_then(|v| v.as_u64()) + .map(|value| value as usize) + .unwrap_or(0) + } + + fn display_base(context: &ToolUseContext) -> Option { + context.current_working_directory.clone().or_else(|| { + context + .workspace + .as_ref() + .map(|workspace| workspace.root_path_string()) + }) + } + + fn relativize_result_text(result_text: &str, display_base: Option<&str>) -> String { + let Some(base) = display_base else { + return result_text.to_string(); + }; + + let normalized_base = base.replace('\\', "/").trim_end_matches('/').to_string(); + if normalized_base.is_empty() { + return result_text.to_string(); + } + + result_text + .lines() + .map(|line| { + if let Some(rest) = line.strip_prefix(&(normalized_base.clone() + "/")) { + rest.to_string() + } else { + line.to_string() + } + }) + .collect::>() + .join("\n") + } + async fn call_remote( &self, input: &Value, @@ -30,39 +107,85 @@ impl GrepTool { let resolved_path = context.resolve_workspace_tool_path(search_path)?; let case_insensitive = input.get("-i").and_then(|v| v.as_bool()).unwrap_or(false); - let head_limit = input - .get("head_limit") + let head_limit = Self::resolve_head_limit(input); + let offset = Self::resolve_offset(input); + let output_mode = input + .get("output_mode") + .and_then(|v| v.as_str()) + .unwrap_or("files_with_matches"); + let show_line_numbers = input + .get("-n") + .and_then(|v| v.as_bool()) + .unwrap_or(output_mode == "content"); + let context_c = input + .get("context") + .or_else(|| input.get("-C")) .and_then(|v| v.as_u64()) - .map(|v| v as usize) - .unwrap_or(200); - let glob_pattern = input.get("glob").and_then(|v| v.as_str()); + .map(|v| v.to_string()); + let before_context = input + .get("-B") + .and_then(|v| v.as_u64()) + .map(|v| v.to_string()); + let after_context = input + .get("-A") + .and_then(|v| v.as_u64()) + .map(|v| v.to_string()); + let glob_patterns = Self::parse_glob_patterns(input.get("glob").and_then(|v| v.as_str())); let file_type = input.get("type").and_then(|v| v.as_str()); - let escaped_path = resolved_path.replace('\'', "'\\''"); - let escaped_pattern = pattern.replace('\'', "'\\''"); + let escaped_path = Self::shell_escape(&resolved_path); + let escaped_pattern = Self::shell_escape(pattern); + let offset_cmd = if offset > 0 { + format!(" | tail -n +{}", offset + 1) + } else { + String::new() + }; + let limit_cmd = head_limit + .map(|limit| format!(" | head -n {}", limit)) + .unwrap_or_default(); - let mut cmd = "rg --no-heading --line-number".to_string(); + let mut cmd = "rg --no-heading --hidden --max-columns 500".to_string(); if case_insensitive { cmd.push_str(" -i"); } - if let Some(glob) = glob_pattern { - cmd.push_str(&format!(" --glob '{}'", glob.replace('\'', "'\\''"))); + if output_mode == "files_with_matches" { + cmd.push_str(" -l"); + } else if output_mode == "count" { + cmd.push_str(" -c"); + } else if show_line_numbers { + cmd.push_str(" --line-number"); + } + if output_mode == "content" { + if let Some(context) = context_c { + cmd.push_str(&format!(" -C {}", context)); + } else { + if let Some(before) = before_context { + cmd.push_str(&format!(" -B {}", before)); + } + if let Some(after) = after_context { + cmd.push_str(&format!(" -A {}", after)); + } + } + } + for glob_pattern in glob_patterns { + cmd.push_str(&format!(" --glob '{}'", Self::shell_escape(&glob_pattern))); } if let Some(ft) = file_type { - cmd.push_str(&format!(" --type {}", ft)); + cmd.push_str(&format!(" --type '{}'", Self::shell_escape(ft))); } cmd.push_str(&format!( - " '{}' '{}' 2>/dev/null | head -n {}", - escaped_pattern, escaped_path, head_limit + " -e '{}' '{}' 2>/dev/null{}{}", + escaped_pattern, escaped_path, offset_cmd, limit_cmd )); let full_cmd = format!( - "if command -v rg >/dev/null 2>&1; then {}; else grep -rn{} '{}' '{}' 2>/dev/null | head -n {}; fi", + "if command -v rg >/dev/null 2>&1; then {}; else grep -rn{} -e '{}' '{}' 2>/dev/null{}{}; fi", cmd, if case_insensitive { "i" } else { "" }, escaped_pattern, escaped_path, - head_limit, + offset_cmd, + limit_cmd, ); let (stdout, _stderr, _exit_code) = ws_shell @@ -72,18 +195,21 @@ impl GrepTool { let lines: Vec<&str> = stdout.lines().collect(); let total_matches = lines.len(); + let display_base = Self::display_base(context); let result_text = if lines.is_empty() { format!("No matches found for pattern '{}'", pattern) } else { - stdout.clone() + Self::relativize_result_text(&stdout, display_base.as_deref()) }; Ok(vec![ToolResult::Result { data: json!({ "pattern": pattern, "path": resolved_path, - "output_mode": "content", + "output_mode": output_mode, "total_matches": total_matches, + "applied_limit": head_limit, + "applied_offset": if offset > 0 { Some(offset) } else { None:: }, "result": result_text, }), result_for_assistant: Some(result_text), @@ -114,18 +240,20 @@ impl GrepTool { .and_then(|v| v.as_str()) .unwrap_or("files_with_matches"); let output_mode = OutputMode::from_str(output_mode_str); - let show_line_numbers = input.get("-n").and_then(|v| v.as_bool()).unwrap_or(false); - let context_c = input.get("-C").and_then(|v| v.as_u64()).map(|v| v as usize); - let before_context = input.get("-B").and_then(|v| v.as_u64()).map(|v| v as usize); - let after_context = input.get("-A").and_then(|v| v.as_u64()).map(|v| v as usize); - let head_limit = input - .get("head_limit") + let show_line_numbers = input + .get("-n") + .and_then(|v| v.as_bool()) + .unwrap_or(output_mode_str == "content"); + let context_c = input + .get("context") + .or_else(|| input.get("-C")) .and_then(|v| v.as_u64()) .map(|v| v as usize); - let glob_pattern = input - .get("glob") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); + let before_context = input.get("-B").and_then(|v| v.as_u64()).map(|v| v as usize); + let after_context = input.get("-A").and_then(|v| v.as_u64()).map(|v| v as usize); + let head_limit = Self::resolve_head_limit(input); + let offset = Self::resolve_offset(input); + let glob_patterns = Self::parse_glob_patterns(input.get("glob").and_then(|v| v.as_str())); let file_type = input .get("type") .and_then(|v| v.as_str()) @@ -137,6 +265,10 @@ impl GrepTool { .output_mode(output_mode) .show_line_numbers(show_line_numbers); + if let Some(display_base) = Self::display_base(context) { + options = options.display_base(display_base); + } + if let Some(c) = context_c { options = options.context(c); } @@ -149,8 +281,11 @@ impl GrepTool { if let Some(h) = head_limit { options = options.head_limit(h); } - if let Some(g) = glob_pattern { - options = options.glob(g); + if offset > 0 { + options = options.offset(offset); + } + if !glob_patterns.is_empty() { + options = options.globs(glob_patterns); } if let Some(t) = file_type { options = options.file_type(t); @@ -160,6 +295,39 @@ impl GrepTool { } } +#[cfg(test)] +mod tests { + use super::{GrepTool, DEFAULT_HEAD_LIMIT}; + use serde_json::json; + + #[test] + fn head_limit_defaults_and_zero_escape_hatch() { + assert_eq!( + GrepTool::resolve_head_limit(&json!({})), + Some(DEFAULT_HEAD_LIMIT) + ); + assert_eq!( + GrepTool::resolve_head_limit(&json!({ "head_limit": 25 })), + Some(25) + ); + assert_eq!( + GrepTool::resolve_head_limit(&json!({ "head_limit": 0 })), + None + ); + } + + #[test] + fn relativizes_prefixed_result_lines() { + let text = "/repo/src/main.rs:12:fn main()\n/repo/src/lib.rs:3:pub fn lib()"; + let relativized = GrepTool::relativize_result_text(text, Some("/repo")); + + assert_eq!( + relativized, + "src/main.rs:12:fn main()\nsrc/lib.rs:3:pub fn lib()" + ); + } +} + #[async_trait] impl Tool for GrepTool { fn name(&self) -> &str { @@ -203,10 +371,12 @@ Usage: "-B": { "type": "number", "description": "Number of lines to show before each match (rg -B). Requires output_mode: \"content\", ignored otherwise." }, "-A": { "type": "number", "description": "Number of lines to show after each match (rg -A). Requires output_mode: \"content\", ignored otherwise." }, "-C": { "type": "number", "description": "Number of lines to show before and after each match (rg -C). Requires output_mode: \"content\", ignored otherwise." }, + "context": { "type": "number", "description": "Alias for -C. Number of lines to show before and after each match." }, "-n": { "type": "boolean", "description": "Show line numbers in output (rg -n). Requires output_mode: \"content\", ignored otherwise." }, "-i": { "type": "boolean", "description": "Case insensitive search (rg -i)" }, "type": { "type": "string", "description": "File type to search (rg --type). Common types: js, py, rust, go, java, etc." }, "head_limit": { "type": "number", "description": "Limit output to first N lines/entries." }, + "offset": { "type": "number", "description": "Skip the first N lines/entries before applying head_limit." }, "multiline": { "type": "boolean", "description": "Enable multiline mode where . matches newlines and patterns can span lines (rg -U --multiline-dotall). Default: false." } }, "required": ["pattern"], @@ -221,7 +391,7 @@ Usage: fn is_concurrency_safe(&self, _input: Option<&Value>) -> bool { true } - + fn needs_permissions(&self, _input: Option<&Value>) -> bool { false } @@ -321,7 +491,13 @@ Usage: }) .await; - let (file_count, total_matches, result_text) = match search_result { + let GrepSearchResult { + file_count, + total_matches, + result_text, + applied_limit, + applied_offset, + } = match search_result { Ok(Ok(result)) => result, Ok(Err(e)) => return Err(BitFunError::tool(e)), Err(e) => return Err(BitFunError::tool(format!("grep search failed: {}", e))), @@ -334,6 +510,8 @@ Usage: "output_mode": output_mode, "file_count": file_count, "total_matches": total_matches, + "applied_limit": applied_limit, + "applied_offset": applied_offset, "result": result_text, }), result_for_assistant: Some(result_text), diff --git a/src/crates/core/src/agentic/tools/implementations/tool-runtime/src/search/grep_search.rs b/src/crates/core/src/agentic/tools/implementations/tool-runtime/src/search/grep_search.rs index 62e822aa..694ffb0d 100644 --- a/src/crates/core/src/agentic/tools/implementations/tool-runtime/src/search/grep_search.rs +++ b/src/crates/core/src/agentic/tools/implementations/tool-runtime/src/search/grep_search.rs @@ -1,14 +1,18 @@ use log::{debug, info, warn}; use std::io; -use std::path::PathBuf; +use std::path::{Component, Path, PathBuf}; use std::sync::{Arc, Mutex}; +use std::time::SystemTime; -use globset::GlobBuilder; +use globset::{GlobBuilder, GlobMatcher}; use grep_regex::RegexMatcherBuilder; use grep_searcher::{Searcher, SearcherBuilder, Sink, SinkContext, SinkMatch}; use ignore::types::TypesBuilder; use ignore::WalkBuilder; +const MAX_DISPLAY_COLUMNS: usize = 500; +const VCS_DIRECTORIES_TO_EXCLUDE: &[&str] = &[".git", ".svn", ".hg", ".bzr", ".jj", ".sl"]; + /// Output mode enumeration #[derive(Debug, Clone, Copy)] pub enum OutputMode { @@ -45,6 +49,7 @@ struct GrepSink { after_context: usize, head_limit: Option, current_file: PathBuf, + display_base: Option, output: Arc>>, line_count: Arc>, match_count: Arc>, @@ -70,6 +75,7 @@ impl GrepSink { after_context: usize, head_limit: Option, current_file: PathBuf, + display_base: Option, ) -> Self { Self { output_mode, @@ -78,6 +84,7 @@ impl GrepSink { after_context, head_limit, current_file, + display_base, output: Arc::new(Mutex::new(Vec::new())), line_count: Arc::new(Mutex::new(0)), match_count: Arc::new(Mutex::new(0)), @@ -90,10 +97,6 @@ impl GrepSink { String::from_utf8_lossy(&output).to_string() } - fn get_line_count(&self) -> usize { - *lock_recover(&self.line_count, "line_count") - } - fn get_match_count(&self) -> usize { *lock_recover(&self.match_count, "match_count") } @@ -146,13 +149,23 @@ impl GrepSink { /// Format output line (rg style: only show line number and content, no path) fn format_line(&self, line_number: u64, line: &[u8], is_match: bool) -> Vec { - let line_str = String::from_utf8_lossy(line).trim_end().to_string(); + let mut line_str = String::from_utf8_lossy(line).trim_end().to_string(); + if line_str.chars().count() > MAX_DISPLAY_COLUMNS { + line_str = format!( + "{} [truncated]", + line_str + .chars() + .take(MAX_DISPLAY_COLUMNS) + .collect::() + ); + } let separator = if is_match { ":" } else { "-" }; + let path_prefix = relativize_display_path(&self.current_file, self.display_base.as_deref()); if self.show_line_numbers { - format!("{}{}{}", line_number, separator, line_str).into_bytes() + format!("{}{}{}:{}", path_prefix, separator, line_number, line_str).into_bytes() } else { - line_str.into_bytes() + format!("{}{}{}", path_prefix, separator, line_str).into_bytes() } } } @@ -176,8 +189,6 @@ impl Sink for GrepSink { self.write_line(&formatted); } OutputMode::FilesWithMatches => { - let path_str = self.current_file.display().to_string(); - self.write_line(path_str.as_bytes()); return Ok(false); // Only need first match, then stop } OutputMode::Count => { @@ -250,10 +261,14 @@ pub struct GrepOptions { pub after_context: Option, /// Limit output lines/files pub head_limit: Option, - /// Glob pattern filter - pub glob: Option, + /// Number of lines/files to skip before limiting output + pub offset: usize, + /// Glob pattern filters + pub globs: Vec, /// File type filter pub file_type: Option, + /// Prefer displaying paths relative to this base when possible + pub display_base: Option, } impl Default for GrepOptions { @@ -269,8 +284,10 @@ impl Default for GrepOptions { before_context: None, after_context: None, head_limit: None, - glob: None, + offset: 0, + globs: Vec::new(), file_type: None, + display_base: None, } } } @@ -334,8 +351,13 @@ impl GrepOptions { } /// Set glob pattern filter - pub fn glob(mut self, pattern: impl Into) -> Self { - self.glob = Some(pattern.into()); + pub fn offset(mut self, offset: usize) -> Self { + self.offset = offset; + self + } + + pub fn globs(mut self, patterns: Vec) -> Self { + self.globs = patterns; self } @@ -344,6 +366,11 @@ impl GrepOptions { self.file_type = Some(ftype.into()); self } + + pub fn display_base(mut self, base: impl Into) -> Self { + self.display_base = Some(base.into()); + self + } } /// Execute grep search @@ -367,11 +394,86 @@ impl GrepOptions { /// /// let result = grep_search(options, None, None); /// ``` +pub struct GrepSearchResult { + pub file_count: usize, + pub total_matches: usize, + pub result_text: String, + pub applied_limit: Option, + pub applied_offset: Option, +} + +fn is_vcs_path(path: &Path) -> bool { + path.components().any(|component| { + matches!( + component, + Component::Normal(name) + if VCS_DIRECTORIES_TO_EXCLUDE + .iter() + .any(|excluded| name.to_string_lossy() == *excluded) + ) + }) +} + +fn modified_time(path: &Path) -> SystemTime { + std::fs::metadata(path) + .and_then(|metadata| metadata.modified()) + .unwrap_or(SystemTime::UNIX_EPOCH) +} + +fn normalize_display_base(base: &str) -> String { + base.replace('\\', "/").trim_end_matches('/').to_string() +} + +fn relativize_display_path(path: &Path, display_base: Option<&str>) -> String { + let normalized = path.display().to_string().replace('\\', "/"); + let Some(base) = display_base else { + return normalized; + }; + + let normalized_base = normalize_display_base(base); + if normalized == normalized_base { + return ".".to_string(); + } + + if let Some(rest) = normalized.strip_prefix(&(normalized_base + "/")) { + return rest.to_string(); + } + + normalized +} + +fn apply_offset_limit( + items: Vec, + limit: Option, + offset: usize, +) -> (Vec, Option, Option) +where + T: Clone, +{ + let total_len = items.len(); + let sliced = match limit { + Some(limit) => items + .into_iter() + .skip(offset) + .take(limit) + .collect::>(), + None => items.into_iter().skip(offset).collect::>(), + }; + + let applied_limit = match limit { + Some(limit) if total_len.saturating_sub(offset) > limit => Some(limit), + _ => None, + }; + let applied_offset = if offset > 0 { Some(offset) } else { None }; + + (sliced, applied_limit, applied_offset) +} + pub fn grep_search( options: GrepOptions, progress_callback: Option, progress_interval_millis: Option, -) -> Result<(usize, usize, String), String> { +) -> Result { let search_path = &options.path; // Validate that search path exists @@ -392,8 +494,9 @@ pub fn grep_search( let output_mode = options.output_mode; let show_line_numbers = options.show_line_numbers; let head_limit = options.head_limit; - let glob_pattern = options.glob.as_deref(); + let offset = options.offset; let file_type = options.file_type.as_deref(); + let display_base = options.display_base.clone(); // Build regex matcher let matcher = RegexMatcherBuilder::new() @@ -419,17 +522,11 @@ pub fn grep_search( // Build walker let mut walk_builder = WalkBuilder::new(search_path); walk_builder - .hidden(true) // Ignore hidden files + .hidden(false) // Include hidden files, closer to Claude's rg --hidden .ignore(true) // Use .gitignore .git_ignore(true) - .git_global(false) - .git_exclude(false); - - // Add glob filter - if glob_pattern.is_some() { - walk_builder.add_custom_ignore_filename(".gitignore"); - // Glob filter needs to be handled manually during traversal - } + .git_global(true) + .git_exclude(true); // Add file type filter let mut types_builder = TypesBuilder::new(); @@ -479,23 +576,23 @@ pub fn grep_search( let walker = walk_builder.build(); // Pre-build glob matcher - let glob_matcher = if let Some(glob) = glob_pattern { - Some( + let glob_matchers = options + .globs + .iter() + .map(|glob| { GlobBuilder::new(glob) .build() - .map_err(|e| format!("Invalid glob pattern: {}", e))? - .compile_matcher(), - ) - } else { - None - }; + .map(|compiled| compiled.compile_matcher()) + .map_err(|e| format!("Invalid glob pattern: {}", e)) + }) + .collect::, String>>()?; // Collect all results - let mut all_output = Vec::new(); + let mut content_lines = Vec::new(); let mut total_matches = 0; - let mut total_lines = 0; let mut file_count = 0; let mut file_match_counts: Vec<(String, usize)> = Vec::new(); + let mut matched_files_with_mtime: Vec<(String, SystemTime)> = Vec::new(); // Progress tracking let mut files_processed = 0; @@ -528,40 +625,24 @@ pub fn grep_search( continue; } - // Filter using pre-built glob matcher - if let Some(ref matcher) = glob_matcher { - if !matcher.is_match(path) { - continue; - } + if is_vcs_path(path) { + continue; } - // Check head_limit - if let Some(limit) = head_limit { - if matches!(output_mode, OutputMode::FilesWithMatches) { - if file_count >= limit { - break; - } - } else if total_lines >= limit { - break; - } + if !glob_matchers.is_empty() + && !glob_matchers.iter().any(|matcher| matcher.is_match(path)) + { + continue; } - // Create sink - let remaining_limit = head_limit.map(|limit| { - if total_lines < limit { - limit - total_lines - } else { - 0 - } - }); - let sink = GrepSink::new( output_mode, show_line_numbers, before_context, after_context, - remaining_limit, + None, path.to_path_buf(), + display_base.clone(), ); // Execute search @@ -578,27 +659,25 @@ pub fn grep_search( OutputMode::Content => { let output = sink.get_output(); if !output.is_empty() { - // rg style: files separated by blank lines, file path on separate line at top - let mut file_output = String::new(); - if !all_output.is_empty() { - file_output.push('\n'); // Separate files with blank lines - } - // File path at top - file_output.push_str(&path.display().to_string()); - file_output.push('\n'); - file_output.push_str(&output); - all_output.push(file_output); + content_lines.extend( + output + .lines() + .filter(|line| !line.is_empty()) + .map(|line| line.to_string()), + ); } - total_lines += sink.get_line_count(); } OutputMode::FilesWithMatches => { - let output = sink.get_output(); - if !output.is_empty() { - all_output.push(output); - } + matched_files_with_mtime.push(( + relativize_display_path(path, display_base.as_deref()), + modified_time(path), + )); } OutputMode::Count => { - file_match_counts.push((path.display().to_string(), file_matches)); + file_match_counts.push(( + relativize_display_path(path, display_base.as_deref()), + file_matches, + )); } } } @@ -612,71 +691,117 @@ pub fn grep_search( // Build result let result_text = match output_mode { OutputMode::Content => { - if all_output.is_empty() { + let (lines, applied_limit, applied_offset) = + apply_offset_limit(content_lines, head_limit, offset); + if lines.is_empty() { format!("No matches found for pattern '{}'", pattern) } else { - let limited = if let Some(limit) = head_limit { - format!(" (limited to {} lines)", limit) - } else { - String::new() - }; - format!( - "Found {} matches{}:\n{}", + return Ok(GrepSearchResult { + file_count, total_matches, - limited, - all_output.join("") - ) + result_text: lines.join("\n").trim_end_matches('\n').to_string(), + applied_limit, + applied_offset, + }); } } OutputMode::FilesWithMatches => { - if all_output.is_empty() { + matched_files_with_mtime + .sort_by(|left, right| right.1.cmp(&left.1).then_with(|| left.0.cmp(&right.0))); + let sorted_matches = matched_files_with_mtime + .into_iter() + .map(|(path, _)| path) + .collect::>(); + let (matches, applied_limit, applied_offset) = + apply_offset_limit(sorted_matches, head_limit, offset); + + if matches.is_empty() { format!("No files found matching pattern '{}'", pattern) } else { - let limited = if let Some(limit) = head_limit { - format!(" (limited to {} files)", limit) - } else { - String::new() - }; - format!( - "Found {} file(s){}:\n{}", + return Ok(GrepSearchResult { file_count, - limited, - all_output.join("") - ) + total_matches, + result_text: matches.join("\n").trim_end_matches('\n').to_string(), + applied_limit, + applied_offset, + }); } } OutputMode::Count => { if file_match_counts.is_empty() { format!("No matches found for pattern '{}'", pattern) } else { - let count_list: Vec<(String, usize)> = if let Some(limit) = head_limit { - file_match_counts.into_iter().take(limit).collect() - } else { - file_match_counts - }; - - let limited = if head_limit.is_some() { - format!(" (limited to {} files)", count_list.len()) - } else { - String::new() - }; + let (count_list, applied_limit, applied_offset) = + apply_offset_limit(file_match_counts, head_limit, offset); let count_lines: Vec = count_list .iter() .map(|(file, count)| format!("{}:{}", file, count)) .collect(); - format!( - "Total {} matches in {} files{}:\n{}", + return Ok(GrepSearchResult { + file_count, total_matches, - count_list.len(), - limited, - count_lines.join("\n") - ) + result_text: format!( + "Total {} matches in {} files:\n{}", + total_matches, + count_list.len(), + count_lines.join("\n") + ) + .trim_end_matches('\n') + .to_string(), + applied_limit, + applied_offset, + }); } } }; - let result_text = result_text.trim_end_matches("\n").to_string(); - Ok((file_count, total_matches, result_text)) + Ok(GrepSearchResult { + file_count, + total_matches, + result_text: result_text.trim_end_matches('\n').to_string(), + applied_limit: None, + applied_offset: if offset > 0 { Some(offset) } else { None }, + }) +} + +#[cfg(test)] +mod tests { + use super::{grep_search, GrepOptions, OutputMode}; + use std::fs; + use std::path::PathBuf; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn make_temp_dir(name: &str) -> PathBuf { + let unique = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos(); + let dir = std::env::temp_dir().join(format!("bitfun-grep-search-{name}-{unique}")); + fs::create_dir_all(&dir).unwrap(); + dir + } + + #[test] + fn truncates_very_long_output_lines() { + let root = make_temp_dir("truncate"); + let file_path = root.join("sample.txt"); + let long_line = "a".repeat(600); + fs::write(&file_path, format!("{long_line}\n")).unwrap(); + + let result = grep_search( + GrepOptions::new("a+", root.to_string_lossy().to_string()) + .output_mode(OutputMode::Content) + .show_line_numbers(true) + .head_limit(10), + None, + None, + ) + .unwrap(); + + assert!(result.result_text.contains("[truncated]")); + + let _ = fs::remove_dir_all(root); + } }