From f49731e872851538c7894e2954daf00a83cff47b Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 05:54:36 +0000 Subject: [PATCH 1/2] perf: batch git exec calls for notes, rev-parse, status, and checkpoints - Fix #1: get_commits_with_notes_from_list uses batch cat-file instead of N individual git notes show calls - Fix #2: populate_ai_human_authors and overlay_ai_authorship in blame batch-fetch all authorship notes upfront - Fix #4: resolve_commit + resolve_parent combined into single git rev-parse call - Fix #5: status() skips separate get_staged_filenames() call when no pathspecs provided - Fix #7: read_all_checkpoints() called once instead of twice in get_all_tracked_files - Add batch_get_authorship_logs() helper to refs.rs for reuse across codebase - Make batch_read_blobs_with_oids() public for cross-module access Co-Authored-By: Sasha Varlamov --- src/commands/blame.rs | 78 ++++++++++++++++++++++--------- src/commands/checkpoint.rs | 53 ++++++++++----------- src/commands/diff.rs | 82 ++++++++++++++++++--------------- src/git/authorship_traversal.rs | 2 +- src/git/refs.rs | 78 ++++++++++++++++++++++++++++++- src/git/status.rs | 19 ++++++-- 6 files changed, 218 insertions(+), 94 deletions(-) diff --git a/src/commands/blame.rs b/src/commands/blame.rs index 41b4c641b..86125003f 100644 --- a/src/commands/blame.rs +++ b/src/commands/blame.rs @@ -4,7 +4,7 @@ use crate::authorship::authorship_log_serialization::AuthorshipLog; use crate::authorship::prompt_utils::enrich_prompt_messages; use crate::authorship::working_log::CheckpointKind; use crate::error::GitAiError; -use crate::git::refs::get_reference_as_authorship_log_v3; +use crate::git::refs::batch_get_authorship_logs; use crate::git::repository::Repository; use crate::git::repository::{exec_git, exec_git_stdin}; #[cfg(windows)] @@ -714,23 +714,39 @@ impl Repository { file_path: &str, options: &GitAiBlameOptions, ) -> Result, GitAiError> { - // Cache authorship logs by commit SHA to avoid repeated lookups - let mut commit_authorship_cache: HashMap> = HashMap::new(); + // Batch-fetch all authorship logs upfront using 2 git calls instead of N + let unique_shas: Vec = { + let mut seen = std::collections::HashSet::new(); + hunks + .iter() + .filter_map(|h| { + if seen.insert(h.commit_sha.clone()) { + Some(h.commit_sha.clone()) + } else { + None + } + }) + .collect() + }; + let batch_logs = batch_get_authorship_logs(self, &unique_shas)?; + let commit_authorship_cache: HashMap> = unique_shas + .into_iter() + .map(|sha| { + let log = batch_logs.get(&sha).cloned(); + (sha, log) + }) + .collect(); + // Cache for foreign prompts to avoid repeated grepping let mut foreign_prompts_cache: HashMap> = HashMap::new(); let mut result_hunks: Vec = Vec::new(); for hunk in hunks { - // Get or fetch the authorship log for this commit - let authorship_log = if let Some(cached) = commit_authorship_cache.get(&hunk.commit_sha) - { - cached.clone() - } else { - let authorship = get_reference_as_authorship_log_v3(self, &hunk.commit_sha).ok(); - commit_authorship_cache.insert(hunk.commit_sha.clone(), authorship.clone()); - authorship - }; + // Get the authorship log from the pre-populated batch cache + let authorship_log = commit_authorship_cache + .get(&hunk.commit_sha) + .and_then(|opt| opt.clone()); // If we have an authorship log, look up human_author for each line if let Some(ref authorship_log) = authorship_log { @@ -828,21 +844,37 @@ fn overlay_ai_authorship( // Track which commits contain each prompt hash let mut prompt_commits: HashMap> = HashMap::new(); - // Group hunks by commit SHA to avoid repeated lookups - let mut commit_authorship_cache: HashMap> = HashMap::new(); + // Batch-fetch all authorship logs upfront using 2 git calls instead of N + let unique_shas: Vec = { + let mut seen = std::collections::HashSet::new(); + blame_hunks + .iter() + .filter_map(|h| { + if seen.insert(h.commit_sha.clone()) { + Some(h.commit_sha.clone()) + } else { + None + } + }) + .collect() + }; + let batch_logs = batch_get_authorship_logs(repo, &unique_shas)?; + let commit_authorship_cache: HashMap> = unique_shas + .into_iter() + .map(|sha| { + let log = batch_logs.get(&sha).cloned(); + (sha, log) + }) + .collect(); + // Cache for foreign prompts to avoid repeated grepping let mut foreign_prompts_cache: HashMap> = HashMap::new(); for hunk in blame_hunks { - // Check if we've already looked up this commit's authorship - let authorship_log = if let Some(cached) = commit_authorship_cache.get(&hunk.commit_sha) { - cached.clone() - } else { - // Try to get authorship log for this commit - let authorship = get_reference_as_authorship_log_v3(repo, &hunk.commit_sha).ok(); - commit_authorship_cache.insert(hunk.commit_sha.clone(), authorship.clone()); - authorship - }; + // Get the authorship log from the pre-populated batch cache + let authorship_log = commit_authorship_cache + .get(&hunk.commit_sha) + .and_then(|opt| opt.clone()); // If we have AI authorship data, look up the author for lines in this hunk if let Some(authorship_log) = authorship_log { diff --git a/src/commands/checkpoint.rs b/src/commands/checkpoint.rs index 9cf73cff9..bb24dfade 100644 --- a/src/commands/checkpoint.rs +++ b/src/commands/checkpoint.rs @@ -660,27 +660,28 @@ fn get_all_tracked_files( )); let checkpoints_read_start = Instant::now(); - if let Ok(working_log_data) = working_log.read_all_checkpoints() { - for checkpoint in &working_log_data { - for entry in &checkpoint.entries { - // Normalize path separators to forward slashes - let normalized_path = normalize_to_posix(&entry.file); - // Filter out paths outside the repository to prevent git command failures - if !is_path_in_repo(&normalized_path) { - debug_log(&format!( - "Skipping checkpoint file outside repository: {}", - normalized_path - )); - continue; - } - if should_ignore_file_with_matcher(&normalized_path, ignore_matcher) { - continue; - } - if !files.contains(&normalized_path) { - // Check if it's a text file before adding - if is_text_file(working_log, &normalized_path) { - files.insert(normalized_path); - } + // Read checkpoints once and reuse the result for both file discovery and + // AI-checkpoint detection, avoiding a redundant second read_all_checkpoints() call. + let working_log_data = working_log.read_all_checkpoints().unwrap_or_default(); + for checkpoint in &working_log_data { + for entry in &checkpoint.entries { + // Normalize path separators to forward slashes + let normalized_path = normalize_to_posix(&entry.file); + // Filter out paths outside the repository to prevent git command failures + if !is_path_in_repo(&normalized_path) { + debug_log(&format!( + "Skipping checkpoint file outside repository: {}", + normalized_path + )); + continue; + } + if should_ignore_file_with_matcher(&normalized_path, ignore_matcher) { + continue; + } + if !files.contains(&normalized_path) { + // Check if it's a text file before adding + if is_text_file(working_log, &normalized_path) { + files.insert(normalized_path); } } } @@ -690,13 +691,9 @@ fn get_all_tracked_files( checkpoints_read_start.elapsed() )); - let has_ai_checkpoints = if let Ok(working_log_data) = working_log.read_all_checkpoints() { - working_log_data.iter().any(|checkpoint| { - checkpoint.kind == CheckpointKind::AiAgent || checkpoint.kind == CheckpointKind::AiTab - }) - } else { - false - }; + let has_ai_checkpoints = working_log_data.iter().any(|checkpoint| { + checkpoint.kind == CheckpointKind::AiAgent || checkpoint.kind == CheckpointKind::AiTab + }); let status_files_start = Instant::now(); let mut results_for_tracked_files = if is_pre_commit && !has_ai_checkpoints { diff --git a/src/commands/diff.rs b/src/commands/diff.rs index 346801c38..b31937956 100644 --- a/src/commands/diff.rs +++ b/src/commands/diff.rs @@ -138,16 +138,23 @@ pub fn execute_diff( // Resolve commits to get from/to SHAs let (from_commit, to_commit) = match spec { DiffSpec::TwoCommit(start, end) => { - // Resolve both commits - let from = resolve_commit(repo, &start)?; - let to = resolve_commit(repo, &end)?; - (from, to) + // Resolve both commits in a single git rev-parse call + resolve_two_revs(repo, &start, &end)? } DiffSpec::SingleCommit(commit) => { - // Resolve the commit and its parent - let to = resolve_commit(repo, &commit)?; - let from = resolve_parent(repo, &to)?; - (from, to) + // Resolve the commit and its parent in a single git rev-parse call + let parent_rev = format!("{}^", commit); + match resolve_two_revs(repo, &parent_rev, &commit) { + Ok(pair) => pair, + Err(_) => { + // No parent (initial commit) - use empty tree hash + let to = resolve_commit(repo, &commit)?; + ( + "4b825dc642cb6eb9a060e54bf8d69288fbee4904".to_string(), + to, + ) + } + } } }; @@ -197,35 +204,31 @@ fn resolve_commit(repo: &Repository, rev: &str) -> Result { Ok(sha) } -fn resolve_parent(repo: &Repository, commit: &str) -> Result { - let parent_rev = format!("{}^", commit); - - // Try to resolve parent +/// Resolve two revisions in a single `git rev-parse` call instead of two +/// separate invocations, saving one subprocess spawn. +fn resolve_two_revs( + repo: &Repository, + rev_a: &str, + rev_b: &str, +) -> Result<(String, String), GitAiError> { let mut args = repo.global_args_for_exec(); args.push("rev-parse".to_string()); - args.push(parent_rev); - - let output = exec_git(&args); - - match output { - Ok(out) => { - let sha = String::from_utf8(out.stdout) - .map_err(|e| GitAiError::Generic(format!("Failed to parse parent SHA: {}", e)))? - .trim() - .to_string(); - - if sha.is_empty() { - // No parent, this is initial commit - use empty tree - Ok("4b825dc642cb6eb9a060e54bf8d69288fbee4904".to_string()) - } else { - Ok(sha) - } - } - Err(_) => { - // No parent, this is initial commit - use empty tree hash - Ok("4b825dc642cb6eb9a060e54bf8d69288fbee4904".to_string()) - } + args.push(rev_a.to_string()); + args.push(rev_b.to_string()); + + let output = exec_git(&args)?; + let stdout = String::from_utf8(output.stdout) + .map_err(|e| GitAiError::Generic(format!("Failed to parse rev-parse output: {}", e)))?; + let shas: Vec<&str> = stdout.trim().lines().collect(); + + if shas.len() < 2 || shas[0].is_empty() || shas[1].is_empty() { + return Err(GitAiError::Generic(format!( + "Could not resolve revisions: {} and {}", + rev_a, rev_b + ))); } + + Ok((shas[0].to_string(), shas[1].to_string())) } // ============================================================================ @@ -948,9 +951,16 @@ pub fn get_diff_json_filtered( commit_sha: &str, options: DiffOptions, ) -> Result { - // Resolve the commit to get from/to SHAs (parent -> commit) + // Resolve the commit and its parent in a single git rev-parse call let to_commit = resolve_commit(repo, commit_sha)?; - let from_commit = resolve_parent(repo, &to_commit)?; + let parent_rev = format!("{}^", to_commit); + let from_commit = match resolve_two_revs(repo, &parent_rev, &to_commit) { + Ok((from, _)) => from, + Err(_) => { + // No parent (initial commit) - use empty tree hash + "4b825dc642cb6eb9a060e54bf8d69288fbee4904".to_string() + } + }; // Get diff hunks with line numbers let hunks = get_diff_with_line_numbers(repo, &from_commit, &to_commit)?; diff --git a/src/git/authorship_traversal.rs b/src/git/authorship_traversal.rs index 9d1187fa7..7be68eb97 100644 --- a/src/git/authorship_traversal.rs +++ b/src/git/authorship_traversal.rs @@ -90,7 +90,7 @@ fn get_notes_list(global_args: &[String]) -> Result, GitAi Ok(mappings) } -fn batch_read_blobs_with_oids( +pub fn batch_read_blobs_with_oids( global_args: &[String], blob_oids: &[String], ) -> Result, GitAiError> { diff --git a/src/git/refs.rs b/src/git/refs.rs index 263d4571a..055f65b66 100644 --- a/src/git/refs.rs +++ b/src/git/refs.rs @@ -301,6 +301,21 @@ pub fn get_commits_with_notes_from_list( } } + // Batch-fetch all authorship notes in 2 git calls instead of N individual + // `git notes show` calls. Uses note_blob_oids_for_commits (1 batched + // cat-file --batch-check) + batch_read_blobs_with_oids (1 batched + // cat-file --batch) to read all note contents at once. + let note_blob_map = note_blob_oids_for_commits(repo, commit_shas)?; + let unique_blob_oids: Vec = { + let mut oids: Vec = note_blob_map.values().cloned().collect::>().into_iter().collect(); + oids.sort(); + oids + }; + let blob_contents = crate::git::authorship_traversal::batch_read_blobs_with_oids( + &repo.global_args_for_exec(), + &unique_blob_oids, + )?; + // Build the result Vec let mut result = Vec::new(); for sha in commit_shas { @@ -309,8 +324,17 @@ pub fn get_commits_with_notes_from_list( .cloned() .unwrap_or_else(|| "Unknown".to_string()); - // Check if this commit has a note by trying to show it - if let Some(authorship_log) = get_authorship(repo, sha) { + // Look up the note content from the batch-fetched data + let authorship_log = note_blob_map + .get(sha) + .and_then(|blob_oid| blob_contents.get(blob_oid)) + .and_then(|content| { + let mut log = AuthorshipLog::deserialize_from_string(content).ok()?; + log.metadata.base_commit_sha = sha.clone(); + Some(log) + }); + + if let Some(authorship_log) = authorship_log { result.push(CommitAuthorship::Log { sha: sha.clone(), git_author, @@ -378,6 +402,56 @@ pub fn get_reference_as_working_log( Ok(working_log) } +/// Batch-fetch authorship logs for multiple commits using 2 git calls +/// instead of N individual `git notes show` calls. +/// +/// Returns a map of commit SHA -> AuthorshipLog for commits that have valid v3 logs. +pub fn batch_get_authorship_logs( + repo: &Repository, + commit_shas: &[String], +) -> Result, GitAiError> { + if commit_shas.is_empty() { + return Ok(HashMap::new()); + } + + // 1 batched cat-file --batch-check to resolve note blob OIDs + let note_blob_map = note_blob_oids_for_commits(repo, commit_shas)?; + if note_blob_map.is_empty() { + return Ok(HashMap::new()); + } + + let unique_blob_oids: Vec = { + let mut oids: Vec = note_blob_map + .values() + .cloned() + .collect::>() + .into_iter() + .collect(); + oids.sort(); + oids + }; + + // 1 batched cat-file --batch to read all note contents + let blob_contents = crate::git::authorship_traversal::batch_read_blobs_with_oids( + &repo.global_args_for_exec(), + &unique_blob_oids, + )?; + + let mut result = HashMap::new(); + for sha in commit_shas { + if let Some(blob_oid) = note_blob_map.get(sha) + && let Some(content) = blob_contents.get(blob_oid) + && let Ok(mut log) = AuthorshipLog::deserialize_from_string(content) + && log.metadata.schema_version == AUTHORSHIP_LOG_VERSION + { + log.metadata.base_commit_sha = sha.clone(); + result.insert(sha.clone(), log); + } + } + + Ok(result) +} + pub fn get_reference_as_authorship_log_v3( repo: &Repository, commit_sha: &str, diff --git a/src/git/status.rs b/src/git/status.rs index 3ee2c0160..ee8473ea4 100644 --- a/src/git/status.rs +++ b/src/git/status.rs @@ -119,17 +119,28 @@ impl Repository { pathspecs: Option<&HashSet>, skip_untracked: bool, ) -> Result, GitAiError> { - let staged_filenames = self.get_staged_filenames()?; + // When no pathspecs are provided, run a single full `git status + // --porcelain=v2` scan. Extract staged filenames from the porcelain v2 + // output (the XY field already encodes staging state) instead of making + // a separate `git diff --cached` call. This saves 1 git exec call. + // + // When pathspecs ARE provided, we still need a separate staged-filenames + // call because staged files outside the pathspec set must be included. + let staged_filenames = if pathspecs.is_some() { + self.get_staged_filenames()? + } else { + HashSet::new() + }; let combined_pathspecs: HashSet = if let Some(paths) = pathspecs { staged_filenames.union(paths).cloned().collect() } else { + // No pathspecs — full scan will capture everything including staged. staged_filenames }; - // When no explicit pathspecs are provided and nothing is staged, - // we still need a full status scan to capture unstaged changes. - let should_full_scan = pathspecs.is_none() && combined_pathspecs.is_empty(); + // When no explicit pathspecs are provided we always do a full scan. + let should_full_scan = pathspecs.is_none(); if combined_pathspecs.is_empty() && !should_full_scan { return Ok(Vec::new()); } From 87cd07357c429075d88dddfee5055ae7837aa777 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 05:56:53 +0000 Subject: [PATCH 2/2] style: fix cargo fmt formatting issues Co-Authored-By: Sasha Varlamov --- src/commands/diff.rs | 5 +---- src/git/refs.rs | 7 ++++++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/commands/diff.rs b/src/commands/diff.rs index b31937956..1a2889d51 100644 --- a/src/commands/diff.rs +++ b/src/commands/diff.rs @@ -149,10 +149,7 @@ pub fn execute_diff( Err(_) => { // No parent (initial commit) - use empty tree hash let to = resolve_commit(repo, &commit)?; - ( - "4b825dc642cb6eb9a060e54bf8d69288fbee4904".to_string(), - to, - ) + ("4b825dc642cb6eb9a060e54bf8d69288fbee4904".to_string(), to) } } } diff --git a/src/git/refs.rs b/src/git/refs.rs index 055f65b66..ced001130 100644 --- a/src/git/refs.rs +++ b/src/git/refs.rs @@ -307,7 +307,12 @@ pub fn get_commits_with_notes_from_list( // cat-file --batch) to read all note contents at once. let note_blob_map = note_blob_oids_for_commits(repo, commit_shas)?; let unique_blob_oids: Vec = { - let mut oids: Vec = note_blob_map.values().cloned().collect::>().into_iter().collect(); + let mut oids: Vec = note_blob_map + .values() + .cloned() + .collect::>() + .into_iter() + .collect(); oids.sort(); oids };