diff --git a/src/commands/blame.rs b/src/commands/blame.rs index ed16713ac..f74804a8a 100644 --- a/src/commands/blame.rs +++ b/src/commands/blame.rs @@ -4,7 +4,7 @@ use crate::authorship::authorship_log_serialization::AuthorshipLog; use crate::authorship::prompt_utils::enrich_prompt_messages; use crate::authorship::working_log::CheckpointKind; use crate::error::GitAiError; -use crate::git::refs::get_reference_as_authorship_log_v3; +use crate::git::refs::batch_get_authorship_logs; use crate::git::repository::Repository; use crate::git::repository::{exec_git, exec_git_stdin}; #[cfg(windows)] @@ -799,23 +799,39 @@ impl Repository { file_path: &str, options: &GitAiBlameOptions, ) -> Result, GitAiError> { - // Cache authorship logs by commit SHA to avoid repeated lookups - let mut commit_authorship_cache: HashMap> = HashMap::new(); + // Batch-fetch all authorship logs upfront using 2 git calls instead of N + let unique_shas: Vec = { + let mut seen = std::collections::HashSet::new(); + hunks + .iter() + .filter_map(|h| { + if seen.insert(h.commit_sha.clone()) { + Some(h.commit_sha.clone()) + } else { + None + } + }) + .collect() + }; + let batch_logs = batch_get_authorship_logs(self, &unique_shas)?; + let commit_authorship_cache: HashMap> = unique_shas + .into_iter() + .map(|sha| { + let log = batch_logs.get(&sha).cloned(); + (sha, log) + }) + .collect(); + // Cache for foreign prompts to avoid repeated grepping let mut foreign_prompts_cache: HashMap> = HashMap::new(); let mut result_hunks: Vec = Vec::new(); for hunk in hunks { - // Get or fetch the authorship log for this commit - let authorship_log = if let Some(cached) = commit_authorship_cache.get(&hunk.commit_sha) - { - cached.clone() - } else { - let authorship = get_reference_as_authorship_log_v3(self, &hunk.commit_sha).ok(); - commit_authorship_cache.insert(hunk.commit_sha.clone(), authorship.clone()); - authorship - }; + // Get the authorship log from the pre-populated batch cache + let authorship_log = commit_authorship_cache + .get(&hunk.commit_sha) + .and_then(|opt| opt.clone()); // If we have an authorship log, look up human_author for each line if let Some(ref authorship_log) = authorship_log { @@ -913,21 +929,37 @@ fn overlay_ai_authorship( // Track which commits contain each prompt hash let mut prompt_commits: HashMap> = HashMap::new(); - // Group hunks by commit SHA to avoid repeated lookups - let mut commit_authorship_cache: HashMap> = HashMap::new(); + // Batch-fetch all authorship logs upfront using 2 git calls instead of N + let unique_shas: Vec = { + let mut seen = std::collections::HashSet::new(); + blame_hunks + .iter() + .filter_map(|h| { + if seen.insert(h.commit_sha.clone()) { + Some(h.commit_sha.clone()) + } else { + None + } + }) + .collect() + }; + let batch_logs = batch_get_authorship_logs(repo, &unique_shas)?; + let commit_authorship_cache: HashMap> = unique_shas + .into_iter() + .map(|sha| { + let log = batch_logs.get(&sha).cloned(); + (sha, log) + }) + .collect(); + // Cache for foreign prompts to avoid repeated grepping let mut foreign_prompts_cache: HashMap> = HashMap::new(); for hunk in blame_hunks { - // Check if we've already looked up this commit's authorship - let authorship_log = if let Some(cached) = commit_authorship_cache.get(&hunk.commit_sha) { - cached.clone() - } else { - // Try to get authorship log for this commit - let authorship = get_reference_as_authorship_log_v3(repo, &hunk.commit_sha).ok(); - commit_authorship_cache.insert(hunk.commit_sha.clone(), authorship.clone()); - authorship - }; + // Get the authorship log from the pre-populated batch cache + let authorship_log = commit_authorship_cache + .get(&hunk.commit_sha) + .and_then(|opt| opt.clone()); // If we have AI authorship data, look up the author for lines in this hunk if let Some(authorship_log) = authorship_log { diff --git a/src/commands/checkpoint.rs b/src/commands/checkpoint.rs index 943c150ae..ed682d8ff 100644 --- a/src/commands/checkpoint.rs +++ b/src/commands/checkpoint.rs @@ -660,27 +660,28 @@ fn get_all_tracked_files( )); let checkpoints_read_start = Instant::now(); - if let Ok(working_log_data) = working_log.read_all_checkpoints() { - for checkpoint in &working_log_data { - for entry in &checkpoint.entries { - // Normalize path separators to forward slashes - let normalized_path = normalize_to_posix(&entry.file); - // Filter out paths outside the repository to prevent git command failures - if !is_path_in_repo(&normalized_path) { - debug_log(&format!( - "Skipping checkpoint file outside repository: {}", - normalized_path - )); - continue; - } - if should_ignore_file_with_matcher(&normalized_path, ignore_matcher) { - continue; - } - if !files.contains(&normalized_path) { - // Check if it's a text file before adding - if is_text_file(working_log, &normalized_path) { - files.insert(normalized_path); - } + // Read checkpoints once and reuse the result for both file discovery and + // AI-checkpoint detection, avoiding a redundant second read_all_checkpoints() call. + let working_log_data = working_log.read_all_checkpoints().unwrap_or_default(); + for checkpoint in &working_log_data { + for entry in &checkpoint.entries { + // Normalize path separators to forward slashes + let normalized_path = normalize_to_posix(&entry.file); + // Filter out paths outside the repository to prevent git command failures + if !is_path_in_repo(&normalized_path) { + debug_log(&format!( + "Skipping checkpoint file outside repository: {}", + normalized_path + )); + continue; + } + if should_ignore_file_with_matcher(&normalized_path, ignore_matcher) { + continue; + } + if !files.contains(&normalized_path) { + // Check if it's a text file before adding + if is_text_file(working_log, &normalized_path) { + files.insert(normalized_path); } } } @@ -690,13 +691,9 @@ fn get_all_tracked_files( checkpoints_read_start.elapsed() )); - let has_ai_checkpoints = if let Ok(working_log_data) = working_log.read_all_checkpoints() { - working_log_data.iter().any(|checkpoint| { - checkpoint.kind == CheckpointKind::AiAgent || checkpoint.kind == CheckpointKind::AiTab - }) - } else { - false - }; + let has_ai_checkpoints = working_log_data.iter().any(|checkpoint| { + checkpoint.kind == CheckpointKind::AiAgent || checkpoint.kind == CheckpointKind::AiTab + }); let status_files_start = Instant::now(); let mut results_for_tracked_files = if is_pre_commit && !has_ai_checkpoints { diff --git a/src/commands/diff.rs b/src/commands/diff.rs index f66f8eceb..c72b72bdf 100644 --- a/src/commands/diff.rs +++ b/src/commands/diff.rs @@ -138,16 +138,20 @@ pub fn execute_diff( // Resolve commits to get from/to SHAs let (from_commit, to_commit) = match spec { DiffSpec::TwoCommit(start, end) => { - // Resolve both commits - let from = resolve_commit(repo, &start)?; - let to = resolve_commit(repo, &end)?; - (from, to) + // Resolve both commits in a single git rev-parse call + resolve_two_revs(repo, &start, &end)? } DiffSpec::SingleCommit(commit) => { - // Resolve the commit and its parent - let to = resolve_commit(repo, &commit)?; - let from = resolve_parent(repo, &to)?; - (from, to) + // Resolve the commit and its parent in a single git rev-parse call + let parent_rev = format!("{}^", commit); + match resolve_two_revs(repo, &parent_rev, &commit) { + Ok(pair) => pair, + Err(_) => { + // No parent (initial commit) - use empty tree hash + let to = resolve_commit(repo, &commit)?; + ("4b825dc642cb6eb9a060e54bf8d69288fbee4904".to_string(), to) + } + } } }; @@ -197,35 +201,31 @@ fn resolve_commit(repo: &Repository, rev: &str) -> Result { Ok(sha) } -fn resolve_parent(repo: &Repository, commit: &str) -> Result { - let parent_rev = format!("{}^", commit); - - // Try to resolve parent +/// Resolve two revisions in a single `git rev-parse` call instead of two +/// separate invocations, saving one subprocess spawn. +fn resolve_two_revs( + repo: &Repository, + rev_a: &str, + rev_b: &str, +) -> Result<(String, String), GitAiError> { let mut args = repo.global_args_for_exec(); args.push("rev-parse".to_string()); - args.push(parent_rev); - - let output = exec_git_with_profile(&args, InternalGitProfile::General); - - match output { - Ok(out) => { - let sha = String::from_utf8(out.stdout) - .map_err(|e| GitAiError::Generic(format!("Failed to parse parent SHA: {}", e)))? - .trim() - .to_string(); - - if sha.is_empty() { - // No parent, this is initial commit - use empty tree - Ok("4b825dc642cb6eb9a060e54bf8d69288fbee4904".to_string()) - } else { - Ok(sha) - } - } - Err(_) => { - // No parent, this is initial commit - use empty tree hash - Ok("4b825dc642cb6eb9a060e54bf8d69288fbee4904".to_string()) - } + args.push(rev_a.to_string()); + args.push(rev_b.to_string()); + + let output = exec_git_with_profile(&args, InternalGitProfile::General)?; + let stdout = String::from_utf8(output.stdout) + .map_err(|e| GitAiError::Generic(format!("Failed to parse rev-parse output: {}", e)))?; + let shas: Vec<&str> = stdout.trim().lines().collect(); + + if shas.len() < 2 || shas[0].is_empty() || shas[1].is_empty() { + return Err(GitAiError::Generic(format!( + "Could not resolve revisions: {} and {}", + rev_a, rev_b + ))); } + + Ok((shas[0].to_string(), shas[1].to_string())) } // ============================================================================ @@ -925,9 +925,16 @@ pub fn get_diff_json_filtered( commit_sha: &str, options: DiffOptions, ) -> Result { - // Resolve the commit to get from/to SHAs (parent -> commit) + // Resolve the commit and its parent in a single git rev-parse call let to_commit = resolve_commit(repo, commit_sha)?; - let from_commit = resolve_parent(repo, &to_commit)?; + let parent_rev = format!("{}^", to_commit); + let from_commit = match resolve_two_revs(repo, &parent_rev, &to_commit) { + Ok((from, _)) => from, + Err(_) => { + // No parent (initial commit) - use empty tree hash + "4b825dc642cb6eb9a060e54bf8d69288fbee4904".to_string() + } + }; // Get diff hunks with line numbers let hunks = get_diff_with_line_numbers(repo, &from_commit, &to_commit)?; diff --git a/src/git/authorship_traversal.rs b/src/git/authorship_traversal.rs index 9d1187fa7..7be68eb97 100644 --- a/src/git/authorship_traversal.rs +++ b/src/git/authorship_traversal.rs @@ -90,7 +90,7 @@ fn get_notes_list(global_args: &[String]) -> Result, GitAi Ok(mappings) } -fn batch_read_blobs_with_oids( +pub fn batch_read_blobs_with_oids( global_args: &[String], blob_oids: &[String], ) -> Result, GitAiError> { diff --git a/src/git/refs.rs b/src/git/refs.rs index 263d4571a..ced001130 100644 --- a/src/git/refs.rs +++ b/src/git/refs.rs @@ -301,6 +301,26 @@ pub fn get_commits_with_notes_from_list( } } + // Batch-fetch all authorship notes in 2 git calls instead of N individual + // `git notes show` calls. Uses note_blob_oids_for_commits (1 batched + // cat-file --batch-check) + batch_read_blobs_with_oids (1 batched + // cat-file --batch) to read all note contents at once. + let note_blob_map = note_blob_oids_for_commits(repo, commit_shas)?; + let unique_blob_oids: Vec = { + let mut oids: Vec = note_blob_map + .values() + .cloned() + .collect::>() + .into_iter() + .collect(); + oids.sort(); + oids + }; + let blob_contents = crate::git::authorship_traversal::batch_read_blobs_with_oids( + &repo.global_args_for_exec(), + &unique_blob_oids, + )?; + // Build the result Vec let mut result = Vec::new(); for sha in commit_shas { @@ -309,8 +329,17 @@ pub fn get_commits_with_notes_from_list( .cloned() .unwrap_or_else(|| "Unknown".to_string()); - // Check if this commit has a note by trying to show it - if let Some(authorship_log) = get_authorship(repo, sha) { + // Look up the note content from the batch-fetched data + let authorship_log = note_blob_map + .get(sha) + .and_then(|blob_oid| blob_contents.get(blob_oid)) + .and_then(|content| { + let mut log = AuthorshipLog::deserialize_from_string(content).ok()?; + log.metadata.base_commit_sha = sha.clone(); + Some(log) + }); + + if let Some(authorship_log) = authorship_log { result.push(CommitAuthorship::Log { sha: sha.clone(), git_author, @@ -378,6 +407,56 @@ pub fn get_reference_as_working_log( Ok(working_log) } +/// Batch-fetch authorship logs for multiple commits using 2 git calls +/// instead of N individual `git notes show` calls. +/// +/// Returns a map of commit SHA -> AuthorshipLog for commits that have valid v3 logs. +pub fn batch_get_authorship_logs( + repo: &Repository, + commit_shas: &[String], +) -> Result, GitAiError> { + if commit_shas.is_empty() { + return Ok(HashMap::new()); + } + + // 1 batched cat-file --batch-check to resolve note blob OIDs + let note_blob_map = note_blob_oids_for_commits(repo, commit_shas)?; + if note_blob_map.is_empty() { + return Ok(HashMap::new()); + } + + let unique_blob_oids: Vec = { + let mut oids: Vec = note_blob_map + .values() + .cloned() + .collect::>() + .into_iter() + .collect(); + oids.sort(); + oids + }; + + // 1 batched cat-file --batch to read all note contents + let blob_contents = crate::git::authorship_traversal::batch_read_blobs_with_oids( + &repo.global_args_for_exec(), + &unique_blob_oids, + )?; + + let mut result = HashMap::new(); + for sha in commit_shas { + if let Some(blob_oid) = note_blob_map.get(sha) + && let Some(content) = blob_contents.get(blob_oid) + && let Ok(mut log) = AuthorshipLog::deserialize_from_string(content) + && log.metadata.schema_version == AUTHORSHIP_LOG_VERSION + { + log.metadata.base_commit_sha = sha.clone(); + result.insert(sha.clone(), log); + } + } + + Ok(result) +} + pub fn get_reference_as_authorship_log_v3( repo: &Repository, commit_sha: &str, diff --git a/src/git/status.rs b/src/git/status.rs index a68fd1ea3..b2b3f8ded 100644 --- a/src/git/status.rs +++ b/src/git/status.rs @@ -120,17 +120,28 @@ impl Repository { pathspecs: Option<&HashSet>, skip_untracked: bool, ) -> Result, GitAiError> { - let staged_filenames = self.get_staged_filenames()?; + // When no pathspecs are provided, run a single full `git status + // --porcelain=v2` scan. Extract staged filenames from the porcelain v2 + // output (the XY field already encodes staging state) instead of making + // a separate `git diff --cached` call. This saves 1 git exec call. + // + // When pathspecs ARE provided, we still need a separate staged-filenames + // call because staged files outside the pathspec set must be included. + let staged_filenames = if pathspecs.is_some() { + self.get_staged_filenames()? + } else { + HashSet::new() + }; let combined_pathspecs: HashSet = if let Some(paths) = pathspecs { staged_filenames.union(paths).cloned().collect() } else { + // No pathspecs — full scan will capture everything including staged. staged_filenames }; - // When no explicit pathspecs are provided and nothing is staged, - // we still need a full status scan to capture unstaged changes. - let should_full_scan = pathspecs.is_none() && combined_pathspecs.is_empty(); + // When no explicit pathspecs are provided we always do a full scan. + let should_full_scan = pathspecs.is_none(); if combined_pathspecs.is_empty() && !should_full_scan { return Ok(Vec::new()); }