Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 55 additions & 23 deletions src/commands/blame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::authorship::authorship_log_serialization::AuthorshipLog;
use crate::authorship::prompt_utils::enrich_prompt_messages;
use crate::authorship::working_log::CheckpointKind;
use crate::error::GitAiError;
use crate::git::refs::get_reference_as_authorship_log_v3;
use crate::git::refs::batch_get_authorship_logs;
use crate::git::repository::Repository;
use crate::git::repository::{exec_git, exec_git_stdin};
#[cfg(windows)]
Expand Down Expand Up @@ -799,23 +799,39 @@ impl Repository {
file_path: &str,
options: &GitAiBlameOptions,
) -> Result<Vec<BlameHunk>, GitAiError> {
// Cache authorship logs by commit SHA to avoid repeated lookups
let mut commit_authorship_cache: HashMap<String, Option<AuthorshipLog>> = HashMap::new();
// Batch-fetch all authorship logs upfront using 2 git calls instead of N
let unique_shas: Vec<String> = {
let mut seen = std::collections::HashSet::new();
hunks
.iter()
.filter_map(|h| {
if seen.insert(h.commit_sha.clone()) {
Some(h.commit_sha.clone())
} else {
None
}
})
.collect()
};
let batch_logs = batch_get_authorship_logs(self, &unique_shas)?;
let commit_authorship_cache: HashMap<String, Option<AuthorshipLog>> = unique_shas
.into_iter()
.map(|sha| {
let log = batch_logs.get(&sha).cloned();
(sha, log)
})
.collect();

// Cache for foreign prompts to avoid repeated grepping
let mut foreign_prompts_cache: HashMap<String, Option<PromptRecord>> = HashMap::new();

let mut result_hunks: Vec<BlameHunk> = Vec::new();

for hunk in hunks {
// Get or fetch the authorship log for this commit
let authorship_log = if let Some(cached) = commit_authorship_cache.get(&hunk.commit_sha)
{
cached.clone()
} else {
let authorship = get_reference_as_authorship_log_v3(self, &hunk.commit_sha).ok();
commit_authorship_cache.insert(hunk.commit_sha.clone(), authorship.clone());
authorship
};
// Get the authorship log from the pre-populated batch cache
let authorship_log = commit_authorship_cache
.get(&hunk.commit_sha)
.and_then(|opt| opt.clone());

// If we have an authorship log, look up human_author for each line
if let Some(ref authorship_log) = authorship_log {
Expand Down Expand Up @@ -913,21 +929,37 @@ fn overlay_ai_authorship(
// Track which commits contain each prompt hash
let mut prompt_commits: HashMap<String, std::collections::HashSet<String>> = HashMap::new();

// Group hunks by commit SHA to avoid repeated lookups
let mut commit_authorship_cache: HashMap<String, Option<AuthorshipLog>> = HashMap::new();
// Batch-fetch all authorship logs upfront using 2 git calls instead of N
let unique_shas: Vec<String> = {
let mut seen = std::collections::HashSet::new();
blame_hunks
.iter()
.filter_map(|h| {
if seen.insert(h.commit_sha.clone()) {
Some(h.commit_sha.clone())
} else {
None
}
})
.collect()
};
let batch_logs = batch_get_authorship_logs(repo, &unique_shas)?;
let commit_authorship_cache: HashMap<String, Option<AuthorshipLog>> = unique_shas
.into_iter()
.map(|sha| {
let log = batch_logs.get(&sha).cloned();
(sha, log)
})
.collect();

// Cache for foreign prompts to avoid repeated grepping
let mut foreign_prompts_cache: HashMap<String, Option<PromptRecord>> = HashMap::new();

for hunk in blame_hunks {
// Check if we've already looked up this commit's authorship
let authorship_log = if let Some(cached) = commit_authorship_cache.get(&hunk.commit_sha) {
cached.clone()
} else {
// Try to get authorship log for this commit
let authorship = get_reference_as_authorship_log_v3(repo, &hunk.commit_sha).ok();
commit_authorship_cache.insert(hunk.commit_sha.clone(), authorship.clone());
authorship
};
// Get the authorship log from the pre-populated batch cache
let authorship_log = commit_authorship_cache
.get(&hunk.commit_sha)
.and_then(|opt| opt.clone());

// If we have AI authorship data, look up the author for lines in this hunk
if let Some(authorship_log) = authorship_log {
Expand Down
53 changes: 25 additions & 28 deletions src/commands/checkpoint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -660,27 +660,28 @@ fn get_all_tracked_files(
));

let checkpoints_read_start = Instant::now();
if let Ok(working_log_data) = working_log.read_all_checkpoints() {
for checkpoint in &working_log_data {
for entry in &checkpoint.entries {
// Normalize path separators to forward slashes
let normalized_path = normalize_to_posix(&entry.file);
// Filter out paths outside the repository to prevent git command failures
if !is_path_in_repo(&normalized_path) {
debug_log(&format!(
"Skipping checkpoint file outside repository: {}",
normalized_path
));
continue;
}
if should_ignore_file_with_matcher(&normalized_path, ignore_matcher) {
continue;
}
if !files.contains(&normalized_path) {
// Check if it's a text file before adding
if is_text_file(working_log, &normalized_path) {
files.insert(normalized_path);
}
// Read checkpoints once and reuse the result for both file discovery and
// AI-checkpoint detection, avoiding a redundant second read_all_checkpoints() call.
let working_log_data = working_log.read_all_checkpoints().unwrap_or_default();
for checkpoint in &working_log_data {
for entry in &checkpoint.entries {
// Normalize path separators to forward slashes
let normalized_path = normalize_to_posix(&entry.file);
// Filter out paths outside the repository to prevent git command failures
if !is_path_in_repo(&normalized_path) {
debug_log(&format!(
"Skipping checkpoint file outside repository: {}",
normalized_path
));
continue;
}
if should_ignore_file_with_matcher(&normalized_path, ignore_matcher) {
continue;
}
if !files.contains(&normalized_path) {
// Check if it's a text file before adding
if is_text_file(working_log, &normalized_path) {
files.insert(normalized_path);
}
}
}
Expand All @@ -690,13 +691,9 @@ fn get_all_tracked_files(
checkpoints_read_start.elapsed()
));

let has_ai_checkpoints = if let Ok(working_log_data) = working_log.read_all_checkpoints() {
working_log_data.iter().any(|checkpoint| {
checkpoint.kind == CheckpointKind::AiAgent || checkpoint.kind == CheckpointKind::AiTab
})
} else {
false
};
let has_ai_checkpoints = working_log_data.iter().any(|checkpoint| {
checkpoint.kind == CheckpointKind::AiAgent || checkpoint.kind == CheckpointKind::AiTab
});

let status_files_start = Instant::now();
let mut results_for_tracked_files = if is_pre_commit && !has_ai_checkpoints {
Expand Down
79 changes: 43 additions & 36 deletions src/commands/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,16 +138,20 @@ pub fn execute_diff(
// Resolve commits to get from/to SHAs
let (from_commit, to_commit) = match spec {
DiffSpec::TwoCommit(start, end) => {
// Resolve both commits
let from = resolve_commit(repo, &start)?;
let to = resolve_commit(repo, &end)?;
(from, to)
// Resolve both commits in a single git rev-parse call
resolve_two_revs(repo, &start, &end)?
}
DiffSpec::SingleCommit(commit) => {
// Resolve the commit and its parent
let to = resolve_commit(repo, &commit)?;
let from = resolve_parent(repo, &to)?;
(from, to)
// Resolve the commit and its parent in a single git rev-parse call
let parent_rev = format!("{}^", commit);
match resolve_two_revs(repo, &parent_rev, &commit) {
Ok(pair) => pair,
Err(_) => {
// No parent (initial commit) - use empty tree hash
let to = resolve_commit(repo, &commit)?;
("4b825dc642cb6eb9a060e54bf8d69288fbee4904".to_string(), to)
}
}
}
};

Expand Down Expand Up @@ -197,35 +201,31 @@ fn resolve_commit(repo: &Repository, rev: &str) -> Result<String, GitAiError> {
Ok(sha)
}

fn resolve_parent(repo: &Repository, commit: &str) -> Result<String, GitAiError> {
let parent_rev = format!("{}^", commit);

// Try to resolve parent
/// Resolve two revisions in a single `git rev-parse` call instead of two
/// separate invocations, saving one subprocess spawn.
fn resolve_two_revs(
repo: &Repository,
rev_a: &str,
rev_b: &str,
) -> Result<(String, String), GitAiError> {
let mut args = repo.global_args_for_exec();
args.push("rev-parse".to_string());
args.push(parent_rev);

let output = exec_git_with_profile(&args, InternalGitProfile::General);

match output {
Ok(out) => {
let sha = String::from_utf8(out.stdout)
.map_err(|e| GitAiError::Generic(format!("Failed to parse parent SHA: {}", e)))?
.trim()
.to_string();

if sha.is_empty() {
// No parent, this is initial commit - use empty tree
Ok("4b825dc642cb6eb9a060e54bf8d69288fbee4904".to_string())
} else {
Ok(sha)
}
}
Err(_) => {
// No parent, this is initial commit - use empty tree hash
Ok("4b825dc642cb6eb9a060e54bf8d69288fbee4904".to_string())
}
args.push(rev_a.to_string());
args.push(rev_b.to_string());

let output = exec_git_with_profile(&args, InternalGitProfile::General)?;
let stdout = String::from_utf8(output.stdout)
.map_err(|e| GitAiError::Generic(format!("Failed to parse rev-parse output: {}", e)))?;
let shas: Vec<&str> = stdout.trim().lines().collect();

if shas.len() < 2 || shas[0].is_empty() || shas[1].is_empty() {
return Err(GitAiError::Generic(format!(
"Could not resolve revisions: {} and {}",
rev_a, rev_b
)));
}

Ok((shas[0].to_string(), shas[1].to_string()))
}

// ============================================================================
Expand Down Expand Up @@ -925,9 +925,16 @@ pub fn get_diff_json_filtered(
commit_sha: &str,
options: DiffOptions,
) -> Result<DiffJson, GitAiError> {
// Resolve the commit to get from/to SHAs (parent -> commit)
// Resolve the commit and its parent in a single git rev-parse call
let to_commit = resolve_commit(repo, commit_sha)?;
let from_commit = resolve_parent(repo, &to_commit)?;
let parent_rev = format!("{}^", to_commit);
let from_commit = match resolve_two_revs(repo, &parent_rev, &to_commit) {
Ok((from, _)) => from,
Err(_) => {
// No parent (initial commit) - use empty tree hash
"4b825dc642cb6eb9a060e54bf8d69288fbee4904".to_string()
}
};

// Get diff hunks with line numbers
let hunks = get_diff_with_line_numbers(repo, &from_commit, &to_commit)?;
Expand Down
2 changes: 1 addition & 1 deletion src/git/authorship_traversal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ fn get_notes_list(global_args: &[String]) -> Result<Vec<(String, String)>, GitAi
Ok(mappings)
}

fn batch_read_blobs_with_oids(
pub fn batch_read_blobs_with_oids(
global_args: &[String],
blob_oids: &[String],
) -> Result<std::collections::HashMap<String, String>, GitAiError> {
Expand Down
83 changes: 81 additions & 2 deletions src/git/refs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,26 @@ pub fn get_commits_with_notes_from_list(
}
}

// Batch-fetch all authorship notes in 2 git calls instead of N individual
// `git notes show` calls. Uses note_blob_oids_for_commits (1 batched
// cat-file --batch-check) + batch_read_blobs_with_oids (1 batched
// cat-file --batch) to read all note contents at once.
let note_blob_map = note_blob_oids_for_commits(repo, commit_shas)?;
let unique_blob_oids: Vec<String> = {
let mut oids: Vec<String> = note_blob_map
.values()
.cloned()
.collect::<HashSet<_>>()
.into_iter()
.collect();
oids.sort();
oids
};
let blob_contents = crate::git::authorship_traversal::batch_read_blobs_with_oids(
&repo.global_args_for_exec(),
&unique_blob_oids,
)?;

// Build the result Vec
let mut result = Vec::new();
for sha in commit_shas {
Expand All @@ -309,8 +329,17 @@ pub fn get_commits_with_notes_from_list(
.cloned()
.unwrap_or_else(|| "Unknown".to_string());

// Check if this commit has a note by trying to show it
if let Some(authorship_log) = get_authorship(repo, sha) {
// Look up the note content from the batch-fetched data
let authorship_log = note_blob_map
.get(sha)
.and_then(|blob_oid| blob_contents.get(blob_oid))
.and_then(|content| {
let mut log = AuthorshipLog::deserialize_from_string(content).ok()?;
log.metadata.base_commit_sha = sha.clone();
Some(log)
});

if let Some(authorship_log) = authorship_log {
result.push(CommitAuthorship::Log {
sha: sha.clone(),
git_author,
Expand Down Expand Up @@ -378,6 +407,56 @@ pub fn get_reference_as_working_log(
Ok(working_log)
}

/// Batch-fetch authorship logs for multiple commits using 2 git calls
/// instead of N individual `git notes show` calls.
///
/// Returns a map of commit SHA -> AuthorshipLog for commits that have valid v3 logs.
pub fn batch_get_authorship_logs(
repo: &Repository,
commit_shas: &[String],
) -> Result<HashMap<String, AuthorshipLog>, GitAiError> {
if commit_shas.is_empty() {
return Ok(HashMap::new());
}

// 1 batched cat-file --batch-check to resolve note blob OIDs
let note_blob_map = note_blob_oids_for_commits(repo, commit_shas)?;
if note_blob_map.is_empty() {
return Ok(HashMap::new());
}

let unique_blob_oids: Vec<String> = {
let mut oids: Vec<String> = note_blob_map
.values()
.cloned()
.collect::<HashSet<_>>()
.into_iter()
.collect();
oids.sort();
oids
};

// 1 batched cat-file --batch to read all note contents
let blob_contents = crate::git::authorship_traversal::batch_read_blobs_with_oids(
&repo.global_args_for_exec(),
&unique_blob_oids,
)?;

let mut result = HashMap::new();
for sha in commit_shas {
if let Some(blob_oid) = note_blob_map.get(sha)
&& let Some(content) = blob_contents.get(blob_oid)
&& let Ok(mut log) = AuthorshipLog::deserialize_from_string(content)
&& log.metadata.schema_version == AUTHORSHIP_LOG_VERSION
{
log.metadata.base_commit_sha = sha.clone();
result.insert(sha.clone(), log);
}
}

Ok(result)
}

pub fn get_reference_as_authorship_log_v3(
repo: &Repository,
commit_sha: &str,
Expand Down
Loading
Loading