Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions src/repo_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -936,17 +936,20 @@ impl RepoContextExtractor {
file_content: &str,
keywords: &[String],
) -> Vec<FileContentMatch> {
let lines: Vec<String> = file_content.lines().map(|s| s.to_string()).collect();
let lines: Vec<&str> = file_content.lines().collect();
if lines.is_empty() {
return Vec::new();
}

// Pre-calculate lowercased keywords to avoid repeated allocation
let keywords_lower: Vec<String> = keywords.iter().map(|k| k.to_lowercase()).collect();

// Find line numbers that contain any of the keywords (case-insensitive)
let mut matching_lines = HashSet::new();
for (line_idx, line) in lines.iter().enumerate() {
let line_lower = line.to_lowercase();
for keyword in keywords {
if line_lower.contains(&keyword.to_lowercase()) {
for keyword in &keywords_lower {
if line_lower.contains(keyword) {
matching_lines.insert(line_idx);
break; // Found a match, no need to check other keywords for this line
}
Expand Down Expand Up @@ -985,7 +988,8 @@ impl RepoContextExtractor {
// Convert ranges to FileContentMatch structs
let mut matches = Vec::new();
for (start, end) in merged_ranges {
let range_lines = lines[start..=end].to_vec();
let range_lines: Vec<String> =
lines[start..=end].iter().map(|&s| s.to_string()).collect();
matches.push(FileContentMatch {
start_line: start + 1, // Convert to 1-based line numbering
end_line: end + 1, // Convert to 1-based line numbering
Expand Down
2 changes: 2 additions & 0 deletions src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,6 @@ pub mod polling_test;
#[cfg(test)]
pub mod polling_tests;
#[cfg(test)]
pub mod repo_context_perf_test;
#[cfg(test)]
pub mod repo_context_tests;
64 changes: 64 additions & 0 deletions src/tests/repo_context_perf_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#[cfg(test)]
mod tests {
use crate::config::AppSettings;
use crate::file_indexer::FileIndexManager;
use crate::gitlab::GitlabApiClient;
use crate::repo_context::RepoContextExtractor;
use std::sync::Arc;
use std::time::Instant;

#[test]
fn test_extract_relevant_file_sections_perf() {
// Setup minimal extractor
let settings = AppSettings {
context_lines: 5,
..Default::default()
};
let settings_arc = Arc::new(settings);
// We use a dummy client since we won't be making network calls,
// but we need to construct it properly to satisfy types.
let mut minimal_settings = AppSettings::default();
minimal_settings.gitlab_url = "https://example.com".to_string();
minimal_settings.gitlab_token = "dummy".to_string();
minimal_settings.openai_api_key = "dummy".to_string();

let valid_settings = Arc::new(minimal_settings);
let gitlab_client = Arc::new(GitlabApiClient::new(valid_settings.clone()).unwrap());
let file_index_manager = Arc::new(FileIndexManager::new(gitlab_client.clone(), 3600));

let extractor = RepoContextExtractor::new_with_file_indexer(
gitlab_client,
settings_arc, // This one is used for context_lines
file_index_manager,
);

// Generate large content
// 100,000 lines, ~50 chars per line -> ~5MB
let line_count = 100_000;
let mut content = String::with_capacity(line_count * 60);
for i in 0..line_count {
if i % 1000 == 0 {
content.push_str("This line contains the magic keyword TARGET.\n");
} else {
content.push_str(
"This is a regular line of code with some content that is not relevant.\n",
);
}
}

let keywords = vec!["TARGET".to_string()];

println!("Starting benchmark with {} lines...", line_count);
let start = Instant::now();

// Run multiple times to average? Or just once for large enough dataset.
// 100k lines should be enough to see difference.
let _matches = extractor.extract_relevant_file_sections(&content, &keywords);

let duration = start.elapsed();
println!("Extraction took: {:?}", duration);

// Sanity check
assert!(!_matches.is_empty());
}
}