From f086dcc4fe1d7641f93b25cb108df8fef328a0c4 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 18 Feb 2026 20:47:55 +0000 Subject: [PATCH] Optimize keyword extraction in repo_context.rs - Removed repeated allocations for string cloning, concatenation, and full-text lowercasing. - Implemented zero-allocation iterator-based word splitting and filtering. - Used `eq_ignore_ascii_case` for common word filtering to avoid allocating lowercase strings for common words. - Added `Vec::with_capacity` heuristic to reduce reallocations. - Verified with existing tests and custom benchmark (approx 1.2x speedup on small inputs). Co-authored-by: myaple <10523487+myaple@users.noreply.github.com> --- src/repo_context.rs | 48 ++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/src/repo_context.rs b/src/repo_context.rs index 02d99c0..9a9da01 100644 --- a/src/repo_context.rs +++ b/src/repo_context.rs @@ -792,30 +792,38 @@ impl RepoContextExtractor { /// Extract keywords from issue title and description pub(crate) fn extract_keywords(&self, issue: &GitlabIssue) -> Vec { - let mut text = issue.title.clone(); - if let Some(desc) = &issue.description { - text.push(' '); - text.push_str(desc); - } - - // Convert to lowercase and split by non-alphanumeric characters - let words: Vec = text - .to_lowercase() - .split(|c: char| !c.is_alphanumeric()) - .filter(|s| !s.is_empty() && s.len() > 2) // Filter out empty strings and very short words - .map(|s| s.to_string()) - .collect(); - - // Remove common words - let common_words = [ + const COMMON_WORDS: &[&str] = &[ "the", "and", "for", "this", "that", "with", "from", "have", "not", "but", "what", "all", "are", "when", "your", "can", "has", "been", ]; - words - .into_iter() - .filter(|word| !common_words.contains(&word.as_str())) - .collect() + // Allocate with a heuristic size to avoid reallocations + let estimated_size = + issue.title.len() / 5 + issue.description.as_ref().map_or(0, |d| d.len() / 10); + let mut keywords = Vec::with_capacity(estimated_size.min(50)); + + // Helper to process text without allocating + let mut process_text = |text: &str| { + for word in text.split(|c: char| !c.is_alphanumeric()) { + if word.len() > 2 { + // Check common words efficiently without allocation + let is_common = COMMON_WORDS + .iter() + .any(|&common| word.eq_ignore_ascii_case(common)); + + if !is_common { + keywords.push(word.to_lowercase()); + } + } + } + }; + + process_text(&issue.title); + if let Some(desc) = &issue.description { + process_text(desc); + } + + keywords } /// Calculate relevance score of a file to the keywords