From 0d9511009b1ecc0956ed95d8de30013e399cb792 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Thu, 15 Jan 2026 18:31:09 +0800 Subject: [PATCH] Fix block max score allocation --- rust/lance-index/src/scalar/inverted/index.rs | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/rust/lance-index/src/scalar/inverted/index.rs b/rust/lance-index/src/scalar/inverted/index.rs index 1862750cbfe..3605364715e 100644 --- a/rust/lance-index/src/scalar/inverted/index.rs +++ b/rust/lance-index/src/scalar/inverted/index.rs @@ -2192,7 +2192,9 @@ impl DocSet { ) -> Vec { let avgdl = self.average_length(); let length = doc_ids.size_hint().0; - let mut block_max_scores = Vec::with_capacity(length); + let num_blocks = length.div_ceil(BLOCK_SIZE); + let mut block_max_scores = Vec::with_capacity(num_blocks); + let idf_scale = idf(length, self.len()) * (K1 + 1.0); let mut max_score = f32::MIN; for (i, (doc_id, freq)) in doc_ids.zip(freqs).enumerate() { let doc_norm = K1 * (1.0 - B + B * self.num_tokens(*doc_id) as f32 / avgdl); @@ -2202,13 +2204,13 @@ impl DocSet { max_score = score; } if (i + 1) % BLOCK_SIZE == 0 { - max_score *= idf(length, self.len()) * (K1 + 1.0); + max_score *= idf_scale; block_max_scores.push(max_score); max_score = f32::MIN; } } if length % BLOCK_SIZE > 0 { - max_score *= idf(length, self.len()) * (K1 + 1.0); + max_score *= idf_scale; block_max_scores.push(max_score); } block_max_scores @@ -2771,4 +2773,21 @@ mod tests { "Should contain row_id from partition 1" ); } + + #[test] + fn test_block_max_scores_capacity_matches_block_count() { + let mut docs = DocSet::default(); + let num_docs = BLOCK_SIZE * 3 + 7; + let doc_ids = (0..num_docs as u32).collect::>(); + for doc_id in &doc_ids { + docs.append(*doc_id as u64, 1); + } + + let freqs = vec![1_u32; doc_ids.len()]; + let block_max_scores = docs.calculate_block_max_scores(doc_ids.iter(), freqs.iter()); + let expected_blocks = doc_ids.len().div_ceil(BLOCK_SIZE); + + assert_eq!(block_max_scores.len(), expected_blocks); + assert_eq!(block_max_scores.capacity(), expected_blocks); + } }