From 3272b1039d7899b684c2acdfb6d41fdf17db8701 Mon Sep 17 00:00:00 2001 From: Gabriel Gordon-Hall Date: Mon, 4 Dec 2023 16:19:23 +0000 Subject: [PATCH 1/2] deduplicate after merging lexical results --- server/bleep/src/semantic.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/bleep/src/semantic.rs b/server/bleep/src/semantic.rs index 4f066c158f..ca84384688 100644 --- a/server/bleep/src/semantic.rs +++ b/server/bleep/src/semantic.rs @@ -544,7 +544,7 @@ impl Semantic { .map(Payload::from_qdrant) .collect::>() })?; - let dedup_results = deduplicate_snippets(results, vector.clone(), limit); + let results_lexical = self .search_lexical(parsed_query, vector.clone(), limit, offset, 0.0) .await @@ -555,9 +555,10 @@ impl Semantic { })?; let results_lexical = Self::rank_lexical(results_lexical, &query); - let merged_results = Self::merge_rrf(results_lexical, dedup_results); + let merged_results = Self::merge_rrf(results_lexical, results); + let dedup_results = deduplicate_snippets(merged_results, vector.clone(), limit); - Ok(merged_results + Ok(dedup_results .iter() .take(limit.try_into().unwrap()) .cloned() From a1fe3ab338c1d5acd49d4e557b2d977856ceec97 Mon Sep 17 00:00:00 2001 From: Gabriel Gordon-Hall Date: Fri, 8 Dec 2023 14:09:05 +0000 Subject: [PATCH 2/2] rework deduplication order --- server/bleep/src/semantic.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/server/bleep/src/semantic.rs b/server/bleep/src/semantic.rs index ca84384688..c97e2eecee 100644 --- a/server/bleep/src/semantic.rs +++ b/server/bleep/src/semantic.rs @@ -544,21 +544,28 @@ impl Semantic { .map(Payload::from_qdrant) .collect::>() })?; + let results = deduplicate_snippets(results, vector.clone(), limit); let results_lexical = self - .search_lexical(parsed_query, vector.clone(), limit, offset, 0.0) + .search_lexical( + parsed_query, + vector.clone(), + if retrieve_more { limit * 2 } else { limit }, + offset, + 0.0, + ) .await .map(|raw| { raw.into_iter() .map(Payload::from_qdrant) .collect::>() })?; + let results_lexical = deduplicate_snippets(results_lexical, vector.clone(), limit); let results_lexical = Self::rank_lexical(results_lexical, &query); let merged_results = Self::merge_rrf(results_lexical, results); - let dedup_results = deduplicate_snippets(merged_results, vector.clone(), limit); - Ok(dedup_results + Ok(merged_results .iter() .take(limit.try_into().unwrap()) .cloned()