Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 60 additions & 1 deletion rust/lance/src/dataset/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3341,6 +3341,9 @@ impl Scanner {

// If all target fragments are unindexed, skip index entirely
if unindexed_fragments.len() == target_fragments.len() {
if self.fast_search {
return Ok(Arc::new(EmptyExec::new(FTS_SCHEMA.clone())));
}
let flat_match_plan = self
.plan_flat_match_query(unindexed_fragments, query, params, filter_plan)
.await?;
Expand All @@ -3355,7 +3358,7 @@ impl Scanner {
prefilter_source.clone(),
));

if unindexed_fragments.is_empty() {
if self.fast_search || unindexed_fragments.is_empty() {
(Some(match_plan), None)
} else {
let flat_match_plan = self
Expand All @@ -3365,6 +3368,9 @@ impl Scanner {
}
}
None => {
if self.fast_search {
return Ok(Arc::new(EmptyExec::new(FTS_SCHEMA.clone())));
}
Comment on lines +3371 to +3373
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is slightly different from vector search. In vector search, if there is no index, then fast_search is just ignored (it still does an exhaustive KNN). However, I kind of like this behavior better. So I think I'm just pointing it out. We can update vector search later.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ic, let me update the vector search behavior in next PR

// No index: flat search all target fragments
let flat_match_plan = self
.plan_flat_match_query(target_fragments.to_vec(), query, params, filter_plan)
Expand Down Expand Up @@ -8484,6 +8490,25 @@ mod test {
)
.await?;

log::info!("Test case: Full text search with unindexed rows and fast_search");
let expected = r#"ProjectionExec: expr=[s@2 as s, _score@1 as _score, _rowid@0 as _rowid]
Take: columns="_rowid, _score, (s)"
CoalesceBatchesExec: target_batch_size=8192
MatchQuery: column=s, query=hello"#;
assert_plan_equals(
&dataset.dataset,
|scan| {
let scan = scan
.project(&["s"])?
.with_row_id()
.full_text_search(FullTextSearchQuery::new("hello".to_owned()))?;
scan.fast_search();
Ok(scan)
},
expected,
)
.await?;

log::info!("Test case: Full text search with unindexed rows and prefilter");
let expected = if data_storage_version == LanceFileVersion::Legacy {
r#"ProjectionExec: expr=[s@2 as s, _score@1 as _score, _rowid@0 as _rowid]
Expand Down Expand Up @@ -8939,6 +8964,40 @@ mod test {
limit_offset_equivalency_test(&scanner).await;
}

#[tokio::test]
async fn test_fts_fast_search_excludes_unindexed_rows() {
let mut test_ds = TestVectorDataset::new(LanceFileVersion::Stable, false)
.await
.unwrap();
test_ds.make_fts_index().await.unwrap();
// Append rows after index build so they stay unindexed.
test_ds.append_data_with_range(10, 20).await.unwrap();

let mut scanner = test_ds.dataset.scan();
scanner
.full_text_search(FullTextSearchQuery::new_query(
MatchQuery::new("15".to_owned())
.with_column(Some("s".to_owned()))
.into(),
))
.unwrap();
let normal_rows = scanner.try_into_batch().await.unwrap().num_rows();

let mut scanner = test_ds.dataset.scan();
scanner
.full_text_search(FullTextSearchQuery::new_query(
MatchQuery::new("15".to_owned())
.with_column(Some("s".to_owned()))
.into(),
))
.unwrap()
.fast_search();
let fast_rows = scanner.try_into_batch().await.unwrap().num_rows();

assert_eq!(normal_rows, 2);
assert_eq!(fast_rows, 1);
}

async fn test_row_offset_read_helper(
ds: &Dataset,
scan_builder: impl FnOnce(&mut Scanner) -> &mut Scanner,
Expand Down
Loading