From 8e81cf09d776cd5a8fbd6ae4f42555f5e0ff288d Mon Sep 17 00:00:00 2001 From: Wyatt Alt Date: Mon, 27 Oct 2025 08:18:30 -0700 Subject: [PATCH 1/3] feat: add public accessors for count plan construction This patch changes the count plan accessor to public, changes the get_deletion_vector method on fragments to public, and adds a couple public accessors on EvaluatedIndex. The purpose of this patch is to enable systems to construct and optimize the count plan using datafusion optimizer rules. --- rust/lance/src/dataset/fragment.rs | 2 +- rust/lance/src/dataset/scanner.rs | 2 +- rust/lance/src/io/exec/filtered_read.rs | 10 ++++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/rust/lance/src/dataset/fragment.rs b/rust/lance/src/dataset/fragment.rs index 8a52c006d8c..28502752b8f 100644 --- a/rust/lance/src/dataset/fragment.rs +++ b/rust/lance/src/dataset/fragment.rs @@ -1347,7 +1347,7 @@ impl FileFragment { } /// Get the deletion vector for this fragment, using the cache if available. - pub(crate) async fn get_deletion_vector(&self) -> Result>> { + pub async fn get_deletion_vector(&self) -> Result>> { let Some(deletion_file) = self.metadata.deletion_file.as_ref() else { return Ok(None); }; diff --git a/rust/lance/src/dataset/scanner.rs b/rust/lance/src/dataset/scanner.rs index b1f2d075401..3e70b51016c 100644 --- a/rust/lance/src/dataset/scanner.rs +++ b/rust/lance/src/dataset/scanner.rs @@ -1473,7 +1473,7 @@ impl Scanner { Ok(concat_batches(&schema, &batches)?) } - fn create_count_plan(&self) -> BoxFuture<'_, Result>> { + pub fn create_count_plan(&self) -> BoxFuture<'_, Result>> { // Future intentionally boxed here to avoid large futures on the stack async move { if self.projection_plan.physical_projection.is_empty() { diff --git a/rust/lance/src/io/exec/filtered_read.rs b/rust/lance/src/io/exec/filtered_read.rs index e1dd87195a4..f98b5f79b94 100644 --- a/rust/lance/src/io/exec/filtered_read.rs +++ b/rust/lance/src/io/exec/filtered_read.rs @@ -66,6 +66,16 @@ pub struct EvaluatedIndex { } impl EvaluatedIndex { + /// Get a reference to the index result + pub fn index_result(&self) -> &IndexExprResult { + &self.index_result + } + + /// Get a reference to the applicable fragments bitmap + pub fn applicable_fragments(&self) -> &RoaringBitmap { + &self.applicable_fragments + } + pub fn try_from_arrow(batch: &RecordBatch) -> Result { if batch.num_rows() != 2 { return Err(Error::InvalidInput { From 8a53ebefe604a1e2bb73898b85a7b3f1631e048c Mon Sep 17 00:00:00 2001 From: Wyatt Alt Date: Wed, 12 Nov 2025 07:29:54 -0800 Subject: [PATCH 2/3] Update rust/lance/src/io/exec/filtered_read.rs Co-authored-by: Will Jones --- rust/lance/src/io/exec/filtered_read.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/lance/src/io/exec/filtered_read.rs b/rust/lance/src/io/exec/filtered_read.rs index f98b5f79b94..b338cf4dbe9 100644 --- a/rust/lance/src/io/exec/filtered_read.rs +++ b/rust/lance/src/io/exec/filtered_read.rs @@ -66,7 +66,7 @@ pub struct EvaluatedIndex { } impl EvaluatedIndex { - /// Get a reference to the index result + /// Get the row id mask representing which rows matched the index filter. pub fn index_result(&self) -> &IndexExprResult { &self.index_result } From c3d4b29cab59b4d5e5eaf280371132cfe012b273 Mon Sep 17 00:00:00 2001 From: Wyatt Alt Date: Wed, 12 Nov 2025 07:31:57 -0800 Subject: [PATCH 3/3] clarify comment --- rust/lance/src/io/exec/filtered_read.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust/lance/src/io/exec/filtered_read.rs b/rust/lance/src/io/exec/filtered_read.rs index b338cf4dbe9..f97ebcdadbf 100644 --- a/rust/lance/src/io/exec/filtered_read.rs +++ b/rust/lance/src/io/exec/filtered_read.rs @@ -71,7 +71,8 @@ impl EvaluatedIndex { &self.index_result } - /// Get a reference to the applicable fragments bitmap + /// Get a reference to the applicable fragments bitmap, containing the set of fragment IDs + /// implicated by the filter. pub fn applicable_fragments(&self) -> &RoaringBitmap { &self.applicable_fragments }