From 1c870aaa3a857c0bdaaf65b67e68e7862ed92a4c Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 24 Mar 2026 19:01:22 +0800 Subject: [PATCH 1/5] Clarify logical indices and physical index segments --- java/src/main/java/org/lance/Dataset.java | 20 ++++++++++- .../org/lance/index/IndexDescription.java | 9 +++++ java/src/test/java/org/lance/DatasetTest.java | 10 ++++++ python/python/lance/dataset.py | 27 ++++++++++---- python/python/lance/indices/__init__.py | 2 ++ python/python/lance/lance/__init__.pyi | 4 +++ python/python/tests/test_vector_index.py | 22 ++++++++++++ python/src/dataset.rs | 21 ++++++++++- python/src/indices.rs | 35 ++++++++++--------- rust/lance-index/src/traits.rs | 24 +++++++++++++ rust/lance/src/index.rs | 15 +++++++- 11 files changed, 164 insertions(+), 25 deletions(-) diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index 166feebba20..c1ca6080e4c 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -1207,7 +1207,11 @@ public List listIndexes() { /** * Get all indexes with full metadata. * - * @return list of Index objects with complete metadata including index type and fragment coverage + *

Each returned {@link Index} is a physical index segment from the manifest. Use {@link + * #describeIndices()} for the logical-index view. + * + * @return list of Index objects with complete segment metadata, including index type and fragment + * coverage */ public List getIndexes() { try (LockManager.ReadLock readLock = lockManager.acquireReadLock()) { @@ -1218,6 +1222,20 @@ public List getIndexes() { private native List nativeGetIndexes(); + /** + * Get physical index segments for a specific logical index name. + * + * @param indexName logical index name + * @return list of physical index segments belonging to the logical index + */ + public List getIndexSegments(String indexName) { + Preconditions.checkArgument( + indexName != null && !indexName.isEmpty(), "indexName cannot be null or empty"); + return getIndexes().stream() + .filter(index -> indexName.equals(index.name())) + .collect(Collectors.toList()); + } + /** * Get statistics for a specific index in JSON form. * diff --git a/java/src/main/java/org/lance/index/IndexDescription.java b/java/src/main/java/org/lance/index/IndexDescription.java index d17782eb531..1b5e5a3a8f8 100755 --- a/java/src/main/java/org/lance/index/IndexDescription.java +++ b/java/src/main/java/org/lance/index/IndexDescription.java @@ -83,6 +83,15 @@ public List getMetadata() { return metadata; } + /** + * Physical index segments for this logical index. + * + *

This is an alias for {@link #getMetadata()} with a less ambiguous name. + */ + public List getSegments() { + return metadata; + } + /** * JSON representation of index-specific details. * diff --git a/java/src/test/java/org/lance/DatasetTest.java b/java/src/test/java/org/lance/DatasetTest.java index 59e0ee80e7b..cf788ab25e3 100644 --- a/java/src/test/java/org/lance/DatasetTest.java +++ b/java/src/test/java/org/lance/DatasetTest.java @@ -1914,14 +1914,24 @@ public void testDescribeIndicesByName(@TempDir Path tempDir) throws Exception { assertTrue(desc.getRowsIndexed() > 0, "rowsIndexed should be positive"); assertNotNull(desc.getMetadata(), "Metadata list should not be null"); assertFalse(desc.getMetadata().isEmpty(), "Metadata list should not be empty"); + assertEquals( + desc.getMetadata(), desc.getSegments(), "segments alias should match metadata"); assertNotNull(desc.getDetailsJson(), "Details JSON should not be null"); + List physicalSegments = dataset.getIndexSegments("index1"); + assertEquals(1, physicalSegments.size(), "Expected exactly one physical segment"); + assertEquals("index1", physicalSegments.get(0).name()); + descriptions = dataset.describeIndices(); assertEquals(2, descriptions.size(), "Expected exactly one matching index"); for (IndexDescription indexDesc : descriptions) { assertTrue(indexDesc.getRowsIndexed() > 0, "rowsIndexed should be positive"); assertNotNull(indexDesc.getMetadata(), "Metadata list should not be null"); assertFalse(indexDesc.getMetadata().isEmpty(), "Metadata list should not be empty"); + assertEquals( + indexDesc.getMetadata(), + indexDesc.getSegments(), + "segments alias should match metadata"); assertNotNull(indexDesc.getDetailsJson(), "Details JSON should not be null"); } } diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index e8d632b0ecb..9ad7d06ce46 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -79,7 +79,7 @@ from .commit import CommitLock from .io import StorageOptionsProvider - from .lance.indices import IndexDescription + from .lance.indices import IndexDescription, IndexSegmentDescription from .progress import FragmentWriteProgress from .types import ReaderLike @@ -641,12 +641,14 @@ def checkout_latest(self): def list_indices(self) -> List[Index]: """ - Returns index information for all indices in the dataset. + Returns physical index segment information for all indices in the dataset. This method is deprecated as it requires loading the statistics for each index - which can be a very expensive operation. Instead use describe_indices() to - list index information and index_statistics() to get the statistics for - individual indexes of interest. + which can be a very expensive operation. It also exposes physical index + segments directly. Instead use describe_indices() for logical index + descriptions, describe_index_segments() for explicit segment inspection, + and index_statistics() to get the statistics for individual indexes of + interest. """ warnings.warn( "The 'list_indices' method is deprecated. It may be removed in a future " @@ -657,9 +659,22 @@ def list_indices(self) -> List[Index]: return self._ds.load_indices() def describe_indices(self) -> List[IndexDescription]: - """Returns index information for all indices in the dataset.""" + """Returns logical index information aggregated across all segments.""" return self._ds.describe_indices() + def describe_index_segments( + self, index_name: Optional[str] = None + ) -> List[IndexSegmentDescription]: + """ + Returns physical index segment information. + + Parameters + ---------- + index_name: Optional[str] + If provided, only return segments belonging to the named logical index. + """ + return self._ds.describe_index_segments(index_name) + def index_statistics(self, index_name: str) -> Dict[str, Any]: warnings.warn( "LanceDataset.index_statistics() is deprecated, " diff --git a/python/python/lance/indices/__init__.py b/python/python/lance/indices/__init__.py index 085ff66e252..9dcfa457fd6 100644 --- a/python/python/lance/indices/__init__.py +++ b/python/python/lance/indices/__init__.py @@ -9,6 +9,7 @@ from .pq import PqModel IndexSegment = _lance.indices.IndexSegment +IndexSegmentDescription = _lance.indices.IndexSegmentDescription IndexSegmentPlan = _lance.indices.IndexSegmentPlan __all__ = [ @@ -18,6 +19,7 @@ "IvfModel", "IndexFileVersion", "IndexSegment", + "IndexSegmentDescription", "IndexSegmentPlan", ] diff --git a/python/python/lance/lance/__init__.pyi b/python/python/lance/lance/__init__.pyi index e2f70a853a1..1505534954b 100644 --- a/python/python/lance/lance/__init__.pyi +++ b/python/python/lance/lance/__init__.pyi @@ -62,6 +62,7 @@ from .fragment import ( ) from .indices import IndexDescription as IndexDescription from .indices import IndexSegment as IndexSegment +from .indices import IndexSegmentDescription as IndexSegmentDescription from .indices import IndexSegmentPlan as IndexSegmentPlan from .lance import PySearchFilter from .optimize import ( @@ -234,6 +235,9 @@ class _Dataset: def serialized_manifest(self) -> bytes: ... def load_indices(self) -> List[Index]: ... def describe_indices(self) -> List[IndexDescription]: ... + def describe_index_segments( + self, index_name: Optional[str] = None + ) -> List[IndexSegmentDescription]: ... def scanner( self, columns: Optional[List[str]] = None, diff --git a/python/python/tests/test_vector_index.py b/python/python/tests/test_vector_index.py index 973264c475c..a1456ea63b5 100644 --- a/python/python/tests/test_vector_index.py +++ b/python/python/tests/test_vector_index.py @@ -1644,6 +1644,28 @@ def test_optimize_indices(indexed_dataset): assert stats["num_indices"] == 2 +def test_logical_and_physical_index_views(indexed_dataset): + data = create_table() + indexed_dataset = lance.write_dataset(data, indexed_dataset.uri, mode="append") + indexed_dataset.optimize.optimize_indices(num_indices_to_merge=0) + + logical_indices = indexed_dataset.describe_indices() + assert len(logical_indices) == 1 + assert logical_indices[0].name == "vector_idx" + assert len(logical_indices[0].segments) == 2 + + physical_segments = indexed_dataset.describe_index_segments("vector_idx") + assert len(physical_segments) == 2 + assert all(segment.fragment_ids for segment in physical_segments) + + all_segments = indexed_dataset.describe_index_segments() + assert len(all_segments) == 2 + + stats = indexed_dataset.stats.index_stats("vector_idx") + assert stats["num_segments"] == stats["num_indices"] == 2 + assert stats["segments"] == stats["indices"] + + @pytest.mark.skip(reason="retrain is deprecated") def test_retrain_indices(indexed_dataset): data = create_table() diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 3b76f3ce043..50612a9fc65 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -87,7 +87,10 @@ use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler; use crate::error::PythonErrorExt; use crate::file::object_store_from_uri_or_path; use crate::fragment::FileFragment; -use crate::indices::{PyIndexConfig, PyIndexDescription, PyIndexSegment, PyIndexSegmentPlan}; +use crate::indices::{ + PyIndexConfig, PyIndexDescription, PyIndexSegment, PyIndexSegmentDescription, + PyIndexSegmentPlan, +}; use crate::namespace::extract_namespace_arc; use crate::rt; use crate::scanner::ScanStatistics; @@ -2798,6 +2801,22 @@ impl Dataset { .collect()) } + #[pyo3(signature=(index_name=None))] + fn describe_index_segments( + &self, + py: Python<'_>, + index_name: Option<&str>, + ) -> PyResult> { + let new_self = self.ds.as_ref().clone(); + let indices = rt() + .block_on(Some(py), new_self.describe_index_segments(index_name))? + .infer_error()?; + Ok(indices + .iter() + .map(PyIndexSegmentDescription::from_metadata) + .collect()) + } + /// Create a delta builder to explore changes between dataset versions. #[pyo3(signature=())] fn delta(&self) -> PyResult { diff --git a/python/src/indices.rs b/python/src/indices.rs index 9589b78ff36..97572297945 100644 --- a/python/src/indices.rs +++ b/python/src/indices.rs @@ -590,6 +590,24 @@ pub struct PyIndexSegmentDescription { } impl PyIndexSegmentDescription { + pub fn from_metadata(segment: &lance_table::format::IndexMetadata) -> Self { + let fragment_ids = segment + .fragment_bitmap + .as_ref() + .map(|bitmap| bitmap.iter().collect::>()) + .unwrap_or_default(); + let size_bytes = segment.total_size_bytes(); + + Self { + uuid: segment.uuid.to_string(), + dataset_version_at_last_update: segment.dataset_version, + fragment_ids, + index_version: segment.index_version, + created_at: segment.created_at, + size_bytes, + } + } + pub fn __repr__(&self) -> String { format!( "IndexSegmentDescription(uuid={}, dataset_version_at_last_update={}, fragment_ids={:?}, index_version={}, created_at={:?}, size_bytes={:?})", @@ -643,22 +661,7 @@ impl PyIndexDescription { let segments = index .metadata() .iter() - .map(|segment| { - let fragment_ids = segment - .fragment_bitmap - .as_ref() - .map(|bitmap| bitmap.iter().collect::>()) - .unwrap_or_default(); - let size_bytes = segment.total_size_bytes(); - PyIndexSegmentDescription { - uuid: segment.uuid.to_string(), - dataset_version_at_last_update: segment.dataset_version, - fragment_ids, - index_version: segment.index_version, - created_at: segment.created_at, - size_bytes, - } - }) + .map(PyIndexSegmentDescription::from_metadata) .collect(); let details = index.details().unwrap_or_else(|_| "{}".to_string()); diff --git a/rust/lance-index/src/traits.rs b/rust/lance-index/src/traits.rs index 1c5923e4050..e15ce490029 100644 --- a/rust/lance-index/src/traits.rs +++ b/rust/lance-index/src/traits.rs @@ -77,6 +77,13 @@ pub trait IndexDescription: Send + Sync { /// IndexMetadata for each segment of the index. fn metadata(&self) -> &[IndexMetadata]; + /// Returns the physical index segments that make up this logical index. + /// + /// This is an alias for [`Self::metadata`] with a less ambiguous name. + fn segments(&self) -> &[IndexMetadata] { + self.metadata() + } + /// Returns the index type URL /// /// This is extracted from the type url of the index details @@ -210,6 +217,8 @@ pub trait DatasetIndexExt { /// /// The indices are lazy loaded and cached in memory within the `Dataset` instance. /// The cache is invalidated when the dataset version (Manifest) is changed. + /// + /// Each returned entry represents a physical index segment from the manifest. async fn load_indices(&self) -> Result>>; /// Loads all the indies of a given UUID. @@ -243,6 +252,21 @@ pub trait DatasetIndexExt { }) } + /// Describe physical index segments. + /// + /// When `name` is provided, only segments belonging to the named logical + /// index are returned. Otherwise, all index segments in the current dataset + /// version are returned. + async fn describe_index_segments(&self, name: Option<&str>) -> Result> { + match name { + Some(name) => self.load_indices_by_name(name).await, + None => self + .load_indices() + .await + .map(|indices| indices.as_ref().clone()), + } + } + /// Loads a specific index with the given index name. /// This function only works for indices that are unique. /// If there are multiple indices sharing the same name, please use [`Self::load_indices_by_name`] diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs index 5ae8ee91b05..59d31a70fe9 100644 --- a/rust/lance/src/index.rs +++ b/rust/lance/src/index.rs @@ -1134,7 +1134,9 @@ async fn index_statistics_scalar( "index_type": index_type, "name": index_name, "num_indices": num_indices, - "indices": indices_stats, + "num_segments": num_indices, + "indices": indices_stats.clone(), + "segments": indices_stats, "num_indexed_fragments": num_indexed_fragments, "num_indexed_rows": num_indexed_rows, "num_unindexed_fragments": num_unindexed_fragments, @@ -2382,8 +2384,13 @@ mod tests { fn get_bitmap(meta: &IndexMetadata) -> Vec { meta.fragment_bitmap.as_ref().unwrap().iter().collect() } + fn assert_segment_aliases(stats: &serde_json::Value) { + assert_eq!(stats["num_segments"], stats["num_indices"]); + assert_eq!(stats["segments"], stats["indices"]); + } let stats = get_stats(&dataset, "vec_idx").await; + assert_segment_aliases(&stats); assert_eq!(stats["num_unindexed_rows"], 0); assert_eq!(stats["num_indexed_rows"], 512); assert_eq!(stats["num_indexed_fragments"], 1); @@ -2396,6 +2403,7 @@ mod tests { RecordBatchIterator::new(vec![record_batch].into_iter().map(Ok), schema.clone()); dataset.append(reader, None).await.unwrap(); let stats = get_stats(&dataset, "vec_idx").await; + assert_segment_aliases(&stats); assert_eq!(stats["num_unindexed_rows"], 512); assert_eq!(stats["num_indexed_rows"], 512); assert_eq!(stats["num_indexed_fragments"], 1); @@ -2410,6 +2418,7 @@ mod tests { .await .unwrap(); let stats = get_stats(&dataset, "vec_idx").await; + assert_segment_aliases(&stats); assert_eq!(stats["num_unindexed_rows"], 512); assert_eq!(stats["num_indexed_rows"], 512); assert_eq!(stats["num_indexed_fragments"], 1); @@ -2427,6 +2436,7 @@ mod tests { .await .unwrap(); let stats = get_stats(&dataset, "vec_idx").await; + assert_segment_aliases(&stats); assert_eq!(stats["num_unindexed_rows"], 512); assert_eq!(stats["num_indexed_rows"], 512); assert_eq!(stats["num_indexed_fragments"], 1); @@ -2437,6 +2447,7 @@ mod tests { assert_eq!(get_bitmap(&meta[0]), vec![0]); let stats = get_stats(&dataset, "other_vec_idx").await; + assert_segment_aliases(&stats); assert_eq!(stats["num_unindexed_rows"], 0); assert_eq!(stats["num_indexed_rows"], 1024); assert_eq!(stats["num_indexed_fragments"], 2); @@ -2453,6 +2464,7 @@ mod tests { .unwrap(); let stats = get_stats(&dataset, "vec_idx").await; + assert_segment_aliases(&stats); assert_eq!(stats["num_unindexed_rows"], 0); assert_eq!(stats["num_indexed_rows"], 1024); assert_eq!(stats["num_indexed_fragments"], 2); @@ -2467,6 +2479,7 @@ mod tests { .await .unwrap(); let stats = get_stats(&dataset, "other_vec_idx").await; + assert_segment_aliases(&stats); assert_eq!(stats["num_unindexed_rows"], 0); assert_eq!(stats["num_indexed_rows"], 1024); assert_eq!(stats["num_indexed_fragments"], 2); From fa8dccb4415074dcd5b76e404eeb718d52270f6a Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Mar 2026 02:38:37 +0800 Subject: [PATCH 2/5] refactor: drop public describe_index_segments API --- python/src/dataset.rs | 7 +++++-- rust/lance-index/src/traits.rs | 15 --------------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 50612a9fc65..679adb88ff9 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -2808,9 +2808,12 @@ impl Dataset { index_name: Option<&str>, ) -> PyResult> { let new_self = self.ds.as_ref().clone(); - let indices = rt() - .block_on(Some(py), new_self.describe_index_segments(index_name))? + let mut indices = rt() + .block_on(Some(py), new_self.load_indices())? .infer_error()?; + if let Some(index_name) = index_name { + indices.retain(|segment| segment.name == index_name); + } Ok(indices .iter() .map(PyIndexSegmentDescription::from_metadata) diff --git a/rust/lance-index/src/traits.rs b/rust/lance-index/src/traits.rs index e15ce490029..223a617f7be 100644 --- a/rust/lance-index/src/traits.rs +++ b/rust/lance-index/src/traits.rs @@ -252,21 +252,6 @@ pub trait DatasetIndexExt { }) } - /// Describe physical index segments. - /// - /// When `name` is provided, only segments belonging to the named logical - /// index are returned. Otherwise, all index segments in the current dataset - /// version are returned. - async fn describe_index_segments(&self, name: Option<&str>) -> Result> { - match name { - Some(name) => self.load_indices_by_name(name).await, - None => self - .load_indices() - .await - .map(|indices| indices.as_ref().clone()), - } - } - /// Loads a specific index with the given index name. /// This function only works for indices that are unique. /// If there are multiple indices sharing the same name, please use [`Self::load_indices_by_name`] From e166f8d8e6a99f26cd07db031a74d33c0c7d88f4 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Mar 2026 02:59:59 +0800 Subject: [PATCH 3/5] fix: avoid mutating shared index metadata in python binding --- python/src/dataset.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 897c1501bc6..75d79ba67a4 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -2795,14 +2795,16 @@ impl Dataset { index_name: Option<&str>, ) -> PyResult> { let new_self = self.ds.as_ref().clone(); - let mut indices = rt() + let indices = rt() .block_on(Some(py), new_self.load_indices())? .infer_error()?; - if let Some(index_name) = index_name { - indices.retain(|segment| segment.name == index_name); - } Ok(indices .iter() + .filter(|segment| { + index_name + .map(|index_name| segment.name == index_name) + .unwrap_or(true) + }) .map(PyIndexSegmentDescription::from_metadata) .collect()) } From 682a8e4f2c718694e330e440b78672449669b464 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Mar 2026 14:44:26 +0800 Subject: [PATCH 4/5] refactor: remove segment convenience APIs from bindings --- java/src/main/java/org/lance/Dataset.java | 14 ----------- java/src/test/java/org/lance/DatasetTest.java | 5 ++-- python/python/lance/dataset.py | 20 +++------------- python/python/lance/lance/__init__.pyi | 3 --- python/python/tests/test_vector_index.py | 8 +------ python/src/dataset.rs | 24 +------------------ 6 files changed, 7 insertions(+), 67 deletions(-) diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index e178429476b..008fb116a3c 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -1267,20 +1267,6 @@ public List getIndexes() { private native List nativeGetIndexes(); - /** - * Get physical index segments for a specific logical index name. - * - * @param indexName logical index name - * @return list of physical index segments belonging to the logical index - */ - public List getIndexSegments(String indexName) { - Preconditions.checkArgument( - indexName != null && !indexName.isEmpty(), "indexName cannot be null or empty"); - return getIndexes().stream() - .filter(index -> indexName.equals(index.name())) - .collect(Collectors.toList()); - } - /** * Get statistics for a specific index in JSON form. * diff --git a/java/src/test/java/org/lance/DatasetTest.java b/java/src/test/java/org/lance/DatasetTest.java index cf788ab25e3..7444afcfe76 100644 --- a/java/src/test/java/org/lance/DatasetTest.java +++ b/java/src/test/java/org/lance/DatasetTest.java @@ -1918,9 +1918,8 @@ public void testDescribeIndicesByName(@TempDir Path tempDir) throws Exception { desc.getMetadata(), desc.getSegments(), "segments alias should match metadata"); assertNotNull(desc.getDetailsJson(), "Details JSON should not be null"); - List physicalSegments = dataset.getIndexSegments("index1"); - assertEquals(1, physicalSegments.size(), "Expected exactly one physical segment"); - assertEquals("index1", physicalSegments.get(0).name()); + assertEquals(1, desc.getSegments().size(), "Expected exactly one physical segment"); + assertEquals("index1", desc.getSegments().get(0).name()); descriptions = dataset.describeIndices(); assertEquals(2, descriptions.size(), "Expected exactly one matching index"); diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 02a49544c39..7496746285a 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -79,7 +79,7 @@ from .commit import CommitLock from .io import StorageOptionsProvider - from .lance.indices import IndexDescription, IndexSegmentDescription + from .lance.indices import IndexDescription from .progress import FragmentWriteProgress from .types import ReaderLike @@ -646,9 +646,8 @@ def list_indices(self) -> List[Index]: This method is deprecated as it requires loading the statistics for each index which can be a very expensive operation. It also exposes physical index segments directly. Instead use describe_indices() for logical index - descriptions, describe_index_segments() for explicit segment inspection, - and index_statistics() to get the statistics for individual indexes of - interest. + descriptions and index_statistics() to get the statistics for individual + indexes of interest. """ warnings.warn( "The 'list_indices' method is deprecated. It may be removed in a future " @@ -662,19 +661,6 @@ def describe_indices(self) -> List[IndexDescription]: """Returns logical index information aggregated across all segments.""" return self._ds.describe_indices() - def describe_index_segments( - self, index_name: Optional[str] = None - ) -> List[IndexSegmentDescription]: - """ - Returns physical index segment information. - - Parameters - ---------- - index_name: Optional[str] - If provided, only return segments belonging to the named logical index. - """ - return self._ds.describe_index_segments(index_name) - def index_statistics(self, index_name: str) -> Dict[str, Any]: warnings.warn( "LanceDataset.index_statistics() is deprecated, " diff --git a/python/python/lance/lance/__init__.pyi b/python/python/lance/lance/__init__.pyi index 1505534954b..f0be29f39ca 100644 --- a/python/python/lance/lance/__init__.pyi +++ b/python/python/lance/lance/__init__.pyi @@ -235,9 +235,6 @@ class _Dataset: def serialized_manifest(self) -> bytes: ... def load_indices(self) -> List[Index]: ... def describe_indices(self) -> List[IndexDescription]: ... - def describe_index_segments( - self, index_name: Optional[str] = None - ) -> List[IndexSegmentDescription]: ... def scanner( self, columns: Optional[List[str]] = None, diff --git a/python/python/tests/test_vector_index.py b/python/python/tests/test_vector_index.py index 271e91cd11f..0c1d5ab9ed4 100644 --- a/python/python/tests/test_vector_index.py +++ b/python/python/tests/test_vector_index.py @@ -1652,13 +1652,7 @@ def test_logical_and_physical_index_views(indexed_dataset): assert len(logical_indices) == 1 assert logical_indices[0].name == "vector_idx" assert len(logical_indices[0].segments) == 2 - - physical_segments = indexed_dataset.describe_index_segments("vector_idx") - assert len(physical_segments) == 2 - assert all(segment.fragment_ids for segment in physical_segments) - - all_segments = indexed_dataset.describe_index_segments() - assert len(all_segments) == 2 + assert all(segment.fragment_ids for segment in logical_indices[0].segments) stats = indexed_dataset.stats.index_stats("vector_idx") assert stats["num_segments"] == stats["num_indices"] == 2 diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 75d79ba67a4..7c89c945e0f 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -88,8 +88,7 @@ use crate::error::PythonErrorExt; use crate::file::object_store_from_uri_or_path; use crate::fragment::FileFragment; use crate::indices::{ - PyIndexConfig, PyIndexDescription, PyIndexSegment, PyIndexSegmentDescription, - PyIndexSegmentPlan, + PyIndexConfig, PyIndexDescription, PyIndexSegment, PyIndexSegmentPlan, }; use crate::namespace::extract_namespace_arc; use crate::rt; @@ -2788,27 +2787,6 @@ impl Dataset { .collect()) } - #[pyo3(signature=(index_name=None))] - fn describe_index_segments( - &self, - py: Python<'_>, - index_name: Option<&str>, - ) -> PyResult> { - let new_self = self.ds.as_ref().clone(); - let indices = rt() - .block_on(Some(py), new_self.load_indices())? - .infer_error()?; - Ok(indices - .iter() - .filter(|segment| { - index_name - .map(|index_name| segment.name == index_name) - .unwrap_or(true) - }) - .map(PyIndexSegmentDescription::from_metadata) - .collect()) - } - /// Create a delta builder to explore changes between dataset versions. #[pyo3(signature=())] fn delta(&self) -> PyResult { From 0962af09fa4b470181073f8dab73d26cac600c4b Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Mar 2026 15:58:58 +0800 Subject: [PATCH 5/5] fix: satisfy rustfmt in python binding --- python/src/dataset.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 7c89c945e0f..695d8b317c5 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -87,9 +87,7 @@ use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler; use crate::error::PythonErrorExt; use crate::file::object_store_from_uri_or_path; use crate::fragment::FileFragment; -use crate::indices::{ - PyIndexConfig, PyIndexDescription, PyIndexSegment, PyIndexSegmentPlan, -}; +use crate::indices::{PyIndexConfig, PyIndexDescription, PyIndexSegment, PyIndexSegmentPlan}; use crate::namespace::extract_namespace_arc; use crate::rt; use crate::scanner::ScanStatistics;