lance-format · Xuanwo · Mar 26, 2026 · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026
diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs
@@ -38,18 +38,18 @@ use lance::dataset::{
     ColumnAlteration, CommitBuilder, Dataset, NewColumnTransform, ProjectionRequest, ReadParams,
     Version, WriteParams,
 };
+use lance::index::{DatasetIndexExt, IndexSegment};
 use lance::io::commit::namespace_manifest::LanceNamespaceExternalManifestStore;
 use lance::io::{ObjectStore, ObjectStoreParams};
 use lance::session::Session as LanceSession;
 use lance::table::format::IndexMetadata;
 use lance::table::format::{BasePath, Fragment};
 use lance_core::datatypes::Schema as LanceSchema;
 use lance_file::version::LanceFileVersion;
-use lance_index::DatasetIndexExt;
 use lance_index::IndexCriteria as RustIndexCriteria;
 use lance_index::optimize::OptimizeOptions;
 use lance_index::scalar::btree::BTreeParameters;
-use lance_index::{IndexParams, IndexSegment, IndexType};
+use lance_index::{IndexParams, IndexType};
 use lance_io::object_store::ObjectStoreRegistry;
 use lance_io::object_store::StorageOptionsProvider;
 use lance_namespace::LanceNamespace;

diff --git a/python/src/dataset.rs b/python/src/dataset.rs
@@ -55,7 +55,7 @@ use lance::dataset::{
     transaction::{Operation, Transaction},
 };
 use lance::index::vector::utils::get_vector_type;
-use lance::index::{DatasetIndexInternalExt, vector::VectorIndexParams};
+use lance::index::{DatasetIndexExt, DatasetIndexInternalExt, vector::VectorIndexParams};
 use lance::{dataset::builder::DatasetBuilder, index::vector::IndexFileVersion};
 use lance_arrow::as_fixed_size_list_array;
 use lance_core::Error;
@@ -67,7 +67,7 @@ use lance_index::scalar::inverted::query::{
     BooleanQuery, BoostQuery, FtsQuery, MatchQuery, MultiMatchQuery, Operator, PhraseQuery,
 };
 use lance_index::{
-    DatasetIndexExt, IndexParams, IndexType,
+    IndexParams, IndexType,
     optimize::OptimizeOptions,
     scalar::{FullTextSearchQuery, InvertedIndexParams, ScalarIndexParams},
     vector::{

diff --git a/python/src/indices.rs b/python/src/indices.rs
@@ -11,6 +11,7 @@ use arrow_data::ArrayData;
 use chrono::{DateTime, Utc};
 use lance::dataset::Dataset as LanceDataset;
 use lance::index::vector::ivf::builder::write_vector_storage;
+use lance::index::{DatasetIndexExt, IndexSegment, IndexSegmentPlan};
 use lance::io::ObjectStore;
 use lance_index::progress::NoopIndexBuildProgress;
 use lance_index::vector::ivf::shuffler::{IvfShuffler, shuffle_vectors};
@@ -37,7 +38,7 @@ use crate::{
     dataset::Dataset, error::PythonErrorExt, file::object_store_from_uri_or_path_no_options, rt,
 };
 use lance::index::vector::ivf::write_ivf_pq_file_from_existing_index;
-use lance_index::{DatasetIndexExt, IndexDescription, IndexSegment, IndexSegmentPlan, IndexType};
+use lance_index::{IndexDescription, IndexType};
 use uuid::Uuid;
 
 #[pyclass(name = "IndexConfig", module = "lance.indices", get_all)]

diff --git a/python/src/lib.rs b/python/src/lib.rs
@@ -34,6 +34,7 @@ use ::arrow::pyarrow::PyArrowType;
 use ::arrow_schema::Schema as ArrowSchema;
 use ::lance::arrow::json::ArrowJsonExt;
 use ::lance::datafusion::LanceTableProvider;
+use ::lance::index::DatasetIndexExt;
 use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
 use datafusion_ffi::table_provider::FFI_TableProvider;
 #[cfg(feature = "datagen")]
@@ -52,7 +53,6 @@ use file::{
     LanceBufferDescriptor, LanceColumnMetadata, LanceFileMetadata, LanceFileReader,
     LanceFileStatistics, LanceFileWriter, LancePageMetadata, stable_version,
 };
-use lance_index::DatasetIndexExt;
 use log::Level;
 use pyo3::exceptions::PyIOError;
 use pyo3::prelude::*;

diff --git a/rust/examples/src/full_text_search.rs b/rust/examples/src/full_text_search.rs
@@ -15,8 +15,8 @@ use arrow::datatypes::UInt64Type;
 use arrow_schema::{DataType, Field, Schema};
 use itertools::Itertools;
 use lance::Dataset;
+use lance::index::DatasetIndexExt;
 use lance_datagen::{RowCount, array};
-use lance_index::DatasetIndexExt;
 use lance_index::scalar::inverted::flat_full_text_search;
 use lance_index::scalar::{FullTextSearchQuery, InvertedIndexParams};
 use object_store::path::Path;

diff --git a/rust/examples/src/ivf_hnsw.rs b/rust/examples/src/ivf_hnsw.rs
@@ -11,11 +11,12 @@ use clap::Parser;
 use futures::TryStreamExt;
 use lance::Dataset;
 use lance::dataset::ProjectionRequest;
+use lance::index::DatasetIndexExt;
 use lance::index::vector::VectorIndexParams;
+use lance_index::IndexType;
 use lance_index::vector::hnsw::builder::HnswBuildParams;
 use lance_index::vector::ivf::IvfBuildParams;
 use lance_index::vector::sq::builder::SQBuildParams;
-use lance_index::{DatasetIndexExt, IndexType};
 use lance_linalg::distance::MetricType;
 
 #[derive(Parser, Debug)]

diff --git a/rust/lance-index/src/lib.rs b/rust/lance-index/src/lib.rs
@@ -29,11 +29,9 @@ pub mod progress;
 pub mod registry;
 pub mod scalar;
 pub mod traits;
-pub mod types;
 pub mod vector;
 
 pub use crate::traits::*;
-pub use crate::types::{IndexSegment, IndexSegmentPlan};
 
 pub const INDEX_FILE_NAME: &str = "index.idx";
 /// The name of the auxiliary index file.

diff --git a/rust/lance-index/src/traits.rs b/rust/lance-index/src/traits.rs
@@ -1,13 +1,8 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The Lance Authors
 
-use std::sync::Arc;
+use lance_core::Result;
 
-use async_trait::async_trait;
-use datafusion::execution::SendableRecordBatchStream;
-use lance_core::{Error, Result};
-
-use crate::{IndexParams, IndexType, optimize::OptimizeOptions, types::IndexSegment};
 use lance_table::format::IndexMetadata;
 
 /// A set of criteria used to filter potential indices to use for a query
@@ -121,191 +116,3 @@ pub trait IndexDescription: Send + Sync {
     /// (for backward compatibility with indices created before file tracking was added).
     fn total_size_bytes(&self) -> Option<u64>;
 }
-
-// Extends Lance Dataset with secondary index.
-#[async_trait]
-pub trait DatasetIndexExt {
-    type IndexBuilder<'a>
-    where
-        Self: 'a;
-    type IndexSegmentBuilder<'a>
-    where
-        Self: 'a;
-
-    /// Create a builder for creating an index on columns.
-    ///
-    /// This returns a builder that can be configured with additional options
-    /// like `name()`, `replace()`, and `train()` before awaiting to execute.
-    ///
-    /// # Parameters
-    /// - `columns`: the columns to build the indices on.
-    /// - `index_type`: specify [`IndexType`].
-    /// - `params`: index parameters.
-    fn create_index_builder<'a>(
-        &'a mut self,
-        columns: &'a [&'a str],
-        index_type: IndexType,
-        params: &'a dyn IndexParams,
-    ) -> Self::IndexBuilder<'a>;
-
-    /// Create a builder for building physical index segments from uncommitted
-    /// vector index outputs.
-    ///
-    /// The caller supplies the uncommitted index metadata returned by
-    /// `execute_uncommitted()` so the builder can plan segment grouping without
-    /// rediscovering fragment coverage.
-    ///
-    /// This is the canonical entry point for distributed vector segment build.
-    /// After building the physical segments, publish them as a
-    /// logical index with [`Self::commit_existing_index_segments`].
-    fn create_index_segment_builder<'a>(&'a self) -> Self::IndexSegmentBuilder<'a>;
-
-    /// Create indices on columns.
-    ///
-    /// Upon finish, a new dataset version is generated.
-    ///
-    /// Parameters:
-    ///
-    ///  - `columns`: the columns to build the indices on.
-    ///  - `index_type`: specify [`IndexType`].
-    ///  - `name`: optional index name. Must be unique in the dataset.
-    ///            if not provided, it will auto-generate one.
-    ///  - `params`: index parameters.
-    ///  - `replace`: replace the existing index if it exists.
-    ///
-    /// Returns the metadata of the created index.
-    async fn create_index(
-        &mut self,
-        columns: &[&str],
-        index_type: IndexType,
-        name: Option<String>,
-        params: &dyn IndexParams,
-        replace: bool,
-    ) -> Result<IndexMetadata>;
-
-    /// Drop indices by name.
-    ///
-    /// Upon finish, a new dataset version is generated.
-    ///
-    /// Parameters:
-    ///
-    /// - `name`: the name of the index to drop.
-    async fn drop_index(&mut self, name: &str) -> Result<()>;
-
-    /// Prewarm an index by name.
-    ///
-    /// This will load the index into memory and cache it.
-    ///
-    /// Generally, this should only be called when it is known the entire index will
-    /// fit into the index cache.
-    ///
-    /// This is a hint that is not enforced by all indices today.  Some indices may choose
-    /// to ignore this hint.
-    async fn prewarm_index(&self, name: &str) -> Result<()>;
-
-    /// Read all indices of this Dataset version.
-    ///
-    /// The indices are lazy loaded and cached in memory within the `Dataset` instance.
-    /// The cache is invalidated when the dataset version (Manifest) is changed.
-    async fn load_indices(&self) -> Result<Arc<Vec<IndexMetadata>>>;
-
-    /// Loads all the indies of a given UUID.
-    ///
-    /// Note that it is possible to have multiple indices with the same UUID,
-    /// as they are the deltas of the same index.
-    async fn load_index(&self, uuid: &str) -> Result<Option<IndexMetadata>> {
-        self.load_indices().await.map(|indices| {
-            indices
-                .iter()
-                .find(|idx| idx.uuid.to_string() == uuid)
-                .cloned()
-        })
-    }
-
-    /// Loads a specific index with the given index name
-    ///
-    /// Returns
-    /// -------
-    /// - `Ok(indices)`: if the index exists, returns the index.
-    /// - `Ok(vec![])`: if the index does not exist.
-    /// - `Err(e)`: if there is an error loading indices.
-    ///
-    async fn load_indices_by_name(&self, name: &str) -> Result<Vec<IndexMetadata>> {
-        self.load_indices().await.map(|indices| {
-            indices
-                .iter()
-                .filter(|idx| idx.name == name)
-                .cloned()
-                .collect()
-        })
-    }
-
-    /// Loads a specific index with the given index name.
-    /// This function only works for indices that are unique.
-    /// If there are multiple indices sharing the same name, please use [`Self::load_indices_by_name`]
-    ///
-    /// Returns
-    /// -------
-    /// - `Ok(Some(index))`: if the index exists, returns the index.
-    /// - `Ok(None)`: if the index does not exist.
-    /// - `Err(e)`: Index error if there are multiple indexes sharing the same name.
-    ///
-    async fn load_index_by_name(&self, name: &str) -> Result<Option<IndexMetadata>> {
-        let indices = self.load_indices_by_name(name).await?;
-        if indices.is_empty() {
-            Ok(None)
-        } else if indices.len() == 1 {
-            Ok(Some(indices[0].clone()))
-        } else {
-            Err(Error::index(format!(
-                "Found multiple indices of the same name: {:?}, please use load_indices_by_name",
-                indices.iter().map(|idx| &idx.name).collect::<Vec<_>>()
-            )))
-        }
-    }
-
-    /// Describes indexes in a dataset
-    ///
-    /// This method should only access the index metadata and should not load the index into memory.
-    ///
-    /// More detailed information may be available from `index_statistics` but that will require
-    /// loading the index into memory.
-    async fn describe_indices<'a, 'b>(
-        &'a self,
-        criteria: Option<IndexCriteria<'b>>,
-    ) -> Result<Vec<Arc<dyn IndexDescription>>>;
-
-    /// Loads a specific index with the given index name.
-    async fn load_scalar_index<'a, 'b>(
-        &'a self,
-        criteria: IndexCriteria<'b>,
-    ) -> Result<Option<IndexMetadata>>;
-
-    /// Optimize indices.
-    async fn optimize_indices(&mut self, options: &OptimizeOptions) -> Result<()>;
-
-    /// Find index with a given index_name and return its serialized statistics.
-    ///
-    /// If the index does not exist, return Error.
-    async fn index_statistics(&self, index_name: &str) -> Result<String>;
-
-    /// Commit one or more existing physical index segments as a logical index.
-    ///
-    /// This publishes already-built physical segments. It does not build
-    /// or merge index data; callers should first build segments with
-    /// [`Self::create_index_segment_builder`] or another index-specific build
-    /// path and then pass the resulting segments here.
-    async fn commit_existing_index_segments(
-        &mut self,
-        index_name: &str,
-        column: &str,
-        segments: Vec<IndexSegment>,
-    ) -> Result<()>;
-
-    async fn read_index_partition(
-        &self,
-        index_name: &str,
-        partition_id: usize,
-        with_vector: bool,
-    ) -> Result<SendableRecordBatchStream>;
-}