lance-format · wjones127 · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026
diff --git a/java/lance-jni/Cargo.lock b/java/lance-jni/Cargo.lock
diff --git a/java/lance-jni/src/utils.rs b/java/lance-jni/src/utils.rs
@@ -426,6 +426,7 @@ pub fn get_vector_index_params(
                 stages,
                 version: IndexFileVersion::V3,
                 skip_transpose: false,
+                runtime_hints: Default::default(),
             })
         },
     )?;

diff --git a/protos/index.proto b/protos/index.proto
@@ -184,6 +184,67 @@ message VectorIndex {
   VectorMetricType metric_type = 4;
 }
 
+// Details for vector indexes, stored in the manifest's index_details field.
+message VectorIndexDetails {
+  VectorMetricType metric_type = 1;
+
+  // The target number of vectors per partition.
+  // 0 means unset.
+  uint64 target_partition_size = 2;
+
+  // Optional HNSW index configuration. If set, the index has an HNSW layer.
+  optional HnswParameters hnsw_index_config = 3;
+
+  message ProductQuantization {
+    uint32 num_bits = 1;
+    uint32 num_sub_vectors = 2;
+  }
+  message ScalarQuantization {
+    uint32 num_bits = 1;
+  }
+  message RabitQuantization {
+    enum RotationType {
+      FAST = 0;
+      MATRIX = 1;
+    }
+    uint32 num_bits = 1;
+    RotationType rotation_type = 2;
+  }
+
+  // No quantization; vectors are stored as-is.
+  message FlatCompression {}
+
+  oneof compression {
+    ProductQuantization pq = 4;
+    ScalarQuantization sq = 5;
+    RabitQuantization rq = 6;
+    FlatCompression flat = 8;
+  }
+
+  // The version of the index file format. Useful for maintaining backwards
+  // compatibility when introducing breaking changes to the index format.
+  // 0 means unset (legacy index).
+  uint32 index_version = 7;
+
+  // Runtime hints: optional build preferences that don't affect index structure.
+  // Keys use reverse-DNS namespacing (e.g., "lance.ivf.max_iters", "lancedb.accelerator").
+  // Unrecognized keys must be silently ignored by all runtimes.
+  map<string, string> runtime_hints = 9;
+}
+
+// Hierarchical Navigable Small World (HNSW) parameters, used as an optional configuration for IVF indexes.
+message HnswParameters {
+  // The maximum number of outgoing edges per node in the HNSW graph. Higher values
+  // means more connections, better recall, but more memory and slower builds.
+  // Referred to as "M" in the HNSW literature.
+  uint32 max_connections = 1;
+  // "construction exploration factor": The size of the dynamic list used during
+  // index construction.
+  uint32 construction_ef = 2;
+  // The maximum number of levels in the HNSW graph.
+  uint32 max_level = 3;
+}
+
 message JsonIndexDetails {
   string path = 1;
   google.protobuf.Any target_details = 2;

diff --git a/protos/table.proto b/protos/table.proto
@@ -474,8 +474,7 @@ message ExternalFile {
   uint64 size = 3;
 }
 
-// Empty details messages for older indexes that don't take advantage of the details field.
-message VectorIndexDetails {}
+// VectorIndexDetails and HnswParameters (formerly HnswIndexDetails) moved to index.proto
 
 message FragmentReuseIndexDetails {
 

diff --git a/python/python/tests/compat/test_vector_indices.py b/python/python/tests/compat/test_vector_indices.py
@@ -71,6 +71,17 @@ def check_read(self):
         )
         assert result.num_rows == 4
 
+        if hasattr(ds, "describe_indices"):
+            indices = ds.describe_indices()
+            assert len(indices) >= 1
+            name = indices[0].name
+        elif self.compat_version >= "0.39.0":
+            indices = ds.list_indices()
+            assert len(indices) >= 1
+            name = indices[0]["name"]
+            stats = ds.stats.index_stats(name)
+            assert stats["num_indexed_rows"] > 0
+
     def check_write(self):
         """Verify can insert vectors and rebuild index."""
         ds = lance.dataset(self.path)
@@ -140,6 +151,18 @@ def check_read(self):
         )
         assert result.num_rows == 4
 
+        if hasattr(ds, "describe_indices"):
+            indices = ds.describe_indices()
+            assert len(indices) >= 1
+            name = indices[0].name
+        else:
+            indices = ds.list_indices()
+            assert len(indices) >= 1
+            name = indices[0]["name"]
+
+        stats = ds.stats.index_stats(name)
+        assert stats["num_indexed_rows"] > 0
+
     def check_write(self):
         """Verify can insert vectors and rebuild index."""
         ds = lance.dataset(self.path)
@@ -209,6 +232,18 @@ def check_read(self):
         )
         assert result.num_rows == 4
 
+        if hasattr(ds, "describe_indices"):
+            indices = ds.describe_indices()
+            assert len(indices) >= 1
+            name = indices[0].name
+        else:
+            indices = ds.list_indices()
+            assert len(indices) >= 1
+            name = indices[0]["name"]
+
+        stats = ds.stats.index_stats(name)
+        assert stats["num_indexed_rows"] > 0
+
     def check_write(self):
         """Verify can insert vectors and rebuild index."""
         ds = lance.dataset(self.path)
@@ -226,9 +261,9 @@ def check_write(self):
         ds.optimize.compact_files()
 
 
-@compat_test(min_version="0.39.0")
+@compat_test(min_version="4.0.0-beta.8")
 class IvfRqVectorIndex(UpgradeDowngradeTest):
-    """Test IVF_RQ vector index compatibility."""
+    """Test IVF_RQ vector index compatibility. V2 was introduced in v4.0.0-beta.8"""
 
     def __init__(self, path: Path):
         self.path = path
@@ -273,6 +308,18 @@ def check_read(self):
         )
         assert result.num_rows == 4
 
+        if hasattr(ds, "describe_indices"):
+            indices = ds.describe_indices()
+            assert len(indices) >= 1
+            name = indices[0].name
+        else:
+            indices = ds.list_indices()
+            assert len(indices) >= 1
+            name = indices[0].name
+
+        stats = ds.stats.index_stats(name)
+        assert stats["num_indexed_rows"] > 0
+
     def check_write(self):
         """Verify can insert vectors and run optimize workflows."""
         ds = lance.dataset(self.path)

diff --git a/python/python/tests/test_vector_index.py b/python/python/tests/test_vector_index.py
@@ -1677,7 +1677,7 @@ def test_describe_vector_index(indexed_dataset: LanceDataset):
     info = indexed_dataset.describe_indices()[0]
 
     assert info.name == "vector_idx"
-    assert info.type_url == "/lance.table.VectorIndexDetails"
+    assert info.type_url == "/lance.index.pb.VectorIndexDetails"
     assert info.index_type == "IVF_PQ"
     assert info.num_rows_indexed == 1000
     assert info.fields == [0]
@@ -1688,6 +1688,44 @@ def test_describe_vector_index(indexed_dataset: LanceDataset):
     assert info.segments[0].index_version == 1
     assert info.segments[0].created_at is not None
 
+    details = info.details
+    assert details["metric_type"] == "L2"
+    assert details["compression"]["type"] == "pq"
+    assert details["compression"]["num_bits"] == 8
+    assert details["compression"]["num_sub_vectors"] == 16
+
+
+def test_describe_index_runtime_hints_stored(tmp_path):
+    tbl = create_table(nvec=300, ndim=16)
+    dataset = lance.write_dataset(tbl, tmp_path)
+    dataset = dataset.create_index(
+        "vector",
+        index_type="IVF_PQ",
+        num_partitions=4,
+        num_sub_vectors=4,
+        max_iters=100,
+        sample_rate=512,
+    )
+    details = dataset.describe_indices()[0].details
+    hints = details.get("runtime_hints", {})
+    assert hints.get("lance.ivf.max_iters") == "100"
+    assert hints.get("lance.ivf.sample_rate") == "512"
+    assert hints.get("lance.pq.max_iters") == "100"
+    assert hints.get("lance.pq.sample_rate") == "512"
+
+
+def test_describe_index_runtime_hints_defaults_omitted(tmp_path):
+    tbl = create_table(nvec=300, ndim=16)
+    dataset = lance.write_dataset(tbl, tmp_path)
+    dataset = dataset.create_index(
+        "vector",
+        index_type="IVF_PQ",
+        num_partitions=4,
+        num_sub_vectors=4,
+    )
+    details = dataset.describe_indices()[0].details
+    assert "runtime_hints" not in details
+
 
 def test_optimize_indices(indexed_dataset):
     data = create_table()

diff --git a/python/src/dataset.rs b/python/src/dataset.rs
@@ -3569,6 +3569,12 @@ fn prepare_vector_index_params(
             sq_params.sample_rate = sample_rate;
         }
 
+        if let Some(max_iters) = kwargs.get_item("max_iters")? {
+            let max_iters: usize = max_iters.extract()?;
+            ivf_params.max_iters = max_iters;
+            pq_params.max_iters = max_iters;
+        }
+
         // Parse IVF params
         if let Some(n) = kwargs.get_item("num_partitions")? {
             ivf_params.num_partitions = Some(n.extract()?)
@@ -3731,6 +3737,13 @@ fn prepare_vector_index_params(
     }?;
     params.version(index_file_version);
     params.skip_transpose(skip_transpose);
+    if let Some(kwargs) = kwargs
+        && let Some(acc) = kwargs.get_item("accelerator")?
+    {
+        params
+            .runtime_hints
+            .insert("lancedb.accelerator".to_string(), acc.to_string());
+    }
     Ok(params)
 }
 

diff --git a/python/src/indices.rs b/python/src/indices.rs
@@ -441,8 +441,7 @@ async fn do_load_shuffled_vectors(
         dataset_version: ds.manifest.version,
         fragment_bitmap: Some(ds.fragments().iter().map(|f| f.id as u32).collect()),
         index_details: Some(Arc::new(
-            prost_types::Any::from_msg(&lance_table::format::pb::VectorIndexDetails::default())
-                .unwrap(),
+            prost_types::Any::from_msg(&lance_index::pb::VectorIndexDetails::default()).unwrap(),
         )),
         index_version: IndexType::IvfPq.version(),
         created_at: Some(Utc::now()),

diff --git a/rust/lance-index/src/vector/bq.rs b/rust/lance-index/src/vector/bq.rs
@@ -7,6 +7,7 @@ use std::iter::once;
 use std::str::FromStr;
 use std::sync::Arc;
 
+use crate::pb::vector_index_details::RabitQuantization;
 use arrow_array::types::Float32Type;
 use arrow_array::{Array, ArrayRef, UInt8Array, cast::AsArray};
 use lance_core::{Error, Result};
@@ -121,6 +122,19 @@ impl RQBuildParams {
     }
 }
 
+impl From<&RQBuildParams> for RabitQuantization {
+    fn from(value: &RQBuildParams) -> Self {
+        use crate::pb::vector_index_details::rabit_quantization::RotationType;
+        Self {
+            num_bits: value.num_bits as u32,
+            rotation_type: match value.rotation_type {
+                RQRotationType::Fast => RotationType::Fast as i32,
+                RQRotationType::Matrix => RotationType::Matrix as i32,
+            },
+        }
+    }
+}
+
 impl QuantizerBuildParams for RQBuildParams {
     fn sample_size(&self) -> usize {
         0

diff --git a/rust/lance-index/src/vector/hnsw/builder.rs b/rust/lance-index/src/vector/hnsw/builder.rs
@@ -60,6 +60,16 @@ pub struct HnswBuildParams {
     pub prefetch_distance: Option<usize>,
 }
 
+impl From<&HnswBuildParams> for crate::pb::HnswParameters {
+    fn from(params: &HnswBuildParams) -> Self {
+        Self {
+            max_connections: params.m as u32,
+            construction_ef: params.ef_construction as u32,
+            max_level: params.max_level as u32,
+        }
+    }
+}
+
 impl Default for HnswBuildParams {
     fn default() -> Self {
         Self {

diff --git a/rust/lance-index/src/vector/pq/builder.rs b/rust/lance-index/src/vector/pq/builder.rs
@@ -44,6 +44,15 @@ pub struct PQBuildParams {
     pub sample_rate: usize,
 }
 
+impl From<&PQBuildParams> for crate::pb::vector_index_details::ProductQuantization {
+    fn from(params: &PQBuildParams) -> Self {
+        Self {
+            num_bits: params.num_bits as u32,
+            num_sub_vectors: params.num_sub_vectors as u32,
+        }
+    }
+}
+
 impl Default for PQBuildParams {
     fn default() -> Self {
         Self {

diff --git a/rust/lance-index/src/vector/sq/builder.rs b/rust/lance-index/src/vector/sq/builder.rs
@@ -12,6 +12,14 @@ pub struct SQBuildParams {
     pub sample_rate: usize,
 }
 
+impl From<&SQBuildParams> for crate::pb::vector_index_details::ScalarQuantization {
+    fn from(params: &SQBuildParams) -> Self {
+        Self {
+            num_bits: params.num_bits as u32,
+        }
+    }
+}
+
 impl Default for SQBuildParams {
     fn default() -> Self {
         Self {

diff --git a/rust/lance/src/dataset/index.rs b/rust/lance/src/dataset/index.rs
@@ -17,9 +17,9 @@ use async_trait::async_trait;
 use lance_core::{Error, Result};
 use lance_encoding::version::LanceFileVersion;
 use lance_index::frag_reuse::FRAG_REUSE_INDEX_NAME;
+use lance_index::pb::VectorIndexDetails;
 use lance_index::scalar::lance_format::LanceIndexStore;
 use lance_table::format::IndexMetadata;
-use lance_table::format::pb::VectorIndexDetails;
 use serde::{Deserialize, Serialize};
 
 use super::optimize::{IndexRemapper, IndexRemapperOptions};

diff --git a/rust/lance/src/dataset/optimize.rs b/rust/lance/src/dataset/optimize.rs
@@ -4091,6 +4091,7 @@ mod tests {
                     ],
                     version: crate::index::vector::IndexFileVersion::V3,
                     skip_transpose: false,
+                    runtime_hints: Default::default(),
                 },
                 false,
             )
-Original file line number
+Diff line change
@@ Expand Up / @@ -426,6 +426,7 @@ pub fn get_vector_index_params( @@
                     stages,
                     version: IndexFileVersion::V3,
                     skip_transpose: false,
+                    runtime_hints: Default::default(),
                 })
             },
         )?;
@@ Expand Down @@