From 604debcbe99be323b88ea253aaaa26e1bf17e4af Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Tue, 9 Dec 2025 15:59:04 -0800 Subject: [PATCH 1/3] feat: add additional index APIs to support split plan --- java/lance-jni/src/blocking_dataset.rs | 283 ++++++++++++++++++ java/src/main/java/org/lance/Dataset.java | 39 +++ java/src/main/java/org/lance/index/Index.java | 66 +++- 3 files changed, 384 insertions(+), 4 deletions(-) diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index b15132ad00b..08146b4d617 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -2445,3 +2445,286 @@ fn inner_cleanup_with_policy<'local>( Ok(jstats) } + +////////////////////////////// +// Index operation Methods // +////////////////////////////// + +#[no_mangle] +pub extern "system" fn Java_org_lance_Dataset_nativeGetIndexes<'local>( + mut env: JNIEnv<'local>, + java_dataset: JObject, +) -> JObject<'local> { + ok_or_throw!(env, inner_get_indexes(&mut env, java_dataset)) +} + +fn inner_get_indexes<'local>( + env: &mut JNIEnv<'local>, + java_dataset: JObject, +) -> Result> { + let indexes = { + let dataset_guard = + unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?; + dataset_guard.list_indexes()? + }; + + let array_list = env.new_object("java/util/ArrayList", "()V", &[])?; + + for index_meta in indexes.iter() { + let java_index = create_java_index(env, index_meta)?; + env.call_method( + &array_list, + "add", + "(Ljava/lang/Object;)Z", + &[JValue::Object(&java_index)], + )?; + } + + Ok(array_list) +} + +fn create_java_index<'local>( + env: &mut JNIEnv<'local>, + index_meta: &IndexMetadata, +) -> Result> { + // Create UUID + let uuid_str = index_meta.uuid.to_string(); + let uuid_obj = env.call_static_method( + "java/util/UUID", + "fromString", + "(Ljava/lang/String;)Ljava/util/UUID;", + &[JValue::Object(&env.new_string(&uuid_str)?.into())], + )?.l()?; + + // Create fields list (List) + let fields_list = env.new_object("java/util/ArrayList", "()V", &[])?; + for field in &index_meta.fields { + let int_obj = env.call_static_method( + "java/lang/Integer", + "valueOf", + "(I)Ljava/lang/Integer;", + &[JValue::Int(*field)], + )?.l()?; + env.call_method(&fields_list, "add", "(Ljava/lang/Object;)Z", &[JValue::Object(&int_obj)])?; + } + + // Create name + let name = env.new_string(&index_meta.name)?; + + // Create fragments list (List) - can be null + let fragments_list = if let Some(bitmap) = &index_meta.fragment_bitmap { + let list = env.new_object("java/util/ArrayList", "()V", &[])?; + for frag_id in bitmap.iter() { + let int_obj = env.call_static_method( + "java/lang/Integer", + "valueOf", + "(I)Ljava/lang/Integer;", + &[JValue::Int(frag_id as i32)], + )?.l()?; + env.call_method(&list, "add", "(Ljava/lang/Object;)Z", &[JValue::Object(&int_obj)])?; + } + list + } else { + JObject::null() + }; + + // Create indexDetails byte array - can be null + let index_details_bytes = if let Some(details) = &index_meta.index_details { + let bytes = prost::Message::encode_to_vec(details.as_ref()); + let byte_array = env.byte_array_from_slice(&bytes)?; + byte_array.into() + } else { + JObject::null() + }; + + // Create createdAt Instant - can be null + let created_at = if let Some(dt) = index_meta.created_at { + let millis = dt.timestamp_millis(); + env.call_static_method( + "java/time/Instant", + "ofEpochMilli", + "(J)Ljava/time/Instant;", + &[JValue::Long(millis)], + )?.l()? + } else { + JObject::null() + }; + + // Create baseId Integer - can be null + let base_id = if let Some(id) = index_meta.base_id { + env.call_static_method( + "java/lang/Integer", + "valueOf", + "(I)Ljava/lang/Integer;", + &[JValue::Int(id as i32)], + )?.l()? + } else { + JObject::null() + }; + + // Determine index type from index_details type_url + let index_type = determine_index_type(env, &index_meta.index_details)?; + + // Call Index.create() static method + let index_obj = env.call_static_method( + "org/lance/index/Index", + "create", + "(Ljava/util/UUID;Ljava/util/List;Ljava/lang/String;JLjava/util/List;[BILjava/time/Instant;Ljava/lang/Integer;Lorg/lance/index/IndexType;)Lorg/lance/index/Index;", + &[ + JValue::Object(&uuid_obj), + JValue::Object(&fields_list), + JValue::Object(&name.into()), + JValue::Long(index_meta.dataset_version as i64), + JValue::Object(&fragments_list), + JValue::Object(&index_details_bytes), + JValue::Int(index_meta.index_version), + JValue::Object(&created_at), + JValue::Object(&base_id), + JValue::Object(&index_type), + ], + )?.l()?; + + Ok(index_obj) +} + +fn determine_index_type<'local>( + env: &mut JNIEnv<'local>, + index_details: &Option>, +) -> Result> { + let type_name = if let Some(details) = index_details { + // Extract type name from type_url (e.g., ".lance.index.BTreeIndexDetails" -> "BTREE") + let type_url = &details.type_url; + let type_part = type_url.split('.').next_back().unwrap_or(""); + let lower = type_part.to_lowercase(); + + if lower.contains("btree") { + Some("BTREE") + } else if lower.contains("bitmap") { + Some("BITMAP") + } else if lower.contains("labellist") { + Some("LABEL_LIST") + } else if lower.contains("inverted") { + Some("INVERTED") + } else if lower.contains("ngram") { + Some("NGRAM") + } else if lower.contains("zonemap") { + Some("ZONEMAP") + } else if lower.contains("bloomfilter") { + Some("BLOOM_FILTER") + } else if lower.contains("ivfhnsw") { + if lower.contains("sq") { + Some("IVF_HNSW_SQ") + } else if lower.contains("pq") { + Some("IVF_HNSW_PQ") + } else { + Some("IVF_HNSW_FLAT") + } + } else if lower.contains("ivf") { + if lower.contains("sq") { + Some("IVF_SQ") + } else if lower.contains("pq") { + Some("IVF_PQ") + } else { + Some("IVF_FLAT") + } + } else if lower.contains("vector") { + Some("VECTOR") + } else { + None + } + } else { + None + }; + + match type_name { + Some(name) => { + let index_type = env.get_static_field( + "org/lance/index/IndexType", + name, + "Lorg/lance/index/IndexType;", + )?.l()?; + Ok(index_type) + } + None => Ok(JObject::null()), + } +} + +#[no_mangle] +pub extern "system" fn Java_org_lance_Dataset_nativeCountIndexedRows( + mut env: JNIEnv, + java_dataset: JObject, + jindex_name: JString, + jfilter: JString, + jfragment_ids: JObject, // Optional> +) -> jlong { + ok_or_throw_with_return!( + env, + inner_count_indexed_rows(&mut env, java_dataset, jindex_name, jfilter, jfragment_ids), + -1 + ) +} + +fn inner_count_indexed_rows( + env: &mut JNIEnv, + java_dataset: JObject, + _jindex_name: JString, + jfilter: JString, + jfragment_ids: JObject, // Optional> +) -> Result { + let filter: String = jfilter.extract(env)?; + + // Extract optional fragment IDs + let fragment_ids: Option> = if env.call_method(&jfragment_ids, "isPresent", "()Z", &[])?.z()? { + let list_obj = env.call_method(&jfragment_ids, "get", "()Ljava/lang/Object;", &[])?.l()?; + let list = env.get_list(&list_obj)?; + let mut ids = Vec::new(); + let mut iter = list.iter(env)?; + while let Some(elem) = iter.next(env)? { + let int_val = env.call_method(&elem, "intValue", "()I", &[])?.i()?; + ids.push(int_val as u32); + } + Some(ids) + } else { + None + }; + + let count = { + let dataset_guard = + unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?; + + // Use a scanner with fragment filtering to count rows + // This ensures we only count rows in the specified fragments + let inner = dataset_guard.inner.clone(); + + RT.block_on(async { + let mut scanner = inner.scan(); + + // Apply filter + if !filter.is_empty() { + scanner.filter(&filter)?; + } + + // Empty projection and enable row_id for count_rows to work + // count_rows() requires metadata-only projection + scanner.project::(&[])?; + scanner.with_row_id(); + + // Apply fragment filter if specified + if let Some(frag_ids) = fragment_ids { + // Convert FileFragment to Fragment by extracting metadata + let filtered_fragments: Vec<_> = inner.get_fragments().into_iter() + .filter(|f| frag_ids.contains(&(f.id() as u32))) + .map(|f| f.metadata().clone()) + .collect(); + scanner.with_fragments(filtered_fragments); + } + + // Use the scanner's count_rows method + let count = scanner.count_rows().await?; + + Ok::(count as i64) + })? + }; + + Ok(count) +} diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index 21572214eda..0eedc5bad50 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -834,6 +834,31 @@ public long countRows(String filter) { private native long nativeCountRows(Optional filter); + /** + * Count rows matching a filter using a specific scalar index. This directly queries the index and + * counts matching row addresses, which is more efficient than scanning when the index covers the + * filter column. + * + * @param indexName the name of the scalar index to use + * @param filter the filter expression (e.g., "column = 5") + * @param fragmentIds optional list of fragment IDs to restrict the count to + * @return count of matching rows + */ + public long countIndexedRows( + String indexName, String filter, Optional> fragmentIds) { + try (LockManager.ReadLock readLock = lockManager.acquireReadLock()) { + Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed"); + Preconditions.checkArgument( + indexName != null && !indexName.isEmpty(), "indexName cannot be null or empty"); + Preconditions.checkArgument( + filter != null && !filter.isEmpty(), "filter cannot be null or empty"); + return nativeCountIndexedRows(indexName, filter, fragmentIds); + } + } + + private native long nativeCountIndexedRows( + String indexName, String filter, Optional> fragmentIds); + /** * Calculate the size of the dataset. * @@ -928,6 +953,20 @@ public List listIndexes() { private native List nativeListIndexes(); + /** + * Get all indexes with full metadata. + * + * @return list of Index objects with complete metadata including index type and fragment coverage + */ + public List getIndexes() { + try (LockManager.ReadLock readLock = lockManager.acquireReadLock()) { + Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed"); + return nativeGetIndexes(); + } + } + + private native List nativeGetIndexes(); + /** * Get the table config of the dataset. * diff --git a/java/src/main/java/org/lance/index/Index.java b/java/src/main/java/org/lance/index/Index.java index 86ff8c6007b..8b134304dae 100644 --- a/java/src/main/java/org/lance/index/Index.java +++ b/java/src/main/java/org/lance/index/Index.java @@ -36,6 +36,7 @@ public class Index { private final int indexVersion; private final Instant createdAt; private final Integer baseId; + private final IndexType indexType; private Index( UUID uuid, @@ -46,7 +47,8 @@ private Index( byte[] indexDetails, int indexVersion, Instant createdAt, - Integer baseId) { + Integer baseId, + IndexType indexType) { this.uuid = uuid; this.fields = fields; this.name = name; @@ -56,6 +58,7 @@ private Index( this.indexVersion = indexVersion; this.createdAt = createdAt; this.baseId = baseId; + this.indexType = indexType; } public UUID uuid() { @@ -119,6 +122,15 @@ public Optional createdAt() { return Optional.ofNullable(createdAt); } + /** + * Get the type of the index (e.g., BTREE, BITMAP, VECTOR). + * + * @return the index type, or null if unknown + */ + public IndexType indexType() { + return indexType; + } + @Override public boolean equals(Object o) { if (this == o) return true; @@ -132,14 +144,23 @@ public boolean equals(Object o) { && Objects.equals(fragments, index.fragments) && Arrays.equals(indexDetails, index.indexDetails) && Objects.equals(createdAt, index.createdAt) - && Objects.equals(baseId, index.baseId); + && Objects.equals(baseId, index.baseId) + && indexType == index.indexType; } @Override public int hashCode() { int result = Objects.hash( - uuid, fields, name, datasetVersion, indexVersion, createdAt, baseId, fragments); + uuid, + fields, + name, + datasetVersion, + indexVersion, + createdAt, + baseId, + fragments, + indexType); result = 31 * result + Arrays.hashCode(indexDetails); return result; } @@ -152,6 +173,7 @@ public String toString() { .add("name", name) .add("datasetVersion", datasetVersion) .add("indexVersion", indexVersion) + .add("indexType", indexType) .add("createdAt", createdAt) .add("baseId", baseId) .toString(); @@ -166,6 +188,35 @@ public static Builder builder() { return new Builder(); } + /** + * Create an Index instance directly. This is primarily for JNI use. + * + * @return a new Index instance + */ + public static Index create( + UUID uuid, + List fields, + String name, + long datasetVersion, + List fragments, + byte[] indexDetails, + int indexVersion, + Instant createdAt, + Integer baseId, + IndexType indexType) { + return new Index( + uuid, + fields, + name, + datasetVersion, + fragments, + indexDetails, + indexVersion, + createdAt, + baseId, + indexType); + } + public static class Builder { private UUID uuid; @@ -177,6 +228,7 @@ public static class Builder { private int indexVersion; private Instant createdAt; private Integer baseId; + private IndexType indexType; private Builder() {} @@ -225,6 +277,11 @@ public Builder baseId(Integer baseId) { return this; } + public Builder indexType(IndexType indexType) { + this.indexType = indexType; + return this; + } + public Index build() { return new Index( uuid, @@ -235,7 +292,8 @@ public Index build() { indexDetails, indexVersion, createdAt, - baseId); + baseId, + indexType); } } } From fade3f13f1fcdad9f08d46068c04a72cc8f2c2f6 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Tue, 9 Dec 2025 16:08:27 -0800 Subject: [PATCH 2/3] clippy --- java/lance-jni/src/blocking_dataset.rs | 87 +++++++++++++++++--------- 1 file changed, 57 insertions(+), 30 deletions(-) diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index 08146b4d617..4d4e31e0dd7 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -2489,23 +2489,32 @@ fn create_java_index<'local>( ) -> Result> { // Create UUID let uuid_str = index_meta.uuid.to_string(); - let uuid_obj = env.call_static_method( - "java/util/UUID", - "fromString", - "(Ljava/lang/String;)Ljava/util/UUID;", - &[JValue::Object(&env.new_string(&uuid_str)?.into())], - )?.l()?; + let uuid_obj = env + .call_static_method( + "java/util/UUID", + "fromString", + "(Ljava/lang/String;)Ljava/util/UUID;", + &[JValue::Object(&env.new_string(&uuid_str)?.into())], + )? + .l()?; // Create fields list (List) let fields_list = env.new_object("java/util/ArrayList", "()V", &[])?; for field in &index_meta.fields { - let int_obj = env.call_static_method( - "java/lang/Integer", - "valueOf", - "(I)Ljava/lang/Integer;", - &[JValue::Int(*field)], - )?.l()?; - env.call_method(&fields_list, "add", "(Ljava/lang/Object;)Z", &[JValue::Object(&int_obj)])?; + let int_obj = env + .call_static_method( + "java/lang/Integer", + "valueOf", + "(I)Ljava/lang/Integer;", + &[JValue::Int(*field)], + )? + .l()?; + env.call_method( + &fields_list, + "add", + "(Ljava/lang/Object;)Z", + &[JValue::Object(&int_obj)], + )?; } // Create name @@ -2515,13 +2524,20 @@ fn create_java_index<'local>( let fragments_list = if let Some(bitmap) = &index_meta.fragment_bitmap { let list = env.new_object("java/util/ArrayList", "()V", &[])?; for frag_id in bitmap.iter() { - let int_obj = env.call_static_method( - "java/lang/Integer", - "valueOf", - "(I)Ljava/lang/Integer;", - &[JValue::Int(frag_id as i32)], - )?.l()?; - env.call_method(&list, "add", "(Ljava/lang/Object;)Z", &[JValue::Object(&int_obj)])?; + let int_obj = env + .call_static_method( + "java/lang/Integer", + "valueOf", + "(I)Ljava/lang/Integer;", + &[JValue::Int(frag_id as i32)], + )? + .l()?; + env.call_method( + &list, + "add", + "(Ljava/lang/Object;)Z", + &[JValue::Object(&int_obj)], + )?; } list } else { @@ -2545,7 +2561,8 @@ fn create_java_index<'local>( "ofEpochMilli", "(J)Ljava/time/Instant;", &[JValue::Long(millis)], - )?.l()? + )? + .l()? } else { JObject::null() }; @@ -2557,7 +2574,8 @@ fn create_java_index<'local>( "valueOf", "(I)Ljava/lang/Integer;", &[JValue::Int(id as i32)], - )?.l()? + )? + .l()? } else { JObject::null() }; @@ -2638,11 +2656,13 @@ fn determine_index_type<'local>( match type_name { Some(name) => { - let index_type = env.get_static_field( - "org/lance/index/IndexType", - name, - "Lorg/lance/index/IndexType;", - )?.l()?; + let index_type = env + .get_static_field( + "org/lance/index/IndexType", + name, + "Lorg/lance/index/IndexType;", + )? + .l()?; Ok(index_type) } None => Ok(JObject::null()), @@ -2674,8 +2694,13 @@ fn inner_count_indexed_rows( let filter: String = jfilter.extract(env)?; // Extract optional fragment IDs - let fragment_ids: Option> = if env.call_method(&jfragment_ids, "isPresent", "()Z", &[])?.z()? { - let list_obj = env.call_method(&jfragment_ids, "get", "()Ljava/lang/Object;", &[])?.l()?; + let fragment_ids: Option> = if env + .call_method(&jfragment_ids, "isPresent", "()Z", &[])? + .z()? + { + let list_obj = env + .call_method(&jfragment_ids, "get", "()Ljava/lang/Object;", &[])? + .l()?; let list = env.get_list(&list_obj)?; let mut ids = Vec::new(); let mut iter = list.iter(env)?; @@ -2712,7 +2737,9 @@ fn inner_count_indexed_rows( // Apply fragment filter if specified if let Some(frag_ids) = fragment_ids { // Convert FileFragment to Fragment by extracting metadata - let filtered_fragments: Vec<_> = inner.get_fragments().into_iter() + let filtered_fragments: Vec<_> = inner + .get_fragments() + .into_iter() .filter(|f| frag_ids.contains(&(f.id() as u32))) .map(|f| f.metadata().clone()) .collect(); From 75ed4cd2dabbb29029d54ad93e6df539a6d07149 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Wed, 10 Dec 2025 11:04:56 -0800 Subject: [PATCH 3/3] address comments --- java/lance-jni/src/blocking_dataset.rs | 188 +----------------- java/lance-jni/src/transaction.rs | 73 ++++++- java/src/main/java/org/lance/Dataset.java | 5 +- java/src/main/java/org/lance/index/Index.java | 29 --- 4 files changed, 75 insertions(+), 220 deletions(-) diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index 4d4e31e0dd7..cd10e27a085 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -2471,7 +2471,7 @@ fn inner_get_indexes<'local>( let array_list = env.new_object("java/util/ArrayList", "()V", &[])?; for index_meta in indexes.iter() { - let java_index = create_java_index(env, index_meta)?; + let java_index = index_meta.into_java(env)?; env.call_method( &array_list, "add", @@ -2483,192 +2483,6 @@ fn inner_get_indexes<'local>( Ok(array_list) } -fn create_java_index<'local>( - env: &mut JNIEnv<'local>, - index_meta: &IndexMetadata, -) -> Result> { - // Create UUID - let uuid_str = index_meta.uuid.to_string(); - let uuid_obj = env - .call_static_method( - "java/util/UUID", - "fromString", - "(Ljava/lang/String;)Ljava/util/UUID;", - &[JValue::Object(&env.new_string(&uuid_str)?.into())], - )? - .l()?; - - // Create fields list (List) - let fields_list = env.new_object("java/util/ArrayList", "()V", &[])?; - for field in &index_meta.fields { - let int_obj = env - .call_static_method( - "java/lang/Integer", - "valueOf", - "(I)Ljava/lang/Integer;", - &[JValue::Int(*field)], - )? - .l()?; - env.call_method( - &fields_list, - "add", - "(Ljava/lang/Object;)Z", - &[JValue::Object(&int_obj)], - )?; - } - - // Create name - let name = env.new_string(&index_meta.name)?; - - // Create fragments list (List) - can be null - let fragments_list = if let Some(bitmap) = &index_meta.fragment_bitmap { - let list = env.new_object("java/util/ArrayList", "()V", &[])?; - for frag_id in bitmap.iter() { - let int_obj = env - .call_static_method( - "java/lang/Integer", - "valueOf", - "(I)Ljava/lang/Integer;", - &[JValue::Int(frag_id as i32)], - )? - .l()?; - env.call_method( - &list, - "add", - "(Ljava/lang/Object;)Z", - &[JValue::Object(&int_obj)], - )?; - } - list - } else { - JObject::null() - }; - - // Create indexDetails byte array - can be null - let index_details_bytes = if let Some(details) = &index_meta.index_details { - let bytes = prost::Message::encode_to_vec(details.as_ref()); - let byte_array = env.byte_array_from_slice(&bytes)?; - byte_array.into() - } else { - JObject::null() - }; - - // Create createdAt Instant - can be null - let created_at = if let Some(dt) = index_meta.created_at { - let millis = dt.timestamp_millis(); - env.call_static_method( - "java/time/Instant", - "ofEpochMilli", - "(J)Ljava/time/Instant;", - &[JValue::Long(millis)], - )? - .l()? - } else { - JObject::null() - }; - - // Create baseId Integer - can be null - let base_id = if let Some(id) = index_meta.base_id { - env.call_static_method( - "java/lang/Integer", - "valueOf", - "(I)Ljava/lang/Integer;", - &[JValue::Int(id as i32)], - )? - .l()? - } else { - JObject::null() - }; - - // Determine index type from index_details type_url - let index_type = determine_index_type(env, &index_meta.index_details)?; - - // Call Index.create() static method - let index_obj = env.call_static_method( - "org/lance/index/Index", - "create", - "(Ljava/util/UUID;Ljava/util/List;Ljava/lang/String;JLjava/util/List;[BILjava/time/Instant;Ljava/lang/Integer;Lorg/lance/index/IndexType;)Lorg/lance/index/Index;", - &[ - JValue::Object(&uuid_obj), - JValue::Object(&fields_list), - JValue::Object(&name.into()), - JValue::Long(index_meta.dataset_version as i64), - JValue::Object(&fragments_list), - JValue::Object(&index_details_bytes), - JValue::Int(index_meta.index_version), - JValue::Object(&created_at), - JValue::Object(&base_id), - JValue::Object(&index_type), - ], - )?.l()?; - - Ok(index_obj) -} - -fn determine_index_type<'local>( - env: &mut JNIEnv<'local>, - index_details: &Option>, -) -> Result> { - let type_name = if let Some(details) = index_details { - // Extract type name from type_url (e.g., ".lance.index.BTreeIndexDetails" -> "BTREE") - let type_url = &details.type_url; - let type_part = type_url.split('.').next_back().unwrap_or(""); - let lower = type_part.to_lowercase(); - - if lower.contains("btree") { - Some("BTREE") - } else if lower.contains("bitmap") { - Some("BITMAP") - } else if lower.contains("labellist") { - Some("LABEL_LIST") - } else if lower.contains("inverted") { - Some("INVERTED") - } else if lower.contains("ngram") { - Some("NGRAM") - } else if lower.contains("zonemap") { - Some("ZONEMAP") - } else if lower.contains("bloomfilter") { - Some("BLOOM_FILTER") - } else if lower.contains("ivfhnsw") { - if lower.contains("sq") { - Some("IVF_HNSW_SQ") - } else if lower.contains("pq") { - Some("IVF_HNSW_PQ") - } else { - Some("IVF_HNSW_FLAT") - } - } else if lower.contains("ivf") { - if lower.contains("sq") { - Some("IVF_SQ") - } else if lower.contains("pq") { - Some("IVF_PQ") - } else { - Some("IVF_FLAT") - } - } else if lower.contains("vector") { - Some("VECTOR") - } else { - None - } - } else { - None - }; - - match type_name { - Some(name) => { - let index_type = env - .get_static_field( - "org/lance/index/IndexType", - name, - "Lorg/lance/index/IndexType;", - )? - .l()?; - Ok(index_type) - } - None => Ok(JObject::null()), - } -} - #[no_mangle] pub extern "system" fn Java_org_lance_Dataset_nativeCountIndexedRows( mut env: JNIEnv, diff --git a/java/lance-jni/src/transaction.rs b/java/lance-jni/src/transaction.rs index 32ffe3c99e0..9b80a741d6f 100644 --- a/java/lance-jni/src/transaction.rs +++ b/java/lance-jni/src/transaction.rs @@ -150,10 +150,13 @@ impl IntoJava for &IndexMetadata { JObject::null() }; - // Create IndexMetadata object + // Determine index type from index_details type_url + let index_type = determine_index_type(env, &self.index_details)?; + + // Create Index object Ok(env.new_object( "org/lance/index/Index", - "(Ljava/util/UUID;Ljava/util/List;Ljava/lang/String;JLjava/util/List;[BILjava/time/Instant;Ljava/lang/Integer;)V", + "(Ljava/util/UUID;Ljava/util/List;Ljava/lang/String;JLjava/util/List;[BILjava/time/Instant;Ljava/lang/Integer;Lorg/lance/index/IndexType;)V", &[ JValue::Object(&uuid), JValue::Object(&fields), @@ -164,11 +167,77 @@ impl IntoJava for &IndexMetadata { JValue::Int(self.index_version), JValue::Object(&created_at), JValue::Object(&base_id), + JValue::Object(&index_type), ], )?) } } +/// Determine the IndexType enum value from index_details protobuf +fn determine_index_type<'local>( + env: &mut JNIEnv<'local>, + index_details: &Option>, +) -> Result> { + let type_name = if let Some(details) = index_details { + // Extract type name from type_url (e.g., ".lance.index.BTreeIndexDetails" -> "BTREE") + let type_url = &details.type_url; + let type_part = type_url.split('.').next_back().unwrap_or(""); + let lower = type_part.to_lowercase(); + + if lower.contains("btree") { + Some("BTREE") + } else if lower.contains("bitmap") { + Some("BITMAP") + } else if lower.contains("labellist") { + Some("LABEL_LIST") + } else if lower.contains("inverted") { + Some("INVERTED") + } else if lower.contains("ngram") { + Some("NGRAM") + } else if lower.contains("zonemap") { + Some("ZONEMAP") + } else if lower.contains("bloomfilter") { + Some("BLOOM_FILTER") + } else if lower.contains("ivfhnsw") { + if lower.contains("sq") { + Some("IVF_HNSW_SQ") + } else if lower.contains("pq") { + Some("IVF_HNSW_PQ") + } else { + Some("IVF_HNSW_FLAT") + } + } else if lower.contains("ivf") { + if lower.contains("sq") { + Some("IVF_SQ") + } else if lower.contains("pq") { + Some("IVF_PQ") + } else { + Some("IVF_FLAT") + } + } else if lower.contains("vector") { + Some("VECTOR") + } else { + None + } + } else { + None + }; + + match type_name { + Some(name) => { + let index_type = env + .get_static_field( + "org/lance/index/IndexType", + name, + "Lorg/lance/index/IndexType;", + )? + .l()?; + Ok(index_type) + } + None => Ok(JObject::null()), + } +} + impl IntoJava for &UpdateMode { fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> Result> { let name = match self { diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index 0eedc5bad50..107294bdf7d 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -16,6 +16,7 @@ import org.lance.cleanup.CleanupPolicy; import org.lance.cleanup.RemovalStats; import org.lance.compaction.CompactionOptions; +import org.lance.index.Index; import org.lance.index.IndexOptions; import org.lance.index.IndexParams; import org.lance.index.IndexType; @@ -958,14 +959,14 @@ public List listIndexes() { * * @return list of Index objects with complete metadata including index type and fragment coverage */ - public List getIndexes() { + public List getIndexes() { try (LockManager.ReadLock readLock = lockManager.acquireReadLock()) { Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed"); return nativeGetIndexes(); } } - private native List nativeGetIndexes(); + private native List nativeGetIndexes(); /** * Get the table config of the dataset. diff --git a/java/src/main/java/org/lance/index/Index.java b/java/src/main/java/org/lance/index/Index.java index 8b134304dae..955835496ed 100644 --- a/java/src/main/java/org/lance/index/Index.java +++ b/java/src/main/java/org/lance/index/Index.java @@ -188,35 +188,6 @@ public static Builder builder() { return new Builder(); } - /** - * Create an Index instance directly. This is primarily for JNI use. - * - * @return a new Index instance - */ - public static Index create( - UUID uuid, - List fields, - String name, - long datasetVersion, - List fragments, - byte[] indexDetails, - int indexVersion, - Instant createdAt, - Integer baseId, - IndexType indexType) { - return new Index( - uuid, - fields, - name, - datasetVersion, - fragments, - indexDetails, - indexVersion, - createdAt, - baseId, - indexType); - } - public static class Builder { private UUID uuid;