diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index dde2b81879b..60bf01961a4 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -43,6 +43,7 @@ use lance_index::optimize::OptimizeOptions; use lance_index::scalar::btree::BTreeParameters; use lance_index::scalar::lance_format::LanceIndexStore; use lance_index::DatasetIndexExt; +use lance_index::IndexCriteria as RustIndexCriteria; use lance_index::{IndexParams, IndexType}; use lance_io::object_store::ObjectStoreRegistry; use lance_io::object_store::StorageOptionsProvider; @@ -2496,6 +2497,76 @@ fn inner_get_indexes<'local>( Ok(array_list) } +#[no_mangle] +pub extern "system" fn Java_org_lance_Dataset_nativeGetIndexStatistics<'local>( + mut env: JNIEnv<'local>, + java_dataset: JObject, + jindex_name: JString, +) -> JString<'local> { + ok_or_throw_with_return!( + env, + inner_get_index_statistics(&mut env, java_dataset, jindex_name), + JString::from(JObject::null()) + ) +} + +fn inner_get_index_statistics<'local>( + env: &mut JNIEnv<'local>, + java_dataset: JObject, + jindex_name: JString, +) -> Result> { + let index_name: String = jindex_name.extract(env)?; + let stats_json = { + let dataset_guard = + unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?; + RT.block_on(dataset_guard.inner.index_statistics(&index_name))? + }; + let jstats = env.new_string(stats_json)?; + Ok(jstats) +} + +#[no_mangle] +pub extern "system" fn Java_org_lance_Dataset_nativeDescribeIndices<'local>( + mut env: JNIEnv<'local>, + java_dataset: JObject, + criteria_obj: JObject, +) -> JObject<'local> { + ok_or_throw!( + env, + inner_describe_indices(&mut env, java_dataset, criteria_obj) + ) +} + +fn inner_describe_indices<'local>( + env: &mut JNIEnv<'local>, + java_dataset: JObject, + java_index_criteria: JObject, +) -> Result> { + let mut for_column = None; + let mut has_name = None; + let index_criteria = env.get_optional(&java_index_criteria, |env, obj| { + for_column = env.get_optional_string_from_method(&obj, "getForColumn")?; + has_name = env.get_optional_string_from_method(&obj, "getHasName")?; + let must_support_fts = env.get_boolean_from_method(&obj, "mustSupportFts")?; + let must_support_exact_equality = + env.get_boolean_from_method(&obj, "mustSupportExactEquality")?; + Ok(RustIndexCriteria { + for_column: for_column.as_deref(), + has_name: has_name.as_deref(), + must_support_fts, + must_support_exact_equality, + }) + })?; + + let descriptions = { + let dataset_guard = + unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?; + RT.block_on(dataset_guard.inner.describe_indices(index_criteria))? + }; + + export_vec(env, &descriptions) +} + #[no_mangle] pub extern "system" fn Java_org_lance_Dataset_nativeCountIndexedRows( mut env: JNIEnv, diff --git a/java/lance-jni/src/index.rs b/java/lance-jni/src/index.rs new file mode 100644 index 00000000000..6360627cb11 --- /dev/null +++ b/java/lance-jni/src/index.rs @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +use crate::error::Result; +use crate::traits::{export_vec, IntoJava}; +use jni::objects::{JObject, JValue}; +use jni::sys::jbyte; +use jni::JNIEnv; +use lance::table::format::IndexMetadata; +use lance_index::IndexDescription; +use prost::Message; +use prost_types::Any; +use std::sync::Arc; + +impl IntoJava for &Arc { + fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> Result> { + let field_ids_list = { + let array_list = env.new_object("java/util/ArrayList", "()V", &[])?; + for id in self.field_ids() { + let int_obj = + env.new_object("java/lang/Integer", "(I)V", &[JValue::Int(*id as i32)])?; + env.call_method( + &array_list, + "add", + "(Ljava/lang/Object;)Z", + &[JValue::Object(&int_obj)], + )?; + } + array_list + }; + let name = env.new_string(self.name())?; + let type_url = env.new_string(self.type_url())?; + let index_type = env.new_string(self.index_type())?; + let rows_indexed = self.rows_indexed() as i64; + let metadata_list = export_vec(env, self.metadata())?; + let details_json = self.details()?; + let details = env.new_string(details_json)?; + + let j_index_desc = env.new_object( + "org/lance/index/IndexDescription", + "(Ljava/lang/String;Ljava/util/List;Ljava/lang/String;Ljava/lang/String;JLjava/util/List;Ljava/lang/String;)V", + &[ + JValue::Object(&name), + JValue::Object(&field_ids_list), + JValue::Object(&type_url), + JValue::Object(&index_type), + JValue::Long(rows_indexed), + JValue::Object(&metadata_list), + JValue::Object(&details), + ], + )?; + Ok(j_index_desc) + } +} + +impl IntoJava for &IndexMetadata { + fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> Result> { + let uuid = self.uuid.into_java(env)?; + + let fields = { + let array_list = env.new_object("java/util/ArrayList", "()V", &[])?; + for field in &self.fields { + let field_obj = + env.new_object("java/lang/Integer", "(I)V", &[JValue::Int(*field)])?; + env.call_method( + &array_list, + "add", + "(Ljava/lang/Object;)Z", + &[JValue::Object(&field_obj)], + )?; + } + array_list + }; + let name = env.new_string(&self.name)?; + + let fragments = if let Some(bitmap) = &self.fragment_bitmap { + let array_list = env.new_object("java/util/ArrayList", "()V", &[])?; + for frag_id in bitmap.iter() { + let id_obj = + env.new_object("java/lang/Integer", "(I)V", &[JValue::Int(frag_id as i32)])?; + env.call_method( + &array_list, + "add", + "(Ljava/lang/Object;)Z", + &[JValue::Object(&id_obj)], + )?; + } + array_list + } else { + JObject::null() + }; + + // Convert index_details to byte array + let index_details = if let Some(details) = &self.index_details { + let bytes = details.encode_to_vec(); + let jbytes: &[jbyte] = + unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const jbyte, bytes.len()) }; + + let byte_array = env.new_byte_array(bytes.len() as i32)?; + env.set_byte_array_region(&byte_array, 0, jbytes)?; + byte_array.into() + } else { + JObject::null() + }; + + // Convert created_at to Instant + let created_at = if let Some(dt) = &self.created_at { + let seconds = dt.timestamp(); + let nanos = dt.timestamp_subsec_nanos() as i64; + env.call_static_method( + "java/time/Instant", + "ofEpochSecond", + "(JJ)Ljava/time/Instant;", + &[JValue::Long(seconds), JValue::Long(nanos)], + )? + .l()? + } else { + JObject::null() + }; + + // Convert base_id from Option to Integer for Java + let base_id = if let Some(id) = self.base_id { + env.new_object("java/lang/Integer", "(I)V", &[JValue::Int(id as i32)])? + } else { + JObject::null() + }; + + // Determine index type from index_details type_url + let index_type = determine_index_type(env, &self.index_details)?; + + // Create Index object + Ok(env.new_object( + "org/lance/index/Index", + "(Ljava/util/UUID;Ljava/util/List;Ljava/lang/String;JLjava/util/List;[BILjava/time/Instant;Ljava/lang/Integer;Lorg/lance/index/IndexType;)V", + &[ + JValue::Object(&uuid), + JValue::Object(&fields), + JValue::Object(&name), + JValue::Long(self.dataset_version as i64), + JValue::Object(&fragments), + JValue::Object(&index_details), + JValue::Int(self.index_version), + JValue::Object(&created_at), + JValue::Object(&base_id), + JValue::Object(&index_type), + ], + )?) + } +} + +/// Determine the IndexType enum value from index_details protobuf +fn determine_index_type<'local>( + env: &mut JNIEnv<'local>, + index_details: &Option>, +) -> Result> { + let type_name = if let Some(details) = index_details { + // Extract type name from type_url (e.g., ".lance.index.BTreeIndexDetails" -> "BTREE") + let type_url = &details.type_url; + let type_part = type_url.split('.').next_back().unwrap_or(""); + let lower = type_part.to_lowercase(); + + if lower.contains("btree") { + Some("BTREE") + } else if lower.contains("bitmap") { + Some("BITMAP") + } else if lower.contains("labellist") { + Some("LABEL_LIST") + } else if lower.contains("inverted") { + Some("INVERTED") + } else if lower.contains("ngram") { + Some("NGRAM") + } else if lower.contains("zonemap") { + Some("ZONEMAP") + } else if lower.contains("bloomfilter") { + Some("BLOOM_FILTER") + } else if lower.contains("ivfhnsw") { + if lower.contains("sq") { + Some("IVF_HNSW_SQ") + } else if lower.contains("pq") { + Some("IVF_HNSW_PQ") + } else { + Some("IVF_HNSW_FLAT") + } + } else if lower.contains("ivf") { + if lower.contains("sq") { + Some("IVF_SQ") + } else if lower.contains("pq") { + Some("IVF_PQ") + } else { + Some("IVF_FLAT") + } + } else if lower.contains("vector") { + Some("VECTOR") + } else { + None + } + } else { + None + }; + + match type_name { + Some(name) => { + let index_type = env + .get_static_field( + "org/lance/index/IndexType", + name, + "Lorg/lance/index/IndexType;", + )? + .l()?; + Ok(index_type) + } + None => Ok(JObject::null()), + } +} diff --git a/java/lance-jni/src/lib.rs b/java/lance-jni/src/lib.rs index 566f77dd110..f141a393fb7 100644 --- a/java/lance-jni/src/lib.rs +++ b/java/lance-jni/src/lib.rs @@ -48,6 +48,7 @@ pub mod ffi; mod file_reader; mod file_writer; mod fragment; +mod index; mod merge_insert; mod namespace; mod optimize; diff --git a/java/lance-jni/src/traits.rs b/java/lance-jni/src/traits.rs index 7da64d453c2..3279562db43 100644 --- a/java/lance-jni/src/traits.rs +++ b/java/lance-jni/src/traits.rs @@ -218,6 +218,12 @@ impl IntoJava for &JLance { } } +impl IntoJava for &JLance { + fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> Result> { + Ok(env.new_object("java/lang/Integer", "(I)V", &[JValue::Int(self.0)])?) + } +} + impl IntoJava for &String { fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> Result> { Ok(env.new_string(self)?.into()) diff --git a/java/lance-jni/src/transaction.rs b/java/lance-jni/src/transaction.rs index 5f43f454154..1d54cd42573 100644 --- a/java/lance-jni/src/transaction.rs +++ b/java/lance-jni/src/transaction.rs @@ -11,7 +11,7 @@ use arrow::datatypes::Schema; use arrow_schema::ffi::FFI_ArrowSchema; use chrono::DateTime; use jni::objects::{JByteArray, JLongArray, JMap, JObject, JString, JValue, JValueGen}; -use jni::sys::{jboolean, jbyte}; +use jni::sys::jboolean; use jni::JNIEnv; use lance::dataset::transaction::{ DataReplacementGroup, Operation, RewriteGroup, RewrittenIndex, Transaction, TransactionBuilder, @@ -78,166 +78,6 @@ impl IntoJava for &DataReplacementGroup { } } -impl IntoJava for &IndexMetadata { - fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> Result> { - let uuid = self.uuid.into_java(env)?; - - let fields = { - let array_list = env.new_object("java/util/ArrayList", "()V", &[])?; - for field in &self.fields { - let field_obj = - env.new_object("java/lang/Integer", "(I)V", &[JValue::Int(*field)])?; - env.call_method( - &array_list, - "add", - "(Ljava/lang/Object;)Z", - &[JValue::Object(&field_obj)], - )?; - } - array_list - }; - let name = env.new_string(&self.name)?; - - let fragments = if let Some(bitmap) = &self.fragment_bitmap { - let array_list = env.new_object("java/util/ArrayList", "()V", &[])?; - for frag_id in bitmap.iter() { - let id_obj = - env.new_object("java/lang/Integer", "(I)V", &[JValue::Int(frag_id as i32)])?; - env.call_method( - &array_list, - "add", - "(Ljava/lang/Object;)Z", - &[JValue::Object(&id_obj)], - )?; - } - array_list - } else { - JObject::null() - }; - - // Convert index_details to byte array - let index_details = if let Some(details) = &self.index_details { - let bytes = details.encode_to_vec(); - let jbytes: &[jbyte] = - unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const jbyte, bytes.len()) }; - - let byte_array = env.new_byte_array(bytes.len() as i32)?; - env.set_byte_array_region(&byte_array, 0, jbytes)?; - byte_array.into() - } else { - JObject::null() - }; - - // Convert created_at to Instant - let created_at = if let Some(dt) = &self.created_at { - let seconds = dt.timestamp(); - let nanos = dt.timestamp_subsec_nanos() as i64; - env.call_static_method( - "java/time/Instant", - "ofEpochSecond", - "(JJ)Ljava/time/Instant;", - &[JValue::Long(seconds), JValue::Long(nanos)], - )? - .l()? - } else { - JObject::null() - }; - - // Convert base_id from Option to Integer for Java - let base_id = if let Some(id) = self.base_id { - env.new_object("java/lang/Integer", "(I)V", &[JValue::Int(id as i32)])? - } else { - JObject::null() - }; - - // Determine index type from index_details type_url - let index_type = determine_index_type(env, &self.index_details)?; - - // Create Index object - Ok(env.new_object( - "org/lance/index/Index", - "(Ljava/util/UUID;Ljava/util/List;Ljava/lang/String;JLjava/util/List;[BILjava/time/Instant;Ljava/lang/Integer;Lorg/lance/index/IndexType;)V", - &[ - JValue::Object(&uuid), - JValue::Object(&fields), - JValue::Object(&name), - JValue::Long(self.dataset_version as i64), - JValue::Object(&fragments), - JValue::Object(&index_details), - JValue::Int(self.index_version), - JValue::Object(&created_at), - JValue::Object(&base_id), - JValue::Object(&index_type), - ], - )?) - } -} - -/// Determine the IndexType enum value from index_details protobuf -fn determine_index_type<'local>( - env: &mut JNIEnv<'local>, - index_details: &Option>, -) -> Result> { - let type_name = if let Some(details) = index_details { - // Extract type name from type_url (e.g., ".lance.index.BTreeIndexDetails" -> "BTREE") - let type_url = &details.type_url; - let type_part = type_url.split('.').next_back().unwrap_or(""); - let lower = type_part.to_lowercase(); - - if lower.contains("btree") { - Some("BTREE") - } else if lower.contains("bitmap") { - Some("BITMAP") - } else if lower.contains("labellist") { - Some("LABEL_LIST") - } else if lower.contains("inverted") { - Some("INVERTED") - } else if lower.contains("ngram") { - Some("NGRAM") - } else if lower.contains("zonemap") { - Some("ZONEMAP") - } else if lower.contains("bloomfilter") { - Some("BLOOM_FILTER") - } else if lower.contains("ivfhnsw") { - if lower.contains("sq") { - Some("IVF_HNSW_SQ") - } else if lower.contains("pq") { - Some("IVF_HNSW_PQ") - } else { - Some("IVF_HNSW_FLAT") - } - } else if lower.contains("ivf") { - if lower.contains("sq") { - Some("IVF_SQ") - } else if lower.contains("pq") { - Some("IVF_PQ") - } else { - Some("IVF_FLAT") - } - } else if lower.contains("vector") { - Some("VECTOR") - } else { - None - } - } else { - None - }; - - match type_name { - Some(name) => { - let index_type = env - .get_static_field( - "org/lance/index/IndexType", - name, - "Lorg/lance/index/IndexType;", - )? - .l()?; - Ok(index_type) - } - None => Ok(JObject::null()), - } -} - impl IntoJava for &UpdateMode { fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> Result> { let name = match self { diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index cc4e0d03dbb..3b383a5e2e0 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -18,6 +18,8 @@ import org.lance.compaction.CompactionOptions; import org.lance.delta.DatasetDelta; import org.lance.index.Index; +import org.lance.index.IndexCriteria; +import org.lance.index.IndexDescription; import org.lance.index.IndexOptions; import org.lance.index.IndexParams; import org.lance.index.IndexType; @@ -33,6 +35,7 @@ import org.lance.schema.ColumnAlteration; import org.lance.schema.LanceSchema; import org.lance.schema.SqlExpressions; +import org.lance.util.JsonUtils; import org.apache.arrow.c.ArrowArrayStream; import org.apache.arrow.c.ArrowSchema; @@ -1058,6 +1061,54 @@ public List getIndexes() { private native List nativeGetIndexes(); + /** + * Get statistics for a specific index in JSON form. + * + *

The JSON structure matches the Rust/Python index_statistics API. + * + * @param indexName the name of the index + * @return JSON string with index statistics + */ + public Map getIndexStatistics(String indexName) { + Preconditions.checkArgument( + indexName != null && !indexName.isEmpty(), "indexName cannot be null or empty"); + try (LockManager.ReadLock readLock = lockManager.acquireReadLock()) { + Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed"); + String jsonDesc = nativeGetIndexStatistics(indexName); + return JsonUtils.fromJson(jsonDesc); + } + } + + private native String nativeGetIndexStatistics(String indexName); + + /** + * Describe indices on this dataset filtered by criteria. + * + * @param criteria filter options such as column, name or index capabilities + * @return list of index descriptions + */ + public List describeIndices(IndexCriteria criteria) { + Preconditions.checkNotNull(criteria, "criteria cannot be null"); + try (LockManager.ReadLock readLock = lockManager.acquireReadLock()) { + Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed"); + return nativeDescribeIndices(Optional.of(criteria)); + } + } + + /** + * Describe all indices on this dataset. + * + * @return list of index descriptions + */ + public List describeIndices() { + try (LockManager.ReadLock readLock = lockManager.acquireReadLock()) { + Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed"); + return nativeDescribeIndices(Optional.empty()); + } + } + + private native List nativeDescribeIndices(Optional criteria); + /** * Get the table config of the dataset. * diff --git a/java/src/main/java/org/lance/index/IndexCriteria.java b/java/src/main/java/org/lance/index/IndexCriteria.java new file mode 100755 index 00000000000..f00e8c5fca6 --- /dev/null +++ b/java/src/main/java/org/lance/index/IndexCriteria.java @@ -0,0 +1,98 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.index; + +import java.util.Optional; + +/** + * Criteria for describing or selecting indices on a dataset. + * + *

This mirrors the semantics of the Rust {@code IndexCriteria} struct used by {@code + * Dataset::describe_indices} and related APIs. + */ +public final class IndexCriteria { + + private final Optional forColumn; + private final Optional hasName; + private final boolean mustSupportFts; + private final boolean mustSupportExactEquality; + + private IndexCriteria(Builder builder) { + this.forColumn = Optional.ofNullable(builder.forColumn); + this.hasName = Optional.ofNullable(builder.hasName); + this.mustSupportFts = builder.mustSupportFts; + this.mustSupportExactEquality = builder.mustSupportExactEquality; + } + + /** + * Optional column name to restrict indices to. + * + *

If present, only indices built on this column (and only this column) will be considered. + */ + public Optional getForColumn() { + return forColumn; + } + + /** Optional index name to restrict indices to. */ + public Optional getHasName() { + return hasName; + } + + /** If true, only indices that support full-text search will be considered. */ + public boolean mustSupportFts() { + return mustSupportFts; + } + + /** If true, only indices that support exact equality predicates will be considered. */ + public boolean mustSupportExactEquality() { + return mustSupportExactEquality; + } + + /** Builder for {@link IndexCriteria}. */ + public static final class Builder { + + private String forColumn; + private String hasName; + private boolean mustSupportFts; + private boolean mustSupportExactEquality; + + /** Restrict indices to those built on the given column. */ + public Builder forColumn(String forColumn) { + this.forColumn = forColumn; + return this; + } + + /** Restrict indices to those with the given name. */ + public Builder hasName(String name) { + this.hasName = name; + return this; + } + + /** Require indices to support full-text search. */ + public Builder mustSupportFts(boolean mustSupportFts) { + this.mustSupportFts = mustSupportFts; + return this; + } + + /** Require indices to support exact equality predicates. */ + public Builder mustSupportExactEquality(boolean mustSupportExactEquality) { + this.mustSupportExactEquality = mustSupportExactEquality; + return this; + } + + public IndexCriteria build() { + return new IndexCriteria(this); + } + } +} diff --git a/java/src/main/java/org/lance/index/IndexDescription.java b/java/src/main/java/org/lance/index/IndexDescription.java new file mode 100755 index 00000000000..d17782eb531 --- /dev/null +++ b/java/src/main/java/org/lance/index/IndexDescription.java @@ -0,0 +1,94 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.index; + +import java.util.List; +import java.util.Objects; + +/** + * High-level description of an index, aggregating metadata across all segments. + * + *

This mirrors the Rust {@code IndexDescription} trait and is returned from {@code + * Dataset.describeIndices}. + */ +public final class IndexDescription { + + private final String name; + private final List fieldIds; + private final String typeUrl; + private final String indexType; + private final long rowsIndexed; + private final List metadata; + private final String detailsJson; + + public IndexDescription( + String name, + List fieldIds, + String typeUrl, + String indexType, + long rowsIndexed, + List metadata, + String detailsJson) { + this.name = Objects.requireNonNull(name, "name must not be null"); + this.fieldIds = Objects.requireNonNull(fieldIds, "fieldIds must not be null"); + this.typeUrl = Objects.requireNonNull(typeUrl, "typeUrl must not be null"); + this.indexType = Objects.requireNonNull(indexType, "indexType must not be null"); + this.rowsIndexed = rowsIndexed; + this.metadata = Objects.requireNonNull(metadata, "metadata must not be null"); + this.detailsJson = detailsJson; + } + + /** The logical name of the index. */ + public String getName() { + return name; + } + + /** Field ids that this index is built on. */ + public List getFieldIds() { + return fieldIds; + } + + /** Underlying protobuf type URL for the index details. */ + public String getTypeUrl() { + return typeUrl; + } + + /** Human-readable index type identifier (e.g. BTREE, INVERTED, IVF_PQ). */ + public String getIndexType() { + return indexType; + } + + /** Approximate number of rows covered by this index. */ + public long getRowsIndexed() { + return rowsIndexed; + } + + /** + * Per-segment metadata objects for this index. + * + *

Each entry corresponds to a single {@link Index} segment in the manifest. + */ + public List getMetadata() { + return metadata; + } + + /** + * JSON representation of index-specific details. + * + *

The exact structure depends on the index implementation. + */ + public String getDetailsJson() { + return detailsJson; + } +} diff --git a/java/src/main/java/org/lance/util/JsonUtils.java b/java/src/main/java/org/lance/util/JsonUtils.java index f76a31feb72..705504e36f3 100755 --- a/java/src/main/java/org/lance/util/JsonUtils.java +++ b/java/src/main/java/org/lance/util/JsonUtils.java @@ -14,6 +14,7 @@ package org.lance.util; import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import java.util.Map; @@ -25,9 +26,19 @@ private JsonUtils() {} public static String toJson(Map params) { try { - return OBJECT_MAPPER.writeValueAsString(params); + return params == null ? null : OBJECT_MAPPER.writeValueAsString(params); } catch (JsonProcessingException e) { throw new IllegalStateException("Failed to serialize to JSON", e); } } + + public static Map fromJson(String json) { + try { + return json == null + ? null + : OBJECT_MAPPER.readValue(json, new TypeReference>() {}); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to deserialize from JSON", e); + } + } } diff --git a/java/src/test/java/org/lance/DatasetTest.java b/java/src/test/java/org/lance/DatasetTest.java index 49956f37fa5..156be38ad69 100644 --- a/java/src/test/java/org/lance/DatasetTest.java +++ b/java/src/test/java/org/lance/DatasetTest.java @@ -15,9 +15,13 @@ import org.lance.compaction.CompactionOptions; import org.lance.index.Index; +import org.lance.index.IndexCriteria; +import org.lance.index.IndexDescription; import org.lance.index.IndexParams; import org.lance.index.IndexType; import org.lance.index.OptimizeOptions; +import org.lance.index.scalar.BTreeIndexParams; +import org.lance.index.scalar.NGramIndexParams; import org.lance.index.scalar.ScalarIndexParams; import org.lance.ipc.LanceScanner; import org.lance.ipc.ScanOptions; @@ -1829,4 +1833,79 @@ void testReadSmallBlobSequentialIntegrity(@TempDir Path tempDir) throws Exceptio blobFile.close(); } } + + @Test + public void testIndexStatistics(@TempDir Path tempDir) throws Exception { + Path datasetPath = tempDir.resolve("testIndexStatistics"); + + try (TestVectorDataset vectorDataset = new TestVectorDataset(datasetPath)) { + try (Dataset dataset = vectorDataset.create()) { + ScalarIndexParams scalarParams = ScalarIndexParams.create("btree"); + IndexParams indexParams = IndexParams.builder().setScalarIndexParams(scalarParams).build(); + dataset.createIndex( + Collections.singletonList("i"), + IndexType.BTREE, + Optional.of(TestVectorDataset.indexName), + indexParams, + true); + + Map stats = dataset.getIndexStatistics(TestVectorDataset.indexName); + assertNotNull(stats, "Index statistics JSON should not be null"); + assertFalse(stats.isEmpty(), "Index statistics JSON should not be empty"); + + assertEquals( + TestVectorDataset.indexName, + stats.get("name"), + "Index statistics should contain the index name"); + assertEquals( + "BTree", + stats.get("index_type"), + "Index statistics should contain index_type information"); + } + } + } + + @Test + public void testDescribeIndicesByName(@TempDir Path tempDir) throws Exception { + Path datasetPath = tempDir.resolve("testDescribeIndicesByName"); + + try (TestVectorDataset vectorDataset = new TestVectorDataset(datasetPath)) { + try (Dataset dataset = vectorDataset.create()) { + dataset.createIndex( + Collections.singletonList("i"), + IndexType.BTREE, + Optional.of("index1"), + IndexParams.builder().setScalarIndexParams(BTreeIndexParams.builder().build()).build(), + true); + + dataset.createIndex( + Collections.singletonList("s"), + IndexType.NGRAM, + Optional.of("index2"), + IndexParams.builder().setScalarIndexParams(NGramIndexParams.builder().build()).build(), + true); + + IndexCriteria criteria = new IndexCriteria.Builder().hasName("index1").build(); + + List descriptions = dataset.describeIndices(criteria); + assertEquals(1, descriptions.size(), "Expected exactly one matching index"); + + IndexDescription desc = descriptions.get(0); + assertEquals("index1", desc.getName()); + assertTrue(desc.getRowsIndexed() > 0, "rowsIndexed should be positive"); + assertNotNull(desc.getMetadata(), "Metadata list should not be null"); + assertFalse(desc.getMetadata().isEmpty(), "Metadata list should not be empty"); + assertNotNull(desc.getDetailsJson(), "Details JSON should not be null"); + + descriptions = dataset.describeIndices(); + assertEquals(2, descriptions.size(), "Expected exactly one matching index"); + for (IndexDescription indexDesc : descriptions) { + assertTrue(indexDesc.getRowsIndexed() > 0, "rowsIndexed should be positive"); + assertNotNull(indexDesc.getMetadata(), "Metadata list should not be null"); + assertFalse(indexDesc.getMetadata().isEmpty(), "Metadata list should not be empty"); + assertNotNull(indexDesc.getDetailsJson(), "Details JSON should not be null"); + } + } + } + } }