Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions java/lance-jni/src/blocking_dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2445,3 +2445,127 @@ fn inner_cleanup_with_policy<'local>(

Ok(jstats)
}

//////////////////////////////
// Index operation Methods //
//////////////////////////////

#[no_mangle]
pub extern "system" fn Java_org_lance_Dataset_nativeGetIndexes<'local>(
mut env: JNIEnv<'local>,
java_dataset: JObject,
) -> JObject<'local> {
ok_or_throw!(env, inner_get_indexes(&mut env, java_dataset))
}

fn inner_get_indexes<'local>(
env: &mut JNIEnv<'local>,
java_dataset: JObject,
) -> Result<JObject<'local>> {
let indexes = {
let dataset_guard =
unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?;
dataset_guard.list_indexes()?
};

let array_list = env.new_object("java/util/ArrayList", "()V", &[])?;

for index_meta in indexes.iter() {
let java_index = index_meta.into_java(env)?;
env.call_method(
&array_list,
"add",
"(Ljava/lang/Object;)Z",
&[JValue::Object(&java_index)],
)?;
}

Ok(array_list)
}

#[no_mangle]
pub extern "system" fn Java_org_lance_Dataset_nativeCountIndexedRows(
mut env: JNIEnv,
java_dataset: JObject,
jindex_name: JString,
jfilter: JString,
jfragment_ids: JObject, // Optional<List<Integer>>
) -> jlong {
ok_or_throw_with_return!(
env,
inner_count_indexed_rows(&mut env, java_dataset, jindex_name, jfilter, jfragment_ids),
-1
)
}

fn inner_count_indexed_rows(
env: &mut JNIEnv,
java_dataset: JObject,
_jindex_name: JString,
jfilter: JString,
jfragment_ids: JObject, // Optional<List<Integer>>
) -> Result<i64> {
let filter: String = jfilter.extract(env)?;

// Extract optional fragment IDs
let fragment_ids: Option<Vec<u32>> = if env
.call_method(&jfragment_ids, "isPresent", "()Z", &[])?
.z()?
{
let list_obj = env
.call_method(&jfragment_ids, "get", "()Ljava/lang/Object;", &[])?
.l()?;
let list = env.get_list(&list_obj)?;
let mut ids = Vec::new();
let mut iter = list.iter(env)?;
while let Some(elem) = iter.next(env)? {
let int_val = env.call_method(&elem, "intValue", "()I", &[])?.i()?;
ids.push(int_val as u32);
}
Some(ids)
} else {
None
};

let count = {
let dataset_guard =
unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?;

// Use a scanner with fragment filtering to count rows
// This ensures we only count rows in the specified fragments
let inner = dataset_guard.inner.clone();

RT.block_on(async {
let mut scanner = inner.scan();

// Apply filter
if !filter.is_empty() {
scanner.filter(&filter)?;
}

// Empty projection and enable row_id for count_rows to work
// count_rows() requires metadata-only projection
scanner.project::<String>(&[])?;
scanner.with_row_id();

// Apply fragment filter if specified
if let Some(frag_ids) = fragment_ids {
// Convert FileFragment to Fragment by extracting metadata
let filtered_fragments: Vec<_> = inner
.get_fragments()
.into_iter()
.filter(|f| frag_ids.contains(&(f.id() as u32)))
.map(|f| f.metadata().clone())
.collect();
scanner.with_fragments(filtered_fragments);
}

// Use the scanner's count_rows method
let count = scanner.count_rows().await?;

Ok::<i64, lance::Error>(count as i64)
})?
};

Ok(count)
}
73 changes: 71 additions & 2 deletions java/lance-jni/src/transaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,13 @@ impl IntoJava for &IndexMetadata {
JObject::null()
};

// Create IndexMetadata object
// Determine index type from index_details type_url
let index_type = determine_index_type(env, &self.index_details)?;

// Create Index object
Ok(env.new_object(
"org/lance/index/Index",
"(Ljava/util/UUID;Ljava/util/List;Ljava/lang/String;JLjava/util/List;[BILjava/time/Instant;Ljava/lang/Integer;)V",
"(Ljava/util/UUID;Ljava/util/List;Ljava/lang/String;JLjava/util/List;[BILjava/time/Instant;Ljava/lang/Integer;Lorg/lance/index/IndexType;)V",
&[
JValue::Object(&uuid),
JValue::Object(&fields),
Expand All @@ -164,11 +167,77 @@ impl IntoJava for &IndexMetadata {
JValue::Int(self.index_version),
JValue::Object(&created_at),
JValue::Object(&base_id),
JValue::Object(&index_type),
],
)?)
}
}

/// Determine the IndexType enum value from index_details protobuf
fn determine_index_type<'local>(
env: &mut JNIEnv<'local>,
index_details: &Option<Arc<Any>>,
) -> Result<JObject<'local>> {
let type_name = if let Some(details) = index_details {
// Extract type name from type_url (e.g., ".lance.index.BTreeIndexDetails" -> "BTREE")
let type_url = &details.type_url;
let type_part = type_url.split('.').next_back().unwrap_or("");
let lower = type_part.to_lowercase();

if lower.contains("btree") {
Some("BTREE")
} else if lower.contains("bitmap") {
Some("BITMAP")
} else if lower.contains("labellist") {
Some("LABEL_LIST")
} else if lower.contains("inverted") {
Some("INVERTED")
} else if lower.contains("ngram") {
Some("NGRAM")
} else if lower.contains("zonemap") {
Some("ZONEMAP")
} else if lower.contains("bloomfilter") {
Some("BLOOM_FILTER")
} else if lower.contains("ivfhnsw") {
if lower.contains("sq") {
Some("IVF_HNSW_SQ")
} else if lower.contains("pq") {
Some("IVF_HNSW_PQ")
} else {
Some("IVF_HNSW_FLAT")
}
} else if lower.contains("ivf") {
if lower.contains("sq") {
Some("IVF_SQ")
} else if lower.contains("pq") {
Some("IVF_PQ")
} else {
Some("IVF_FLAT")
}
} else if lower.contains("vector") {
Some("VECTOR")
} else {
None
}
} else {
None
};

match type_name {
Some(name) => {
let index_type = env
.get_static_field(
"org/lance/index/IndexType",
name,
"Lorg/lance/index/IndexType;",
)?
.l()?;
Ok(index_type)
}
None => Ok(JObject::null()),
}
}

impl IntoJava for &UpdateMode {
fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> Result<JObject<'a>> {
let name = match self {
Expand Down
40 changes: 40 additions & 0 deletions java/src/main/java/org/lance/Dataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.lance.cleanup.CleanupPolicy;
import org.lance.cleanup.RemovalStats;
import org.lance.compaction.CompactionOptions;
import org.lance.index.Index;
import org.lance.index.IndexOptions;
import org.lance.index.IndexParams;
import org.lance.index.IndexType;
Expand Down Expand Up @@ -834,6 +835,31 @@ public long countRows(String filter) {

private native long nativeCountRows(Optional<String> filter);

/**
* Count rows matching a filter using a specific scalar index. This directly queries the index and
* counts matching row addresses, which is more efficient than scanning when the index covers the
* filter column.
*
* @param indexName the name of the scalar index to use
* @param filter the filter expression (e.g., "column = 5")
* @param fragmentIds optional list of fragment IDs to restrict the count to
* @return count of matching rows
*/
public long countIndexedRows(
String indexName, String filter, Optional<List<Integer>> fragmentIds) {
try (LockManager.ReadLock readLock = lockManager.acquireReadLock()) {
Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed");
Preconditions.checkArgument(
indexName != null && !indexName.isEmpty(), "indexName cannot be null or empty");
Preconditions.checkArgument(
filter != null && !filter.isEmpty(), "filter cannot be null or empty");
return nativeCountIndexedRows(indexName, filter, fragmentIds);
}
}

private native long nativeCountIndexedRows(
String indexName, String filter, Optional<List<Integer>> fragmentIds);

/**
* Calculate the size of the dataset.
*
Expand Down Expand Up @@ -928,6 +954,20 @@ public List<String> listIndexes() {

private native List<String> nativeListIndexes();

/**
* Get all indexes with full metadata.
*
* @return list of Index objects with complete metadata including index type and fragment coverage
*/
public List<Index> getIndexes() {
try (LockManager.ReadLock readLock = lockManager.acquireReadLock()) {
Preconditions.checkArgument(nativeDatasetHandle != 0, "Dataset is closed");
return nativeGetIndexes();
}
}

private native List<Index> nativeGetIndexes();

/**
* Get the table config of the dataset.
*
Expand Down
37 changes: 33 additions & 4 deletions java/src/main/java/org/lance/index/Index.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ public class Index {
private final int indexVersion;
private final Instant createdAt;
private final Integer baseId;
private final IndexType indexType;

private Index(
UUID uuid,
Expand All @@ -46,7 +47,8 @@ private Index(
byte[] indexDetails,
int indexVersion,
Instant createdAt,
Integer baseId) {
Integer baseId,
IndexType indexType) {
this.uuid = uuid;
this.fields = fields;
this.name = name;
Expand All @@ -56,6 +58,7 @@ private Index(
this.indexVersion = indexVersion;
this.createdAt = createdAt;
this.baseId = baseId;
this.indexType = indexType;
}

public UUID uuid() {
Expand Down Expand Up @@ -119,6 +122,15 @@ public Optional<Instant> createdAt() {
return Optional.ofNullable(createdAt);
}

/**
* Get the type of the index (e.g., BTREE, BITMAP, VECTOR).
*
* @return the index type, or null if unknown
*/
public IndexType indexType() {
return indexType;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
Expand All @@ -132,14 +144,23 @@ public boolean equals(Object o) {
&& Objects.equals(fragments, index.fragments)
&& Arrays.equals(indexDetails, index.indexDetails)
&& Objects.equals(createdAt, index.createdAt)
&& Objects.equals(baseId, index.baseId);
&& Objects.equals(baseId, index.baseId)
&& indexType == index.indexType;
}

@Override
public int hashCode() {
int result =
Objects.hash(
uuid, fields, name, datasetVersion, indexVersion, createdAt, baseId, fragments);
uuid,
fields,
name,
datasetVersion,
indexVersion,
createdAt,
baseId,
fragments,
indexType);
result = 31 * result + Arrays.hashCode(indexDetails);
return result;
}
Expand All @@ -152,6 +173,7 @@ public String toString() {
.add("name", name)
.add("datasetVersion", datasetVersion)
.add("indexVersion", indexVersion)
.add("indexType", indexType)
.add("createdAt", createdAt)
.add("baseId", baseId)
.toString();
Expand All @@ -177,6 +199,7 @@ public static class Builder {
private int indexVersion;
private Instant createdAt;
private Integer baseId;
private IndexType indexType;

private Builder() {}

Expand Down Expand Up @@ -225,6 +248,11 @@ public Builder baseId(Integer baseId) {
return this;
}

public Builder indexType(IndexType indexType) {
this.indexType = indexType;
return this;
}

public Index build() {
return new Index(
uuid,
Expand All @@ -235,7 +263,8 @@ public Index build() {
indexDetails,
indexVersion,
createdAt,
baseId);
baseId,
indexType);
}
}
}