From dc444e7b2fc8c21b3ef0f77153dbf6d47f635b08 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Sat, 21 Feb 2026 17:30:55 -0600 Subject: [PATCH 1/7] added uri-based commit support Signed-off-by: Daniel Rammer --- java/lance-jni/src/blocking_dataset.rs | 101 ++++----- java/lance-jni/src/transaction.rs | 149 +++++++++++-- java/src/main/java/org/lance/Transaction.java | 197 +++++++++++++++++- .../test/java/org/lance/TransactionTest.java | 32 +++ 4 files changed, 399 insertions(+), 80 deletions(-) diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index 3d6c1ab8e64..e5fecf660a4 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -575,34 +575,11 @@ fn inner_create_with_ffi_stream<'local>( namespace_obj: JObject, // LanceNamespace (can be null) table_id_obj: JObject, // List (can be null) ) -> Result> { - use crate::namespace::{ - create_java_lance_namespace, BlockingDirectoryNamespace, BlockingRestNamespace, - }; - let stream_ptr = arrow_array_stream_addr as *mut FFI_ArrowArrayStream; let reader = unsafe { ArrowArrayStreamReader::from_raw(stream_ptr) }?; // Create the namespace wrapper for commit handling (if provided) - let namespace_info = if namespace_obj.is_null() { - None - } else { - let namespace: Arc = if is_directory_namespace(env, &namespace_obj)? { - let native_handle = get_native_namespace_handle(env, &namespace_obj)?; - let ns = unsafe { &*(native_handle as *const BlockingDirectoryNamespace) }; - ns.inner.clone() - } else if is_rest_namespace(env, &namespace_obj)? { - let native_handle = get_native_namespace_handle(env, &namespace_obj)?; - let ns = unsafe { &*(native_handle as *const BlockingRestNamespace) }; - ns.inner.clone() - } else { - // Custom Java implementation, create a Java bridge wrapper - create_java_lance_namespace(env, &namespace_obj)? - }; - - // Extract table_id from Java List - let table_id = env.get_strings(&table_id_obj)?; - Some((namespace, table_id)) - }; + let namespace_info = extract_namespace_info(env, &namespace_obj, &table_id_obj)?; create_dataset( env, @@ -1150,10 +1127,6 @@ fn inner_open_native<'local>( namespace_obj: JObject, // LanceNamespace object, null if no namespace table_id_obj: JObject, // List, null if no namespace ) -> Result> { - use crate::namespace::{ - create_java_lance_namespace, BlockingDirectoryNamespace, BlockingRestNamespace, - }; - let path_str: String = path.extract(env)?; let version = env.get_u64_opt(&version_obj)?; let block_size = env.get_int_opt(&block_size_obj)?; @@ -1170,31 +1143,10 @@ fn inner_open_native<'local>( storage_options_provider.map(|v| Arc::new(v) as Arc); // Extract namespace and table_id if provided (before get_bytes_opt which holds borrow) - let (namespace, table_id) = if !namespace_obj.is_null() { - // Check if it's a native implementation using instanceof checks - let ns_arc: Arc = if is_directory_namespace(env, &namespace_obj)? { - let native_handle = get_native_namespace_handle(env, &namespace_obj)?; - let ns = unsafe { &*(native_handle as *const BlockingDirectoryNamespace) }; - ns.inner.clone() - } else if is_rest_namespace(env, &namespace_obj)? { - let native_handle = get_native_namespace_handle(env, &namespace_obj)?; - let ns = unsafe { &*(native_handle as *const BlockingRestNamespace) }; - ns.inner.clone() - } else { - // Custom Java implementation, create a Java bridge wrapper - create_java_lance_namespace(env, &namespace_obj)? - }; - - // Extract table_id from List - let table_id = if !table_id_obj.is_null() { - Some(env.get_strings(&table_id_obj)?) - } else { - None - }; - - (Some(ns_arc), table_id) - } else { - (None, None) + let namespace_info = extract_namespace_info(env, &namespace_obj, &table_id_obj)?; + let (namespace, table_id) = match namespace_info { + Some((ns, tid)) => (Some(ns), Some(tid)), + None => (None, None), }; let serialized_manifest = env.get_bytes_opt(&serialized_manifest)?; @@ -1219,7 +1171,7 @@ fn inner_open_native<'local>( } /// Check if the Java object is an instance of DirectoryNamespace. -fn is_directory_namespace(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { +pub(crate) fn is_directory_namespace(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { let class = env .find_class("org/lance/namespace/DirectoryNamespace") .map_err(|e| { @@ -1230,7 +1182,7 @@ fn is_directory_namespace(env: &mut JNIEnv, namespace_obj: &JObject) -> Result Result { +pub(crate) fn is_rest_namespace(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { let class = env .find_class("org/lance/namespace/RestNamespace") .map_err(|e| Error::runtime_error(format!("Failed to find RestNamespace class: {}", e)))?; @@ -1239,13 +1191,50 @@ fn is_rest_namespace(env: &mut JNIEnv, namespace_obj: &JObject) -> Result } /// Get the native handle from a Java LanceNamespace object. -fn get_native_namespace_handle(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { +pub(crate) fn get_native_namespace_handle( + env: &mut JNIEnv, + namespace_obj: &JObject, +) -> Result { env.call_method(namespace_obj, "getNativeHandle", "()J", &[]) .map_err(|e| Error::runtime_error(format!("Failed to call getNativeHandle: {}", e)))? .j() .map_err(|e| Error::runtime_error(format!("getNativeHandle did not return a long: {}", e))) } +/// Extract namespace and table_id from Java objects into Rust types. +/// +/// Returns `None` if `namespace_obj` is null, otherwise returns the namespace +/// and table_id pair. +#[allow(clippy::type_complexity)] +pub(crate) fn extract_namespace_info( + env: &mut JNIEnv, + namespace_obj: &JObject, + table_id_obj: &JObject, +) -> Result, Vec)>> { + use crate::namespace::{ + create_java_lance_namespace, BlockingDirectoryNamespace, BlockingRestNamespace, + }; + + if namespace_obj.is_null() { + return Ok(None); + } + + let namespace: Arc = if is_directory_namespace(env, namespace_obj)? { + let native_handle = get_native_namespace_handle(env, namespace_obj)?; + let ns = unsafe { &*(native_handle as *const BlockingDirectoryNamespace) }; + ns.inner.clone() + } else if is_rest_namespace(env, namespace_obj)? { + let native_handle = get_native_namespace_handle(env, namespace_obj)?; + let ns = unsafe { &*(native_handle as *const BlockingRestNamespace) }; + ns.inner.clone() + } else { + create_java_lance_namespace(env, namespace_obj)? + }; + + let table_id = env.get_strings(table_id_obj)?; + Ok(Some((namespace, table_id))) +} + #[no_mangle] pub extern "system" fn Java_org_lance_Dataset_getFragmentsNative<'a>( mut env: JNIEnv<'a>, diff --git a/java/lance-jni/src/transaction.rs b/java/lance-jni/src/transaction.rs index ea5996aaeed..39bbf17fae7 100644 --- a/java/lance-jni/src/transaction.rs +++ b/java/lance-jni/src/transaction.rs @@ -1,12 +1,16 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -use crate::blocking_dataset::{BlockingDataset, NATIVE_DATASET}; +use crate::blocking_dataset::{extract_namespace_info, BlockingDataset, NATIVE_DATASET}; use crate::error::Result; -use crate::traits::{export_vec, import_vec_from_method, FromJObjectWithEnv, IntoJava, JLance}; +use crate::storage_options::JavaStorageOptionsProvider; +use crate::traits::{ + export_vec, import_vec_from_method, FromJObjectWithEnv, FromJString, IntoJava, JLance, +}; use crate::utils::{to_java_map, to_rust_map}; use crate::Error; use crate::JNIEnvExt; +use crate::RT; use arrow::datatypes::Schema; use arrow_schema::ffi::FFI_ArrowSchema; use chrono::DateTime; @@ -17,9 +21,14 @@ use lance::dataset::transaction::{ DataReplacementGroup, Operation, RewriteGroup, RewrittenIndex, Transaction, TransactionBuilder, UpdateMap, UpdateMapEntry, UpdateMode, }; +use lance::dataset::CommitBuilder; +use lance::io::commit::namespace_manifest::LanceNamespaceExternalManifestStore; use lance::io::ObjectStoreParams; use lance::table::format::{Fragment, IndexMetadata}; use lance_core::datatypes::Schema as LanceSchema; +use lance_io::object_store::StorageOptionsProvider; +use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler; +use lance_table::io::commit::CommitHandler; use prost::Message; use prost_types::Any; use roaring::RoaringBitmap; @@ -656,7 +665,15 @@ fn inner_commit_transaction<'local>( ..Default::default() }; - let transaction = convert_to_rust_transaction(env, java_transaction, Some(&java_dataset))?; + let java_allocator = env + .call_method( + &java_dataset, + "allocator", + "()Lorg/apache/arrow/memory/BufferAllocator;", + &[], + )? + .l()?; + let transaction = convert_to_rust_transaction(env, java_transaction, Some(&java_allocator))?; let new_blocking_ds = { let mut dataset_guard = unsafe { env.get_rust_field::<_, _, BlockingDataset>(&java_dataset, NATIVE_DATASET) }?; @@ -673,7 +690,7 @@ fn inner_commit_transaction<'local>( fn convert_to_rust_transaction( env: &mut JNIEnv, java_transaction: JObject, - java_dataset: Option<&JObject>, + allocator: Option<&JObject>, ) -> Result { let read_ver = env.get_u64_from_method(&java_transaction, "readVersion")?; let uuid = env.get_string_from_method(&java_transaction, "uuid")?; @@ -685,7 +702,7 @@ fn convert_to_rust_transaction( &[], )? .l()?; - let op = convert_to_rust_operation(env, &op, java_dataset)?; + let op = convert_to_rust_operation(env, &op, allocator)?; let transaction_properties = env.get_optional_from_method( &java_transaction, @@ -704,22 +721,14 @@ fn convert_to_rust_transaction( fn convert_schema_from_operation( env: &mut JNIEnv, java_operation: &JObject, - java_dataset: &JObject, + java_allocator: &JObject, ) -> Result { - let java_buffer_allocator = env - .call_method( - java_dataset, - "allocator", - "()Lorg/apache/arrow/memory/BufferAllocator;", - &[], - )? - .l()?; let schema_ptr = env .call_method( java_operation, "exportSchema", "(Lorg/apache/arrow/memory/BufferAllocator;)J", - &[JValue::Object(&java_buffer_allocator)], + &[JValue::Object(java_allocator)], )? .j()?; let c_schema_ptr = schema_ptr as *mut FFI_ArrowSchema; @@ -734,12 +743,12 @@ fn convert_schema_from_operation( fn convert_to_rust_operation( env: &mut JNIEnv<'_>, java_operation: &JObject<'_>, - java_dataset: Option<&JObject<'_>>, + allocator: Option<&JObject<'_>>, ) -> Result { let op_name = env.get_string_from_method(java_operation, "name")?; let op = match op_name.as_str() { "Project" => Operation::Project { - schema: convert_schema_from_operation(env, java_operation, java_dataset.unwrap())?, + schema: convert_schema_from_operation(env, java_operation, allocator.unwrap())?, }, "UpdateConfig" => { let config_updates_obj = env @@ -860,7 +869,7 @@ fn convert_to_rust_operation( to_rust_map(env, &config_upsert_values) }, )?; - let schema = convert_schema_from_operation(env, java_operation, java_dataset.unwrap())?; + let schema = convert_schema_from_operation(env, java_operation, allocator.unwrap())?; Operation::Overwrite { fragments, schema, @@ -954,7 +963,7 @@ fn convert_to_rust_operation( })?; Operation::Merge { fragments, - schema: convert_schema_from_operation(env, java_operation, java_dataset.unwrap())?, + schema: convert_schema_from_operation(env, java_operation, allocator.unwrap())?, } } "Restore" => { @@ -1068,3 +1077,105 @@ fn export_update_map<'a>( } } } + +#[no_mangle] +pub extern "system" fn Java_org_lance_Transaction_nativeCommitTransactionToUri<'local>( + mut env: JNIEnv<'local>, + _cls: JObject, + uri: JString, + java_transaction: JObject, + enable_v2_manifest_paths: jboolean, + storage_options_provider_obj: JObject, + namespace_obj: JObject, + table_id_obj: JObject, + allocator_obj: JObject, +) -> JObject<'local> { + ok_or_throw!( + env, + inner_commit_transaction_to_uri( + &mut env, + uri, + java_transaction, + enable_v2_manifest_paths != 0, + storage_options_provider_obj, + namespace_obj, + table_id_obj, + allocator_obj, + ) + ) +} + +#[allow(clippy::too_many_arguments)] +fn inner_commit_transaction_to_uri<'local>( + env: &mut JNIEnv<'local>, + uri: JString, + java_transaction: JObject, + enable_v2_manifest_paths: bool, + storage_options_provider_obj: JObject, + namespace_obj: JObject, + table_id_obj: JObject, + allocator_obj: JObject, +) -> Result> { + let uri_str: String = uri.extract(env)?; + + // Extract write params from the transaction as storage options + let write_param_jobj = env + .call_method(&java_transaction, "writeParams", "()Ljava/util/Map;", &[])? + .l()?; + let write_param_jmap = JMap::from_env(env, &write_param_jobj)?; + let write_param = to_rust_map(env, &write_param_jmap)?; + + // Build storage options accessor + let storage_options_provider: Option = env + .get_optional(&storage_options_provider_obj, |env, provider_obj| { + JavaStorageOptionsProvider::new(env, provider_obj) + })?; + + let accessor = match (write_param.is_empty(), storage_options_provider) { + (false, Some(provider)) => Some(Arc::new( + lance::io::StorageOptionsAccessor::with_initial_and_provider( + write_param, + Arc::new(provider) as Arc, + ), + )), + (false, None) => Some(Arc::new( + lance::io::StorageOptionsAccessor::with_static_options(write_param), + )), + (true, Some(provider)) => Some(Arc::new(lance::io::StorageOptionsAccessor::with_provider( + Arc::new(provider) as Arc, + ))), + (true, None) => None, + }; + + let store_params = ObjectStoreParams { + storage_options_accessor: accessor, + ..Default::default() + }; + + // Convert Java transaction to Rust + let allocator_ref = if allocator_obj.is_null() { + None + } else { + Some(allocator_obj) + }; + let transaction = convert_to_rust_transaction(env, java_transaction, allocator_ref.as_ref())?; + + // Build CommitBuilder with URI + let mut builder = CommitBuilder::new(&*uri_str) + .with_store_params(store_params) + .enable_v2_manifest_paths(enable_v2_manifest_paths); + + // Set namespace commit handler if provided + let namespace_info = extract_namespace_info(env, &namespace_obj, &table_id_obj)?; + if let Some((ns, tid)) = namespace_info { + let external_store = LanceNamespaceExternalManifestStore::new(ns, tid); + let commit_handler: Arc = Arc::new(ExternalManifestCommitHandler { + external_manifest_store: Arc::new(external_store), + }); + builder = builder.with_commit_handler(commit_handler); + } + + let dataset = RT.block_on(builder.execute(transaction))?; + let blocking_ds = BlockingDataset { inner: dataset }; + blocking_ds.into_java(env) +} diff --git a/java/src/main/java/org/lance/Transaction.java b/java/src/main/java/org/lance/Transaction.java index 2d565c73258..3056cec9656 100644 --- a/java/src/main/java/org/lance/Transaction.java +++ b/java/src/main/java/org/lance/Transaction.java @@ -13,12 +13,15 @@ */ package org.lance; +import org.lance.io.StorageOptionsProvider; import org.lance.operation.Operation; import com.google.common.base.MoreObjects; +import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; @@ -38,6 +41,14 @@ public class Transaction { private final Dataset dataset; private final Operation operation; + // URI-based commit fields + private final String uri; + private final BufferAllocator allocator; + private final StorageOptionsProvider storageOptionsProvider; + private final Object namespace; + private final List tableId; + private final boolean enableV2ManifestPaths; + private Transaction( Dataset dataset, long readVersion, @@ -45,12 +56,46 @@ private Transaction( Operation operation, Map writeParams, Map transactionProperties) { + this( + dataset, + readVersion, + uuid, + operation, + writeParams, + transactionProperties, + null, + null, + null, + null, + null, + true); + } + + private Transaction( + Dataset dataset, + long readVersion, + String uuid, + Operation operation, + Map writeParams, + Map transactionProperties, + String uri, + BufferAllocator allocator, + StorageOptionsProvider storageOptionsProvider, + Object namespace, + List tableId, + boolean enableV2ManifestPaths) { this.dataset = dataset; this.readVersion = readVersion; this.uuid = uuid; this.operation = operation; this.writeParams = writeParams != null ? writeParams : new HashMap<>(); this.transactionProperties = Optional.ofNullable(transactionProperties); + this.uri = uri; + this.allocator = allocator; + this.storageOptionsProvider = storageOptionsProvider; + this.namespace = namespace; + this.tableId = tableId; + this.enableV2ManifestPaths = enableV2ManifestPaths; } public long readVersion() { @@ -73,11 +118,59 @@ public Optional> transactionProperties() { return transactionProperties; } + public String uri() { + return uri; + } + + public BufferAllocator allocator() { + return allocator; + } + + public StorageOptionsProvider storageOptionsProvider() { + return storageOptionsProvider; + } + + public Object namespace() { + return namespace; + } + + public List tableId() { + return tableId; + } + + public boolean enableV2ManifestPaths() { + return enableV2ManifestPaths; + } + + /** + * Commit the transaction and return a new Dataset. + * + *

When this transaction was built from an existing dataset, commits against that dataset. When + * built from a URI, creates a new dataset at the specified location. + * + * @return a new Dataset at the committed version + */ public Dataset commit() { - if (dataset == null) { - throw new UnsupportedOperationException("Transaction doesn't support create new dataset yet"); + if (dataset != null) { + return dataset.commitTransaction(this); + } + if (uri != null) { + return commitToUri(); } - return dataset.commitTransaction(this); + throw new IllegalStateException("Transaction requires either a dataset or a URI"); + } + + private Dataset commitToUri() { + Dataset result = + nativeCommitTransactionToUri( + uri, + this, + enableV2ManifestPaths, + storageOptionsProvider, + namespace, + tableId, + allocator); + return result; } public void release() { @@ -92,6 +185,7 @@ public String toString() { .add("operation", operation) .add("writeParams", writeParams) .add("transactionProperties", transactionProperties) + .add("uri", uri) .toString(); } @@ -111,19 +205,53 @@ public boolean equals(Object o) { && Objects.equals(transactionProperties, that.transactionProperties); } + private static native Dataset nativeCommitTransactionToUri( + String uri, + Transaction transaction, + boolean enableV2ManifestPaths, + Object storageOptionsProvider, + Object namespace, + Object tableId, + Object allocator); + public static class Builder { private final String uuid; - private final Dataset dataset; + private Dataset dataset; + private String uri; + private BufferAllocator allocator; private long readVersion; private Operation operation; private Map writeParams; private Map transactionProperties; + private StorageOptionsProvider storageOptionsProvider; + private Object namespace; + private List tableId; + private boolean enableV2ManifestPaths = true; + /** + * Create a builder for committing against an existing dataset. + * + * @param dataset the existing dataset to commit against + */ public Builder(Dataset dataset) { this.dataset = dataset; this.uuid = UUID.randomUUID().toString(); } + /** + * Create a builder for creating a new dataset at the given URI. + * + * @param uri the target URI for the new dataset + * @param allocator the Arrow buffer allocator for schema export + */ + public Builder(String uri, BufferAllocator allocator) { + Preconditions.checkNotNull(uri, "URI must not be null"); + Preconditions.checkNotNull(allocator, "Allocator must not be null"); + this.uri = uri; + this.allocator = allocator; + this.uuid = UUID.randomUUID().toString(); + } + public Builder readVersion(long readVersion) { this.readVersion = readVersion; return this; @@ -145,6 +273,54 @@ public Builder operation(Operation operation) { return this; } + /** + * Set the storage options provider for credential refresh during URI-based commits. + * + * @param provider the storage options provider + * @return this builder instance + */ + public Builder storageOptionsProvider(StorageOptionsProvider provider) { + this.storageOptionsProvider = provider; + return this; + } + + /** + * Set the namespace for managed versioning during URI-based commits. + * + * @param namespace the LanceNamespace instance + * @return this builder instance + */ + public Builder namespace(Object namespace) { + this.namespace = namespace; + return this; + } + + /** + * Set the table ID for namespace-based commit handling. + * + * @param tableId the table identifier (e.g., ["workspace", "table_name"]) + * @return this builder instance + */ + public Builder tableId(List tableId) { + this.tableId = tableId; + return this; + } + + /** + * Enable or disable v2 manifest paths for new datasets. + * + *

Defaults to true. V2 manifest paths allow constant-time lookups for the latest manifest on + * object storage. Warning: enabling this makes the dataset unreadable for Lance versions prior + * to 0.17.0. + * + * @param enable whether to enable v2 manifest paths + * @return this builder instance + */ + public Builder enableV2ManifestPaths(boolean enable) { + this.enableV2ManifestPaths = enable; + return this; + } + private void validateState() { if (operation != null) { throw new IllegalStateException( @@ -156,7 +332,18 @@ public Transaction build() { Preconditions.checkState(operation != null, "TransactionBuilder has no operations"); return new Transaction( - dataset, readVersion, uuid, operation, writeParams, transactionProperties); + dataset, + readVersion, + uuid, + operation, + writeParams, + transactionProperties, + uri, + allocator, + storageOptionsProvider, + namespace, + tableId, + enableV2ManifestPaths); } } } diff --git a/java/src/test/java/org/lance/TransactionTest.java b/java/src/test/java/org/lance/TransactionTest.java index c9d0e937263..d1d812046a3 100644 --- a/java/src/test/java/org/lance/TransactionTest.java +++ b/java/src/test/java/org/lance/TransactionTest.java @@ -19,8 +19,10 @@ import org.lance.index.scalar.ScalarIndexParams; import org.lance.operation.Append; import org.lance.operation.CreateIndex; +import org.lance.operation.Overwrite; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.types.pojo.Schema; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -109,4 +111,34 @@ public void testReadTransactionCreateIndex(@TempDir Path tempDir) { } } } + + @Test + public void testCommitToUri(@TempDir Path tempDir) { + String datasetPath = tempDir.resolve("testCommitToUri").toString(); + try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { + TestUtils.SimpleTestDataset testDataset = + new TestUtils.SimpleTestDataset(allocator, datasetPath); + Schema schema = testDataset.getSchema(); + + // Create fragments at the dataset path + FragmentMetadata fragmentMeta = testDataset.createNewFragment(20); + + // Build a transaction targeting a URI (no existing dataset) + Transaction txn = + new Transaction.Builder(datasetPath, allocator) + .operation( + Overwrite.builder() + .fragments(Collections.singletonList(fragmentMeta)) + .schema(schema) + .build()) + .build(); + + try (Dataset committedDataset = txn.commit()) { + assertEquals(1, committedDataset.version()); + assertEquals(20, committedDataset.countRows()); + } finally { + txn.release(); + } + } + } } From e83e26fbd8664177bf279e60acace028900810d2 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Tue, 24 Feb 2026 13:49:18 -0600 Subject: [PATCH 2/7] added new CommitBuilder API to Java SDK Signed-off-by: Daniel Rammer --- java/lance-jni/src/blocking_dataset.rs | 16 +- java/lance-jni/src/delta.rs | 6 +- java/lance-jni/src/transaction.rs | 50 ++-- .../main/java/org/lance/CommitBuilder.java | 218 ++++++++++++++ java/src/main/java/org/lance/Dataset.java | 15 +- .../java/org/lance/SourcedTransaction.java | 149 ++++++++++ java/src/main/java/org/lance/Transaction.java | 273 +++--------------- java/src/test/java/org/lance/DatasetTest.java | 13 +- .../src/test/java/org/lance/FragmentTest.java | 4 +- .../org/lance/NamespaceIntegrationTest.java | 4 +- java/src/test/java/org/lance/TestUtils.java | 2 +- .../test/java/org/lance/TransactionTest.java | 6 +- .../java/org/lance/index/ScalarIndexTest.java | 6 +- .../java/org/lance/index/VectorIndexTest.java | 8 +- .../java/org/lance/operation/AppendTest.java | 8 +- .../lance/operation/DataReplacementTest.java | 6 +- .../java/org/lance/operation/DeleteTest.java | 5 +- .../java/org/lance/operation/MergeTest.java | 8 +- .../lance/operation/OperationTestBase.java | 4 +- .../org/lance/operation/OverwriteTest.java | 6 +- .../java/org/lance/operation/ProjectTest.java | 10 +- .../lance/operation/ReserveFragmentsTest.java | 11 +- .../java/org/lance/operation/RestoreTest.java | 9 +- .../java/org/lance/operation/RewriteTest.java | 8 +- .../org/lance/operation/TruncateTest.java | 4 +- .../org/lance/operation/UpdateConfigTest.java | 4 +- .../java/org/lance/operation/UpdateTest.java | 9 +- 27 files changed, 523 insertions(+), 339 deletions(-) create mode 100644 java/src/main/java/org/lance/CommitBuilder.java create mode 100644 java/src/main/java/org/lance/SourcedTransaction.java diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index e5fecf660a4..32cf9a14ce0 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -3,6 +3,9 @@ use crate::error::{Error, Result}; use crate::ffi::JNIEnvExt; +use crate::namespace::{ + create_java_lance_namespace, BlockingDirectoryNamespace, BlockingRestNamespace, +}; use crate::session::{handle_from_session, session_from_handle}; use crate::storage_options::JavaStorageOptionsProvider; use crate::traits::{export_vec, import_vec, FromJObjectWithEnv, FromJString}; @@ -1171,7 +1174,7 @@ fn inner_open_native<'local>( } /// Check if the Java object is an instance of DirectoryNamespace. -pub(crate) fn is_directory_namespace(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { +fn is_directory_namespace(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { let class = env .find_class("org/lance/namespace/DirectoryNamespace") .map_err(|e| { @@ -1182,7 +1185,7 @@ pub(crate) fn is_directory_namespace(env: &mut JNIEnv, namespace_obj: &JObject) } /// Check if the Java object is an instance of RestNamespace. -pub(crate) fn is_rest_namespace(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { +fn is_rest_namespace(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { let class = env .find_class("org/lance/namespace/RestNamespace") .map_err(|e| Error::runtime_error(format!("Failed to find RestNamespace class: {}", e)))?; @@ -1191,10 +1194,7 @@ pub(crate) fn is_rest_namespace(env: &mut JNIEnv, namespace_obj: &JObject) -> Re } /// Get the native handle from a Java LanceNamespace object. -pub(crate) fn get_native_namespace_handle( - env: &mut JNIEnv, - namespace_obj: &JObject, -) -> Result { +fn get_native_namespace_handle(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { env.call_method(namespace_obj, "getNativeHandle", "()J", &[]) .map_err(|e| Error::runtime_error(format!("Failed to call getNativeHandle: {}", e)))? .j() @@ -1211,10 +1211,6 @@ pub(crate) fn extract_namespace_info( namespace_obj: &JObject, table_id_obj: &JObject, ) -> Result, Vec)>> { - use crate::namespace::{ - create_java_lance_namespace, BlockingDirectoryNamespace, BlockingRestNamespace, - }; - if namespace_obj.is_null() { return Ok(None); } diff --git a/java/lance-jni/src/delta.rs b/java/lance-jni/src/delta.rs index 8a407a19167..c07b94bcf1c 100755 --- a/java/lance-jni/src/delta.rs +++ b/java/lance-jni/src/delta.rs @@ -126,13 +126,9 @@ fn inner_list_transactions<'local>( RT.block_on(delta_guard.inner.list_transactions())? }; - let java_dataset = env - .get_field(&j_delta, "dataset", "Lorg/lance/Dataset;")? - .l()?; - let array_list = env.new_object("java/util/ArrayList", "()V", &[])?; for tx in txs.into_iter() { - let jtx = convert_to_java_transaction(env, tx, &java_dataset)?; + let jtx = convert_to_java_transaction(env, tx)?; env.call_method( &array_list, "add", diff --git a/java/lance-jni/src/transaction.rs b/java/lance-jni/src/transaction.rs index 39bbf17fae7..a91e393688c 100644 --- a/java/lance-jni/src/transaction.rs +++ b/java/lance-jni/src/transaction.rs @@ -295,7 +295,7 @@ fn inner_read_transaction<'local>( }; let transaction = match transaction { - Some(transaction) => convert_to_java_transaction(env, transaction, &java_dataset)?, + Some(transaction) => convert_to_java_transaction(env, transaction)?, None => JObject::null(), }; Ok(transaction) @@ -304,7 +304,6 @@ fn inner_read_transaction<'local>( pub(crate) fn convert_to_java_transaction<'local>( env: &mut JNIEnv<'local>, transaction: Transaction, - java_dataset: &JObject, ) -> Result> { let uuid = env.new_string(transaction.uuid)?; let transaction_properties = match transaction.transaction_properties { @@ -315,13 +314,11 @@ pub(crate) fn convert_to_java_transaction<'local>( let java_transaction = env.new_object( "org/lance/Transaction", - "(Lorg/lance/Dataset;JLjava/lang/String;Lorg/lance/operation/Operation;Ljava/util/Map;Ljava/util/Map;)V", + "(JLjava/lang/String;Lorg/lance/operation/Operation;Ljava/util/Map;)V", &[ - JValue::Object(java_dataset), JValue::Long(transaction.read_version as i64), JValue::Object(&uuid), JValue::Object(&operation), - JValue::Object(&JObject::null()), JValue::Object(&transaction_properties), ], )?; @@ -589,37 +586,42 @@ pub(crate) fn convert_to_java_schema<'local>( } #[no_mangle] -pub extern "system" fn Java_org_lance_Dataset_nativeCommitTransaction<'local>( +pub extern "system" fn Java_org_lance_CommitBuilder_nativeCommitToDataset<'local>( mut env: JNIEnv<'local>, + _cls: JObject, java_dataset: JObject, java_transaction: JObject, detached_jbool: jboolean, enable_v2_manifest_paths: jboolean, + write_params_obj: JObject, ) -> JObject<'local> { ok_or_throw!( env, - inner_commit_transaction( + inner_commit_to_dataset( &mut env, java_dataset, java_transaction, detached_jbool != 0, enable_v2_manifest_paths != 0, + write_params_obj, ) ) } -fn inner_commit_transaction<'local>( +fn inner_commit_to_dataset<'local>( env: &mut JNIEnv<'local>, java_dataset: JObject, java_transaction: JObject, detached: bool, enable_v2_manifest_paths: bool, + write_params_obj: JObject, ) -> Result> { - let write_param_jobj = env - .call_method(&java_transaction, "writeParams", "()Ljava/util/Map;", &[])? - .l()?; - let write_param_jmap = JMap::from_env(env, &write_param_jobj)?; - let write_param = to_rust_map(env, &write_param_jmap)?; + let write_param = if write_params_obj.is_null() { + HashMap::new() + } else { + let write_param_jmap = JMap::from_env(env, &write_params_obj)?; + to_rust_map(env, &write_param_jmap)? + }; // Get the Dataset's storage_options_accessor and merge with write_param let storage_options_accessor = { @@ -1079,7 +1081,7 @@ fn export_update_map<'a>( } #[no_mangle] -pub extern "system" fn Java_org_lance_Transaction_nativeCommitTransactionToUri<'local>( +pub extern "system" fn Java_org_lance_CommitBuilder_nativeCommitToUri<'local>( mut env: JNIEnv<'local>, _cls: JObject, uri: JString, @@ -1089,10 +1091,11 @@ pub extern "system" fn Java_org_lance_Transaction_nativeCommitTransactionToUri<' namespace_obj: JObject, table_id_obj: JObject, allocator_obj: JObject, + write_params_obj: JObject, ) -> JObject<'local> { ok_or_throw!( env, - inner_commit_transaction_to_uri( + inner_commit_to_uri( &mut env, uri, java_transaction, @@ -1101,12 +1104,13 @@ pub extern "system" fn Java_org_lance_Transaction_nativeCommitTransactionToUri<' namespace_obj, table_id_obj, allocator_obj, + write_params_obj, ) ) } #[allow(clippy::too_many_arguments)] -fn inner_commit_transaction_to_uri<'local>( +fn inner_commit_to_uri<'local>( env: &mut JNIEnv<'local>, uri: JString, java_transaction: JObject, @@ -1115,15 +1119,17 @@ fn inner_commit_transaction_to_uri<'local>( namespace_obj: JObject, table_id_obj: JObject, allocator_obj: JObject, + write_params_obj: JObject, ) -> Result> { let uri_str: String = uri.extract(env)?; - // Extract write params from the transaction as storage options - let write_param_jobj = env - .call_method(&java_transaction, "writeParams", "()Ljava/util/Map;", &[])? - .l()?; - let write_param_jmap = JMap::from_env(env, &write_param_jobj)?; - let write_param = to_rust_map(env, &write_param_jmap)?; + // Extract write params from parameter + let write_param = if write_params_obj.is_null() { + HashMap::new() + } else { + let write_param_jmap = JMap::from_env(env, &write_params_obj)?; + to_rust_map(env, &write_param_jmap)? + }; // Build storage options accessor let storage_options_provider: Option = env diff --git a/java/src/main/java/org/lance/CommitBuilder.java b/java/src/main/java/org/lance/CommitBuilder.java new file mode 100644 index 00000000000..c53742fd0cd --- /dev/null +++ b/java/src/main/java/org/lance/CommitBuilder.java @@ -0,0 +1,218 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance; + +import org.lance.io.StorageOptionsProvider; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Preconditions; + +import java.util.List; +import java.util.Map; + +/** + * Builder for committing a {@link Transaction} to a Lance dataset. + * + *

Supports two modes: + * + *

    + *
  • Dataset-based commit: commits against an existing dataset. + *
  • URI-based commit: creates or updates a dataset at a URI. + *
+ * + *

Example usage (dataset-based): + * + *

{@code
+ * Transaction txn = new Transaction.Builder()
+ *     .readVersion(dataset.version())
+ *     .operation(Append.builder().fragments(fragments).build())
+ *     .build();
+ * try (Dataset committed = new CommitBuilder(dataset).execute(txn)) {
+ *     // use committed dataset
+ * } finally {
+ *     txn.release();
+ * }
+ * }
+ * + *

Example usage (URI-based): + * + *

{@code
+ * Transaction txn = new Transaction.Builder()
+ *     .operation(Overwrite.builder().fragments(fragments).schema(schema).build())
+ *     .build();
+ * try (Dataset committed = new CommitBuilder(uri, allocator).execute(txn)) {
+ *     // use committed dataset
+ * } finally {
+ *     txn.release();
+ * }
+ * }
+ */ +public class CommitBuilder { + static { + JniLoader.ensureLoaded(); + } + + private final Dataset dataset; + private final String uri; + private final BufferAllocator allocator; + + private Map writeParams; + private StorageOptionsProvider storageOptionsProvider; + private Object namespace; + private List tableId; + private boolean enableV2ManifestPaths = true; + private boolean detached = false; + + /** + * Create a commit builder for committing against an existing dataset. + * + * @param dataset the existing dataset to commit against + */ + public CommitBuilder(Dataset dataset) { + Preconditions.checkNotNull(dataset, "Dataset must not be null"); + this.dataset = dataset; + this.uri = null; + this.allocator = null; + } + + /** + * Create a commit builder for creating or updating a dataset at the given URI. + * + * @param uri the target URI for the dataset + * @param allocator the Arrow buffer allocator for schema export + */ + public CommitBuilder(String uri, BufferAllocator allocator) { + Preconditions.checkNotNull(uri, "URI must not be null"); + Preconditions.checkNotNull(allocator, "Allocator must not be null"); + this.dataset = null; + this.uri = uri; + this.allocator = allocator; + } + + /** + * Set write parameters (storage options) for the commit. + * + * @param writeParams the write parameters + * @return this builder instance + */ + public CommitBuilder writeParams(Map writeParams) { + this.writeParams = writeParams; + return this; + } + + /** + * Set the storage options provider for credential refresh during URI-based commits. + * + * @param provider the storage options provider + * @return this builder instance + */ + public CommitBuilder storageOptionsProvider(StorageOptionsProvider provider) { + this.storageOptionsProvider = provider; + return this; + } + + /** + * Set the namespace for managed versioning during URI-based commits. + * + * @param namespace the LanceNamespace instance + * @return this builder instance + */ + public CommitBuilder namespace(Object namespace) { + this.namespace = namespace; + return this; + } + + /** + * Set the table ID for namespace-based commit handling. + * + * @param tableId the table identifier (e.g., ["workspace", "table_name"]) + * @return this builder instance + */ + public CommitBuilder tableId(List tableId) { + this.tableId = tableId; + return this; + } + + /** + * Enable or disable v2 manifest paths for new datasets. + * + *

Defaults to true. V2 manifest paths allow constant-time lookups for the latest manifest on + * object storage. Warning: enabling this makes the dataset unreadable for Lance versions prior to + * 0.17.0. + * + * @param enable whether to enable v2 manifest paths + * @return this builder instance + */ + public CommitBuilder enableV2ManifestPaths(boolean enable) { + this.enableV2ManifestPaths = enable; + return this; + } + + /** + * Set whether the commit should be detached from the main dataset lineage. + * + * @param detached if true, the commit will not be part of the main dataset lineage + * @return this builder instance + */ + public CommitBuilder detached(boolean detached) { + this.detached = detached; + return this; + } + + /** + * Execute the commit with the given transaction. + * + *

The caller is responsible for calling {@link Transaction#release()} after this method + * returns. + * + * @param transaction the transaction to commit + * @return a new Dataset at the committed version + */ + public Dataset execute(Transaction transaction) { + Preconditions.checkNotNull(transaction, "Transaction must not be null"); + if (dataset != null) { + return nativeCommitToDataset( + dataset, transaction, detached, enableV2ManifestPaths, writeParams); + } + if (uri != null) { + return nativeCommitToUri( + uri, + transaction, + enableV2ManifestPaths, + storageOptionsProvider, + namespace, + tableId, + allocator, + writeParams); + } + throw new IllegalStateException("CommitBuilder requires either a dataset or a URI"); + } + + private static native Dataset nativeCommitToDataset( + Dataset dataset, + Transaction transaction, + boolean detached, + boolean enableV2ManifestPaths, + Map writeParams); + + private static native Dataset nativeCommitToUri( + String uri, + Transaction transaction, + boolean enableV2ManifestPaths, + Object storageOptionsProvider, + Object namespace, + Object tableId, + Object allocator, + Map writeParams); +} diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index ef5340f5744..e7536483450 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -515,10 +515,10 @@ public BufferAllocator allocator() { * Create a new transaction builder at current version for the dataset. The dataset itself will * not refresh after the transaction committed. * - * @return A new instance of {@link Transaction.Builder} linked to the opened dataset. + * @return A new instance of {@link SourcedTransaction.Builder} linked to the opened dataset. */ - public Transaction.Builder newTransactionBuilder() { - return new Transaction.Builder(this).readVersion(version()); + public SourcedTransaction.Builder newTransactionBuilder() { + return new SourcedTransaction.Builder(this); } /** @@ -549,7 +549,11 @@ public Dataset commitTransaction( Transaction transaction, boolean detached, boolean enableV2ManifestPaths) { Preconditions.checkNotNull(transaction); try { - Dataset dataset = nativeCommitTransaction(transaction, detached, enableV2ManifestPaths); + Dataset dataset = + new CommitBuilder(this) + .detached(detached) + .enableV2ManifestPaths(enableV2ManifestPaths) + .execute(transaction); if (selfManagedAllocator) { dataset.allocator = new RootAllocator(Long.MAX_VALUE); } else { @@ -561,9 +565,6 @@ public Dataset commitTransaction( } } - private native Dataset nativeCommitTransaction( - Transaction transaction, boolean detached, boolean enableV2ManifestPaths); - /** * Drop a Dataset. * diff --git a/java/src/main/java/org/lance/SourcedTransaction.java b/java/src/main/java/org/lance/SourcedTransaction.java new file mode 100644 index 00000000000..94551ffeb82 --- /dev/null +++ b/java/src/main/java/org/lance/SourcedTransaction.java @@ -0,0 +1,149 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance; + +import org.lance.operation.Operation; + +import org.apache.arrow.util.Preconditions; + +import java.util.Map; +import java.util.Optional; + +/** + * A convenience wrapper that pairs a {@link Transaction} with a {@link Dataset}, providing a simple + * commit workflow. + * + *

This replaces the old {@code Transaction} class's "sourced" role where the transaction held a + * reference to the dataset it was built from. + * + *

Example usage: + * + *

{@code
+ * SourcedTransaction txn = dataset.newSourcedTransactionBuilder()
+ *     .operation(Append.builder().fragments(fragments).build())
+ *     .build();
+ * try (Dataset committed = txn.commit()) {
+ *     // use committed dataset
+ * }
+ * }
+ */ +public class SourcedTransaction { + + private final Transaction transaction; + private final Dataset dataset; + + private SourcedTransaction(Transaction transaction, Dataset dataset) { + this.transaction = transaction; + this.dataset = dataset; + } + + /** Returns the underlying {@link Transaction}. */ + public Transaction transaction() { + return transaction; + } + + /** Delegates to {@link Transaction#readVersion()}. */ + public long readVersion() { + return transaction.readVersion(); + } + + /** Delegates to {@link Transaction#uuid()}. */ + public String uuid() { + return transaction.uuid(); + } + + /** Delegates to {@link Transaction#operation()}. */ + public Operation operation() { + return transaction.operation(); + } + + /** Delegates to {@link Transaction#transactionProperties()}. */ + public Optional> transactionProperties() { + return transaction.transactionProperties(); + } + + /** + * Commit this transaction against the source dataset. + * + * @return a new Dataset at the committed version + */ + public Dataset commit() { + return dataset.commitTransaction(transaction); + } + + /** + * Commit this transaction against the source dataset with additional options. + * + * @param detached if true, the commit will not be part of the main dataset lineage + * @param enableV2ManifestPaths if true, and this is a new dataset, uses the new V2 manifest paths + * @return a new Dataset at the committed version + */ + public Dataset commit(boolean detached, boolean enableV2ManifestPaths) { + return dataset.commitTransaction(transaction, detached, enableV2ManifestPaths); + } + + /** Release native resources held by the underlying transaction's operation. */ + public void release() { + transaction.release(); + } + + /** Builder for constructing {@link SourcedTransaction} instances from a {@link Dataset}. */ + public static class Builder { + private final Dataset dataset; + private long readVersion; + private Operation operation; + private Map transactionProperties; + + /** + * Create a builder for committing against an existing dataset. The read version defaults to the + * dataset's current version. + * + * @param dataset the existing dataset to commit against + */ + public Builder(Dataset dataset) { + this.dataset = dataset; + this.readVersion = dataset.version(); + } + + public Builder readVersion(long readVersion) { + this.readVersion = readVersion; + return this; + } + + public Builder operation(Operation operation) { + if (this.operation != null) { + throw new IllegalStateException( + String.format("Operation %s has been set", this.operation.name())); + } + this.operation = operation; + return this; + } + + public Builder transactionProperties(Map properties) { + this.transactionProperties = properties; + return this; + } + + public SourcedTransaction build() { + Preconditions.checkState(operation != null, "TransactionBuilder has no operations"); + Transaction transaction = + new Transaction.Builder() + .readVersion(readVersion) + .operation(operation) + .transactionProperties(transactionProperties) + .build(); + return new SourcedTransaction(transaction, dataset); + } + } +} diff --git a/java/src/main/java/org/lance/Transaction.java b/java/src/main/java/org/lance/Transaction.java index 3056cec9656..9f7ad436f36 100644 --- a/java/src/main/java/org/lance/Transaction.java +++ b/java/src/main/java/org/lance/Transaction.java @@ -13,89 +13,59 @@ */ package org.lance; -import org.lance.io.StorageOptionsProvider; import org.lance.operation.Operation; import com.google.common.base.MoreObjects; -import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; -import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.UUID; /** - * Align with the Transaction struct in rust. The transaction won't commit the status to original - * dataset. It will return a new dataset after committed. + * A pure data container representing a Lance transaction. + * + *

A Transaction holds the read version, a unique identifier, the operation to perform, and + * optional transaction properties. It does not contain commit configuration or execution logic. + * + *

To commit a transaction, use {@link CommitBuilder} or {@link SourcedTransaction}. */ public class Transaction { private final long readVersion; private final String uuid; - private final Map writeParams; - private final Optional> transactionProperties; - // Mainly for JNI usage - private final Dataset dataset; private final Operation operation; + private final Optional> transactionProperties; - // URI-based commit fields - private final String uri; - private final BufferAllocator allocator; - private final StorageOptionsProvider storageOptionsProvider; - private final Object namespace; - private final List tableId; - private final boolean enableV2ManifestPaths; - + /** + * Constructor used by JNI when reading transactions from native code. + * + * @param readVersion the version that was read when creating this transaction + * @param uuid the unique identifier for this transaction + * @param operation the operation to perform + * @param transactionProperties optional transaction properties + */ private Transaction( - Dataset dataset, long readVersion, String uuid, Operation operation, - Map writeParams, Map transactionProperties) { - this( - dataset, - readVersion, - uuid, - operation, - writeParams, - transactionProperties, - null, - null, - null, - null, - null, - true); - } - - private Transaction( - Dataset dataset, - long readVersion, - String uuid, - Operation operation, - Map writeParams, - Map transactionProperties, - String uri, - BufferAllocator allocator, - StorageOptionsProvider storageOptionsProvider, - Object namespace, - List tableId, - boolean enableV2ManifestPaths) { - this.dataset = dataset; this.readVersion = readVersion; this.uuid = uuid; this.operation = operation; - this.writeParams = writeParams != null ? writeParams : new HashMap<>(); this.transactionProperties = Optional.ofNullable(transactionProperties); - this.uri = uri; - this.allocator = allocator; - this.storageOptionsProvider = storageOptionsProvider; - this.namespace = namespace; - this.tableId = tableId; - this.enableV2ManifestPaths = enableV2ManifestPaths; + } + + /** + * Create a transaction with the given read version and operation. A random UUID is generated + * automatically. + * + * @param readVersion the version that was read when creating this transaction + * @param operation the operation to perform + */ + public Transaction(long readVersion, Operation operation) { + this(readVersion, UUID.randomUUID().toString(), operation, null); } public long readVersion() { @@ -110,69 +80,11 @@ public Operation operation() { return operation; } - public Map writeParams() { - return writeParams; - } - public Optional> transactionProperties() { return transactionProperties; } - public String uri() { - return uri; - } - - public BufferAllocator allocator() { - return allocator; - } - - public StorageOptionsProvider storageOptionsProvider() { - return storageOptionsProvider; - } - - public Object namespace() { - return namespace; - } - - public List tableId() { - return tableId; - } - - public boolean enableV2ManifestPaths() { - return enableV2ManifestPaths; - } - - /** - * Commit the transaction and return a new Dataset. - * - *

When this transaction was built from an existing dataset, commits against that dataset. When - * built from a URI, creates a new dataset at the specified location. - * - * @return a new Dataset at the committed version - */ - public Dataset commit() { - if (dataset != null) { - return dataset.commitTransaction(this); - } - if (uri != null) { - return commitToUri(); - } - throw new IllegalStateException("Transaction requires either a dataset or a URI"); - } - - private Dataset commitToUri() { - Dataset result = - nativeCommitTransactionToUri( - uri, - this, - enableV2ManifestPaths, - storageOptionsProvider, - namespace, - tableId, - allocator); - return result; - } - + /** Release native resources held by the operation. */ public void release() { operation.release(); } @@ -183,9 +95,7 @@ public String toString() { .add("readVersion", readVersion) .add("uuid", uuid) .add("operation", operation) - .add("writeParams", writeParams) .add("transactionProperties", transactionProperties) - .add("uri", uri) .toString(); } @@ -201,54 +111,22 @@ public boolean equals(Object o) { return readVersion == that.readVersion && uuid.equals(that.uuid) && Objects.equals(operation, that.operation) - && Objects.equals(writeParams, that.writeParams) && Objects.equals(transactionProperties, that.transactionProperties); } - private static native Dataset nativeCommitTransactionToUri( - String uri, - Transaction transaction, - boolean enableV2ManifestPaths, - Object storageOptionsProvider, - Object namespace, - Object tableId, - Object allocator); + @Override + public int hashCode() { + return Objects.hash(readVersion, uuid, operation, transactionProperties); + } + /** Builder for constructing {@link Transaction} instances. */ public static class Builder { - private final String uuid; - private Dataset dataset; - private String uri; - private BufferAllocator allocator; + private String uuid; private long readVersion; private Operation operation; - private Map writeParams; private Map transactionProperties; - private StorageOptionsProvider storageOptionsProvider; - private Object namespace; - private List tableId; - private boolean enableV2ManifestPaths = true; - /** - * Create a builder for committing against an existing dataset. - * - * @param dataset the existing dataset to commit against - */ - public Builder(Dataset dataset) { - this.dataset = dataset; - this.uuid = UUID.randomUUID().toString(); - } - - /** - * Create a builder for creating a new dataset at the given URI. - * - * @param uri the target URI for the new dataset - * @param allocator the Arrow buffer allocator for schema export - */ - public Builder(String uri, BufferAllocator allocator) { - Preconditions.checkNotNull(uri, "URI must not be null"); - Preconditions.checkNotNull(allocator, "Allocator must not be null"); - this.uri = uri; - this.allocator = allocator; + public Builder() { this.uuid = UUID.randomUUID().toString(); } @@ -257,93 +135,28 @@ public Builder readVersion(long readVersion) { return this; } - public Builder transactionProperties(Map properties) { - this.transactionProperties = properties; - return this; - } - - public Builder writeParams(Map writeParams) { - this.writeParams = writeParams; + public Builder uuid(String uuid) { + this.uuid = uuid; return this; } public Builder operation(Operation operation) { - validateState(); + if (this.operation != null) { + throw new IllegalStateException( + String.format("Operation %s has been set", this.operation.name())); + } this.operation = operation; return this; } - /** - * Set the storage options provider for credential refresh during URI-based commits. - * - * @param provider the storage options provider - * @return this builder instance - */ - public Builder storageOptionsProvider(StorageOptionsProvider provider) { - this.storageOptionsProvider = provider; - return this; - } - - /** - * Set the namespace for managed versioning during URI-based commits. - * - * @param namespace the LanceNamespace instance - * @return this builder instance - */ - public Builder namespace(Object namespace) { - this.namespace = namespace; - return this; - } - - /** - * Set the table ID for namespace-based commit handling. - * - * @param tableId the table identifier (e.g., ["workspace", "table_name"]) - * @return this builder instance - */ - public Builder tableId(List tableId) { - this.tableId = tableId; - return this; - } - - /** - * Enable or disable v2 manifest paths for new datasets. - * - *

Defaults to true. V2 manifest paths allow constant-time lookups for the latest manifest on - * object storage. Warning: enabling this makes the dataset unreadable for Lance versions prior - * to 0.17.0. - * - * @param enable whether to enable v2 manifest paths - * @return this builder instance - */ - public Builder enableV2ManifestPaths(boolean enable) { - this.enableV2ManifestPaths = enable; + public Builder transactionProperties(Map properties) { + this.transactionProperties = properties; return this; } - private void validateState() { - if (operation != null) { - throw new IllegalStateException( - String.format("Operation %s has been set", operation.name())); - } - } - public Transaction build() { Preconditions.checkState(operation != null, "TransactionBuilder has no operations"); - - return new Transaction( - dataset, - readVersion, - uuid, - operation, - writeParams, - transactionProperties, - uri, - allocator, - storageOptionsProvider, - namespace, - tableId, - enableV2ManifestPaths); + return new Transaction(readVersion, uuid, operation, transactionProperties); } } } diff --git a/java/src/test/java/org/lance/DatasetTest.java b/java/src/test/java/org/lance/DatasetTest.java index a08a608161d..cb9942bdaaf 100644 --- a/java/src/test/java/org/lance/DatasetTest.java +++ b/java/src/test/java/org/lance/DatasetTest.java @@ -1147,8 +1147,8 @@ void testCommitTransactionDetachedTrue(@TempDir Path tempDir) { long baseRowCount = base.countRows(); FragmentMetadata fragment = suite.createNewFragment(5); Append append = Append.builder().fragments(Collections.singletonList(fragment)).build(); - Transaction transaction = base.newTransactionBuilder().operation(append).build(); - try (Dataset committed = base.commitTransaction(transaction, true, false)) { + SourcedTransaction transaction = base.newTransactionBuilder().operation(append).build(); + try (Dataset committed = base.commitTransaction(transaction.transaction(), true, false)) { // Original dataset is not refreshed to the new version. assertEquals(baseVersion, base.version()); assertEquals(baseRowCount, base.countRows()); @@ -1176,11 +1176,11 @@ void testCommitTransactionDetachedTrueOnV1ManifestThrowsUnsupported(@TempDir Pat FragmentMetadata fragment = suite.createNewFragment(3); Append append = Append.builder().fragments(Collections.singletonList(fragment)).build(); - Transaction transaction = dataset.newTransactionBuilder().operation(append).build(); + SourcedTransaction transaction = dataset.newTransactionBuilder().operation(append).build(); UnsupportedOperationException ex = assertThrows( UnsupportedOperationException.class, - () -> dataset.commitTransaction(transaction, true, false)); + () -> dataset.commitTransaction(transaction.transaction(), true, false)); // Error should indicate detached commits are not supported on v1 manifests. assertNotNull(ex.getMessage()); @@ -1210,9 +1210,10 @@ void testEnableStableRowIds(@TempDir Path tempDir) throws Exception { FragmentMetadata frag1 = testDataset.createNewFragment(10); FragmentMetadata frag2 = testDataset.createNewFragment(10); - Transaction.Builder builder = new Transaction.Builder(dataset); + SourcedTransaction.Builder builder = new SourcedTransaction.Builder(dataset); Append append = Append.builder().fragments(Arrays.asList(frag1, frag2)).build(); - Transaction transaction = builder.operation(append).readVersion(dataset.version()).build(); + SourcedTransaction transaction = + builder.operation(append).readVersion(dataset.version()).build(); // Step2: if move-stable-rowid is enabled, the rowids of new fragments should be // consecutive. diff --git a/java/src/test/java/org/lance/FragmentTest.java b/java/src/test/java/org/lance/FragmentTest.java index c1e646f492b..9452b57e49d 100644 --- a/java/src/test/java/org/lance/FragmentTest.java +++ b/java/src/test/java/org/lance/FragmentTest.java @@ -279,7 +279,7 @@ void testMergeColumns(@TempDir Path tempDir) throws Exception { // Commit fragment FragmentOperation.Append appendOp = new FragmentOperation.Append(Arrays.asList(fragmentMeta)); - Transaction transaction; + SourcedTransaction transaction; try (Dataset dataset = Dataset.commit(allocator, datasetPath, appendOp, Optional.of(1L))) { assertEquals(2, dataset.version()); assertEquals(2, dataset.latestVersion()); @@ -293,7 +293,7 @@ void testMergeColumns(@TempDir Path tempDir) throws Exception { FragmentMergeResult mergeResult = testDataset.mergeColumn(fragment, 10); - Transaction.Builder builder = new Transaction.Builder(dataset); + SourcedTransaction.Builder builder = new SourcedTransaction.Builder(dataset); transaction = builder .operation( diff --git a/java/src/test/java/org/lance/NamespaceIntegrationTest.java b/java/src/test/java/org/lance/NamespaceIntegrationTest.java index 2d6f8ab1443..dc67d24ca3f 100644 --- a/java/src/test/java/org/lance/NamespaceIntegrationTest.java +++ b/java/src/test/java/org/lance/NamespaceIntegrationTest.java @@ -1414,8 +1414,8 @@ void testTransactionCommitWithNamespace() throws Exception { // Create and commit transaction Append appendOp = Append.builder().fragments(newFragments).build(); - Transaction transaction = - new Transaction.Builder(datasetWithProvider) + SourcedTransaction transaction = + new SourcedTransaction.Builder(datasetWithProvider) .readVersion(datasetWithProvider.version()) .operation(appendOp) .build(); diff --git a/java/src/test/java/org/lance/TestUtils.java b/java/src/test/java/org/lance/TestUtils.java index 4a884696e54..9e4764b4b73 100644 --- a/java/src/test/java/org/lance/TestUtils.java +++ b/java/src/test/java/org/lance/TestUtils.java @@ -760,7 +760,7 @@ public Dataset createAndAppendRows(int totalRows, int batches) { fragments.add(createBlobFragment(batchRows, Integer.MAX_VALUE)); } - Transaction txn = + SourcedTransaction txn = ds.newTransactionBuilder() .operation(org.lance.operation.Append.builder().fragments(fragments).build()) .build(); diff --git a/java/src/test/java/org/lance/TransactionTest.java b/java/src/test/java/org/lance/TransactionTest.java index d1d812046a3..46e9d5dfbcc 100644 --- a/java/src/test/java/org/lance/TransactionTest.java +++ b/java/src/test/java/org/lance/TransactionTest.java @@ -51,7 +51,7 @@ public void testTransaction(@TempDir Path tempDir) { Map properties = new HashMap<>(); properties.put("transactionType", "APPEND"); properties.put("createdBy", "testUser"); - Transaction appendTxn = + SourcedTransaction appendTxn = dataset .newTransactionBuilder() .operation( @@ -125,7 +125,7 @@ public void testCommitToUri(@TempDir Path tempDir) { // Build a transaction targeting a URI (no existing dataset) Transaction txn = - new Transaction.Builder(datasetPath, allocator) + new Transaction.Builder() .operation( Overwrite.builder() .fragments(Collections.singletonList(fragmentMeta)) @@ -133,7 +133,7 @@ public void testCommitToUri(@TempDir Path tempDir) { .build()) .build(); - try (Dataset committedDataset = txn.commit()) { + try (Dataset committedDataset = new CommitBuilder(datasetPath, allocator).execute(txn)) { assertEquals(1, committedDataset.version()); assertEquals(20, committedDataset.countRows()); } finally { diff --git a/java/src/test/java/org/lance/index/ScalarIndexTest.java b/java/src/test/java/org/lance/index/ScalarIndexTest.java index 8b756633692..3c84d692b38 100644 --- a/java/src/test/java/org/lance/index/ScalarIndexTest.java +++ b/java/src/test/java/org/lance/index/ScalarIndexTest.java @@ -15,8 +15,8 @@ import org.lance.Dataset; import org.lance.Fragment; +import org.lance.SourcedTransaction; import org.lance.TestUtils; -import org.lance.Transaction; import org.lance.WriteParams; import org.lance.index.scalar.ScalarIndexParams; import org.lance.ipc.LanceScanner; @@ -167,7 +167,7 @@ public void testCreateBTreeIndexDistributively(@TempDir Path tempDir) throws Exc CreateIndex createIndexOp = CreateIndex.builder().withNewIndices(Collections.singletonList(index)).build(); - Transaction createIndexTx = + SourcedTransaction createIndexTx = dataset.newTransactionBuilder().operation(createIndexOp).build(); try (Dataset newDataset = createIndexTx.commit()) { @@ -244,7 +244,7 @@ public void testRangedBTreeIndex(@TempDir Path tempDir) throws Exception { CreateIndex createIndexOp = CreateIndex.builder().withNewIndices(Collections.singletonList(index)).build(); - Transaction createIndexTx = + SourcedTransaction createIndexTx = dataset.newTransactionBuilder().operation(createIndexOp).build(); try (Dataset newDataset = createIndexTx.commit()) { diff --git a/java/src/test/java/org/lance/index/VectorIndexTest.java b/java/src/test/java/org/lance/index/VectorIndexTest.java index 771505b9efd..912973b04e1 100755 --- a/java/src/test/java/org/lance/index/VectorIndexTest.java +++ b/java/src/test/java/org/lance/index/VectorIndexTest.java @@ -15,8 +15,8 @@ import org.lance.Dataset; import org.lance.Fragment; +import org.lance.SourcedTransaction; import org.lance.TestVectorDataset; -import org.lance.Transaction; import org.lance.index.vector.IvfBuildParams; import org.lance.index.vector.PQBuildParams; import org.lance.index.vector.RQBuildParams; @@ -130,7 +130,7 @@ public void testCreateIvfFlatIndexDistributively(@TempDir Path tempDir) throws E CreateIndex createIndexOp = CreateIndex.builder().withNewIndices(Collections.singletonList(index)).build(); - Transaction createIndexTx = + SourcedTransaction createIndexTx = dataset.newTransactionBuilder().operation(createIndexOp).build(); try (Dataset newDataset = createIndexTx.commit()) { @@ -248,7 +248,7 @@ public void testCreateIvfPqIndexDistributively(@TempDir Path tempDir) throws Exc CreateIndex createIndexOp = CreateIndex.builder().withNewIndices(Collections.singletonList(index)).build(); - Transaction createIndexTx = + SourcedTransaction createIndexTx = dataset.newTransactionBuilder().operation(createIndexOp).build(); try (Dataset newDataset = createIndexTx.commit()) { @@ -350,7 +350,7 @@ public void testCreateIvfSqIndexDistributively(@TempDir Path tempDir) throws Exc CreateIndex createIndexOp = CreateIndex.builder().withNewIndices(Collections.singletonList(index)).build(); - Transaction createIndexTx = + SourcedTransaction createIndexTx = dataset.newTransactionBuilder().operation(createIndexOp).build(); try (Dataset newDataset = createIndexTx.commit()) { diff --git a/java/src/test/java/org/lance/operation/AppendTest.java b/java/src/test/java/org/lance/operation/AppendTest.java index 2575d3afd00..0c93d3a0ee6 100644 --- a/java/src/test/java/org/lance/operation/AppendTest.java +++ b/java/src/test/java/org/lance/operation/AppendTest.java @@ -15,8 +15,8 @@ import org.lance.Dataset; import org.lance.FragmentMetadata; +import org.lance.SourcedTransaction; import org.lance.TestUtils; -import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; import org.junit.jupiter.api.Test; @@ -62,7 +62,7 @@ void testAppendMultipleFragments(@TempDir Path tempDir) { testDataset.createNewFragment(rowCount), testDataset.createNewFragment(rowCount)); - Transaction transaction = + SourcedTransaction transaction = dataset .newTransactionBuilder() .operation(Append.builder().fragments(fragments).build()) @@ -72,7 +72,7 @@ void testAppendMultipleFragments(@TempDir Path tempDir) { assertEquals(2, dataset.version()); assertEquals(rowCount * 3, dataset.countRows()); assertEquals(3, dataset.getFragments().size()); - assertEquals(transaction, dataset.readTransaction().orElse(null)); + assertEquals(transaction.transaction(), dataset.readTransaction().orElse(null)); } } } @@ -88,7 +88,7 @@ void testAppendEmptyFragmentList(@TempDir Path tempDir) { assertThrows( IllegalArgumentException.class, () -> { - Transaction transaction = + SourcedTransaction transaction = dataset .newTransactionBuilder() .operation(Append.builder().fragments(new ArrayList<>()).build()) diff --git a/java/src/test/java/org/lance/operation/DataReplacementTest.java b/java/src/test/java/org/lance/operation/DataReplacementTest.java index 6d0586cc2b1..c75ef55567d 100644 --- a/java/src/test/java/org/lance/operation/DataReplacementTest.java +++ b/java/src/test/java/org/lance/operation/DataReplacementTest.java @@ -16,8 +16,8 @@ import org.lance.Dataset; import org.lance.Fragment; import org.lance.FragmentMetadata; +import org.lance.SourcedTransaction; import org.lance.TestUtils; -import org.lance.Transaction; import org.lance.WriteParams; import org.lance.fragment.DataFile; import org.lance.ipc.LanceScanner; @@ -71,7 +71,7 @@ void testDataReplacement(@TempDir Path tempDir) throws Exception { List fragmentMetas = Fragment.create(datasetPath, allocator, idRoot, new WriteParams.Builder().build()); - Transaction appendTxn = + SourcedTransaction appendTxn = dataset .newTransactionBuilder() .operation(Append.builder().fragments(fragmentMetas).build()) @@ -124,7 +124,7 @@ void testDataReplacement(@TempDir Path tempDir) throws Exception { Collections.singletonList( new DataReplacement.DataReplacementGroup( fragmentMetas.get(0).getId(), datafile)); - Transaction replaceTxn = + SourcedTransaction replaceTxn = initDataset .newTransactionBuilder() .operation(DataReplacement.builder().replacements(replacementGroups).build()) diff --git a/java/src/test/java/org/lance/operation/DeleteTest.java b/java/src/test/java/org/lance/operation/DeleteTest.java index 2151b31f9de..20a4929b4e0 100644 --- a/java/src/test/java/org/lance/operation/DeleteTest.java +++ b/java/src/test/java/org/lance/operation/DeleteTest.java @@ -15,6 +15,7 @@ import org.lance.Dataset; import org.lance.FragmentMetadata; +import org.lance.SourcedTransaction; import org.lance.TestUtils; import org.lance.Transaction; @@ -42,7 +43,7 @@ void testDelete(@TempDir Path tempDir) { int rowCount = 20; FragmentMetadata fragmentMeta0 = testDataset.createNewFragment(rowCount); FragmentMetadata fragmentMeta1 = testDataset.createNewFragment(rowCount); - Transaction transaction = + SourcedTransaction transaction = dataset .newTransactionBuilder() .operation( @@ -60,7 +61,7 @@ void testDelete(@TempDir Path tempDir) { .map(t -> Long.valueOf(t.getId())) .collect(Collectors.toList()); - Transaction delete = + SourcedTransaction delete = dataset .newTransactionBuilder() .operation( diff --git a/java/src/test/java/org/lance/operation/MergeTest.java b/java/src/test/java/org/lance/operation/MergeTest.java index 8268aa23071..ef01d9654a3 100644 --- a/java/src/test/java/org/lance/operation/MergeTest.java +++ b/java/src/test/java/org/lance/operation/MergeTest.java @@ -15,8 +15,8 @@ import org.lance.Dataset; import org.lance.FragmentMetadata; +import org.lance.SourcedTransaction; import org.lance.TestUtils; -import org.lance.Transaction; import org.lance.fragment.DataFile; import org.lance.ipc.LanceScanner; @@ -90,7 +90,7 @@ void testMergeNewColumn(@TempDir Path tempDir) throws Exception { fragmentMeta.getDeletionFile(), fragmentMeta.getRowIdMeta()); - Transaction mergeTransaction = + SourcedTransaction mergeTransaction = initialDataset .newTransactionBuilder() .operation( @@ -169,7 +169,7 @@ void testReplaceAsDiffColumns(@TempDir Path tempDir) throws Exception { fragmentMeta.getDeletionFile(), fragmentMeta.getRowIdMeta()); - Transaction mergeTransaction = + SourcedTransaction mergeTransaction = initialDataset .newTransactionBuilder() .operation( @@ -254,7 +254,7 @@ void testMergeExistingColumn(@TempDir Path tempDir) throws Exception { fragmentMeta.getDeletionFile(), fragmentMeta.getRowIdMeta()); - Transaction mergeTransaction = + SourcedTransaction mergeTransaction = initialDataset .newTransactionBuilder() .operation( diff --git a/java/src/test/java/org/lance/operation/OperationTestBase.java b/java/src/test/java/org/lance/operation/OperationTestBase.java index f89aeb6e829..7a522f60ac2 100644 --- a/java/src/test/java/org/lance/operation/OperationTestBase.java +++ b/java/src/test/java/org/lance/operation/OperationTestBase.java @@ -15,8 +15,8 @@ import org.lance.Dataset; import org.lance.FragmentMetadata; +import org.lance.SourcedTransaction; import org.lance.TestUtils; -import org.lance.Transaction; import org.lance.file.LanceFileWriter; import org.lance.fragment.DataFile; @@ -53,7 +53,7 @@ protected Dataset createAndAppendRows(TestUtils.SimpleTestDataset suite, int row dataset = suite.createEmptyDataset(); FragmentMetadata fragmentMeta = suite.createNewFragment(rowCount); - Transaction appendTxn = + SourcedTransaction appendTxn = dataset .newTransactionBuilder() .operation(Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) diff --git a/java/src/test/java/org/lance/operation/OverwriteTest.java b/java/src/test/java/org/lance/operation/OverwriteTest.java index f5a90de5b49..7f6bd15e4ab 100644 --- a/java/src/test/java/org/lance/operation/OverwriteTest.java +++ b/java/src/test/java/org/lance/operation/OverwriteTest.java @@ -16,8 +16,8 @@ import org.lance.Dataset; import org.lance.Fragment; import org.lance.FragmentMetadata; +import org.lance.SourcedTransaction; import org.lance.TestUtils; -import org.lance.Transaction; import org.lance.ipc.LanceScanner; import org.apache.arrow.memory.RootAllocator; @@ -43,7 +43,7 @@ void testOverwrite(@TempDir Path tempDir) throws Exception { // Commit fragment int rowCount = 20; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - Transaction transaction = + SourcedTransaction transaction = dataset .newTransactionBuilder() .operation( @@ -91,7 +91,7 @@ void testOverwrite(@TempDir Path tempDir) throws Exception { Schema schemaRes = scanner.schema(); assertEquals(testDataset.getSchema(), schemaRes); } - assertEquals(transaction, dataset.readTransaction().orElse(null)); + assertEquals(transaction.transaction(), dataset.readTransaction().orElse(null)); } } } diff --git a/java/src/test/java/org/lance/operation/ProjectTest.java b/java/src/test/java/org/lance/operation/ProjectTest.java index a8124a672ca..5fef1f078aa 100644 --- a/java/src/test/java/org/lance/operation/ProjectTest.java +++ b/java/src/test/java/org/lance/operation/ProjectTest.java @@ -14,8 +14,8 @@ package org.lance.operation; import org.lance.Dataset; +import org.lance.SourcedTransaction; import org.lance.TestUtils; -import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.types.pojo.Field; @@ -43,7 +43,7 @@ void testProjection(@TempDir Path tempDir) { assertEquals(testDataset.getSchema(), dataset.getSchema()); List fieldList = new ArrayList<>(testDataset.getSchema().getFields()); Collections.reverse(fieldList); - Transaction txn1 = + SourcedTransaction txn1 = dataset .newTransactionBuilder() .operation(Project.builder().schema(new Schema(fieldList)).build()) @@ -54,7 +54,7 @@ void testProjection(@TempDir Path tempDir) { assertEquals(2, committedDataset.version()); assertEquals(new Schema(fieldList), committedDataset.getSchema()); fieldList.remove(1); - Transaction txn2 = + SourcedTransaction txn2 = committedDataset .newTransactionBuilder() .operation(Project.builder().schema(new Schema(fieldList)).build()) @@ -64,8 +64,8 @@ void testProjection(@TempDir Path tempDir) { assertEquals(2, committedDataset.version()); assertEquals(3, committedDataset2.version()); assertEquals(new Schema(fieldList), committedDataset2.getSchema()); - assertEquals(txn1, committedDataset.readTransaction().orElse(null)); - assertEquals(txn2, committedDataset2.readTransaction().orElse(null)); + assertEquals(txn1.transaction(), committedDataset.readTransaction().orElse(null)); + assertEquals(txn2.transaction(), committedDataset2.readTransaction().orElse(null)); } } } diff --git a/java/src/test/java/org/lance/operation/ReserveFragmentsTest.java b/java/src/test/java/org/lance/operation/ReserveFragmentsTest.java index 14089482c08..3b0d267aec1 100644 --- a/java/src/test/java/org/lance/operation/ReserveFragmentsTest.java +++ b/java/src/test/java/org/lance/operation/ReserveFragmentsTest.java @@ -16,8 +16,8 @@ import org.lance.Dataset; import org.lance.Fragment; import org.lance.FragmentMetadata; +import org.lance.SourcedTransaction; import org.lance.TestUtils; -import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; import org.junit.jupiter.api.Assertions; @@ -42,7 +42,7 @@ void testReserveFragments(@TempDir Path tempDir) throws Exception { // Create an initial fragment to establish a baseline fragment ID FragmentMetadata initialFragmentMeta = testDataset.createNewFragment(10); - Transaction appendTransaction = + SourcedTransaction appendTransaction = dataset .newTransactionBuilder() .operation( @@ -53,7 +53,7 @@ void testReserveFragments(@TempDir Path tempDir) throws Exception { try (Dataset datasetWithFragment = appendTransaction.commit()) { // Reserve fragment IDs int numFragmentsToReserve = 5; - Transaction reserveTransaction = + SourcedTransaction reserveTransaction = datasetWithFragment .newTransactionBuilder() .operation( @@ -62,7 +62,7 @@ void testReserveFragments(@TempDir Path tempDir) throws Exception { try (Dataset datasetWithReservedFragments = reserveTransaction.commit()) { // Create a new fragment and verify its ID reflects the reservation FragmentMetadata newFragmentMeta = testDataset.createNewFragment(10); - Transaction appendTransaction2 = + SourcedTransaction appendTransaction2 = datasetWithReservedFragments .newTransactionBuilder() .operation( @@ -89,7 +89,8 @@ void testReserveFragments(@TempDir Path tempDir) throws Exception { // Verify the transaction is recorded assertEquals( - reserveTransaction, datasetWithReservedFragments.readTransaction().orElse(null)); + reserveTransaction.transaction(), + datasetWithReservedFragments.readTransaction().orElse(null)); } } } diff --git a/java/src/test/java/org/lance/operation/RestoreTest.java b/java/src/test/java/org/lance/operation/RestoreTest.java index 751263f4c06..ff2604bc440 100644 --- a/java/src/test/java/org/lance/operation/RestoreTest.java +++ b/java/src/test/java/org/lance/operation/RestoreTest.java @@ -15,8 +15,8 @@ import org.lance.Dataset; import org.lance.FragmentMetadata; +import org.lance.SourcedTransaction; import org.lance.TestUtils; -import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; import org.junit.jupiter.api.Test; @@ -43,7 +43,7 @@ void testRestore(@TempDir Path tempDir) { // Append data to create a new version int rowCount = 20; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - Transaction transaction = + SourcedTransaction transaction = dataset .newTransactionBuilder() .operation( @@ -56,7 +56,7 @@ void testRestore(@TempDir Path tempDir) { assertEquals(rowCount, modifiedDataset.countRows()); // Restore to the initial version - Transaction restoreTransaction = + SourcedTransaction restoreTransaction = modifiedDataset .newTransactionBuilder() .operation(new Restore.Builder().version(initialVersion).build()) @@ -66,7 +66,8 @@ void testRestore(@TempDir Path tempDir) { assertEquals(initialVersion + 2, restoredDataset.version()); // Initial dataset had 0 rows assertEquals(0, restoredDataset.countRows()); - assertEquals(restoreTransaction, restoredDataset.readTransaction().orElse(null)); + assertEquals( + restoreTransaction.transaction(), restoredDataset.readTransaction().orElse(null)); } } } diff --git a/java/src/test/java/org/lance/operation/RewriteTest.java b/java/src/test/java/org/lance/operation/RewriteTest.java index 4a73e2807e8..41d050d0e32 100644 --- a/java/src/test/java/org/lance/operation/RewriteTest.java +++ b/java/src/test/java/org/lance/operation/RewriteTest.java @@ -15,8 +15,8 @@ import org.lance.Dataset; import org.lance.FragmentMetadata; +import org.lance.SourcedTransaction; import org.lance.TestUtils; -import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; import org.junit.jupiter.api.Test; @@ -44,7 +44,7 @@ void testRewrite(@TempDir Path tempDir) { FragmentMetadata fragmentMeta1 = testDataset.createNewFragment(rowCount); FragmentMetadata fragmentMeta2 = testDataset.createNewFragment(rowCount); - Transaction appendTx = + SourcedTransaction appendTx = dataset .newTransactionBuilder() .operation( @@ -72,7 +72,7 @@ void testRewrite(@TempDir Path tempDir) { groups.add(group); // Create and commit the rewrite transaction - Transaction rewriteTx = + SourcedTransaction rewriteTx = datasetWithData .newTransactionBuilder() .operation(Rewrite.builder().groups(groups).build()) @@ -84,7 +84,7 @@ void testRewrite(@TempDir Path tempDir) { assertEquals(rowCount * 2, rewrittenDataset.countRows()); // Verify that the transaction was recorded - assertEquals(rewriteTx, rewrittenDataset.readTransaction().orElse(null)); + assertEquals(rewriteTx.transaction(), rewrittenDataset.readTransaction().orElse(null)); } } } diff --git a/java/src/test/java/org/lance/operation/TruncateTest.java b/java/src/test/java/org/lance/operation/TruncateTest.java index 93f5b689e8c..8c7e1d12238 100644 --- a/java/src/test/java/org/lance/operation/TruncateTest.java +++ b/java/src/test/java/org/lance/operation/TruncateTest.java @@ -15,8 +15,8 @@ import org.lance.Dataset; import org.lance.FragmentMetadata; +import org.lance.SourcedTransaction; import org.lance.TestUtils; -import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.types.pojo.Schema; @@ -40,7 +40,7 @@ void testTruncateTable(@TempDir Path tempDir) throws Exception { // Append some data int rowCount = 20; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - Transaction transaction = + SourcedTransaction transaction = dataset .newTransactionBuilder() .operation( diff --git a/java/src/test/java/org/lance/operation/UpdateConfigTest.java b/java/src/test/java/org/lance/operation/UpdateConfigTest.java index 14c3ea444f0..887fc412133 100644 --- a/java/src/test/java/org/lance/operation/UpdateConfigTest.java +++ b/java/src/test/java/org/lance/operation/UpdateConfigTest.java @@ -14,8 +14,8 @@ package org.lance.operation; import org.lance.Dataset; +import org.lance.SourcedTransaction; import org.lance.TestUtils; -import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; import org.junit.jupiter.api.Test; @@ -45,7 +45,7 @@ void testUpdateConfig(@TempDir Path tempDir) { UpdateMap configUpdates = UpdateMap.builder().updates(configValues).replace(false).build(); - Transaction transaction = + SourcedTransaction transaction = dataset .newTransactionBuilder() .operation(UpdateConfig.builder().configUpdates(configUpdates).build()) diff --git a/java/src/test/java/org/lance/operation/UpdateTest.java b/java/src/test/java/org/lance/operation/UpdateTest.java index 0e3e2276204..dd1eef23076 100644 --- a/java/src/test/java/org/lance/operation/UpdateTest.java +++ b/java/src/test/java/org/lance/operation/UpdateTest.java @@ -16,6 +16,7 @@ import org.lance.Dataset; import org.lance.Fragment; import org.lance.FragmentMetadata; +import org.lance.SourcedTransaction; import org.lance.TestUtils; import org.lance.Transaction; import org.lance.fragment.FragmentUpdateResult; @@ -54,7 +55,7 @@ void testUpdate(@TempDir Path tempDir) throws Exception { // Commit fragment int rowCount = 20; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - Transaction transaction = + SourcedTransaction transaction = dataset .newTransactionBuilder() .operation( @@ -99,7 +100,7 @@ void testUpdate(@TempDir Path tempDir) throws Exception { assertEquals(rowCount, dataset.countRows()); Transaction txn = dataset.readTransaction().orElse(null); - assertEquals(transaction, txn); + assertEquals(transaction.transaction(), txn); } } } @@ -122,7 +123,7 @@ void testUpdateColumns(@TempDir Path tempDir) throws Exception { */ int rowCount = 6; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - Transaction appendTransaction = + SourcedTransaction appendTransaction = dataset .newTransactionBuilder() .operation( @@ -145,7 +146,7 @@ void testUpdateColumns(@TempDir Path tempDir) throws Exception { * 3: | null | null | */ FragmentUpdateResult updateResult = testDataset.updateColumn(targetFragment, updateRowCount); - Transaction updateTransaction = + SourcedTransaction updateTransaction = dataset .newTransactionBuilder() .operation( From 666ad2abe091e7f5adfb3d96557bda929803f9b7 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Tue, 24 Feb 2026 14:11:12 -0600 Subject: [PATCH 3/7] enable all options in rust SDK Signed-off-by: Daniel Rammer --- java/lance-jni/src/blocking_dataset.rs | 30 +++-- java/lance-jni/src/transaction.rs | 111 +++++++++++++++++- .../main/java/org/lance/CommitBuilder.java | 89 +++++++++++++- .../java/org/lance/SourcedTransaction.java | 18 +++ java/src/main/java/org/lance/Transaction.java | 29 ++++- 5 files changed, 261 insertions(+), 16 deletions(-) diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index 32cf9a14ce0..b6431d17e99 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -44,6 +44,7 @@ use lance::session::Session as LanceSession; use lance::table::format::IndexMetadata; use lance::table::format::{BasePath, Fragment}; use lance_core::datatypes::Schema as LanceSchema; +use lance_file::version::LanceFileVersion; use lance_index::optimize::OptimizeOptions; use lance_index::scalar::btree::BTreeParameters; use lance_index::DatasetIndexExt; @@ -325,20 +326,35 @@ impl BlockingDataset { Ok(indexes) } + #[allow(clippy::too_many_arguments)] pub fn commit_transaction( &mut self, transaction: Transaction, store_params: ObjectStoreParams, detached: bool, enable_v2_manifest_paths: bool, + use_stable_row_ids: Option, + storage_format: Option, + max_retries: u32, + skip_auto_cleanup: bool, ) -> Result { - let new_dataset = RT.block_on( - CommitBuilder::new(Arc::new(self.clone().inner)) - .with_store_params(store_params) - .with_detached(detached) - .enable_v2_manifest_paths(enable_v2_manifest_paths) - .execute(transaction), - )?; + let mut builder = CommitBuilder::new(Arc::new(self.clone().inner)) + .with_store_params(store_params) + .with_detached(detached) + .enable_v2_manifest_paths(enable_v2_manifest_paths); + if let Some(use_stable) = use_stable_row_ids { + builder = builder.use_stable_row_ids(use_stable); + } + if let Some(format) = storage_format { + builder = builder.with_storage_format(format); + } + if max_retries > 0 { + builder = builder.with_max_retries(max_retries); + } + if skip_auto_cleanup { + builder = builder.with_skip_auto_cleanup(true); + } + let new_dataset = RT.block_on(builder.execute(transaction))?; Ok(BlockingDataset { inner: new_dataset }) } diff --git a/java/lance-jni/src/transaction.rs b/java/lance-jni/src/transaction.rs index a91e393688c..678d5568db6 100644 --- a/java/lance-jni/src/transaction.rs +++ b/java/lance-jni/src/transaction.rs @@ -15,7 +15,7 @@ use arrow::datatypes::Schema; use arrow_schema::ffi::FFI_ArrowSchema; use chrono::DateTime; use jni::objects::{JByteArray, JLongArray, JMap, JObject, JString, JValue, JValueGen}; -use jni::sys::jboolean; +use jni::sys::{jboolean, jint}; use jni::JNIEnv; use lance::dataset::transaction::{ DataReplacementGroup, Operation, RewriteGroup, RewrittenIndex, Transaction, TransactionBuilder, @@ -26,6 +26,7 @@ use lance::io::commit::namespace_manifest::LanceNamespaceExternalManifestStore; use lance::io::ObjectStoreParams; use lance::table::format::{Fragment, IndexMetadata}; use lance_core::datatypes::Schema as LanceSchema; +use lance_file::version::LanceFileVersion; use lance_io::object_store::StorageOptionsProvider; use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler; use lance_table::io::commit::CommitHandler; @@ -306,6 +307,10 @@ pub(crate) fn convert_to_java_transaction<'local>( transaction: Transaction, ) -> Result> { let uuid = env.new_string(transaction.uuid)?; + let tag = match transaction.tag { + Some(tag) => JObject::from(env.new_string(tag)?), + None => JObject::null(), + }; let transaction_properties = match transaction.transaction_properties { Some(properties) => to_java_map(env, &properties)?, _ => JObject::null(), @@ -314,11 +319,12 @@ pub(crate) fn convert_to_java_transaction<'local>( let java_transaction = env.new_object( "org/lance/Transaction", - "(JLjava/lang/String;Lorg/lance/operation/Operation;Ljava/util/Map;)V", + "(JLjava/lang/String;Lorg/lance/operation/Operation;Ljava/lang/String;Ljava/util/Map;)V", &[ JValue::Long(transaction.read_version as i64), JValue::Object(&uuid), JValue::Object(&operation), + JValue::Object(&tag), JValue::Object(&transaction_properties), ], )?; @@ -585,7 +591,23 @@ pub(crate) fn convert_to_java_schema<'local>( .l()?) } +fn parse_storage_format(name: &str) -> Result { + match name.to_lowercase().as_str() { + "legacy" => Ok(LanceFileVersion::Legacy), + "v2_0" | "v2.0" => Ok(LanceFileVersion::V2_0), + "stable" => Ok(LanceFileVersion::Stable), + "v2_1" | "v2.1" => Ok(LanceFileVersion::V2_1), + "next" => Ok(LanceFileVersion::Next), + "v2_2" | "v2.2" => Ok(LanceFileVersion::V2_2), + _ => Err(Error::input_error(format!( + "Unknown storage format: {}", + name + ))), + } +} + #[no_mangle] +#[allow(clippy::too_many_arguments)] pub extern "system" fn Java_org_lance_CommitBuilder_nativeCommitToDataset<'local>( mut env: JNIEnv<'local>, _cls: JObject, @@ -594,6 +616,10 @@ pub extern "system" fn Java_org_lance_CommitBuilder_nativeCommitToDataset<'local detached_jbool: jboolean, enable_v2_manifest_paths: jboolean, write_params_obj: JObject, + use_stable_row_ids_obj: JObject, + storage_format_obj: JObject, + max_retries: jint, + skip_auto_cleanup: jboolean, ) -> JObject<'local> { ok_or_throw!( env, @@ -604,10 +630,15 @@ pub extern "system" fn Java_org_lance_CommitBuilder_nativeCommitToDataset<'local detached_jbool != 0, enable_v2_manifest_paths != 0, write_params_obj, + use_stable_row_ids_obj, + storage_format_obj, + max_retries as u32, + skip_auto_cleanup != 0, ) ) } +#[allow(clippy::too_many_arguments)] fn inner_commit_to_dataset<'local>( env: &mut JNIEnv<'local>, java_dataset: JObject, @@ -615,6 +646,10 @@ fn inner_commit_to_dataset<'local>( detached: bool, enable_v2_manifest_paths: bool, write_params_obj: JObject, + use_stable_row_ids_obj: JObject, + storage_format_obj: JObject, + max_retries: u32, + skip_auto_cleanup: bool, ) -> Result> { let write_param = if write_params_obj.is_null() { HashMap::new() @@ -623,6 +658,24 @@ fn inner_commit_to_dataset<'local>( to_rust_map(env, &write_param_jmap)? }; + // Parse optional use_stable_row_ids (boxed Boolean) + let use_stable_row_ids = if use_stable_row_ids_obj.is_null() { + None + } else { + let val = env + .call_method(&use_stable_row_ids_obj, "booleanValue", "()Z", &[])? + .z()?; + Some(val) + }; + + // Parse optional storage format string + let storage_format = if storage_format_obj.is_null() { + None + } else { + let format_str: String = JString::from(storage_format_obj).extract(env)?; + Some(parse_storage_format(&format_str)?) + }; + // Get the Dataset's storage_options_accessor and merge with write_param let storage_options_accessor = { let dataset_guard = @@ -684,6 +737,10 @@ fn inner_commit_to_dataset<'local>( store_params, detached, enable_v2_manifest_paths, + use_stable_row_ids, + storage_format, + max_retries, + skip_auto_cleanup, )? }; new_blocking_ds.into_java(env) @@ -706,6 +763,11 @@ fn convert_to_rust_transaction( .l()?; let op = convert_to_rust_operation(env, &op, allocator)?; + let tag = env.get_optional_from_method(&java_transaction, "tag", |env, tag_obj| { + let tag_str = JString::from(tag_obj); + tag_str.extract(env) + })?; + let transaction_properties = env.get_optional_from_method( &java_transaction, "transactionProperties", @@ -716,6 +778,7 @@ fn convert_to_rust_transaction( )?; Ok(TransactionBuilder::new(read_ver, op) .uuid(uuid) + .tag(tag) .transaction_properties(transaction_properties.map(Arc::new)) .build()) } @@ -1081,6 +1144,7 @@ fn export_update_map<'a>( } #[no_mangle] +#[allow(clippy::too_many_arguments)] pub extern "system" fn Java_org_lance_CommitBuilder_nativeCommitToUri<'local>( mut env: JNIEnv<'local>, _cls: JObject, @@ -1092,6 +1156,10 @@ pub extern "system" fn Java_org_lance_CommitBuilder_nativeCommitToUri<'local>( table_id_obj: JObject, allocator_obj: JObject, write_params_obj: JObject, + use_stable_row_ids_obj: JObject, + storage_format_obj: JObject, + max_retries: jint, + skip_auto_cleanup: jboolean, ) -> JObject<'local> { ok_or_throw!( env, @@ -1105,6 +1173,10 @@ pub extern "system" fn Java_org_lance_CommitBuilder_nativeCommitToUri<'local>( table_id_obj, allocator_obj, write_params_obj, + use_stable_row_ids_obj, + storage_format_obj, + max_retries as u32, + skip_auto_cleanup != 0, ) ) } @@ -1120,6 +1192,10 @@ fn inner_commit_to_uri<'local>( table_id_obj: JObject, allocator_obj: JObject, write_params_obj: JObject, + use_stable_row_ids_obj: JObject, + storage_format_obj: JObject, + max_retries: u32, + skip_auto_cleanup: bool, ) -> Result> { let uri_str: String = uri.extract(env)?; @@ -1131,6 +1207,24 @@ fn inner_commit_to_uri<'local>( to_rust_map(env, &write_param_jmap)? }; + // Parse optional use_stable_row_ids (boxed Boolean) + let use_stable_row_ids = if use_stable_row_ids_obj.is_null() { + None + } else { + let val = env + .call_method(&use_stable_row_ids_obj, "booleanValue", "()Z", &[])? + .z()?; + Some(val) + }; + + // Parse optional storage format string + let storage_format = if storage_format_obj.is_null() { + None + } else { + let format_str: String = JString::from(storage_format_obj).extract(env)?; + Some(parse_storage_format(&format_str)?) + }; + // Build storage options accessor let storage_options_provider: Option = env .get_optional(&storage_options_provider_obj, |env, provider_obj| { @@ -1171,6 +1265,19 @@ fn inner_commit_to_uri<'local>( .with_store_params(store_params) .enable_v2_manifest_paths(enable_v2_manifest_paths); + if let Some(use_stable) = use_stable_row_ids { + builder = builder.use_stable_row_ids(use_stable); + } + if let Some(format) = storage_format { + builder = builder.with_storage_format(format); + } + if max_retries > 0 { + builder = builder.with_max_retries(max_retries); + } + if skip_auto_cleanup { + builder = builder.with_skip_auto_cleanup(true); + } + // Set namespace commit handler if provided let namespace_info = extract_namespace_info(env, &namespace_obj, &table_id_obj)?; if let Some((ns, tid)) = namespace_info { diff --git a/java/src/main/java/org/lance/CommitBuilder.java b/java/src/main/java/org/lance/CommitBuilder.java index c53742fd0cd..4429ffb5c10 100644 --- a/java/src/main/java/org/lance/CommitBuilder.java +++ b/java/src/main/java/org/lance/CommitBuilder.java @@ -73,6 +73,10 @@ public class CommitBuilder { private List tableId; private boolean enableV2ManifestPaths = true; private boolean detached = false; + private Boolean useStableRowIds; + private String storageFormat; + private int maxRetries = 0; + private boolean skipAutoCleanup = false; /** * Create a commit builder for committing against an existing dataset. @@ -170,6 +174,63 @@ public CommitBuilder detached(boolean detached) { return this; } + /** + * Whether to use stable row ids. This makes the {@code _rowid} column stable after compaction, + * but not updates. + * + *

This is only used for new datasets. Existing datasets will use their existing setting. + * Default is false. + * + * @param useStableRowIds whether to use stable row ids + * @return this builder instance + */ + public CommitBuilder useStableRowIds(boolean useStableRowIds) { + this.useStableRowIds = useStableRowIds; + return this; + } + + /** + * Set the storage format to use for the dataset. + * + *

This is only needed when creating a new empty table. If any data files are passed, the + * storage format will be inferred from the data files. Valid values: "legacy", "v2_0", "stable", + * "v2_1", "next", "v2_2". + * + * @param storageFormat the storage format name + * @return this builder instance + */ + public CommitBuilder storageFormat(String storageFormat) { + this.storageFormat = storageFormat; + return this; + } + + /** + * Set the maximum number of retries for commit operations. + * + *

If a commit operation fails, it will be retried up to {@code maxRetries} times. Default is + * 0. + * + * @param maxRetries the maximum number of retries + * @return this builder instance + */ + public CommitBuilder maxRetries(int maxRetries) { + this.maxRetries = maxRetries; + return this; + } + + /** + * Set whether to skip automatic cleanup after commit. + * + *

Default is false. + * + * @param skipAutoCleanup if true, skip automatic cleanup + * @return this builder instance + */ + public CommitBuilder skipAutoCleanup(boolean skipAutoCleanup) { + this.skipAutoCleanup = skipAutoCleanup; + return this; + } + /** * Execute the commit with the given transaction. * @@ -183,7 +244,15 @@ public Dataset execute(Transaction transaction) { Preconditions.checkNotNull(transaction, "Transaction must not be null"); if (dataset != null) { return nativeCommitToDataset( - dataset, transaction, detached, enableV2ManifestPaths, writeParams); + dataset, + transaction, + detached, + enableV2ManifestPaths, + writeParams, + useStableRowIds, + storageFormat, + maxRetries, + skipAutoCleanup); } if (uri != null) { return nativeCommitToUri( @@ -194,7 +263,11 @@ public Dataset execute(Transaction transaction) { namespace, tableId, allocator, - writeParams); + writeParams, + useStableRowIds, + storageFormat, + maxRetries, + skipAutoCleanup); } throw new IllegalStateException("CommitBuilder requires either a dataset or a URI"); } @@ -204,7 +277,11 @@ private static native Dataset nativeCommitToDataset( Transaction transaction, boolean detached, boolean enableV2ManifestPaths, - Map writeParams); + Map writeParams, + Boolean useStableRowIds, + String storageFormat, + int maxRetries, + boolean skipAutoCleanup); private static native Dataset nativeCommitToUri( String uri, @@ -214,5 +291,9 @@ private static native Dataset nativeCommitToUri( Object namespace, Object tableId, Object allocator, - Map writeParams); + Map writeParams, + Boolean useStableRowIds, + String storageFormat, + int maxRetries, + boolean skipAutoCleanup); } diff --git a/java/src/main/java/org/lance/SourcedTransaction.java b/java/src/main/java/org/lance/SourcedTransaction.java index 94551ffeb82..bd28456a292 100644 --- a/java/src/main/java/org/lance/SourcedTransaction.java +++ b/java/src/main/java/org/lance/SourcedTransaction.java @@ -68,6 +68,11 @@ public Operation operation() { return transaction.operation(); } + /** Delegates to {@link Transaction#tag()}. */ + public Optional tag() { + return transaction.tag(); + } + /** Delegates to {@link Transaction#transactionProperties()}. */ public Optional> transactionProperties() { return transaction.transactionProperties(); @@ -103,6 +108,7 @@ public static class Builder { private final Dataset dataset; private long readVersion; private Operation operation; + private String tag; private Map transactionProperties; /** @@ -130,6 +136,17 @@ public Builder operation(Operation operation) { return this; } + /** + * Set an optional tag for the transaction. + * + * @param tag the tag string + * @return this builder instance + */ + public Builder tag(String tag) { + this.tag = tag; + return this; + } + public Builder transactionProperties(Map properties) { this.transactionProperties = properties; return this; @@ -141,6 +158,7 @@ public SourcedTransaction build() { new Transaction.Builder() .readVersion(readVersion) .operation(operation) + .tag(tag) .transactionProperties(transactionProperties) .build(); return new SourcedTransaction(transaction, dataset); diff --git a/java/src/main/java/org/lance/Transaction.java b/java/src/main/java/org/lance/Transaction.java index 9f7ad436f36..5f7b06ec05d 100644 --- a/java/src/main/java/org/lance/Transaction.java +++ b/java/src/main/java/org/lance/Transaction.java @@ -36,6 +36,7 @@ public class Transaction { private final long readVersion; private final String uuid; private final Operation operation; + private final Optional tag; private final Optional> transactionProperties; /** @@ -44,16 +45,19 @@ public class Transaction { * @param readVersion the version that was read when creating this transaction * @param uuid the unique identifier for this transaction * @param operation the operation to perform + * @param tag optional tag for the transaction * @param transactionProperties optional transaction properties */ private Transaction( long readVersion, String uuid, Operation operation, + String tag, Map transactionProperties) { this.readVersion = readVersion; this.uuid = uuid; this.operation = operation; + this.tag = Optional.ofNullable(tag); this.transactionProperties = Optional.ofNullable(transactionProperties); } @@ -65,7 +69,7 @@ private Transaction( * @param operation the operation to perform */ public Transaction(long readVersion, Operation operation) { - this(readVersion, UUID.randomUUID().toString(), operation, null); + this(readVersion, UUID.randomUUID().toString(), operation, null, null); } public long readVersion() { @@ -80,6 +84,11 @@ public Operation operation() { return operation; } + /** Returns the optional tag for this transaction. */ + public Optional tag() { + return tag; + } + public Optional> transactionProperties() { return transactionProperties; } @@ -95,6 +104,7 @@ public String toString() { .add("readVersion", readVersion) .add("uuid", uuid) .add("operation", operation) + .add("tag", tag) .add("transactionProperties", transactionProperties) .toString(); } @@ -111,12 +121,13 @@ public boolean equals(Object o) { return readVersion == that.readVersion && uuid.equals(that.uuid) && Objects.equals(operation, that.operation) + && Objects.equals(tag, that.tag) && Objects.equals(transactionProperties, that.transactionProperties); } @Override public int hashCode() { - return Objects.hash(readVersion, uuid, operation, transactionProperties); + return Objects.hash(readVersion, uuid, operation, tag, transactionProperties); } /** Builder for constructing {@link Transaction} instances. */ @@ -124,6 +135,7 @@ public static class Builder { private String uuid; private long readVersion; private Operation operation; + private String tag; private Map transactionProperties; public Builder() { @@ -149,6 +161,17 @@ public Builder operation(Operation operation) { return this; } + /** + * Set an optional tag for the transaction. + * + * @param tag the tag string + * @return this builder instance + */ + public Builder tag(String tag) { + this.tag = tag; + return this; + } + public Builder transactionProperties(Map properties) { this.transactionProperties = properties; return this; @@ -156,7 +179,7 @@ public Builder transactionProperties(Map properties) { public Transaction build() { Preconditions.checkState(operation != null, "TransactionBuilder has no operations"); - return new Transaction(readVersion, uuid, operation, transactionProperties); + return new Transaction(readVersion, uuid, operation, tag, transactionProperties); } } } From 498d56e7614dd13e28653b61715a1bc98355fa30 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Tue, 24 Feb 2026 14:21:44 -0600 Subject: [PATCH 4/7] updated unit tests to use CommitBuilder Signed-off-by: Daniel Rammer --- .../main/java/org/lance/CommitBuilder.java | 52 ++++++++------- java/src/main/java/org/lance/Dataset.java | 5 ++ .../java/org/lance/operation/AppendTest.java | 27 +++++--- .../lance/operation/DataReplacementTest.java | 23 ++++--- .../java/org/lance/operation/DeleteTest.java | 24 ++++--- .../java/org/lance/operation/MergeTest.java | 33 ++++++---- .../lance/operation/OperationTestBase.java | 15 +++-- .../org/lance/operation/OverwriteTest.java | 28 +++++---- .../java/org/lance/operation/ProjectTest.java | 27 ++++---- .../lance/operation/ReserveFragmentsTest.java | 39 +++++++----- .../java/org/lance/operation/RestoreTest.java | 26 ++++---- .../java/org/lance/operation/RewriteTest.java | 25 +++++--- .../org/lance/operation/TruncateTest.java | 13 ++-- .../org/lance/operation/UpdateConfigTest.java | 43 ++++++++----- .../java/org/lance/operation/UpdateTest.java | 63 +++++++++++-------- 15 files changed, 266 insertions(+), 177 deletions(-) diff --git a/java/src/main/java/org/lance/CommitBuilder.java b/java/src/main/java/org/lance/CommitBuilder.java index 4429ffb5c10..6202a2df683 100644 --- a/java/src/main/java/org/lance/CommitBuilder.java +++ b/java/src/main/java/org/lance/CommitBuilder.java @@ -243,31 +243,37 @@ public CommitBuilder skipAutoCleanup(boolean skipAutoCleanup) { public Dataset execute(Transaction transaction) { Preconditions.checkNotNull(transaction, "Transaction must not be null"); if (dataset != null) { - return nativeCommitToDataset( - dataset, - transaction, - detached, - enableV2ManifestPaths, - writeParams, - useStableRowIds, - storageFormat, - maxRetries, - skipAutoCleanup); + Dataset result = + nativeCommitToDataset( + dataset, + transaction, + detached, + enableV2ManifestPaths, + writeParams, + useStableRowIds, + storageFormat, + maxRetries, + skipAutoCleanup); + result.setAllocator(dataset.allocator()); + return result; } if (uri != null) { - return nativeCommitToUri( - uri, - transaction, - enableV2ManifestPaths, - storageOptionsProvider, - namespace, - tableId, - allocator, - writeParams, - useStableRowIds, - storageFormat, - maxRetries, - skipAutoCleanup); + Dataset result = + nativeCommitToUri( + uri, + transaction, + enableV2ManifestPaths, + storageOptionsProvider, + namespace, + tableId, + allocator, + writeParams, + useStableRowIds, + storageFormat, + maxRetries, + skipAutoCleanup); + result.setAllocator(allocator); + return result; } throw new IllegalStateException("CommitBuilder requires either a dataset or a URI"); } diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index e7536483450..4e8f2fd0c26 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -511,6 +511,11 @@ public BufferAllocator allocator() { return allocator; } + /** Package-private setter for allocator, used by {@link CommitBuilder}. */ + void setAllocator(BufferAllocator allocator) { + this.allocator = allocator; + } + /** * Create a new transaction builder at current version for the dataset. The dataset itself will * not refresh after the transaction committed. diff --git a/java/src/test/java/org/lance/operation/AppendTest.java b/java/src/test/java/org/lance/operation/AppendTest.java index 0c93d3a0ee6..325c033b769 100644 --- a/java/src/test/java/org/lance/operation/AppendTest.java +++ b/java/src/test/java/org/lance/operation/AppendTest.java @@ -13,10 +13,11 @@ */ package org.lance.operation; +import org.lance.CommitBuilder; import org.lance.Dataset; import org.lance.FragmentMetadata; -import org.lance.SourcedTransaction; import org.lance.TestUtils; +import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; import org.junit.jupiter.api.Test; @@ -62,17 +63,19 @@ void testAppendMultipleFragments(@TempDir Path tempDir) { testDataset.createNewFragment(rowCount), testDataset.createNewFragment(rowCount)); - SourcedTransaction transaction = - dataset - .newTransactionBuilder() + Transaction txn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation(Append.builder().fragments(fragments).build()) .build(); - try (Dataset dataset = transaction.commit()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(txn)) { assertEquals(2, dataset.version()); assertEquals(rowCount * 3, dataset.countRows()); assertEquals(3, dataset.getFragments().size()); - assertEquals(transaction.transaction(), dataset.readTransaction().orElse(null)); + assertEquals(txn, dataset.readTransaction().orElse(null)); + } finally { + txn.release(); } } } @@ -88,12 +91,16 @@ void testAppendEmptyFragmentList(@TempDir Path tempDir) { assertThrows( IllegalArgumentException.class, () -> { - SourcedTransaction transaction = - dataset - .newTransactionBuilder() + Transaction txn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation(Append.builder().fragments(new ArrayList<>()).build()) .build(); - transaction.commit().close(); + try { + new CommitBuilder(dataset).execute(txn).close(); + } finally { + txn.release(); + } }); } } diff --git a/java/src/test/java/org/lance/operation/DataReplacementTest.java b/java/src/test/java/org/lance/operation/DataReplacementTest.java index c75ef55567d..916ee8a4fbf 100644 --- a/java/src/test/java/org/lance/operation/DataReplacementTest.java +++ b/java/src/test/java/org/lance/operation/DataReplacementTest.java @@ -13,11 +13,12 @@ */ package org.lance.operation; +import org.lance.CommitBuilder; import org.lance.Dataset; import org.lance.Fragment; import org.lance.FragmentMetadata; -import org.lance.SourcedTransaction; import org.lance.TestUtils; +import org.lance.Transaction; import org.lance.WriteParams; import org.lance.fragment.DataFile; import org.lance.ipc.LanceScanner; @@ -71,13 +72,13 @@ void testDataReplacement(@TempDir Path tempDir) throws Exception { List fragmentMetas = Fragment.create(datasetPath, allocator, idRoot, new WriteParams.Builder().build()); - SourcedTransaction appendTxn = - dataset - .newTransactionBuilder() + Transaction appendTxn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation(Append.builder().fragments(fragmentMetas).build()) .build(); - try (Dataset initDataset = appendTxn.commit()) { + try (Dataset initDataset = new CommitBuilder(dataset).execute(appendTxn)) { assertEquals(2, initDataset.version()); assertEquals(rowCount, initDataset.countRows()); @@ -124,13 +125,13 @@ void testDataReplacement(@TempDir Path tempDir) throws Exception { Collections.singletonList( new DataReplacement.DataReplacementGroup( fragmentMetas.get(0).getId(), datafile)); - SourcedTransaction replaceTxn = - initDataset - .newTransactionBuilder() + Transaction replaceTxn = + new Transaction.Builder() + .readVersion(initDataset.version()) .operation(DataReplacement.builder().replacements(replacementGroups).build()) .build(); - try (Dataset datasetWithAddress = replaceTxn.commit()) { + try (Dataset datasetWithAddress = new CommitBuilder(initDataset).execute(replaceTxn)) { assertEquals(4, datasetWithAddress.version()); assertEquals(rowCount, datasetWithAddress.countRows()); @@ -151,8 +152,12 @@ void testDataReplacement(@TempDir Path tempDir) throws Exception { } } } + } finally { + replaceTxn.release(); } } + } finally { + appendTxn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/DeleteTest.java b/java/src/test/java/org/lance/operation/DeleteTest.java index 20a4929b4e0..1bc1cbb9187 100644 --- a/java/src/test/java/org/lance/operation/DeleteTest.java +++ b/java/src/test/java/org/lance/operation/DeleteTest.java @@ -13,9 +13,9 @@ */ package org.lance.operation; +import org.lance.CommitBuilder; import org.lance.Dataset; import org.lance.FragmentMetadata; -import org.lance.SourcedTransaction; import org.lance.TestUtils; import org.lance.Transaction; @@ -43,15 +43,17 @@ void testDelete(@TempDir Path tempDir) { int rowCount = 20; FragmentMetadata fragmentMeta0 = testDataset.createNewFragment(rowCount); FragmentMetadata fragmentMeta1 = testDataset.createNewFragment(rowCount); - SourcedTransaction transaction = - dataset - .newTransactionBuilder() + Transaction appendTxn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation( Append.builder().fragments(Arrays.asList(fragmentMeta0, fragmentMeta1)).build()) .build(); - try (Dataset dataset = transaction.commit()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(appendTxn)) { assertEquals(2, dataset.version()); assertEquals(2, dataset.latestVersion()); + } finally { + appendTxn.release(); } dataset = Dataset.open(datasetPath, allocator); @@ -61,17 +63,19 @@ void testDelete(@TempDir Path tempDir) { .map(t -> Long.valueOf(t.getId())) .collect(Collectors.toList()); - SourcedTransaction delete = - dataset - .newTransactionBuilder() + Transaction deleteTxn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation( Delete.builder().deletedFragmentIds(deletedFragmentIds).predicate("1=1").build()) .build(); - try (Dataset dataset = delete.commit()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(deleteTxn)) { Transaction txn = dataset.readTransaction().get(); Delete execDelete = (Delete) txn.operation(); - assertEquals(delete.operation(), execDelete); + assertEquals(deleteTxn.operation(), execDelete); assertEquals(0, dataset.countRows()); + } finally { + deleteTxn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/MergeTest.java b/java/src/test/java/org/lance/operation/MergeTest.java index ef01d9654a3..3c8f3c14456 100644 --- a/java/src/test/java/org/lance/operation/MergeTest.java +++ b/java/src/test/java/org/lance/operation/MergeTest.java @@ -13,10 +13,11 @@ */ package org.lance.operation; +import org.lance.CommitBuilder; import org.lance.Dataset; import org.lance.FragmentMetadata; -import org.lance.SourcedTransaction; import org.lance.TestUtils; +import org.lance.Transaction; import org.lance.fragment.DataFile; import org.lance.ipc.LanceScanner; @@ -90,9 +91,9 @@ void testMergeNewColumn(@TempDir Path tempDir) throws Exception { fragmentMeta.getDeletionFile(), fragmentMeta.getRowIdMeta()); - SourcedTransaction mergeTransaction = - initialDataset - .newTransactionBuilder() + Transaction mergeTxn = + new Transaction.Builder() + .readVersion(initialDataset.version()) .operation( Merge.builder() .fragments(Collections.singletonList(evolvedFragment)) @@ -100,7 +101,7 @@ void testMergeNewColumn(@TempDir Path tempDir) throws Exception { .build()) .build(); - try (Dataset evolvedDataset = mergeTransaction.commit()) { + try (Dataset evolvedDataset = new CommitBuilder(initialDataset).execute(mergeTxn)) { Assertions.assertEquals(3, evolvedDataset.version()); Assertions.assertEquals(rowCount, evolvedDataset.countRows()); Assertions.assertEquals(evolvedSchema, evolvedDataset.getSchema()); @@ -123,6 +124,8 @@ void testMergeNewColumn(@TempDir Path tempDir) throws Exception { } } } + } finally { + mergeTxn.release(); } } } @@ -169,9 +172,9 @@ void testReplaceAsDiffColumns(@TempDir Path tempDir) throws Exception { fragmentMeta.getDeletionFile(), fragmentMeta.getRowIdMeta()); - SourcedTransaction mergeTransaction = - initialDataset - .newTransactionBuilder() + Transaction mergeTxn = + new Transaction.Builder() + .readVersion(initialDataset.version()) .operation( Merge.builder() .fragments(Collections.singletonList(evolvedFragment)) @@ -179,7 +182,7 @@ void testReplaceAsDiffColumns(@TempDir Path tempDir) throws Exception { .build()) .build(); - try (Dataset evolvedDataset = mergeTransaction.commit()) { + try (Dataset evolvedDataset = new CommitBuilder(initialDataset).execute(mergeTxn)) { Assertions.assertEquals(3, evolvedDataset.version()); Assertions.assertEquals(rowCount, evolvedDataset.countRows()); Assertions.assertEquals(evolvedSchema, evolvedDataset.getSchema()); @@ -202,6 +205,8 @@ void testReplaceAsDiffColumns(@TempDir Path tempDir) throws Exception { } } } + } finally { + mergeTxn.release(); } } } @@ -254,9 +259,9 @@ void testMergeExistingColumn(@TempDir Path tempDir) throws Exception { fragmentMeta.getDeletionFile(), fragmentMeta.getRowIdMeta()); - SourcedTransaction mergeTransaction = - initialDataset - .newTransactionBuilder() + Transaction mergeTxn = + new Transaction.Builder() + .readVersion(initialDataset.version()) .operation( Merge.builder() .fragments(Collections.singletonList(evolvedFragment)) @@ -264,7 +269,7 @@ void testMergeExistingColumn(@TempDir Path tempDir) throws Exception { .build()) .build(); - try (Dataset mergedDataset = mergeTransaction.commit()) { + try (Dataset mergedDataset = new CommitBuilder(initialDataset).execute(mergeTxn)) { Assertions.assertEquals(3, mergedDataset.version()); Assertions.assertEquals(rowCount, mergedDataset.countRows()); @@ -282,6 +287,8 @@ void testMergeExistingColumn(@TempDir Path tempDir) throws Exception { } } } + } finally { + mergeTxn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/OperationTestBase.java b/java/src/test/java/org/lance/operation/OperationTestBase.java index 7a522f60ac2..bd1c1cdb81a 100644 --- a/java/src/test/java/org/lance/operation/OperationTestBase.java +++ b/java/src/test/java/org/lance/operation/OperationTestBase.java @@ -13,10 +13,11 @@ */ package org.lance.operation; +import org.lance.CommitBuilder; import org.lance.Dataset; import org.lance.FragmentMetadata; -import org.lance.SourcedTransaction; import org.lance.TestUtils; +import org.lance.Transaction; import org.lance.file.LanceFileWriter; import org.lance.fragment.DataFile; @@ -53,12 +54,16 @@ protected Dataset createAndAppendRows(TestUtils.SimpleTestDataset suite, int row dataset = suite.createEmptyDataset(); FragmentMetadata fragmentMeta = suite.createNewFragment(rowCount); - SourcedTransaction appendTxn = - dataset - .newTransactionBuilder() + Transaction txn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation(Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) .build(); - return appendTxn.commit(); + try { + return new CommitBuilder(dataset).execute(txn); + } finally { + txn.release(); + } } /** diff --git a/java/src/test/java/org/lance/operation/OverwriteTest.java b/java/src/test/java/org/lance/operation/OverwriteTest.java index 7f6bd15e4ab..e29ccdb75e1 100644 --- a/java/src/test/java/org/lance/operation/OverwriteTest.java +++ b/java/src/test/java/org/lance/operation/OverwriteTest.java @@ -13,11 +13,12 @@ */ package org.lance.operation; +import org.lance.CommitBuilder; import org.lance.Dataset; import org.lance.Fragment; import org.lance.FragmentMetadata; -import org.lance.SourcedTransaction; import org.lance.TestUtils; +import org.lance.Transaction; import org.lance.ipc.LanceScanner; import org.apache.arrow.memory.RootAllocator; @@ -43,16 +44,16 @@ void testOverwrite(@TempDir Path tempDir) throws Exception { // Commit fragment int rowCount = 20; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - SourcedTransaction transaction = - dataset - .newTransactionBuilder() + Transaction txn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation( Overwrite.builder() .fragments(Collections.singletonList(fragmentMeta)) .schema(testDataset.getSchema()) .build()) .build(); - try (Dataset dataset = transaction.commit()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(txn)) { assertEquals(2, dataset.version()); assertEquals(2, dataset.latestVersion()); assertEquals(rowCount, dataset.countRows()); @@ -62,14 +63,16 @@ void testOverwrite(@TempDir Path tempDir) throws Exception { Schema schemaRes = scanner.schema(); assertEquals(testDataset.getSchema(), schemaRes); } + } finally { + txn.release(); } // Commit fragment again rowCount = 40; fragmentMeta = testDataset.createNewFragment(rowCount); - transaction = - dataset - .newTransactionBuilder() + Transaction txn2 = + new Transaction.Builder() + .readVersion(dataset.version()) .operation( Overwrite.builder() .fragments(Collections.singletonList(fragmentMeta)) @@ -78,9 +81,8 @@ void testOverwrite(@TempDir Path tempDir) throws Exception { .build()) .transactionProperties(Collections.singletonMap("key", "value")) .build(); - assertEquals( - "value", transaction.transactionProperties().map(m -> m.get("key")).orElse(null)); - try (Dataset dataset = transaction.commit()) { + assertEquals("value", txn2.transactionProperties().map(m -> m.get("key")).orElse(null)); + try (Dataset dataset = new CommitBuilder(this.dataset).execute(txn2)) { assertEquals(3, dataset.version()); assertEquals(3, dataset.latestVersion()); assertEquals(rowCount, dataset.countRows()); @@ -91,7 +93,9 @@ void testOverwrite(@TempDir Path tempDir) throws Exception { Schema schemaRes = scanner.schema(); assertEquals(testDataset.getSchema(), schemaRes); } - assertEquals(transaction.transaction(), dataset.readTransaction().orElse(null)); + assertEquals(txn2, dataset.readTransaction().orElse(null)); + } finally { + txn2.release(); } } } diff --git a/java/src/test/java/org/lance/operation/ProjectTest.java b/java/src/test/java/org/lance/operation/ProjectTest.java index 5fef1f078aa..a5b54241965 100644 --- a/java/src/test/java/org/lance/operation/ProjectTest.java +++ b/java/src/test/java/org/lance/operation/ProjectTest.java @@ -13,9 +13,10 @@ */ package org.lance.operation; +import org.lance.CommitBuilder; import org.lance.Dataset; -import org.lance.SourcedTransaction; import org.lance.TestUtils; +import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.types.pojo.Field; @@ -43,30 +44,34 @@ void testProjection(@TempDir Path tempDir) { assertEquals(testDataset.getSchema(), dataset.getSchema()); List fieldList = new ArrayList<>(testDataset.getSchema().getFields()); Collections.reverse(fieldList); - SourcedTransaction txn1 = - dataset - .newTransactionBuilder() + Transaction txn1 = + new Transaction.Builder() + .readVersion(dataset.version()) .operation(Project.builder().schema(new Schema(fieldList)).build()) .build(); - try (Dataset committedDataset = txn1.commit()) { + try (Dataset committedDataset = new CommitBuilder(dataset).execute(txn1)) { assertEquals(1, txn1.readVersion()); assertEquals(1, dataset.version()); assertEquals(2, committedDataset.version()); assertEquals(new Schema(fieldList), committedDataset.getSchema()); fieldList.remove(1); - SourcedTransaction txn2 = - committedDataset - .newTransactionBuilder() + Transaction txn2 = + new Transaction.Builder() + .readVersion(committedDataset.version()) .operation(Project.builder().schema(new Schema(fieldList)).build()) .build(); - try (Dataset committedDataset2 = txn2.commit()) { + try (Dataset committedDataset2 = new CommitBuilder(committedDataset).execute(txn2)) { assertEquals(2, txn2.readVersion()); assertEquals(2, committedDataset.version()); assertEquals(3, committedDataset2.version()); assertEquals(new Schema(fieldList), committedDataset2.getSchema()); - assertEquals(txn1.transaction(), committedDataset.readTransaction().orElse(null)); - assertEquals(txn2.transaction(), committedDataset2.readTransaction().orElse(null)); + assertEquals(txn1, committedDataset.readTransaction().orElse(null)); + assertEquals(txn2, committedDataset2.readTransaction().orElse(null)); + } finally { + txn2.release(); } + } finally { + txn1.release(); } } } diff --git a/java/src/test/java/org/lance/operation/ReserveFragmentsTest.java b/java/src/test/java/org/lance/operation/ReserveFragmentsTest.java index 3b0d267aec1..258aca84164 100644 --- a/java/src/test/java/org/lance/operation/ReserveFragmentsTest.java +++ b/java/src/test/java/org/lance/operation/ReserveFragmentsTest.java @@ -13,11 +13,12 @@ */ package org.lance.operation; +import org.lance.CommitBuilder; import org.lance.Dataset; import org.lance.Fragment; import org.lance.FragmentMetadata; -import org.lance.SourcedTransaction; import org.lance.TestUtils; +import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; import org.junit.jupiter.api.Assertions; @@ -42,35 +43,37 @@ void testReserveFragments(@TempDir Path tempDir) throws Exception { // Create an initial fragment to establish a baseline fragment ID FragmentMetadata initialFragmentMeta = testDataset.createNewFragment(10); - SourcedTransaction appendTransaction = - dataset - .newTransactionBuilder() + Transaction appendTxn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation( Append.builder() .fragments(Collections.singletonList(initialFragmentMeta)) .build()) .build(); - try (Dataset datasetWithFragment = appendTransaction.commit()) { + try (Dataset datasetWithFragment = new CommitBuilder(dataset).execute(appendTxn)) { // Reserve fragment IDs int numFragmentsToReserve = 5; - SourcedTransaction reserveTransaction = - datasetWithFragment - .newTransactionBuilder() + Transaction reserveTxn = + new Transaction.Builder() + .readVersion(datasetWithFragment.version()) .operation( new ReserveFragments.Builder().numFragments(numFragmentsToReserve).build()) .build(); - try (Dataset datasetWithReservedFragments = reserveTransaction.commit()) { + try (Dataset datasetWithReservedFragments = + new CommitBuilder(datasetWithFragment).execute(reserveTxn)) { // Create a new fragment and verify its ID reflects the reservation FragmentMetadata newFragmentMeta = testDataset.createNewFragment(10); - SourcedTransaction appendTransaction2 = - datasetWithReservedFragments - .newTransactionBuilder() + Transaction appendTxn2 = + new Transaction.Builder() + .readVersion(datasetWithReservedFragments.version()) .operation( Append.builder() .fragments(Collections.singletonList(newFragmentMeta)) .build()) .build(); - try (Dataset finalDataset = appendTransaction2.commit()) { + try (Dataset finalDataset = + new CommitBuilder(datasetWithReservedFragments).execute(appendTxn2)) { // Verify the fragment IDs were properly reserved // The new fragment should have an ID that's at least numFragmentsToReserve higher // than it would have been without the reservation @@ -88,11 +91,15 @@ void testReserveFragments(@TempDir Path tempDir) throws Exception { firstFragment.metadata().getId() + 1, secondFragment.getId()); // Verify the transaction is recorded - assertEquals( - reserveTransaction.transaction(), - datasetWithReservedFragments.readTransaction().orElse(null)); + assertEquals(reserveTxn, datasetWithReservedFragments.readTransaction().orElse(null)); + } finally { + appendTxn2.release(); } + } finally { + reserveTxn.release(); } + } finally { + appendTxn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/RestoreTest.java b/java/src/test/java/org/lance/operation/RestoreTest.java index ff2604bc440..2e27db6626a 100644 --- a/java/src/test/java/org/lance/operation/RestoreTest.java +++ b/java/src/test/java/org/lance/operation/RestoreTest.java @@ -13,10 +13,11 @@ */ package org.lance.operation; +import org.lance.CommitBuilder; import org.lance.Dataset; import org.lance.FragmentMetadata; -import org.lance.SourcedTransaction; import org.lance.TestUtils; +import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; import org.junit.jupiter.api.Test; @@ -43,32 +44,35 @@ void testRestore(@TempDir Path tempDir) { // Append data to create a new version int rowCount = 20; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - SourcedTransaction transaction = - dataset - .newTransactionBuilder() + Transaction appendTxn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation( Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) .build(); - try (Dataset modifiedDataset = transaction.commit()) { + try (Dataset modifiedDataset = new CommitBuilder(dataset).execute(appendTxn)) { // Verify the dataset was modified long newVersion = modifiedDataset.version(); assertEquals(initialVersion + 1, newVersion); assertEquals(rowCount, modifiedDataset.countRows()); // Restore to the initial version - SourcedTransaction restoreTransaction = - modifiedDataset - .newTransactionBuilder() + Transaction restoreTxn = + new Transaction.Builder() + .readVersion(modifiedDataset.version()) .operation(new Restore.Builder().version(initialVersion).build()) .build(); - try (Dataset restoredDataset = restoreTransaction.commit()) { + try (Dataset restoredDataset = new CommitBuilder(modifiedDataset).execute(restoreTxn)) { // Verify the dataset was restored to the initial version, but the version increases assertEquals(initialVersion + 2, restoredDataset.version()); // Initial dataset had 0 rows assertEquals(0, restoredDataset.countRows()); - assertEquals( - restoreTransaction.transaction(), restoredDataset.readTransaction().orElse(null)); + assertEquals(restoreTxn, restoredDataset.readTransaction().orElse(null)); + } finally { + restoreTxn.release(); } + } finally { + appendTxn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/RewriteTest.java b/java/src/test/java/org/lance/operation/RewriteTest.java index 41d050d0e32..3f0f7f13b84 100644 --- a/java/src/test/java/org/lance/operation/RewriteTest.java +++ b/java/src/test/java/org/lance/operation/RewriteTest.java @@ -13,10 +13,11 @@ */ package org.lance.operation; +import org.lance.CommitBuilder; import org.lance.Dataset; import org.lance.FragmentMetadata; -import org.lance.SourcedTransaction; import org.lance.TestUtils; +import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; import org.junit.jupiter.api.Test; @@ -44,14 +45,14 @@ void testRewrite(@TempDir Path tempDir) { FragmentMetadata fragmentMeta1 = testDataset.createNewFragment(rowCount); FragmentMetadata fragmentMeta2 = testDataset.createNewFragment(rowCount); - SourcedTransaction appendTx = - dataset - .newTransactionBuilder() + Transaction appendTxn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation( Append.builder().fragments(Arrays.asList(fragmentMeta1, fragmentMeta2)).build()) .build(); - try (Dataset datasetWithData = appendTx.commit()) { + try (Dataset datasetWithData = new CommitBuilder(dataset).execute(appendTxn)) { assertEquals(2, datasetWithData.version()); assertEquals(rowCount * 2, datasetWithData.countRows()); @@ -72,20 +73,24 @@ void testRewrite(@TempDir Path tempDir) { groups.add(group); // Create and commit the rewrite transaction - SourcedTransaction rewriteTx = - datasetWithData - .newTransactionBuilder() + Transaction rewriteTxn = + new Transaction.Builder() + .readVersion(datasetWithData.version()) .operation(Rewrite.builder().groups(groups).build()) .build(); - try (Dataset rewrittenDataset = rewriteTx.commit()) { + try (Dataset rewrittenDataset = new CommitBuilder(datasetWithData).execute(rewriteTxn)) { assertEquals(3, rewrittenDataset.version()); // The row count should remain the same since we're just rewriting assertEquals(rowCount * 2, rewrittenDataset.countRows()); // Verify that the transaction was recorded - assertEquals(rewriteTx.transaction(), rewrittenDataset.readTransaction().orElse(null)); + assertEquals(rewriteTxn, rewrittenDataset.readTransaction().orElse(null)); + } finally { + rewriteTxn.release(); } + } finally { + appendTxn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/TruncateTest.java b/java/src/test/java/org/lance/operation/TruncateTest.java index 8c7e1d12238..80ac8dcb76b 100644 --- a/java/src/test/java/org/lance/operation/TruncateTest.java +++ b/java/src/test/java/org/lance/operation/TruncateTest.java @@ -13,10 +13,11 @@ */ package org.lance.operation; +import org.lance.CommitBuilder; import org.lance.Dataset; import org.lance.FragmentMetadata; -import org.lance.SourcedTransaction; import org.lance.TestUtils; +import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.types.pojo.Schema; @@ -40,15 +41,15 @@ void testTruncateTable(@TempDir Path tempDir) throws Exception { // Append some data int rowCount = 20; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - SourcedTransaction transaction = - dataset - .newTransactionBuilder() + Transaction txn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation( Append.builder() .fragments(java.util.Collections.singletonList(fragmentMeta)) .build()) .build(); - try (Dataset ds1 = transaction.commit()) { + try (Dataset ds1 = new CommitBuilder(dataset).execute(txn)) { assertEquals(rowCount, ds1.countRows()); // Truncate to empty while preserving schema @@ -59,6 +60,8 @@ void testTruncateTable(@TempDir Path tempDir) throws Exception { Schema schemaRes = scanner.schema(); assertEquals(testDataset.getSchema(), schemaRes); } + } finally { + txn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/UpdateConfigTest.java b/java/src/test/java/org/lance/operation/UpdateConfigTest.java index 887fc412133..653fae85c4d 100644 --- a/java/src/test/java/org/lance/operation/UpdateConfigTest.java +++ b/java/src/test/java/org/lance/operation/UpdateConfigTest.java @@ -13,9 +13,10 @@ */ package org.lance.operation; +import org.lance.CommitBuilder; import org.lance.Dataset; -import org.lance.SourcedTransaction; import org.lance.TestUtils; +import org.lance.Transaction; import org.apache.arrow.memory.RootAllocator; import org.junit.jupiter.api.Test; @@ -45,12 +46,12 @@ void testUpdateConfig(@TempDir Path tempDir) { UpdateMap configUpdates = UpdateMap.builder().updates(configValues).replace(false).build(); - SourcedTransaction transaction = - dataset - .newTransactionBuilder() + Transaction txn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation(UpdateConfig.builder().configUpdates(configUpdates).build()) .build(); - try (Dataset updatedDataset = transaction.commit()) { + try (Dataset updatedDataset = new CommitBuilder(dataset).execute(txn)) { assertEquals(2, updatedDataset.version()); assertEquals("value1", updatedDataset.getConfig().get("key1")); assertEquals("value2", updatedDataset.getConfig().get("key2")); @@ -62,12 +63,12 @@ void testUpdateConfig(@TempDir Path tempDir) { UpdateMap configDeleteUpdates = UpdateMap.builder().updates(deleteUpdates).replace(false).build(); - transaction = - updatedDataset - .newTransactionBuilder() + Transaction txn2 = + new Transaction.Builder() + .readVersion(updatedDataset.version()) .operation(UpdateConfig.builder().configUpdates(configDeleteUpdates).build()) .build(); - try (Dataset updatedDataset2 = transaction.commit()) { + try (Dataset updatedDataset2 = new CommitBuilder(updatedDataset).execute(txn2)) { assertEquals(3, updatedDataset2.version()); assertNull(updatedDataset2.getConfig().get("key1")); assertEquals("value2", updatedDataset2.getConfig().get("key2")); @@ -80,13 +81,13 @@ void testUpdateConfig(@TempDir Path tempDir) { UpdateMap schemaMetadataUpdates = UpdateMap.builder().updates(schemaMetadataMap).replace(false).build(); - transaction = - updatedDataset2 - .newTransactionBuilder() + Transaction txn3 = + new Transaction.Builder() + .readVersion(updatedDataset2.version()) .operation( UpdateConfig.builder().schemaMetadataUpdates(schemaMetadataUpdates).build()) .build(); - try (Dataset updatedDataset3 = transaction.commit()) { + try (Dataset updatedDataset3 = new CommitBuilder(updatedDataset2).execute(txn3)) { assertEquals(4, updatedDataset3.version()); assertEquals( "schema_value1", updatedDataset3.getLanceSchema().metadata().get("schema_key1")); @@ -110,13 +111,13 @@ void testUpdateConfig(@TempDir Path tempDir) { fieldMetadataUpdates.put(0, field0UpdateMap); fieldMetadataUpdates.put(1, field1UpdateMap); - transaction = - updatedDataset3 - .newTransactionBuilder() + Transaction txn4 = + new Transaction.Builder() + .readVersion(updatedDataset3.version()) .operation( UpdateConfig.builder().fieldMetadataUpdates(fieldMetadataUpdates).build()) .build(); - try (Dataset updatedDataset4 = transaction.commit()) { + try (Dataset updatedDataset4 = new CommitBuilder(updatedDataset3).execute(txn4)) { assertEquals(5, updatedDataset4.version()); // Verify field metadata for field 0 @@ -129,9 +130,17 @@ void testUpdateConfig(@TempDir Path tempDir) { updatedDataset4.getLanceSchema().fields().get(1).getMetadata(); assertEquals("field1_value1", field1Result.get("field1_key1")); assertEquals("field1_value2", field1Result.get("field1_key2")); + } finally { + txn4.release(); } + } finally { + txn3.release(); } + } finally { + txn2.release(); } + } finally { + txn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/UpdateTest.java b/java/src/test/java/org/lance/operation/UpdateTest.java index dd1eef23076..4a3ad2a3b3a 100644 --- a/java/src/test/java/org/lance/operation/UpdateTest.java +++ b/java/src/test/java/org/lance/operation/UpdateTest.java @@ -13,10 +13,10 @@ */ package org.lance.operation; +import org.lance.CommitBuilder; import org.lance.Dataset; import org.lance.Fragment; import org.lance.FragmentMetadata; -import org.lance.SourcedTransaction; import org.lance.TestUtils; import org.lance.Transaction; import org.lance.fragment.FragmentUpdateResult; @@ -55,35 +55,42 @@ void testUpdate(@TempDir Path tempDir) throws Exception { // Commit fragment int rowCount = 20; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - SourcedTransaction transaction = - dataset - .newTransactionBuilder() + Transaction appendTxn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation( Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) .build(); - try (Dataset dataset = transaction.commit()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(appendTxn)) { assertEquals(2, dataset.version()); assertEquals(2, dataset.latestVersion()); assertEquals(rowCount, dataset.countRows()); assertThrows( IllegalArgumentException.class, - () -> - dataset - .newTransactionBuilder() - .operation(Append.builder().fragments(new ArrayList<>()).build()) - .build() - .commit() - .close()); + () -> { + Transaction txn = + new Transaction.Builder() + .readVersion(dataset.version()) + .operation(Append.builder().fragments(new ArrayList<>()).build()) + .build(); + try { + new CommitBuilder(dataset).execute(txn).close(); + } finally { + txn.release(); + } + }); + } finally { + appendTxn.release(); } dataset = Dataset.open(datasetPath, allocator); // Update fragments rowCount = 40; FragmentMetadata newFragment = testDataset.createNewFragment(rowCount); - transaction = - dataset - .newTransactionBuilder() + Transaction updateTxn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation( Update.builder() .removedFragmentIds( @@ -94,13 +101,15 @@ void testUpdate(@TempDir Path tempDir) throws Exception { .build()) .build(); - try (Dataset dataset = transaction.commit()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(updateTxn)) { assertEquals(3, dataset.version()); assertEquals(3, dataset.latestVersion()); assertEquals(rowCount, dataset.countRows()); Transaction txn = dataset.readTransaction().orElse(null); - assertEquals(transaction.transaction(), txn); + assertEquals(updateTxn, txn); + } finally { + updateTxn.release(); } } } @@ -123,16 +132,18 @@ void testUpdateColumns(@TempDir Path tempDir) throws Exception { */ int rowCount = 6; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - SourcedTransaction appendTransaction = - dataset - .newTransactionBuilder() + Transaction appendTxn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation( Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) .build(); - try (Dataset dataset = appendTransaction.commit()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(appendTxn)) { assertEquals(2, dataset.version()); assertEquals(2, dataset.latestVersion()); assertEquals(rowCount, dataset.countRows()); + } finally { + appendTxn.release(); } dataset = Dataset.open(datasetPath, allocator); @@ -146,9 +157,9 @@ void testUpdateColumns(@TempDir Path tempDir) throws Exception { * 3: | null | null | */ FragmentUpdateResult updateResult = testDataset.updateColumn(targetFragment, updateRowCount); - SourcedTransaction updateTransaction = - dataset - .newTransactionBuilder() + Transaction updateTxn = + new Transaction.Builder() + .readVersion(dataset.version()) .operation( Update.builder() .updatedFragments( @@ -156,7 +167,7 @@ void testUpdateColumns(@TempDir Path tempDir) throws Exception { .fieldsModified(updateResult.getFieldsModified()) .build()) .build(); - try (Dataset dataset = updateTransaction.commit()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(updateTxn)) { assertEquals(3, dataset.version()); assertEquals(3, dataset.latestVersion()); Fragment fragment = dataset.getFragments().get(0); @@ -200,6 +211,8 @@ void testUpdateColumns(@TempDir Path tempDir) throws Exception { assertEquals(expectNames, actualNames); assertEquals(expectTimeStamps, actualTimeStamps); } + } finally { + updateTxn.release(); } } } From 81b5c1da18aad7ade9198f02a49ee43c41ec8596 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Tue, 24 Feb 2026 15:04:56 -0600 Subject: [PATCH 5/7] made Transaction auto-closeable Signed-off-by: Daniel Rammer --- .../main/java/org/lance/CommitBuilder.java | 16 +- java/src/main/java/org/lance/Dataset.java | 24 +-- .../java/org/lance/SourcedTransaction.java | 11 +- java/src/main/java/org/lance/Transaction.java | 7 +- .../src/test/java/org/lance/FragmentTest.java | 60 +++--- .../test/java/org/lance/TransactionTest.java | 14 +- .../java/org/lance/operation/AppendTest.java | 24 +-- .../lance/operation/DataReplacementTest.java | 145 +++++++------- .../java/org/lance/operation/DeleteTest.java | 27 ++- .../java/org/lance/operation/MergeTest.java | 130 ++++++------- .../lance/operation/OperationTestBase.java | 7 +- .../org/lance/operation/OverwriteTest.java | 51 +++-- .../java/org/lance/operation/ProjectTest.java | 40 ++-- .../lance/operation/ReserveFragmentsTest.java | 84 ++++---- .../java/org/lance/operation/RestoreTest.java | 43 ++--- .../java/org/lance/operation/RewriteTest.java | 76 ++++---- .../org/lance/operation/TruncateTest.java | 20 +- .../org/lance/operation/UpdateConfigTest.java | 179 +++++++++--------- .../java/org/lance/operation/UpdateTest.java | 162 ++++++++-------- 19 files changed, 526 insertions(+), 594 deletions(-) diff --git a/java/src/main/java/org/lance/CommitBuilder.java b/java/src/main/java/org/lance/CommitBuilder.java index 6202a2df683..a7c7fd18bf7 100644 --- a/java/src/main/java/org/lance/CommitBuilder.java +++ b/java/src/main/java/org/lance/CommitBuilder.java @@ -34,27 +34,23 @@ *

Example usage (dataset-based): * *

{@code
- * Transaction txn = new Transaction.Builder()
+ * try (Transaction txn = new Transaction.Builder()
  *     .readVersion(dataset.version())
  *     .operation(Append.builder().fragments(fragments).build())
  *     .build();
- * try (Dataset committed = new CommitBuilder(dataset).execute(txn)) {
+ *     Dataset committed = new CommitBuilder(dataset).execute(txn)) {
  *     // use committed dataset
- * } finally {
- *     txn.release();
  * }
  * }
* *

Example usage (URI-based): * *

{@code
- * Transaction txn = new Transaction.Builder()
+ * try (Transaction txn = new Transaction.Builder()
  *     .operation(Overwrite.builder().fragments(fragments).schema(schema).build())
  *     .build();
- * try (Dataset committed = new CommitBuilder(uri, allocator).execute(txn)) {
+ *     Dataset committed = new CommitBuilder(uri, allocator).execute(txn)) {
  *     // use committed dataset
- * } finally {
- *     txn.release();
  * }
  * }
*/ @@ -234,8 +230,8 @@ public CommitBuilder skipAutoCleanup(boolean skipAutoCleanup) { /** * Execute the commit with the given transaction. * - *

The caller is responsible for calling {@link Transaction#release()} after this method - * returns. + *

The caller is responsible for closing the transaction (via try-with-resources or {@link + * Transaction#close()}) to release any native resources held by the operation. * * @param transaction the transaction to commit * @return a new Dataset at the committed version diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index 4e8f2fd0c26..164e998d80a 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -553,21 +553,17 @@ public Dataset commitTransaction(Transaction transaction) { public Dataset commitTransaction( Transaction transaction, boolean detached, boolean enableV2ManifestPaths) { Preconditions.checkNotNull(transaction); - try { - Dataset dataset = - new CommitBuilder(this) - .detached(detached) - .enableV2ManifestPaths(enableV2ManifestPaths) - .execute(transaction); - if (selfManagedAllocator) { - dataset.allocator = new RootAllocator(Long.MAX_VALUE); - } else { - dataset.allocator = allocator; - } - return dataset; - } finally { - transaction.release(); + Dataset dataset = + new CommitBuilder(this) + .detached(detached) + .enableV2ManifestPaths(enableV2ManifestPaths) + .execute(transaction); + if (selfManagedAllocator) { + dataset.allocator = new RootAllocator(Long.MAX_VALUE); + } else { + dataset.allocator = allocator; } + return dataset; } /** diff --git a/java/src/main/java/org/lance/SourcedTransaction.java b/java/src/main/java/org/lance/SourcedTransaction.java index bd28456a292..2630c263b43 100644 --- a/java/src/main/java/org/lance/SourcedTransaction.java +++ b/java/src/main/java/org/lance/SourcedTransaction.java @@ -30,15 +30,15 @@ *

Example usage: * *

{@code
- * SourcedTransaction txn = dataset.newSourcedTransactionBuilder()
+ * try (SourcedTransaction txn = dataset.newSourcedTransactionBuilder()
  *     .operation(Append.builder().fragments(fragments).build())
  *     .build();
- * try (Dataset committed = txn.commit()) {
+ *     Dataset committed = txn.commit()) {
  *     // use committed dataset
  * }
  * }
*/ -public class SourcedTransaction { +public class SourcedTransaction implements AutoCloseable { private final Transaction transaction; private final Dataset dataset; @@ -99,8 +99,9 @@ public Dataset commit(boolean detached, boolean enableV2ManifestPaths) { } /** Release native resources held by the underlying transaction's operation. */ - public void release() { - transaction.release(); + @Override + public void close() { + transaction.close(); } /** Builder for constructing {@link SourcedTransaction} instances from a {@link Dataset}. */ diff --git a/java/src/main/java/org/lance/Transaction.java b/java/src/main/java/org/lance/Transaction.java index 5f7b06ec05d..92ef11551fe 100644 --- a/java/src/main/java/org/lance/Transaction.java +++ b/java/src/main/java/org/lance/Transaction.java @@ -31,7 +31,7 @@ * *

To commit a transaction, use {@link CommitBuilder} or {@link SourcedTransaction}. */ -public class Transaction { +public class Transaction implements AutoCloseable { private final long readVersion; private final String uuid; @@ -93,8 +93,9 @@ public Optional> transactionProperties() { return transactionProperties; } - /** Release native resources held by the operation. */ - public void release() { + /** Release native resources held by the operation (e.g. Arrow C schemas). */ + @Override + public void close() { operation.release(); } diff --git a/java/src/test/java/org/lance/FragmentTest.java b/java/src/test/java/org/lance/FragmentTest.java index 9452b57e49d..078af0ac1de 100644 --- a/java/src/test/java/org/lance/FragmentTest.java +++ b/java/src/test/java/org/lance/FragmentTest.java @@ -279,7 +279,6 @@ void testMergeColumns(@TempDir Path tempDir) throws Exception { // Commit fragment FragmentOperation.Append appendOp = new FragmentOperation.Append(Arrays.asList(fragmentMeta)); - SourcedTransaction transaction; try (Dataset dataset = Dataset.commit(allocator, datasetPath, appendOp, Optional.of(1L))) { assertEquals(2, dataset.version()); assertEquals(2, dataset.latestVersion()); @@ -293,43 +292,40 @@ void testMergeColumns(@TempDir Path tempDir) throws Exception { FragmentMergeResult mergeResult = testDataset.mergeColumn(fragment, 10); - SourcedTransaction.Builder builder = new SourcedTransaction.Builder(dataset); - transaction = - builder + try (SourcedTransaction transaction = + new SourcedTransaction.Builder(dataset) .operation( Merge.builder() .fragments(Collections.singletonList(mergeResult.getFragmentMetadata())) .schema(mergeResult.getSchema().asArrowSchema()) .build()) .readVersion(dataset.version()) - .build(); - - assertNotNull(transaction); - - try (Dataset newDs = transaction.commit()) { - assertEquals(3, newDs.version()); - assertEquals(3, newDs.latestVersion()); - Fragment newFrag = newDs.getFragments().get(0); - try (LanceScanner scanner = newFrag.newScan()) { - Schema schemaRes = scanner.schema(); - assertTrue( - schemaRes.getFields().stream() - .anyMatch(field -> field.getName().equals("new_col1"))); - assertTrue( - schemaRes.getFields().stream() - .anyMatch(field -> field.getName().equals("new_col2"))); - - try (ArrowReader reader = scanner.scanBatches()) { - assertTrue(reader.loadNextBatch()); - VectorSchemaRoot root = reader.getVectorSchemaRoot(); - VarCharVector newCol1Vec = (VarCharVector) root.getVector("new_col1"); - VarCharVector newCol2Vec = (VarCharVector) root.getVector("new_col2"); - assertEquals(21, newCol2Vec.getValueCount()); - - // The first 10 rows are not null - assertNotNull(newCol1Vec.get(9)); - // Remaining rows are null - assertNull(newCol1Vec.get(10)); + .build()) { + try (Dataset newDs = transaction.commit()) { + assertEquals(3, newDs.version()); + assertEquals(3, newDs.latestVersion()); + Fragment newFrag = newDs.getFragments().get(0); + try (LanceScanner scanner = newFrag.newScan()) { + Schema schemaRes = scanner.schema(); + assertTrue( + schemaRes.getFields().stream() + .anyMatch(field -> field.getName().equals("new_col1"))); + assertTrue( + schemaRes.getFields().stream() + .anyMatch(field -> field.getName().equals("new_col2"))); + + try (ArrowReader reader = scanner.scanBatches()) { + assertTrue(reader.loadNextBatch()); + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + VarCharVector newCol1Vec = (VarCharVector) root.getVector("new_col1"); + VarCharVector newCol2Vec = (VarCharVector) root.getVector("new_col2"); + assertEquals(21, newCol2Vec.getValueCount()); + + // The first 10 rows are not null + assertNotNull(newCol1Vec.get(9)); + // Remaining rows are null + assertNull(newCol1Vec.get(10)); + } } } } diff --git a/java/src/test/java/org/lance/TransactionTest.java b/java/src/test/java/org/lance/TransactionTest.java index 46e9d5dfbcc..1767bc7a7d5 100644 --- a/java/src/test/java/org/lance/TransactionTest.java +++ b/java/src/test/java/org/lance/TransactionTest.java @@ -124,20 +124,18 @@ public void testCommitToUri(@TempDir Path tempDir) { FragmentMetadata fragmentMeta = testDataset.createNewFragment(20); // Build a transaction targeting a URI (no existing dataset) - Transaction txn = + try (Transaction txn = new Transaction.Builder() .operation( Overwrite.builder() .fragments(Collections.singletonList(fragmentMeta)) .schema(schema) .build()) - .build(); - - try (Dataset committedDataset = new CommitBuilder(datasetPath, allocator).execute(txn)) { - assertEquals(1, committedDataset.version()); - assertEquals(20, committedDataset.countRows()); - } finally { - txn.release(); + .build()) { + try (Dataset committedDataset = new CommitBuilder(datasetPath, allocator).execute(txn)) { + assertEquals(1, committedDataset.version()); + assertEquals(20, committedDataset.countRows()); + } } } } diff --git a/java/src/test/java/org/lance/operation/AppendTest.java b/java/src/test/java/org/lance/operation/AppendTest.java index 325c033b769..5d62b429fe1 100644 --- a/java/src/test/java/org/lance/operation/AppendTest.java +++ b/java/src/test/java/org/lance/operation/AppendTest.java @@ -63,19 +63,16 @@ void testAppendMultipleFragments(@TempDir Path tempDir) { testDataset.createNewFragment(rowCount), testDataset.createNewFragment(rowCount)); - Transaction txn = + try (Transaction txn = new Transaction.Builder() .readVersion(dataset.version()) .operation(Append.builder().fragments(fragments).build()) - .build(); - - try (Dataset dataset = new CommitBuilder(this.dataset).execute(txn)) { - assertEquals(2, dataset.version()); - assertEquals(rowCount * 3, dataset.countRows()); - assertEquals(3, dataset.getFragments().size()); - assertEquals(txn, dataset.readTransaction().orElse(null)); - } finally { - txn.release(); + .build()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(txn)) { + assertEquals(2, dataset.version()); + assertEquals(rowCount * 3, dataset.countRows()); + assertEquals(3, dataset.getFragments().size()); + } } } } @@ -91,15 +88,12 @@ void testAppendEmptyFragmentList(@TempDir Path tempDir) { assertThrows( IllegalArgumentException.class, () -> { - Transaction txn = + try (Transaction txn = new Transaction.Builder() .readVersion(dataset.version()) .operation(Append.builder().fragments(new ArrayList<>()).build()) - .build(); - try { + .build()) { new CommitBuilder(dataset).execute(txn).close(); - } finally { - txn.release(); } }); } diff --git a/java/src/test/java/org/lance/operation/DataReplacementTest.java b/java/src/test/java/org/lance/operation/DataReplacementTest.java index 916ee8a4fbf..0ba59dd8aa1 100644 --- a/java/src/test/java/org/lance/operation/DataReplacementTest.java +++ b/java/src/test/java/org/lance/operation/DataReplacementTest.java @@ -72,92 +72,91 @@ void testDataReplacement(@TempDir Path tempDir) throws Exception { List fragmentMetas = Fragment.create(datasetPath, allocator, idRoot, new WriteParams.Builder().build()); - Transaction appendTxn = + try (Transaction appendTxn = new Transaction.Builder() .readVersion(dataset.version()) .operation(Append.builder().fragments(fragmentMetas).build()) - .build(); - - try (Dataset initDataset = new CommitBuilder(dataset).execute(appendTxn)) { - assertEquals(2, initDataset.version()); - assertEquals(rowCount, initDataset.countRows()); - - // step 3. use dataset.addColumn to add a new column named as address with all null values - Field addressField = Field.nullable("address", new ArrowType.Utf8()); - Schema addressSchema = new Schema(Collections.singletonList(addressField), null); - initDataset.addColumns(addressSchema); - - try (LanceScanner scanner = initDataset.newScan()) { - try (ArrowReader resultReader = scanner.scanBatches()) { - assertTrue(resultReader.loadNextBatch()); - VectorSchemaRoot batch = resultReader.getVectorSchemaRoot(); - assertEquals(rowCount, initDataset.countRows()); - assertEquals(rowCount, batch.getRowCount()); - - // verify all null values - VarCharVector resultNameVector = (VarCharVector) batch.getVector("address"); - for (int i = 0; i < rowCount; i++) { - Assertions.assertTrue(resultNameVector.isNull(i)); + .build()) { + try (Dataset initDataset = new CommitBuilder(dataset).execute(appendTxn)) { + assertEquals(2, initDataset.version()); + assertEquals(rowCount, initDataset.countRows()); + + // step 3. use dataset.addColumn to add a new column named as address with all null + // values + Field addressField = Field.nullable("address", new ArrowType.Utf8()); + Schema addressSchema = new Schema(Collections.singletonList(addressField), null); + initDataset.addColumns(addressSchema); + + try (LanceScanner scanner = initDataset.newScan()) { + try (ArrowReader resultReader = scanner.scanBatches()) { + assertTrue(resultReader.loadNextBatch()); + VectorSchemaRoot batch = resultReader.getVectorSchemaRoot(); + assertEquals(rowCount, initDataset.countRows()); + assertEquals(rowCount, batch.getRowCount()); + + // verify all null values + VarCharVector resultNameVector = (VarCharVector) batch.getVector("address"); + for (int i = 0; i < rowCount; i++) { + Assertions.assertTrue(resultNameVector.isNull(i)); + } } } - } - // step 4. use DataReplacement transaction to replace null values - try (VectorSchemaRoot replaceVectorRoot = - VectorSchemaRoot.create(addressSchema, allocator)) { - replaceVectorRoot.allocateNew(); - VarCharVector addressVector = (VarCharVector) replaceVectorRoot.getVector("address"); + // step 4. use DataReplacement transaction to replace null values + try (VectorSchemaRoot replaceVectorRoot = + VectorSchemaRoot.create(addressSchema, allocator)) { + replaceVectorRoot.allocateNew(); + VarCharVector addressVector = (VarCharVector) replaceVectorRoot.getVector("address"); - for (int i = 0; i < rowCount; i++) { - String name = "District " + i; - addressVector.setSafe(i, name.getBytes(StandardCharsets.UTF_8)); - } - replaceVectorRoot.setRowCount(rowCount); - - DataFile datafile = - writeLanceDataFile( - dataset.allocator(), - datasetPath, - replaceVectorRoot, - new int[] {2}, - new int[] {0}); - List replacementGroups = - Collections.singletonList( - new DataReplacement.DataReplacementGroup( - fragmentMetas.get(0).getId(), datafile)); - Transaction replaceTxn = - new Transaction.Builder() - .readVersion(initDataset.version()) - .operation(DataReplacement.builder().replacements(replacementGroups).build()) - .build(); - - try (Dataset datasetWithAddress = new CommitBuilder(initDataset).execute(replaceTxn)) { - assertEquals(4, datasetWithAddress.version()); - assertEquals(rowCount, datasetWithAddress.countRows()); - - try (LanceScanner scanner = datasetWithAddress.newScan()) { - try (ArrowReader resultReader = scanner.scanBatches()) { - assertTrue(resultReader.loadNextBatch()); - VectorSchemaRoot batch = resultReader.getVectorSchemaRoot(); + for (int i = 0; i < rowCount; i++) { + String name = "District " + i; + addressVector.setSafe(i, name.getBytes(StandardCharsets.UTF_8)); + } + replaceVectorRoot.setRowCount(rowCount); + + DataFile datafile = + writeLanceDataFile( + dataset.allocator(), + datasetPath, + replaceVectorRoot, + new int[] {2}, + new int[] {0}); + List replacementGroups = + Collections.singletonList( + new DataReplacement.DataReplacementGroup( + fragmentMetas.get(0).getId(), datafile)); + try (Transaction replaceTxn = + new Transaction.Builder() + .readVersion(initDataset.version()) + .operation(DataReplacement.builder().replacements(replacementGroups).build()) + .build()) { + try (Dataset datasetWithAddress = + new CommitBuilder(initDataset).execute(replaceTxn)) { + assertEquals(4, datasetWithAddress.version()); assertEquals(rowCount, datasetWithAddress.countRows()); - assertEquals(rowCount, batch.getRowCount()); - - // verify all address values not null - VarCharVector resultNameVector = (VarCharVector) batch.getVector("address"); - for (int i = 0; i < rowCount; i++) { - Assertions.assertFalse(resultNameVector.isNull(i)); - String expectedName = "District " + i; - String actualName = new String(resultNameVector.get(i), StandardCharsets.UTF_8); - assertEquals(expectedName, actualName); + + try (LanceScanner scanner = datasetWithAddress.newScan()) { + try (ArrowReader resultReader = scanner.scanBatches()) { + assertTrue(resultReader.loadNextBatch()); + VectorSchemaRoot batch = resultReader.getVectorSchemaRoot(); + assertEquals(rowCount, datasetWithAddress.countRows()); + assertEquals(rowCount, batch.getRowCount()); + + // verify all address values not null + VarCharVector resultNameVector = (VarCharVector) batch.getVector("address"); + for (int i = 0; i < rowCount; i++) { + Assertions.assertFalse(resultNameVector.isNull(i)); + String expectedName = "District " + i; + String actualName = + new String(resultNameVector.get(i), StandardCharsets.UTF_8); + assertEquals(expectedName, actualName); + } + } } } } - } finally { - replaceTxn.release(); } } - } finally { - appendTxn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/DeleteTest.java b/java/src/test/java/org/lance/operation/DeleteTest.java index 1bc1cbb9187..4afa9ca976d 100644 --- a/java/src/test/java/org/lance/operation/DeleteTest.java +++ b/java/src/test/java/org/lance/operation/DeleteTest.java @@ -43,17 +43,16 @@ void testDelete(@TempDir Path tempDir) { int rowCount = 20; FragmentMetadata fragmentMeta0 = testDataset.createNewFragment(rowCount); FragmentMetadata fragmentMeta1 = testDataset.createNewFragment(rowCount); - Transaction appendTxn = + try (Transaction appendTxn = new Transaction.Builder() .readVersion(dataset.version()) .operation( Append.builder().fragments(Arrays.asList(fragmentMeta0, fragmentMeta1)).build()) - .build(); - try (Dataset dataset = new CommitBuilder(this.dataset).execute(appendTxn)) { - assertEquals(2, dataset.version()); - assertEquals(2, dataset.latestVersion()); - } finally { - appendTxn.release(); + .build()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(appendTxn)) { + assertEquals(2, dataset.version()); + assertEquals(2, dataset.latestVersion()); + } } dataset = Dataset.open(datasetPath, allocator); @@ -63,19 +62,15 @@ void testDelete(@TempDir Path tempDir) { .map(t -> Long.valueOf(t.getId())) .collect(Collectors.toList()); - Transaction deleteTxn = + try (Transaction deleteTxn = new Transaction.Builder() .readVersion(dataset.version()) .operation( Delete.builder().deletedFragmentIds(deletedFragmentIds).predicate("1=1").build()) - .build(); - try (Dataset dataset = new CommitBuilder(this.dataset).execute(deleteTxn)) { - Transaction txn = dataset.readTransaction().get(); - Delete execDelete = (Delete) txn.operation(); - assertEquals(deleteTxn.operation(), execDelete); - assertEquals(0, dataset.countRows()); - } finally { - deleteTxn.release(); + .build()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(deleteTxn)) { + assertEquals(0, dataset.countRows()); + } } } } diff --git a/java/src/test/java/org/lance/operation/MergeTest.java b/java/src/test/java/org/lance/operation/MergeTest.java index 3c8f3c14456..243ea352450 100644 --- a/java/src/test/java/org/lance/operation/MergeTest.java +++ b/java/src/test/java/org/lance/operation/MergeTest.java @@ -91,7 +91,7 @@ void testMergeNewColumn(@TempDir Path tempDir) throws Exception { fragmentMeta.getDeletionFile(), fragmentMeta.getRowIdMeta()); - Transaction mergeTxn = + try (Transaction mergeTxn = new Transaction.Builder() .readVersion(initialDataset.version()) .operation( @@ -99,33 +99,31 @@ void testMergeNewColumn(@TempDir Path tempDir) throws Exception { .fragments(Collections.singletonList(evolvedFragment)) .schema(evolvedSchema) .build()) - .build(); - - try (Dataset evolvedDataset = new CommitBuilder(initialDataset).execute(mergeTxn)) { - Assertions.assertEquals(3, evolvedDataset.version()); - Assertions.assertEquals(rowCount, evolvedDataset.countRows()); - Assertions.assertEquals(evolvedSchema, evolvedDataset.getSchema()); - Assertions.assertEquals(3, evolvedDataset.getSchema().getFields().size()); - // Verify merged data - try (LanceScanner scanner = evolvedDataset.newScan()) { - try (ArrowReader resultReader = scanner.scanBatches()) { - Assertions.assertTrue(resultReader.loadNextBatch()); - VectorSchemaRoot batch = resultReader.getVectorSchemaRoot(); - Assertions.assertEquals(rowCount, batch.getRowCount()); - Assertions.assertEquals(3, batch.getSchema().getFields().size()); - // Verify age column - IntVector ageResultVector = (IntVector) batch.getVector("age"); - for (int i = 0; i < rowCount; i++) { - Assertions.assertEquals(20 + i, ageResultVector.get(i)); - } - IntVector idResultVector = (IntVector) batch.getVector("id"); - for (int i = 0; i < rowCount; i++) { - Assertions.assertEquals(i, idResultVector.get(i)); + .build()) { + try (Dataset evolvedDataset = new CommitBuilder(initialDataset).execute(mergeTxn)) { + Assertions.assertEquals(3, evolvedDataset.version()); + Assertions.assertEquals(rowCount, evolvedDataset.countRows()); + Assertions.assertEquals(evolvedSchema, evolvedDataset.getSchema()); + Assertions.assertEquals(3, evolvedDataset.getSchema().getFields().size()); + // Verify merged data + try (LanceScanner scanner = evolvedDataset.newScan()) { + try (ArrowReader resultReader = scanner.scanBatches()) { + Assertions.assertTrue(resultReader.loadNextBatch()); + VectorSchemaRoot batch = resultReader.getVectorSchemaRoot(); + Assertions.assertEquals(rowCount, batch.getRowCount()); + Assertions.assertEquals(3, batch.getSchema().getFields().size()); + // Verify age column + IntVector ageResultVector = (IntVector) batch.getVector("age"); + for (int i = 0; i < rowCount; i++) { + Assertions.assertEquals(20 + i, ageResultVector.get(i)); + } + IntVector idResultVector = (IntVector) batch.getVector("id"); + for (int i = 0; i < rowCount; i++) { + Assertions.assertEquals(i, idResultVector.get(i)); + } } } } - } finally { - mergeTxn.release(); } } } @@ -172,7 +170,7 @@ void testReplaceAsDiffColumns(@TempDir Path tempDir) throws Exception { fragmentMeta.getDeletionFile(), fragmentMeta.getRowIdMeta()); - Transaction mergeTxn = + try (Transaction mergeTxn = new Transaction.Builder() .readVersion(initialDataset.version()) .operation( @@ -180,33 +178,31 @@ void testReplaceAsDiffColumns(@TempDir Path tempDir) throws Exception { .fragments(Collections.singletonList(evolvedFragment)) .schema(evolvedSchema) .build()) - .build(); - - try (Dataset evolvedDataset = new CommitBuilder(initialDataset).execute(mergeTxn)) { - Assertions.assertEquals(3, evolvedDataset.version()); - Assertions.assertEquals(rowCount, evolvedDataset.countRows()); - Assertions.assertEquals(evolvedSchema, evolvedDataset.getSchema()); - Assertions.assertEquals(2, evolvedDataset.getSchema().getFields().size()); - // Verify merged data - try (LanceScanner scanner = evolvedDataset.newScan()) { - try (ArrowReader resultReader = scanner.scanBatches()) { - Assertions.assertTrue(resultReader.loadNextBatch()); - VectorSchemaRoot batch = resultReader.getVectorSchemaRoot(); - Assertions.assertEquals(rowCount, batch.getRowCount()); - Assertions.assertEquals(2, batch.getSchema().getFields().size()); - // Verify age column - IntVector ageResultVector = (IntVector) batch.getVector("age"); - for (int i = 0; i < rowCount; i++) { - Assertions.assertEquals(20 + i, ageResultVector.get(i)); - } - IntVector idResultVector = (IntVector) batch.getVector("id"); - for (int i = 0; i < rowCount; i++) { - Assertions.assertEquals(i, idResultVector.get(i)); + .build()) { + try (Dataset evolvedDataset = new CommitBuilder(initialDataset).execute(mergeTxn)) { + Assertions.assertEquals(3, evolvedDataset.version()); + Assertions.assertEquals(rowCount, evolvedDataset.countRows()); + Assertions.assertEquals(evolvedSchema, evolvedDataset.getSchema()); + Assertions.assertEquals(2, evolvedDataset.getSchema().getFields().size()); + // Verify merged data + try (LanceScanner scanner = evolvedDataset.newScan()) { + try (ArrowReader resultReader = scanner.scanBatches()) { + Assertions.assertTrue(resultReader.loadNextBatch()); + VectorSchemaRoot batch = resultReader.getVectorSchemaRoot(); + Assertions.assertEquals(rowCount, batch.getRowCount()); + Assertions.assertEquals(2, batch.getSchema().getFields().size()); + // Verify age column + IntVector ageResultVector = (IntVector) batch.getVector("age"); + for (int i = 0; i < rowCount; i++) { + Assertions.assertEquals(20 + i, ageResultVector.get(i)); + } + IntVector idResultVector = (IntVector) batch.getVector("id"); + for (int i = 0; i < rowCount; i++) { + Assertions.assertEquals(i, idResultVector.get(i)); + } } } } - } finally { - mergeTxn.release(); } } } @@ -259,7 +255,7 @@ void testMergeExistingColumn(@TempDir Path tempDir) throws Exception { fragmentMeta.getDeletionFile(), fragmentMeta.getRowIdMeta()); - Transaction mergeTxn = + try (Transaction mergeTxn = new Transaction.Builder() .readVersion(initialDataset.version()) .operation( @@ -267,28 +263,26 @@ void testMergeExistingColumn(@TempDir Path tempDir) throws Exception { .fragments(Collections.singletonList(evolvedFragment)) .schema(testDataset.getSchema()) .build()) - .build(); - - try (Dataset mergedDataset = new CommitBuilder(initialDataset).execute(mergeTxn)) { - Assertions.assertEquals(3, mergedDataset.version()); - Assertions.assertEquals(rowCount, mergedDataset.countRows()); + .build()) { + try (Dataset mergedDataset = new CommitBuilder(initialDataset).execute(mergeTxn)) { + Assertions.assertEquals(3, mergedDataset.version()); + Assertions.assertEquals(rowCount, mergedDataset.countRows()); - // Verify updated data - try (LanceScanner scanner = mergedDataset.newScan()) { - try (ArrowReader resultReader = scanner.scanBatches()) { - Assertions.assertTrue(resultReader.loadNextBatch()); - VectorSchemaRoot batch = resultReader.getVectorSchemaRoot(); + // Verify updated data + try (LanceScanner scanner = mergedDataset.newScan()) { + try (ArrowReader resultReader = scanner.scanBatches()) { + Assertions.assertTrue(resultReader.loadNextBatch()); + VectorSchemaRoot batch = resultReader.getVectorSchemaRoot(); - VarCharVector nameResultVector = (VarCharVector) batch.getVector("name"); - for (int i = 0; i < rowCount; i++) { - String expectedName = "UpdatedName_" + i; - String actualName = new String(nameResultVector.get(i), StandardCharsets.UTF_8); - Assertions.assertEquals(expectedName, actualName); + VarCharVector nameResultVector = (VarCharVector) batch.getVector("name"); + for (int i = 0; i < rowCount; i++) { + String expectedName = "UpdatedName_" + i; + String actualName = new String(nameResultVector.get(i), StandardCharsets.UTF_8); + Assertions.assertEquals(expectedName, actualName); + } } } } - } finally { - mergeTxn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/OperationTestBase.java b/java/src/test/java/org/lance/operation/OperationTestBase.java index bd1c1cdb81a..5f2c2a46d99 100644 --- a/java/src/test/java/org/lance/operation/OperationTestBase.java +++ b/java/src/test/java/org/lance/operation/OperationTestBase.java @@ -54,15 +54,12 @@ protected Dataset createAndAppendRows(TestUtils.SimpleTestDataset suite, int row dataset = suite.createEmptyDataset(); FragmentMetadata fragmentMeta = suite.createNewFragment(rowCount); - Transaction txn = + try (Transaction txn = new Transaction.Builder() .readVersion(dataset.version()) .operation(Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) - .build(); - try { + .build()) { return new CommitBuilder(dataset).execute(txn); - } finally { - txn.release(); } } diff --git a/java/src/test/java/org/lance/operation/OverwriteTest.java b/java/src/test/java/org/lance/operation/OverwriteTest.java index e29ccdb75e1..3b7edf7f9c6 100644 --- a/java/src/test/java/org/lance/operation/OverwriteTest.java +++ b/java/src/test/java/org/lance/operation/OverwriteTest.java @@ -44,7 +44,7 @@ void testOverwrite(@TempDir Path tempDir) throws Exception { // Commit fragment int rowCount = 20; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - Transaction txn = + try (Transaction txn = new Transaction.Builder() .readVersion(dataset.version()) .operation( @@ -52,25 +52,24 @@ void testOverwrite(@TempDir Path tempDir) throws Exception { .fragments(Collections.singletonList(fragmentMeta)) .schema(testDataset.getSchema()) .build()) - .build(); - try (Dataset dataset = new CommitBuilder(this.dataset).execute(txn)) { - assertEquals(2, dataset.version()); - assertEquals(2, dataset.latestVersion()); - assertEquals(rowCount, dataset.countRows()); - Fragment fragment = dataset.getFragments().get(0); + .build()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(txn)) { + assertEquals(2, dataset.version()); + assertEquals(2, dataset.latestVersion()); + assertEquals(rowCount, dataset.countRows()); + Fragment fragment = dataset.getFragments().get(0); - try (LanceScanner scanner = fragment.newScan()) { - Schema schemaRes = scanner.schema(); - assertEquals(testDataset.getSchema(), schemaRes); + try (LanceScanner scanner = fragment.newScan()) { + Schema schemaRes = scanner.schema(); + assertEquals(testDataset.getSchema(), schemaRes); + } } - } finally { - txn.release(); } // Commit fragment again rowCount = 40; fragmentMeta = testDataset.createNewFragment(rowCount); - Transaction txn2 = + try (Transaction txn2 = new Transaction.Builder() .readVersion(dataset.version()) .operation( @@ -80,22 +79,20 @@ void testOverwrite(@TempDir Path tempDir) throws Exception { .configUpsertValues(Collections.singletonMap("config_key", "config_value")) .build()) .transactionProperties(Collections.singletonMap("key", "value")) - .build(); - assertEquals("value", txn2.transactionProperties().map(m -> m.get("key")).orElse(null)); - try (Dataset dataset = new CommitBuilder(this.dataset).execute(txn2)) { - assertEquals(3, dataset.version()); - assertEquals(3, dataset.latestVersion()); - assertEquals(rowCount, dataset.countRows()); - assertEquals("config_value", dataset.getConfig().get("config_key")); - Fragment fragment = dataset.getFragments().get(0); + .build()) { + assertEquals("value", txn2.transactionProperties().map(m -> m.get("key")).orElse(null)); + try (Dataset dataset = new CommitBuilder(this.dataset).execute(txn2)) { + assertEquals(3, dataset.version()); + assertEquals(3, dataset.latestVersion()); + assertEquals(rowCount, dataset.countRows()); + assertEquals("config_value", dataset.getConfig().get("config_key")); + Fragment fragment = dataset.getFragments().get(0); - try (LanceScanner scanner = fragment.newScan()) { - Schema schemaRes = scanner.schema(); - assertEquals(testDataset.getSchema(), schemaRes); + try (LanceScanner scanner = fragment.newScan()) { + Schema schemaRes = scanner.schema(); + assertEquals(testDataset.getSchema(), schemaRes); + } } - assertEquals(txn2, dataset.readTransaction().orElse(null)); - } finally { - txn2.release(); } } } diff --git a/java/src/test/java/org/lance/operation/ProjectTest.java b/java/src/test/java/org/lance/operation/ProjectTest.java index a5b54241965..fa0c92cc15f 100644 --- a/java/src/test/java/org/lance/operation/ProjectTest.java +++ b/java/src/test/java/org/lance/operation/ProjectTest.java @@ -44,34 +44,28 @@ void testProjection(@TempDir Path tempDir) { assertEquals(testDataset.getSchema(), dataset.getSchema()); List fieldList = new ArrayList<>(testDataset.getSchema().getFields()); Collections.reverse(fieldList); - Transaction txn1 = + try (Transaction txn1 = new Transaction.Builder() .readVersion(dataset.version()) .operation(Project.builder().schema(new Schema(fieldList)).build()) - .build(); - try (Dataset committedDataset = new CommitBuilder(dataset).execute(txn1)) { - assertEquals(1, txn1.readVersion()); - assertEquals(1, dataset.version()); - assertEquals(2, committedDataset.version()); - assertEquals(new Schema(fieldList), committedDataset.getSchema()); - fieldList.remove(1); - Transaction txn2 = - new Transaction.Builder() - .readVersion(committedDataset.version()) - .operation(Project.builder().schema(new Schema(fieldList)).build()) - .build(); - try (Dataset committedDataset2 = new CommitBuilder(committedDataset).execute(txn2)) { - assertEquals(2, txn2.readVersion()); + .build()) { + try (Dataset committedDataset = new CommitBuilder(dataset).execute(txn1)) { + assertEquals(1, dataset.version()); assertEquals(2, committedDataset.version()); - assertEquals(3, committedDataset2.version()); - assertEquals(new Schema(fieldList), committedDataset2.getSchema()); - assertEquals(txn1, committedDataset.readTransaction().orElse(null)); - assertEquals(txn2, committedDataset2.readTransaction().orElse(null)); - } finally { - txn2.release(); + assertEquals(new Schema(fieldList), committedDataset.getSchema()); + fieldList.remove(1); + try (Transaction txn2 = + new Transaction.Builder() + .readVersion(committedDataset.version()) + .operation(Project.builder().schema(new Schema(fieldList)).build()) + .build()) { + try (Dataset committedDataset2 = new CommitBuilder(committedDataset).execute(txn2)) { + assertEquals(2, committedDataset.version()); + assertEquals(3, committedDataset2.version()); + assertEquals(new Schema(fieldList), committedDataset2.getSchema()); + } + } } - } finally { - txn1.release(); } } } diff --git a/java/src/test/java/org/lance/operation/ReserveFragmentsTest.java b/java/src/test/java/org/lance/operation/ReserveFragmentsTest.java index 258aca84164..9299fa7b1bd 100644 --- a/java/src/test/java/org/lance/operation/ReserveFragmentsTest.java +++ b/java/src/test/java/org/lance/operation/ReserveFragmentsTest.java @@ -43,63 +43,57 @@ void testReserveFragments(@TempDir Path tempDir) throws Exception { // Create an initial fragment to establish a baseline fragment ID FragmentMetadata initialFragmentMeta = testDataset.createNewFragment(10); - Transaction appendTxn = + try (Transaction appendTxn = new Transaction.Builder() .readVersion(dataset.version()) .operation( Append.builder() .fragments(Collections.singletonList(initialFragmentMeta)) .build()) - .build(); - try (Dataset datasetWithFragment = new CommitBuilder(dataset).execute(appendTxn)) { - // Reserve fragment IDs - int numFragmentsToReserve = 5; - Transaction reserveTxn = - new Transaction.Builder() - .readVersion(datasetWithFragment.version()) - .operation( - new ReserveFragments.Builder().numFragments(numFragmentsToReserve).build()) - .build(); - try (Dataset datasetWithReservedFragments = - new CommitBuilder(datasetWithFragment).execute(reserveTxn)) { - // Create a new fragment and verify its ID reflects the reservation - FragmentMetadata newFragmentMeta = testDataset.createNewFragment(10); - Transaction appendTxn2 = + .build()) { + try (Dataset datasetWithFragment = new CommitBuilder(dataset).execute(appendTxn)) { + // Reserve fragment IDs + int numFragmentsToReserve = 5; + try (Transaction reserveTxn = new Transaction.Builder() - .readVersion(datasetWithReservedFragments.version()) + .readVersion(datasetWithFragment.version()) .operation( - Append.builder() - .fragments(Collections.singletonList(newFragmentMeta)) - .build()) - .build(); - try (Dataset finalDataset = - new CommitBuilder(datasetWithReservedFragments).execute(appendTxn2)) { - // Verify the fragment IDs were properly reserved - // The new fragment should have an ID that's at least numFragmentsToReserve higher - // than it would have been without the reservation - List fragments = finalDataset.getFragments(); - assertEquals(2, fragments.size()); + new ReserveFragments.Builder().numFragments(numFragmentsToReserve).build()) + .build()) { + try (Dataset datasetWithReservedFragments = + new CommitBuilder(datasetWithFragment).execute(reserveTxn)) { + // Create a new fragment and verify its ID reflects the reservation + FragmentMetadata newFragmentMeta = testDataset.createNewFragment(10); + try (Transaction appendTxn2 = + new Transaction.Builder() + .readVersion(datasetWithReservedFragments.version()) + .operation( + Append.builder() + .fragments(Collections.singletonList(newFragmentMeta)) + .build()) + .build()) { + try (Dataset finalDataset = + new CommitBuilder(datasetWithReservedFragments).execute(appendTxn2)) { + // Verify the fragment IDs were properly reserved + // The new fragment should have an ID that's at least numFragmentsToReserve + // higher than it would have been without the reservation + List fragments = finalDataset.getFragments(); + assertEquals(2, fragments.size()); - // The first fragment ID is typically 0, and the second would normally be 1 - // But after reserving 5 fragments, the second fragment ID should be at least 6 - Fragment firstFragment = fragments.get(0); - Fragment secondFragment = fragments.get(1); + // The first fragment ID is typically 0, and the second would normally be 1 + // But after reserving 5 fragments, the second fragment ID should be at least 6 + Fragment firstFragment = fragments.get(0); + Fragment secondFragment = fragments.get(1); - // Check that the second fragment has a significantly higher ID than the first - // This is an indirect way to verify that fragment IDs were reserved - Assertions.assertNotEquals( - firstFragment.metadata().getId() + 1, secondFragment.getId()); - - // Verify the transaction is recorded - assertEquals(reserveTxn, datasetWithReservedFragments.readTransaction().orElse(null)); - } finally { - appendTxn2.release(); + // Check that the second fragment has a significantly higher ID than the first + // This is an indirect way to verify that fragment IDs were reserved + Assertions.assertNotEquals( + firstFragment.metadata().getId() + 1, secondFragment.getId()); + } + } + } } - } finally { - reserveTxn.release(); } - } finally { - appendTxn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/RestoreTest.java b/java/src/test/java/org/lance/operation/RestoreTest.java index 2e27db6626a..98e9d779ade 100644 --- a/java/src/test/java/org/lance/operation/RestoreTest.java +++ b/java/src/test/java/org/lance/operation/RestoreTest.java @@ -44,35 +44,32 @@ void testRestore(@TempDir Path tempDir) { // Append data to create a new version int rowCount = 20; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - Transaction appendTxn = + try (Transaction appendTxn = new Transaction.Builder() .readVersion(dataset.version()) .operation( Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) - .build(); - try (Dataset modifiedDataset = new CommitBuilder(dataset).execute(appendTxn)) { - // Verify the dataset was modified - long newVersion = modifiedDataset.version(); - assertEquals(initialVersion + 1, newVersion); - assertEquals(rowCount, modifiedDataset.countRows()); + .build()) { + try (Dataset modifiedDataset = new CommitBuilder(dataset).execute(appendTxn)) { + // Verify the dataset was modified + long newVersion = modifiedDataset.version(); + assertEquals(initialVersion + 1, newVersion); + assertEquals(rowCount, modifiedDataset.countRows()); - // Restore to the initial version - Transaction restoreTxn = - new Transaction.Builder() - .readVersion(modifiedDataset.version()) - .operation(new Restore.Builder().version(initialVersion).build()) - .build(); - try (Dataset restoredDataset = new CommitBuilder(modifiedDataset).execute(restoreTxn)) { - // Verify the dataset was restored to the initial version, but the version increases - assertEquals(initialVersion + 2, restoredDataset.version()); - // Initial dataset had 0 rows - assertEquals(0, restoredDataset.countRows()); - assertEquals(restoreTxn, restoredDataset.readTransaction().orElse(null)); - } finally { - restoreTxn.release(); + // Restore to the initial version + try (Transaction restoreTxn = + new Transaction.Builder() + .readVersion(modifiedDataset.version()) + .operation(new Restore.Builder().version(initialVersion).build()) + .build()) { + try (Dataset restoredDataset = new CommitBuilder(modifiedDataset).execute(restoreTxn)) { + // Verify the dataset was restored to the initial version, but the version increases + assertEquals(initialVersion + 2, restoredDataset.version()); + // Initial dataset had 0 rows + assertEquals(0, restoredDataset.countRows()); + } + } } - } finally { - appendTxn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/RewriteTest.java b/java/src/test/java/org/lance/operation/RewriteTest.java index 3f0f7f13b84..f2081ab8895 100644 --- a/java/src/test/java/org/lance/operation/RewriteTest.java +++ b/java/src/test/java/org/lance/operation/RewriteTest.java @@ -45,52 +45,46 @@ void testRewrite(@TempDir Path tempDir) { FragmentMetadata fragmentMeta1 = testDataset.createNewFragment(rowCount); FragmentMetadata fragmentMeta2 = testDataset.createNewFragment(rowCount); - Transaction appendTxn = + try (Transaction appendTxn = new Transaction.Builder() .readVersion(dataset.version()) .operation( Append.builder().fragments(Arrays.asList(fragmentMeta1, fragmentMeta2)).build()) - .build(); - - try (Dataset datasetWithData = new CommitBuilder(dataset).execute(appendTxn)) { - assertEquals(2, datasetWithData.version()); - assertEquals(rowCount * 2, datasetWithData.countRows()); - - // Now create a rewrite operation - List groups = new ArrayList<>(); - - // Create a rewrite group with old fragments and new fragments - List oldFragments = new ArrayList<>(); - oldFragments.add(fragmentMeta1); - - List newFragments = new ArrayList<>(); - FragmentMetadata newFragmentMeta = testDataset.createNewFragment(rowCount); - newFragments.add(newFragmentMeta); - - RewriteGroup group = - RewriteGroup.builder().oldFragments(oldFragments).newFragments(newFragments).build(); - - groups.add(group); - - // Create and commit the rewrite transaction - Transaction rewriteTxn = - new Transaction.Builder() - .readVersion(datasetWithData.version()) - .operation(Rewrite.builder().groups(groups).build()) - .build(); - - try (Dataset rewrittenDataset = new CommitBuilder(datasetWithData).execute(rewriteTxn)) { - assertEquals(3, rewrittenDataset.version()); - // The row count should remain the same since we're just rewriting - assertEquals(rowCount * 2, rewrittenDataset.countRows()); - - // Verify that the transaction was recorded - assertEquals(rewriteTxn, rewrittenDataset.readTransaction().orElse(null)); - } finally { - rewriteTxn.release(); + .build()) { + try (Dataset datasetWithData = new CommitBuilder(dataset).execute(appendTxn)) { + assertEquals(2, datasetWithData.version()); + assertEquals(rowCount * 2, datasetWithData.countRows()); + + // Now create a rewrite operation + List groups = new ArrayList<>(); + + // Create a rewrite group with old fragments and new fragments + List oldFragments = new ArrayList<>(); + oldFragments.add(fragmentMeta1); + + List newFragments = new ArrayList<>(); + FragmentMetadata newFragmentMeta = testDataset.createNewFragment(rowCount); + newFragments.add(newFragmentMeta); + + RewriteGroup group = + RewriteGroup.builder().oldFragments(oldFragments).newFragments(newFragments).build(); + + groups.add(group); + + // Create and commit the rewrite transaction + try (Transaction rewriteTxn = + new Transaction.Builder() + .readVersion(datasetWithData.version()) + .operation(Rewrite.builder().groups(groups).build()) + .build()) { + try (Dataset rewrittenDataset = + new CommitBuilder(datasetWithData).execute(rewriteTxn)) { + assertEquals(3, rewrittenDataset.version()); + // The row count should remain the same since we're just rewriting + assertEquals(rowCount * 2, rewrittenDataset.countRows()); + } + } } - } finally { - appendTxn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/TruncateTest.java b/java/src/test/java/org/lance/operation/TruncateTest.java index 80ac8dcb76b..e316c969362 100644 --- a/java/src/test/java/org/lance/operation/TruncateTest.java +++ b/java/src/test/java/org/lance/operation/TruncateTest.java @@ -41,15 +41,15 @@ void testTruncateTable(@TempDir Path tempDir) throws Exception { // Append some data int rowCount = 20; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - Transaction txn = - new Transaction.Builder() - .readVersion(dataset.version()) - .operation( - Append.builder() - .fragments(java.util.Collections.singletonList(fragmentMeta)) - .build()) - .build(); - try (Dataset ds1 = new CommitBuilder(dataset).execute(txn)) { + try (Transaction txn = + new Transaction.Builder() + .readVersion(dataset.version()) + .operation( + Append.builder() + .fragments(java.util.Collections.singletonList(fragmentMeta)) + .build()) + .build(); + Dataset ds1 = new CommitBuilder(dataset).execute(txn)) { assertEquals(rowCount, ds1.countRows()); // Truncate to empty while preserving schema @@ -60,8 +60,6 @@ void testTruncateTable(@TempDir Path tempDir) throws Exception { Schema schemaRes = scanner.schema(); assertEquals(testDataset.getSchema(), schemaRes); } - } finally { - txn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/UpdateConfigTest.java b/java/src/test/java/org/lance/operation/UpdateConfigTest.java index 653fae85c4d..aba003846c8 100644 --- a/java/src/test/java/org/lance/operation/UpdateConfigTest.java +++ b/java/src/test/java/org/lance/operation/UpdateConfigTest.java @@ -46,101 +46,104 @@ void testUpdateConfig(@TempDir Path tempDir) { UpdateMap configUpdates = UpdateMap.builder().updates(configValues).replace(false).build(); - Transaction txn = + try (Transaction txn = new Transaction.Builder() .readVersion(dataset.version()) .operation(UpdateConfig.builder().configUpdates(configUpdates).build()) - .build(); - try (Dataset updatedDataset = new CommitBuilder(dataset).execute(txn)) { - assertEquals(2, updatedDataset.version()); - assertEquals("value1", updatedDataset.getConfig().get("key1")); - assertEquals("value2", updatedDataset.getConfig().get("key2")); - - // Test 2: Delete configuration key using configUpdates with null value - Map deleteUpdates = new HashMap<>(); - deleteUpdates.put("key1", null); // null value means delete - - UpdateMap configDeleteUpdates = - UpdateMap.builder().updates(deleteUpdates).replace(false).build(); - - Transaction txn2 = - new Transaction.Builder() - .readVersion(updatedDataset.version()) - .operation(UpdateConfig.builder().configUpdates(configDeleteUpdates).build()) - .build(); - try (Dataset updatedDataset2 = new CommitBuilder(updatedDataset).execute(txn2)) { - assertEquals(3, updatedDataset2.version()); - assertNull(updatedDataset2.getConfig().get("key1")); - assertEquals("value2", updatedDataset2.getConfig().get("key2")); - - // Test 3: Update schema metadata using schemaMetadataUpdates - Map schemaMetadataMap = new HashMap<>(); - schemaMetadataMap.put("schema_key1", "schema_value1"); - schemaMetadataMap.put("schema_key2", "schema_value2"); - - UpdateMap schemaMetadataUpdates = - UpdateMap.builder().updates(schemaMetadataMap).replace(false).build(); - - Transaction txn3 = + .build()) { + try (Dataset updatedDataset = new CommitBuilder(dataset).execute(txn)) { + assertEquals(2, updatedDataset.version()); + assertEquals("value1", updatedDataset.getConfig().get("key1")); + assertEquals("value2", updatedDataset.getConfig().get("key2")); + + // Test 2: Delete configuration key using configUpdates with null value + Map deleteUpdates = new HashMap<>(); + deleteUpdates.put("key1", null); // null value means delete + + UpdateMap configDeleteUpdates = + UpdateMap.builder().updates(deleteUpdates).replace(false).build(); + + try (Transaction txn2 = new Transaction.Builder() - .readVersion(updatedDataset2.version()) - .operation( - UpdateConfig.builder().schemaMetadataUpdates(schemaMetadataUpdates).build()) - .build(); - try (Dataset updatedDataset3 = new CommitBuilder(updatedDataset2).execute(txn3)) { - assertEquals(4, updatedDataset3.version()); - assertEquals( - "schema_value1", updatedDataset3.getLanceSchema().metadata().get("schema_key1")); - assertEquals( - "schema_value2", updatedDataset3.getLanceSchema().metadata().get("schema_key2")); - - // Test 4: Update field metadata using fieldMetadataUpdates - Map fieldMetadataUpdates = new HashMap<>(); - - Map field0Updates = new HashMap<>(); - field0Updates.put("field0_key1", "field0_value1"); - UpdateMap field0UpdateMap = - UpdateMap.builder().updates(field0Updates).replace(false).build(); - - Map field1Updates = new HashMap<>(); - field1Updates.put("field1_key1", "field1_value1"); - field1Updates.put("field1_key2", "field1_value2"); - UpdateMap field1UpdateMap = - UpdateMap.builder().updates(field1Updates).replace(false).build(); - - fieldMetadataUpdates.put(0, field0UpdateMap); - fieldMetadataUpdates.put(1, field1UpdateMap); - - Transaction txn4 = - new Transaction.Builder() - .readVersion(updatedDataset3.version()) - .operation( - UpdateConfig.builder().fieldMetadataUpdates(fieldMetadataUpdates).build()) - .build(); - try (Dataset updatedDataset4 = new CommitBuilder(updatedDataset3).execute(txn4)) { - assertEquals(5, updatedDataset4.version()); - - // Verify field metadata for field 0 - Map fieldMetadata0 = - updatedDataset4.getLanceSchema().fields().get(0).getMetadata(); - assertEquals("field0_value1", fieldMetadata0.get("field0_key1")); - - // Verify field metadata for field 1 - Map field1Result = - updatedDataset4.getLanceSchema().fields().get(1).getMetadata(); - assertEquals("field1_value1", field1Result.get("field1_key1")); - assertEquals("field1_value2", field1Result.get("field1_key2")); - } finally { - txn4.release(); + .readVersion(updatedDataset.version()) + .operation(UpdateConfig.builder().configUpdates(configDeleteUpdates).build()) + .build()) { + try (Dataset updatedDataset2 = new CommitBuilder(updatedDataset).execute(txn2)) { + assertEquals(3, updatedDataset2.version()); + assertNull(updatedDataset2.getConfig().get("key1")); + assertEquals("value2", updatedDataset2.getConfig().get("key2")); + + // Test 3: Update schema metadata using schemaMetadataUpdates + Map schemaMetadataMap = new HashMap<>(); + schemaMetadataMap.put("schema_key1", "schema_value1"); + schemaMetadataMap.put("schema_key2", "schema_value2"); + + UpdateMap schemaMetadataUpdates = + UpdateMap.builder().updates(schemaMetadataMap).replace(false).build(); + + try (Transaction txn3 = + new Transaction.Builder() + .readVersion(updatedDataset2.version()) + .operation( + UpdateConfig.builder() + .schemaMetadataUpdates(schemaMetadataUpdates) + .build()) + .build()) { + try (Dataset updatedDataset3 = new CommitBuilder(updatedDataset2).execute(txn3)) { + assertEquals(4, updatedDataset3.version()); + assertEquals( + "schema_value1", + updatedDataset3.getLanceSchema().metadata().get("schema_key1")); + assertEquals( + "schema_value2", + updatedDataset3.getLanceSchema().metadata().get("schema_key2")); + + // Test 4: Update field metadata using fieldMetadataUpdates + Map fieldMetadataUpdates = new HashMap<>(); + + Map field0Updates = new HashMap<>(); + field0Updates.put("field0_key1", "field0_value1"); + UpdateMap field0UpdateMap = + UpdateMap.builder().updates(field0Updates).replace(false).build(); + + Map field1Updates = new HashMap<>(); + field1Updates.put("field1_key1", "field1_value1"); + field1Updates.put("field1_key2", "field1_value2"); + UpdateMap field1UpdateMap = + UpdateMap.builder().updates(field1Updates).replace(false).build(); + + fieldMetadataUpdates.put(0, field0UpdateMap); + fieldMetadataUpdates.put(1, field1UpdateMap); + + try (Transaction txn4 = + new Transaction.Builder() + .readVersion(updatedDataset3.version()) + .operation( + UpdateConfig.builder() + .fieldMetadataUpdates(fieldMetadataUpdates) + .build()) + .build()) { + try (Dataset updatedDataset4 = + new CommitBuilder(updatedDataset3).execute(txn4)) { + assertEquals(5, updatedDataset4.version()); + + // Verify field metadata for field 0 + Map fieldMetadata0 = + updatedDataset4.getLanceSchema().fields().get(0).getMetadata(); + assertEquals("field0_value1", fieldMetadata0.get("field0_key1")); + + // Verify field metadata for field 1 + Map field1Result = + updatedDataset4.getLanceSchema().fields().get(1).getMetadata(); + assertEquals("field1_value1", field1Result.get("field1_key1")); + assertEquals("field1_value2", field1Result.get("field1_key2")); + } + } + } + } } - } finally { - txn3.release(); } - } finally { - txn2.release(); } - } finally { - txn.release(); } } } diff --git a/java/src/test/java/org/lance/operation/UpdateTest.java b/java/src/test/java/org/lance/operation/UpdateTest.java index 4a3ad2a3b3a..bb39a5f4d12 100644 --- a/java/src/test/java/org/lance/operation/UpdateTest.java +++ b/java/src/test/java/org/lance/operation/UpdateTest.java @@ -55,40 +55,35 @@ void testUpdate(@TempDir Path tempDir) throws Exception { // Commit fragment int rowCount = 20; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - Transaction appendTxn = + try (Transaction appendTxn = new Transaction.Builder() .readVersion(dataset.version()) .operation( Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) - .build(); - - try (Dataset dataset = new CommitBuilder(this.dataset).execute(appendTxn)) { - assertEquals(2, dataset.version()); - assertEquals(2, dataset.latestVersion()); - assertEquals(rowCount, dataset.countRows()); - assertThrows( - IllegalArgumentException.class, - () -> { - Transaction txn = - new Transaction.Builder() - .readVersion(dataset.version()) - .operation(Append.builder().fragments(new ArrayList<>()).build()) - .build(); - try { - new CommitBuilder(dataset).execute(txn).close(); - } finally { - txn.release(); - } - }); - } finally { - appendTxn.release(); + .build()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(appendTxn)) { + assertEquals(2, dataset.version()); + assertEquals(2, dataset.latestVersion()); + assertEquals(rowCount, dataset.countRows()); + assertThrows( + IllegalArgumentException.class, + () -> { + try (Transaction txn = + new Transaction.Builder() + .readVersion(dataset.version()) + .operation(Append.builder().fragments(new ArrayList<>()).build()) + .build()) { + new CommitBuilder(dataset).execute(txn).close(); + } + }); + } } dataset = Dataset.open(datasetPath, allocator); // Update fragments rowCount = 40; FragmentMetadata newFragment = testDataset.createNewFragment(rowCount); - Transaction updateTxn = + try (Transaction updateTxn = new Transaction.Builder() .readVersion(dataset.version()) .operation( @@ -99,17 +94,12 @@ void testUpdate(@TempDir Path tempDir) throws Exception { .newFragments(Collections.singletonList(newFragment)) .updateMode(Optional.of(UpdateMode.RewriteRows)) .build()) - .build(); - - try (Dataset dataset = new CommitBuilder(this.dataset).execute(updateTxn)) { - assertEquals(3, dataset.version()); - assertEquals(3, dataset.latestVersion()); - assertEquals(rowCount, dataset.countRows()); - - Transaction txn = dataset.readTransaction().orElse(null); - assertEquals(updateTxn, txn); - } finally { - updateTxn.release(); + .build()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(updateTxn)) { + assertEquals(3, dataset.version()); + assertEquals(3, dataset.latestVersion()); + assertEquals(rowCount, dataset.countRows()); + } } } } @@ -132,18 +122,17 @@ void testUpdateColumns(@TempDir Path tempDir) throws Exception { */ int rowCount = 6; FragmentMetadata fragmentMeta = testDataset.createNewFragment(rowCount); - Transaction appendTxn = + try (Transaction appendTxn = new Transaction.Builder() .readVersion(dataset.version()) .operation( Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) - .build(); - try (Dataset dataset = new CommitBuilder(this.dataset).execute(appendTxn)) { - assertEquals(2, dataset.version()); - assertEquals(2, dataset.latestVersion()); - assertEquals(rowCount, dataset.countRows()); - } finally { - appendTxn.release(); + .build()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(appendTxn)) { + assertEquals(2, dataset.version()); + assertEquals(2, dataset.latestVersion()); + assertEquals(rowCount, dataset.countRows()); + } } dataset = Dataset.open(datasetPath, allocator); @@ -157,7 +146,7 @@ void testUpdateColumns(@TempDir Path tempDir) throws Exception { * 3: | null | null | */ FragmentUpdateResult updateResult = testDataset.updateColumn(targetFragment, updateRowCount); - Transaction updateTxn = + try (Transaction updateTxn = new Transaction.Builder() .readVersion(dataset.version()) .operation( @@ -166,53 +155,52 @@ void testUpdateColumns(@TempDir Path tempDir) throws Exception { Collections.singletonList(updateResult.getUpdatedFragment())) .fieldsModified(updateResult.getFieldsModified()) .build()) - .build(); - try (Dataset dataset = new CommitBuilder(this.dataset).execute(updateTxn)) { - assertEquals(3, dataset.version()); - assertEquals(3, dataset.latestVersion()); - Fragment fragment = dataset.getFragments().get(0); - try (LanceScanner scanner = fragment.newScan(rowCount)) { - List actualIds = new ArrayList<>(rowCount); - List actualNames = new ArrayList<>(rowCount); - List actualTimeStamps = new ArrayList<>(rowCount); - try (ArrowReader reader = scanner.scanBatches()) { - while (reader.loadNextBatch()) { - VectorSchemaRoot root = reader.getVectorSchemaRoot(); - IntVector idVector = (IntVector) root.getVector("id"); - for (int i = 0; i < idVector.getValueCount(); i++) { - actualIds.add(idVector.isNull(i) ? null : idVector.getObject(i)); - } - VarCharVector nameVector = (VarCharVector) root.getVector("name"); - for (int i = 0; i < nameVector.getValueCount(); i++) { - actualNames.add(nameVector.isNull(i) ? null : nameVector.getObject(i).toString()); - } - TimeStampSecTZVector timeStampVector = - (TimeStampSecTZVector) root.getVector("timeStamp"); - for (int i = 0; i < timeStampVector.getValueCount(); i++) { - actualTimeStamps.add( - timeStampVector.isNull(i) ? null : timeStampVector.getObject(i)); + .build()) { + try (Dataset dataset = new CommitBuilder(this.dataset).execute(updateTxn)) { + assertEquals(3, dataset.version()); + assertEquals(3, dataset.latestVersion()); + Fragment fragment = dataset.getFragments().get(0); + try (LanceScanner scanner = fragment.newScan(rowCount)) { + List actualIds = new ArrayList<>(rowCount); + List actualNames = new ArrayList<>(rowCount); + List actualTimeStamps = new ArrayList<>(rowCount); + try (ArrowReader reader = scanner.scanBatches()) { + while (reader.loadNextBatch()) { + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + IntVector idVector = (IntVector) root.getVector("id"); + for (int i = 0; i < idVector.getValueCount(); i++) { + actualIds.add(idVector.isNull(i) ? null : idVector.getObject(i)); + } + VarCharVector nameVector = (VarCharVector) root.getVector("name"); + for (int i = 0; i < nameVector.getValueCount(); i++) { + actualNames.add(nameVector.isNull(i) ? null : nameVector.getObject(i).toString()); + } + TimeStampSecTZVector timeStampVector = + (TimeStampSecTZVector) root.getVector("timeStamp"); + for (int i = 0; i < timeStampVector.getValueCount(); i++) { + actualTimeStamps.add( + timeStampVector.isNull(i) ? null : timeStampVector.getObject(i)); + } } } + /* result dataset content + * _rowid | id | name | timeStamp | + * 0: | 100 | "Update 0" | 0 | + * 1: | null | null | null | + * 2: | 2 | "Update 2" | 2 | + * 3: | null | null | null | + * 4: | 4 | "Person 4" | 4 | + * 5: | null | null | null | + */ + List expectIds = Arrays.asList(100, null, 2, null, 4, null); + List expectNames = + Arrays.asList("Update 0", null, "Update 2", null, "Person 4", null); + List expectTimeStamps = Arrays.asList(0L, null, 2L, null, 4L, null); + assertEquals(expectIds, actualIds); + assertEquals(expectNames, actualNames); + assertEquals(expectTimeStamps, actualTimeStamps); } - /* result dataset content - * _rowid | id | name | timeStamp | - * 0: | 100 | "Update 0" | 0 | - * 1: | null | null | null | - * 2: | 2 | "Update 2" | 2 | - * 3: | null | null | null | - * 4: | 4 | "Person 4" | 4 | - * 5: | null | null | null | - */ - List expectIds = Arrays.asList(100, null, 2, null, 4, null); - List expectNames = - Arrays.asList("Update 0", null, "Update 2", null, "Person 4", null); - List expectTimeStamps = Arrays.asList(0L, null, 2L, null, 4L, null); - assertEquals(expectIds, actualIds); - assertEquals(expectNames, actualNames); - assertEquals(expectTimeStamps, actualTimeStamps); } - } finally { - updateTxn.release(); } } } From 0de372da436be5b0b3aca939dae987cba9ff505e Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Tue, 24 Feb 2026 15:31:07 -0600 Subject: [PATCH 6/7] updated all unit tests to use CommitBuilder Signed-off-by: Daniel Rammer --- .../main/java/org/lance/CommitBuilder.java | 5 +- .../src/test/java/org/lance/FragmentTest.java | 26 +++- .../org/lance/NamespaceIntegrationTest.java | 15 ++- .../org/lance/SourcedTransactionTest.java | 124 +++++++++++++++++ java/src/test/java/org/lance/TestUtils.java | 16 ++- .../test/java/org/lance/TransactionTest.java | 125 +++++++++++++----- .../java/org/lance/index/ScalarIndexTest.java | 67 +++++----- .../java/org/lance/index/VectorIndexTest.java | 48 ++++--- 8 files changed, 319 insertions(+), 107 deletions(-) create mode 100644 java/src/test/java/org/lance/SourcedTransactionTest.java diff --git a/java/src/main/java/org/lance/CommitBuilder.java b/java/src/main/java/org/lance/CommitBuilder.java index a7c7fd18bf7..2310ec6b8e9 100644 --- a/java/src/main/java/org/lance/CommitBuilder.java +++ b/java/src/main/java/org/lance/CommitBuilder.java @@ -14,6 +14,7 @@ package org.lance; import org.lance.io.StorageOptionsProvider; +import org.lance.namespace.LanceNamespace; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; @@ -65,7 +66,7 @@ public class CommitBuilder { private Map writeParams; private StorageOptionsProvider storageOptionsProvider; - private Object namespace; + private LanceNamespace namespace; private List tableId; private boolean enableV2ManifestPaths = true; private boolean detached = false; @@ -128,7 +129,7 @@ public CommitBuilder storageOptionsProvider(StorageOptionsProvider provider) { * @param namespace the LanceNamespace instance * @return this builder instance */ - public CommitBuilder namespace(Object namespace) { + public CommitBuilder namespace(LanceNamespace namespace) { this.namespace = namespace; return this; } diff --git a/java/src/test/java/org/lance/FragmentTest.java b/java/src/test/java/org/lance/FragmentTest.java index 078af0ac1de..61bfc439290 100644 --- a/java/src/test/java/org/lance/FragmentTest.java +++ b/java/src/test/java/org/lance/FragmentTest.java @@ -210,7 +210,11 @@ void testDeleteRows(@TempDir Path tempDir) throws IOException { Update update = Update.builder().updatedFragments(Collections.singletonList(updateFragment)).build(); - Dataset dataset3 = dataset2.newTransactionBuilder().operation(update).build().commit(); + Dataset dataset3; + try (Transaction txn = + new Transaction.Builder().readVersion(dataset2.version()).operation(update).build()) { + dataset3 = new CommitBuilder(dataset2).execute(txn); + } assertEquals(totalRows - deleteCount, dataset3.countRows()); @@ -226,7 +230,11 @@ void testDeleteRows(@TempDir Path tempDir) throws IOException { update = Update.builder().updatedFragments(Collections.singletonList(updateFragment)).build(); - Dataset dataset4 = dataset3.newTransactionBuilder().operation(update).build().commit(); + Dataset dataset4; + try (Transaction txn = + new Transaction.Builder().readVersion(dataset3.version()).operation(update).build()) { + dataset4 = new CommitBuilder(dataset3).execute(txn); + } assertEquals(totalRows - deleteCount - deleteCount2, dataset4.countRows()); // Case 3. Test delete all rows @@ -242,7 +250,11 @@ void testDeleteRows(@TempDir Path tempDir) throws IOException { Update.builder() .removedFragmentIds(Collections.singletonList(Long.valueOf(fragment.getId()))) .build(); - Dataset dataset5 = dataset4.newTransactionBuilder().operation(update).build().commit(); + Dataset dataset5; + try (Transaction txn = + new Transaction.Builder().readVersion(dataset4.version()).operation(update).build()) { + dataset5 = new CommitBuilder(dataset4).execute(txn); + } assertEquals(0, dataset5.countRows()); } @@ -292,16 +304,16 @@ void testMergeColumns(@TempDir Path tempDir) throws Exception { FragmentMergeResult mergeResult = testDataset.mergeColumn(fragment, 10); - try (SourcedTransaction transaction = - new SourcedTransaction.Builder(dataset) + try (Transaction transaction = + new Transaction.Builder() + .readVersion(dataset.version()) .operation( Merge.builder() .fragments(Collections.singletonList(mergeResult.getFragmentMetadata())) .schema(mergeResult.getSchema().asArrowSchema()) .build()) - .readVersion(dataset.version()) .build()) { - try (Dataset newDs = transaction.commit()) { + try (Dataset newDs = new CommitBuilder(dataset).execute(transaction)) { assertEquals(3, newDs.version()); assertEquals(3, newDs.latestVersion()); Fragment newFrag = newDs.getFragments().get(0); diff --git a/java/src/test/java/org/lance/NamespaceIntegrationTest.java b/java/src/test/java/org/lance/NamespaceIntegrationTest.java index dc67d24ca3f..472848a6151 100644 --- a/java/src/test/java/org/lance/NamespaceIntegrationTest.java +++ b/java/src/test/java/org/lance/NamespaceIntegrationTest.java @@ -1414,15 +1414,16 @@ void testTransactionCommitWithNamespace() throws Exception { // Create and commit transaction Append appendOp = Append.builder().fragments(newFragments).build(); - SourcedTransaction transaction = - new SourcedTransaction.Builder(datasetWithProvider) + try (Transaction transaction = + new Transaction.Builder() .readVersion(datasetWithProvider.version()) .operation(appendOp) - .build(); - - try (Dataset committedDataset = transaction.commit()) { - assertEquals(2, committedDataset.version()); - assertEquals(4, committedDataset.countRows()); + .build()) { + try (Dataset committedDataset = + new CommitBuilder(datasetWithProvider).execute(transaction)) { + assertEquals(2, committedDataset.version()); + assertEquals(4, committedDataset.countRows()); + } } } diff --git a/java/src/test/java/org/lance/SourcedTransactionTest.java b/java/src/test/java/org/lance/SourcedTransactionTest.java new file mode 100644 index 00000000000..ae2be91dd33 --- /dev/null +++ b/java/src/test/java/org/lance/SourcedTransactionTest.java @@ -0,0 +1,124 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance; + +import org.lance.operation.Append; + +import org.apache.arrow.memory.RootAllocator; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Path; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class SourcedTransactionTest { + + @Test + public void testSourcedTransaction(@TempDir Path tempDir) { + String datasetPath = tempDir.resolve("testSourcedTransaction").toString(); + try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { + TestUtils.SimpleTestDataset testDataset = + new TestUtils.SimpleTestDataset(allocator, datasetPath); + try (Dataset dataset = testDataset.createEmptyDataset()) { + FragmentMetadata fragmentMeta = testDataset.createNewFragment(20); + + Map properties = new HashMap<>(); + properties.put("transactionType", "APPEND"); + properties.put("createdBy", "testUser"); + try (SourcedTransaction appendTxn = + dataset + .newTransactionBuilder() + .operation( + Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) + .transactionProperties(properties) + .build()) { + try (Dataset committedDataset = appendTxn.commit()) { + assertEquals(2, committedDataset.version()); + assertEquals(2, committedDataset.latestVersion()); + assertEquals(20, committedDataset.countRows()); + assertEquals(dataset.version(), appendTxn.readVersion()); + assertNotNull(appendTxn.uuid()); + + // Verify transaction properties + Map txnProps = + appendTxn.transactionProperties().orElse(new HashMap<>()); + assertEquals("APPEND", txnProps.get("transactionType")); + assertEquals("testUser", txnProps.get("createdBy")); + } + } + } + } + } + + @Test + public void testTag(@TempDir Path tempDir) { + String datasetPath = tempDir.resolve("testTag").toString(); + try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { + TestUtils.SimpleTestDataset testDataset = + new TestUtils.SimpleTestDataset(allocator, datasetPath); + try (Dataset dataset = testDataset.createEmptyDataset()) { + FragmentMetadata fragmentMeta = testDataset.createNewFragment(10); + + try (SourcedTransaction txn = + dataset + .newTransactionBuilder() + .tag("release-v2") + .operation( + Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) + .build()) { + assertEquals("release-v2", txn.tag().orElse(null)); + assertEquals("release-v2", txn.transaction().tag().orElse(null)); + + try (Dataset committed = txn.commit()) { + Transaction readTx = committed.readTransaction().orElse(null); + assertNotNull(readTx); + assertEquals("release-v2", readTx.tag().orElse(null)); + } + } + } + } + } + + @Test + public void testReadVersionDefaultsToDatasetVersion(@TempDir Path tempDir) { + String datasetPath = tempDir.resolve("testReadVersionDefault").toString(); + try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { + TestUtils.SimpleTestDataset testDataset = + new TestUtils.SimpleTestDataset(allocator, datasetPath); + try (Dataset dataset = testDataset.createEmptyDataset()) { + FragmentMetadata fragmentMeta = testDataset.createNewFragment(10); + + // Do not set readVersion explicitly — it should default to dataset.version() + try (SourcedTransaction txn = + dataset + .newTransactionBuilder() + .operation( + Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) + .build()) { + assertEquals(dataset.version(), txn.readVersion()); + + try (Dataset committed = txn.commit()) { + assertTrue(committed.version() > dataset.version()); + } + } + } + } + } +} diff --git a/java/src/test/java/org/lance/TestUtils.java b/java/src/test/java/org/lance/TestUtils.java index 9e4764b4b73..c9033361103 100644 --- a/java/src/test/java/org/lance/TestUtils.java +++ b/java/src/test/java/org/lance/TestUtils.java @@ -760,14 +760,16 @@ public Dataset createAndAppendRows(int totalRows, int batches) { fragments.add(createBlobFragment(batchRows, Integer.MAX_VALUE)); } - SourcedTransaction txn = - ds.newTransactionBuilder() + try (Transaction txn = + new Transaction.Builder() + .readVersion(ds.version()) .operation(org.lance.operation.Append.builder().fragments(fragments).build()) - .build(); - Dataset newDs = txn.commit(); - Preconditions.checkArgument( - newDs.countRows() == totalRows, "dataset row count mismatch after append"); - return newDs; + .build()) { + Dataset newDs = new CommitBuilder(ds).execute(txn); + Preconditions.checkArgument( + newDs.countRows() == totalRows, "dataset row count mismatch after append"); + return newDs; + } } } } diff --git a/java/src/test/java/org/lance/TransactionTest.java b/java/src/test/java/org/lance/TransactionTest.java index 1767bc7a7d5..cdd95d94165 100644 --- a/java/src/test/java/org/lance/TransactionTest.java +++ b/java/src/test/java/org/lance/TransactionTest.java @@ -39,41 +39,6 @@ public class TransactionTest { - @Test - public void testTransaction(@TempDir Path tempDir) { - String datasetPath = tempDir.resolve("testTransaction").toString(); - try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { - TestUtils.SimpleTestDataset testDataset = - new TestUtils.SimpleTestDataset(allocator, datasetPath); - try (Dataset dataset = testDataset.createEmptyDataset()) { - FragmentMetadata fragmentMeta = testDataset.createNewFragment(20); - - Map properties = new HashMap<>(); - properties.put("transactionType", "APPEND"); - properties.put("createdBy", "testUser"); - SourcedTransaction appendTxn = - dataset - .newTransactionBuilder() - .operation( - Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) - .transactionProperties(properties) - .build(); - try (Dataset committedDataset = appendTxn.commit()) { - assertEquals(2, committedDataset.version()); - assertEquals(2, committedDataset.latestVersion()); - assertEquals(20, committedDataset.countRows()); - assertEquals(dataset.version(), appendTxn.readVersion()); - assertNotNull(appendTxn.uuid()); - - // Verify transaction properties - Map txnProps = appendTxn.transactionProperties().orElse(new HashMap<>()); - assertEquals("APPEND", txnProps.get("transactionType")); - assertEquals("testUser", txnProps.get("createdBy")); - } - } - } - } - @Test public void testReadTransactionCreateIndex(@TempDir Path tempDir) { String datasetPath = tempDir.resolve("read_transaction_create_index").toString(); @@ -139,4 +104,94 @@ public void testCommitToUri(@TempDir Path tempDir) { } } } + + @Test + public void testTagRoundTrip(@TempDir Path tempDir) { + String datasetPath = tempDir.resolve("testTagRoundTrip").toString(); + try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { + TestUtils.SimpleTestDataset testDataset = + new TestUtils.SimpleTestDataset(allocator, datasetPath); + try (Dataset dataset = testDataset.createEmptyDataset()) { + FragmentMetadata fragmentMeta = testDataset.createNewFragment(10); + + try (Transaction txn = + new Transaction.Builder() + .readVersion(dataset.version()) + .tag("v1.0") + .operation( + Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) + .build()) { + assertEquals("v1.0", txn.tag().orElse(null)); + + try (Dataset committed = new CommitBuilder(dataset).execute(txn)) { + Transaction readTx = committed.readTransaction().orElse(null); + assertNotNull(readTx); + assertEquals("v1.0", readTx.tag().orElse(null)); + } + } + } + } + } + + @Test + public void testTransactionPropertiesRoundTrip(@TempDir Path tempDir) { + String datasetPath = tempDir.resolve("testTransactionPropertiesRoundTrip").toString(); + try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { + TestUtils.SimpleTestDataset testDataset = + new TestUtils.SimpleTestDataset(allocator, datasetPath); + try (Dataset dataset = testDataset.createEmptyDataset()) { + FragmentMetadata fragmentMeta = testDataset.createNewFragment(10); + + Map properties = new HashMap<>(); + properties.put("source", "ingestion-pipeline"); + properties.put("batchId", "42"); + + try (Transaction txn = + new Transaction.Builder() + .readVersion(dataset.version()) + .transactionProperties(properties) + .operation( + Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) + .build()) { + try (Dataset committed = new CommitBuilder(dataset).execute(txn)) { + Transaction readTx = committed.readTransaction().orElse(null); + assertNotNull(readTx); + Map readProps = readTx.transactionProperties().orElse(null); + assertNotNull(readProps); + assertEquals("ingestion-pipeline", readProps.get("source")); + assertEquals("42", readProps.get("batchId")); + } + } + } + } + } + + @Test + public void testCustomUuid(@TempDir Path tempDir) { + String datasetPath = tempDir.resolve("testCustomUuid").toString(); + try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { + TestUtils.SimpleTestDataset testDataset = + new TestUtils.SimpleTestDataset(allocator, datasetPath); + try (Dataset dataset = testDataset.createEmptyDataset()) { + FragmentMetadata fragmentMeta = testDataset.createNewFragment(10); + + String customUuid = "custom-uuid-12345"; + try (Transaction txn = + new Transaction.Builder() + .readVersion(dataset.version()) + .uuid(customUuid) + .operation( + Append.builder().fragments(Collections.singletonList(fragmentMeta)).build()) + .build()) { + assertEquals(customUuid, txn.uuid()); + + try (Dataset committed = new CommitBuilder(dataset).execute(txn)) { + Transaction readTx = committed.readTransaction().orElse(null); + assertNotNull(readTx); + assertEquals(customUuid, readTx.uuid()); + } + } + } + } + } } diff --git a/java/src/test/java/org/lance/index/ScalarIndexTest.java b/java/src/test/java/org/lance/index/ScalarIndexTest.java index 3c84d692b38..70ef43c853c 100644 --- a/java/src/test/java/org/lance/index/ScalarIndexTest.java +++ b/java/src/test/java/org/lance/index/ScalarIndexTest.java @@ -13,10 +13,11 @@ */ package org.lance.index; +import org.lance.CommitBuilder; import org.lance.Dataset; import org.lance.Fragment; -import org.lance.SourcedTransaction; import org.lance.TestUtils; +import org.lance.Transaction; import org.lance.WriteParams; import org.lance.index.scalar.ScalarIndexParams; import org.lance.ipc.LanceScanner; @@ -167,13 +168,16 @@ public void testCreateBTreeIndexDistributively(@TempDir Path tempDir) throws Exc CreateIndex createIndexOp = CreateIndex.builder().withNewIndices(Collections.singletonList(index)).build(); - SourcedTransaction createIndexTx = - dataset.newTransactionBuilder().operation(createIndexOp).build(); - - try (Dataset newDataset = createIndexTx.commit()) { - // new dataset should contain that index - assertEquals(datasetVersion + 1, newDataset.version()); - assertTrue(newDataset.listIndexes().contains("test_index")); + try (Transaction createIndexTx = + new Transaction.Builder() + .readVersion(datasetVersion) + .operation(createIndexOp) + .build()) { + try (Dataset newDataset = new CommitBuilder(dataset).execute(createIndexTx)) { + // new dataset should contain that index + assertEquals(datasetVersion + 1, newDataset.version()); + assertTrue(newDataset.listIndexes().contains("test_index")); + } } } } @@ -244,30 +248,33 @@ public void testRangedBTreeIndex(@TempDir Path tempDir) throws Exception { CreateIndex createIndexOp = CreateIndex.builder().withNewIndices(Collections.singletonList(index)).build(); - SourcedTransaction createIndexTx = - dataset.newTransactionBuilder().operation(createIndexOp).build(); - - try (Dataset newDataset = createIndexTx.commit()) { - // new dataset should contain that index - assertEquals(datasetVersion + 1, newDataset.version()); - assertTrue(newDataset.listIndexes().contains("test_index")); - - // 7. compare results - // force use index should get the right value - ScanOptions scanOptions = - new ScanOptions.Builder().withRowId(true).filter("id in (10, 20, 30)").build(); - try (LanceScanner scanner = newDataset.newScan(scanOptions); - ArrowReader arrowReader = scanner.scanBatches(); ) { - List ids = new ArrayList<>(); - while (arrowReader.loadNextBatch()) { - VectorSchemaRoot root = arrowReader.getVectorSchemaRoot(); - IntVector idVec = (IntVector) root.getVector("id"); - for (int i = 0; i < idVec.getValueCount(); i++) { - ids.add(idVec.get(i)); + try (Transaction createIndexTx = + new Transaction.Builder() + .readVersion(datasetVersion) + .operation(createIndexOp) + .build()) { + try (Dataset newDataset = new CommitBuilder(dataset).execute(createIndexTx)) { + // new dataset should contain that index + assertEquals(datasetVersion + 1, newDataset.version()); + assertTrue(newDataset.listIndexes().contains("test_index")); + + // 7. compare results + // force use index should get the right value + ScanOptions scanOptions = + new ScanOptions.Builder().withRowId(true).filter("id in (10, 20, 30)").build(); + try (LanceScanner scanner = newDataset.newScan(scanOptions); + ArrowReader arrowReader = scanner.scanBatches(); ) { + List ids = new ArrayList<>(); + while (arrowReader.loadNextBatch()) { + VectorSchemaRoot root = arrowReader.getVectorSchemaRoot(); + IntVector idVec = (IntVector) root.getVector("id"); + for (int i = 0; i < idVec.getValueCount(); i++) { + ids.add(idVec.get(i)); + } } + Collections.sort(ids); + Assertions.assertIterableEquals(Arrays.asList(10, 20, 30), ids); } - Collections.sort(ids); - Assertions.assertIterableEquals(Arrays.asList(10, 20, 30), ids); } } } diff --git a/java/src/test/java/org/lance/index/VectorIndexTest.java b/java/src/test/java/org/lance/index/VectorIndexTest.java index 912973b04e1..4a5902044da 100755 --- a/java/src/test/java/org/lance/index/VectorIndexTest.java +++ b/java/src/test/java/org/lance/index/VectorIndexTest.java @@ -13,10 +13,11 @@ */ package org.lance.index; +import org.lance.CommitBuilder; import org.lance.Dataset; import org.lance.Fragment; -import org.lance.SourcedTransaction; import org.lance.TestVectorDataset; +import org.lance.Transaction; import org.lance.index.vector.IvfBuildParams; import org.lance.index.vector.PQBuildParams; import org.lance.index.vector.RQBuildParams; @@ -130,12 +131,15 @@ public void testCreateIvfFlatIndexDistributively(@TempDir Path tempDir) throws E CreateIndex createIndexOp = CreateIndex.builder().withNewIndices(Collections.singletonList(index)).build(); - SourcedTransaction createIndexTx = - dataset.newTransactionBuilder().operation(createIndexOp).build(); - - try (Dataset newDataset = createIndexTx.commit()) { - assertEquals(datasetVersion + 1, newDataset.version()); - assertTrue(newDataset.listIndexes().contains(TestVectorDataset.indexName)); + try (Transaction createIndexTx = + new Transaction.Builder() + .readVersion(dataset.version()) + .operation(createIndexOp) + .build()) { + try (Dataset newDataset = new CommitBuilder(dataset).execute(createIndexTx)) { + assertEquals(datasetVersion + 1, newDataset.version()); + assertTrue(newDataset.listIndexes().contains(TestVectorDataset.indexName)); + } } } } @@ -248,12 +252,15 @@ public void testCreateIvfPqIndexDistributively(@TempDir Path tempDir) throws Exc CreateIndex createIndexOp = CreateIndex.builder().withNewIndices(Collections.singletonList(index)).build(); - SourcedTransaction createIndexTx = - dataset.newTransactionBuilder().operation(createIndexOp).build(); - - try (Dataset newDataset = createIndexTx.commit()) { - assertEquals(datasetVersion + 1, newDataset.version()); - assertTrue(newDataset.listIndexes().contains(TestVectorDataset.indexName)); + try (Transaction createIndexTx = + new Transaction.Builder() + .readVersion(dataset.version()) + .operation(createIndexOp) + .build()) { + try (Dataset newDataset = new CommitBuilder(dataset).execute(createIndexTx)) { + assertEquals(datasetVersion + 1, newDataset.version()); + assertTrue(newDataset.listIndexes().contains(TestVectorDataset.indexName)); + } } } } @@ -350,12 +357,15 @@ public void testCreateIvfSqIndexDistributively(@TempDir Path tempDir) throws Exc CreateIndex createIndexOp = CreateIndex.builder().withNewIndices(Collections.singletonList(index)).build(); - SourcedTransaction createIndexTx = - dataset.newTransactionBuilder().operation(createIndexOp).build(); - - try (Dataset newDataset = createIndexTx.commit()) { - assertEquals(datasetVersion + 1, newDataset.version()); - assertTrue(newDataset.listIndexes().contains(TestVectorDataset.indexName)); + try (Transaction createIndexTx = + new Transaction.Builder() + .readVersion(dataset.version()) + .operation(createIndexOp) + .build()) { + try (Dataset newDataset = new CommitBuilder(dataset).execute(createIndexTx)) { + assertEquals(datasetVersion + 1, newDataset.version()); + assertTrue(newDataset.listIndexes().contains(TestVectorDataset.indexName)); + } } } } From 1d7ee2210bf855d988a35bcb6d8673b6d2a7ad2f Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Wed, 25 Feb 2026 14:50:18 -0600 Subject: [PATCH 7/7] addressed claude review issues Signed-off-by: Daniel Rammer --- java/lance-jni/src/transaction.rs | 44 ++++++++++++++++--- .../main/java/org/lance/CommitBuilder.java | 2 + .../java/org/lance/SourcedTransaction.java | 2 +- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/java/lance-jni/src/transaction.rs b/java/lance-jni/src/transaction.rs index 678d5568db6..60621696a91 100644 --- a/java/lance-jni/src/transaction.rs +++ b/java/lance-jni/src/transaction.rs @@ -799,10 +799,12 @@ fn convert_schema_from_operation( let c_schema_ptr = schema_ptr as *mut FFI_ArrowSchema; let c_schema = unsafe { FFI_ArrowSchema::from_raw(c_schema_ptr) }; let schema = Schema::try_from(&c_schema)?; - Ok( - LanceSchema::try_from(&schema) - .expect("Failed to convert from arrow schema to lance schema"), - ) + LanceSchema::try_from(&schema).map_err(|e| { + Error::input_error(format!( + "Failed to convert Arrow schema to Lance schema: {}", + e + )) + }) } fn convert_to_rust_operation( @@ -813,7 +815,15 @@ fn convert_to_rust_operation( let op_name = env.get_string_from_method(java_operation, "name")?; let op = match op_name.as_str() { "Project" => Operation::Project { - schema: convert_schema_from_operation(env, java_operation, allocator.unwrap())?, + schema: convert_schema_from_operation( + env, + java_operation, + allocator.ok_or_else(|| { + Error::input_error( + "BufferAllocator is required for Project operations".to_string(), + ) + })?, + )?, }, "UpdateConfig" => { let config_updates_obj = env @@ -934,7 +944,15 @@ fn convert_to_rust_operation( to_rust_map(env, &config_upsert_values) }, )?; - let schema = convert_schema_from_operation(env, java_operation, allocator.unwrap())?; + let schema = convert_schema_from_operation( + env, + java_operation, + allocator.ok_or_else(|| { + Error::input_error( + "BufferAllocator is required for Overwrite operations".to_string(), + ) + })?, + )?; Operation::Overwrite { fragments, schema, @@ -1028,7 +1046,15 @@ fn convert_to_rust_operation( })?; Operation::Merge { fragments, - schema: convert_schema_from_operation(env, java_operation, allocator.unwrap())?, + schema: convert_schema_from_operation( + env, + java_operation, + allocator.ok_or_else(|| { + Error::input_error( + "BufferAllocator is required for Merge operations".to_string(), + ) + })?, + )?, } } "Restore" => { @@ -1150,6 +1176,7 @@ pub extern "system" fn Java_org_lance_CommitBuilder_nativeCommitToUri<'local>( _cls: JObject, uri: JString, java_transaction: JObject, + detached_jbool: jboolean, enable_v2_manifest_paths: jboolean, storage_options_provider_obj: JObject, namespace_obj: JObject, @@ -1167,6 +1194,7 @@ pub extern "system" fn Java_org_lance_CommitBuilder_nativeCommitToUri<'local>( &mut env, uri, java_transaction, + detached_jbool != 0, enable_v2_manifest_paths != 0, storage_options_provider_obj, namespace_obj, @@ -1186,6 +1214,7 @@ fn inner_commit_to_uri<'local>( env: &mut JNIEnv<'local>, uri: JString, java_transaction: JObject, + detached: bool, enable_v2_manifest_paths: bool, storage_options_provider_obj: JObject, namespace_obj: JObject, @@ -1263,6 +1292,7 @@ fn inner_commit_to_uri<'local>( // Build CommitBuilder with URI let mut builder = CommitBuilder::new(&*uri_str) .with_store_params(store_params) + .with_detached(detached) .enable_v2_manifest_paths(enable_v2_manifest_paths); if let Some(use_stable) = use_stable_row_ids { diff --git a/java/src/main/java/org/lance/CommitBuilder.java b/java/src/main/java/org/lance/CommitBuilder.java index 2310ec6b8e9..0b2752aae89 100644 --- a/java/src/main/java/org/lance/CommitBuilder.java +++ b/java/src/main/java/org/lance/CommitBuilder.java @@ -259,6 +259,7 @@ public Dataset execute(Transaction transaction) { nativeCommitToUri( uri, transaction, + detached, enableV2ManifestPaths, storageOptionsProvider, namespace, @@ -289,6 +290,7 @@ private static native Dataset nativeCommitToDataset( private static native Dataset nativeCommitToUri( String uri, Transaction transaction, + boolean detached, boolean enableV2ManifestPaths, Object storageOptionsProvider, Object namespace, diff --git a/java/src/main/java/org/lance/SourcedTransaction.java b/java/src/main/java/org/lance/SourcedTransaction.java index 2630c263b43..d833c417752 100644 --- a/java/src/main/java/org/lance/SourcedTransaction.java +++ b/java/src/main/java/org/lance/SourcedTransaction.java @@ -30,7 +30,7 @@ *

Example usage: * *

{@code
- * try (SourcedTransaction txn = dataset.newSourcedTransactionBuilder()
+ * try (SourcedTransaction txn = dataset.newTransactionBuilder()
  *     .operation(Append.builder().fragments(fragments).build())
  *     .build();
  *     Dataset committed = txn.commit()) {