From 1b65ffd44cde5f2d87b832294e5422154aaab986 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 09:00:25 -0800 Subject: [PATCH 01/23] commit --- Cargo.lock | 5 +- Cargo.toml | 2 +- java/lance-jni/Cargo.lock | 5 +- java/lance-jni/Cargo.toml | 1 + java/lance-jni/src/blocking_dataset.rs | 56 + java/lance-jni/src/namespace.rs | 118 +- java/pom.xml | 9 +- java/src/main/java/org/lance/Dataset.java | 32 +- .../java/org/lance/OpenDatasetBuilder.java | 54 +- .../lance/namespace/DirectoryNamespace.java | 57 +- .../org/lance/namespace/RestNamespace.java | 14 + .../org/lance/NamespaceIntegrationTest.java | 186 +++ .../namespace/DirectoryNamespaceTest.java | 76 ++ python/Cargo.lock | 4 +- python/pyproject.toml | 2 +- python/python/lance/__init__.py | 6 + python/python/lance/namespace.py | 94 ++ .../tests/test_namespace_integration.py | 231 ++++ python/src/dataset.rs | 29 +- python/src/namespace.rs | 80 +- rust/lance-namespace-impls/Cargo.toml | 1 + rust/lance-namespace-impls/src/dir.rs | 1152 ++++++++++++++++- rust/lance-namespace-impls/src/rest.rs | 42 +- .../lance-namespace-impls/src/rest_adapter.rs | 80 +- rust/lance-namespace/src/namespace.rs | 87 +- .../src/io/commit/external_manifest.rs | 173 ++- rust/lance/src/dataset.rs | 1 + rust/lance/src/dataset/builder.rs | 14 +- rust/lance/src/io/commit.rs | 1 + .../lance/src/io/commit/namespace_manifest.rs | 131 ++ 30 files changed, 2642 insertions(+), 101 deletions(-) create mode 100644 rust/lance/src/io/commit/namespace_manifest.rs diff --git a/Cargo.lock b/Cargo.lock index 235981516c2..871a27889a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5286,6 +5286,7 @@ dependencies = [ "lance-index", "lance-io", "lance-namespace", + "lance-table", "log", "object_store", "rand 0.9.2", @@ -5306,9 +5307,9 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.4.5" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2acdba67f84190067532fce07b51a435dd390d7cdc1129a05003e5cb3274cf0" +checksum = "d2c20a25207bbae280c9acd16ccd1e3561ad7f79a57c0e88809cd9c026a8494c" dependencies = [ "reqwest", "serde", diff --git a/Cargo.toml b/Cargo.toml index f8b903a2c81..cf4c2aae6ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,7 +66,7 @@ lance-linalg = { version = "=3.0.0-beta.3", path = "./rust/lance-linalg" } lance-namespace = { version = "=3.0.0-beta.3", path = "./rust/lance-namespace" } lance-namespace-impls = { version = "=3.0.0-beta.3", path = "./rust/lance-namespace-impls" } lance-namespace-datafusion = { version = "=3.0.0-beta.3", path = "./rust/lance-namespace-datafusion" } -lance-namespace-reqwest-client = { version = "=0.4.5" } +lance-namespace-reqwest-client = "0.5.0" lance-table = { version = "=3.0.0-beta.3", path = "./rust/lance-table" } lance-test-macros = { version = "=3.0.0-beta.3", path = "./rust/lance-test-macros" } lance-testing = { version = "=3.0.0-beta.3", path = "./rust/lance-testing" } diff --git a/java/lance-jni/Cargo.lock b/java/lance-jni/Cargo.lock index 5d2c626bb8b..89f82a1e025 100644 --- a/java/lance-jni/Cargo.lock +++ b/java/lance-jni/Cargo.lock @@ -3751,6 +3751,7 @@ dependencies = [ "lance-linalg", "lance-namespace", "lance-namespace-impls", + "lance-table", "log", "object_store", "prost", @@ -3823,9 +3824,9 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.4.5" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2acdba67f84190067532fce07b51a435dd390d7cdc1129a05003e5cb3274cf0" +checksum = "d2c20a25207bbae280c9acd16ccd1e3561ad7f79a57c0e88809cd9c026a8494c" dependencies = [ "reqwest", "serde", diff --git a/java/lance-jni/Cargo.toml b/java/lance-jni/Cargo.toml index 8aa3d36bb6d..e3f6e546f0f 100644 --- a/java/lance-jni/Cargo.toml +++ b/java/lance-jni/Cargo.toml @@ -26,6 +26,7 @@ lance-namespace = { path = "../../rust/lance-namespace" } lance-namespace-impls = { path = "../../rust/lance-namespace-impls", features = ["rest", "rest-adapter"] } lance-core = { path = "../../rust/lance-core" } lance-file = { path = "../../rust/lance-file" } +lance-table = { path = "../../rust/lance-table" } arrow = { version = "57.1", features = ["ffi"] } arrow-schema = "57.1" object_store = { version = "0.12.2" } diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index dbb23027c9a..b84d39e2059 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -35,6 +35,7 @@ use lance::dataset::{ ColumnAlteration, CommitBuilder, Dataset, NewColumnTransform, ProjectionRequest, ReadParams, Version, WriteParams, }; +use lance::io::commit::namespace_manifest::LanceNamespaceExternalManifestStore; use lance::io::{ObjectStore, ObjectStoreParams}; use lance::session::Session as LanceSession; use lance::table::format::IndexMetadata; @@ -47,6 +48,9 @@ use lance_index::IndexCriteria as RustIndexCriteria; use lance_index::{IndexParams, IndexType}; use lance_io::object_store::ObjectStoreRegistry; use lance_io::object_store::StorageOptionsProvider; +use lance_namespace::LanceNamespace; +use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler; +use lance_table::io::commit::CommitHandler; use std::collections::HashMap; use std::future::IntoFuture; use std::iter::empty; @@ -135,6 +139,8 @@ impl BlockingDataset { serialized_manifest: Option<&[u8]>, storage_options_provider: Option>, session: Option>, + namespace: Option>, + table_id: Option>, ) -> Result { // Create storage options accessor from storage_options and provider let accessor = match (storage_options.is_empty(), storage_options_provider) { @@ -176,6 +182,15 @@ impl BlockingDataset { builder = builder.with_serialized_manifest(serialized_manifest)?; } + // Set up namespace commit handler if namespace and table_id are provided + if let (Some(ns), Some(tid)) = (namespace, table_id) { + let external_store = LanceNamespaceExternalManifestStore::new(ns, tid); + let commit_handler: Arc = Arc::new(ExternalManifestCommitHandler { + external_manifest_store: Arc::new(external_store), + }); + builder = builder.with_commit_handler(commit_handler); + } + let inner = RT.block_on(builder.load())?; Ok(Self { inner }) } @@ -1050,6 +1065,9 @@ pub extern "system" fn Java_org_lance_Dataset_openNative<'local>( serialized_manifest: JObject, // Optional storage_options_provider_obj: JObject, // Optional session_handle: jlong, // Session handle, 0 means no session + namespace_handle: jlong, // Namespace handle, 0 means no namespace + namespace_type: JString, // "directory" or "rest", null if no namespace + table_id_obj: JObject, // List, null if no namespace ) -> JObject<'local> { ok_or_throw!( env, @@ -1064,6 +1082,9 @@ pub extern "system" fn Java_org_lance_Dataset_openNative<'local>( serialized_manifest, storage_options_provider_obj, session_handle, + namespace_handle, + namespace_type, + table_id_obj, ) ) } @@ -1080,7 +1101,12 @@ fn inner_open_native<'local>( serialized_manifest: JObject, // Optional storage_options_provider_obj: JObject, // Optional session_handle: jlong, // Session handle, 0 means no session + namespace_handle: jlong, // Namespace handle, 0 means no namespace + namespace_type: JString, // "directory" or "rest", null if no namespace + table_id_obj: JObject, // List, null if no namespace ) -> Result> { + use crate::namespace::{BlockingDirectoryNamespace, BlockingRestNamespace}; + let path_str: String = path.extract(env)?; let version = env.get_u64_opt(&version_obj)?; let block_size = env.get_int_opt(&block_size_obj)?; @@ -1096,6 +1122,34 @@ fn inner_open_native<'local>( let storage_options_provider_arc = storage_options_provider.map(|v| Arc::new(v) as Arc); + // Extract namespace and table_id if provided (before get_bytes_opt which holds borrow) + let (namespace, table_id) = if namespace_handle != 0 && !namespace_type.is_null() { + let ns_type: String = namespace_type.extract(env)?; + let ns_arc: Arc = if ns_type == "directory" { + let ns = unsafe { &*(namespace_handle as *const BlockingDirectoryNamespace) }; + ns.inner.clone() + } else if ns_type == "rest" { + let ns = unsafe { &*(namespace_handle as *const BlockingRestNamespace) }; + ns.inner.clone() + } else { + return Err(Error::input_error(format!( + "Unknown namespace type: {}", + ns_type + ))); + }; + + // Extract table_id from List + let table_id = if !table_id_obj.is_null() { + env.get_strings_opt(&table_id_obj)? + } else { + None + }; + + (Some(ns_arc), table_id) + } else { + (None, None) + }; + let serialized_manifest = env.get_bytes_opt(&serialized_manifest)?; // Convert session handle to Arc if provided @@ -1111,6 +1165,8 @@ fn inner_open_native<'local>( serialized_manifest, storage_options_provider_arc, session, + namespace, + table_id, )?; dataset.into_java(env) } diff --git a/java/lance-jni/src/namespace.rs b/java/lance-jni/src/namespace.rs index 148171f8c7a..72d1b0a9140 100644 --- a/java/lance-jni/src/namespace.rs +++ b/java/lance-jni/src/namespace.rs @@ -11,8 +11,8 @@ use jni::JNIEnv; use lance_namespace::models::*; use lance_namespace::LanceNamespace as LanceNamespaceTrait; use lance_namespace_impls::{ - ConnectBuilder, DirectoryNamespace, DirectoryNamespaceBuilder, DynamicContextProvider, - OperationInfo, RestAdapter, RestAdapterConfig, RestNamespace, RestNamespaceBuilder, + ConnectBuilder, DirectoryNamespaceBuilder, DynamicContextProvider, OperationInfo, RestAdapter, + RestAdapterConfig, RestNamespaceBuilder, }; use serde::{Deserialize, Serialize}; @@ -118,12 +118,12 @@ fn convert_java_map_to_hashmap( /// Blocking wrapper for DirectoryNamespace pub struct BlockingDirectoryNamespace { - pub(crate) inner: DirectoryNamespace, + pub(crate) inner: Arc, } /// Blocking wrapper for RestNamespace pub struct BlockingRestNamespace { - pub(crate) inner: RestNamespace, + pub(crate) inner: Arc, } // ============================================================================ @@ -184,7 +184,9 @@ fn create_directory_namespace_internal( .block_on(builder.build()) .map_err(|e| Error::runtime_error(format!("Failed to build DirectoryNamespace: {}", e)))?; - let blocking_namespace = BlockingDirectoryNamespace { inner: namespace }; + let blocking_namespace = BlockingDirectoryNamespace { + inner: Arc::new(namespace), + }; let handle = Box::into_raw(Box::new(blocking_namespace)) as jlong; Ok(handle) } @@ -650,6 +652,57 @@ pub extern "system" fn Java_org_lance_namespace_DirectoryNamespace_alterTransact .into_raw() } +#[no_mangle] +pub extern "system" fn Java_org_lance_namespace_DirectoryNamespace_listTableVersionsNative( + mut env: JNIEnv, + _obj: JObject, + handle: jlong, + request_json: JString, +) -> jstring { + ok_or_throw_with_return!( + env, + call_namespace_method(&mut env, handle, request_json, |ns, req| { + RT.block_on(ns.inner.list_table_versions(req)) + }), + std::ptr::null_mut() + ) + .into_raw() +} + +#[no_mangle] +pub extern "system" fn Java_org_lance_namespace_DirectoryNamespace_createTableVersionNative( + mut env: JNIEnv, + _obj: JObject, + handle: jlong, + request_json: JString, +) -> jstring { + ok_or_throw_with_return!( + env, + call_namespace_method(&mut env, handle, request_json, |ns, req| { + RT.block_on(ns.inner.create_table_version(req)) + }), + std::ptr::null_mut() + ) + .into_raw() +} + +#[no_mangle] +pub extern "system" fn Java_org_lance_namespace_DirectoryNamespace_describeTableVersionNative( + mut env: JNIEnv, + _obj: JObject, + handle: jlong, + request_json: JString, +) -> jstring { + ok_or_throw_with_return!( + env, + call_namespace_method(&mut env, handle, request_json, |ns, req| { + RT.block_on(ns.inner.describe_table_version(req)) + }), + std::ptr::null_mut() + ) + .into_raw() +} + // ============================================================================ // RestNamespace JNI Functions // ============================================================================ @@ -705,7 +758,9 @@ fn create_rest_namespace_internal( let namespace = builder.build(); - let blocking_namespace = BlockingRestNamespace { inner: namespace }; + let blocking_namespace = BlockingRestNamespace { + inner: Arc::new(namespace), + }; let handle = Box::into_raw(Box::new(blocking_namespace)) as jlong; Ok(handle) } @@ -1188,6 +1243,57 @@ pub extern "system" fn Java_org_lance_namespace_RestNamespace_alterTransactionNa .into_raw() } +#[no_mangle] +pub extern "system" fn Java_org_lance_namespace_RestNamespace_listTableVersionsNative( + mut env: JNIEnv, + _obj: JObject, + handle: jlong, + request_json: JString, +) -> jstring { + ok_or_throw_with_return!( + env, + call_rest_namespace_method(&mut env, handle, request_json, |ns, req| { + RT.block_on(ns.inner.list_table_versions(req)) + }), + std::ptr::null_mut() + ) + .into_raw() +} + +#[no_mangle] +pub extern "system" fn Java_org_lance_namespace_RestNamespace_createTableVersionNative( + mut env: JNIEnv, + _obj: JObject, + handle: jlong, + request_json: JString, +) -> jstring { + ok_or_throw_with_return!( + env, + call_rest_namespace_method(&mut env, handle, request_json, |ns, req| { + RT.block_on(ns.inner.create_table_version(req)) + }), + std::ptr::null_mut() + ) + .into_raw() +} + +#[no_mangle] +pub extern "system" fn Java_org_lance_namespace_RestNamespace_describeTableVersionNative( + mut env: JNIEnv, + _obj: JObject, + handle: jlong, + request_json: JString, +) -> jstring { + ok_or_throw_with_return!( + env, + call_rest_namespace_method(&mut env, handle, request_json, |ns, req| { + RT.block_on(ns.inner.describe_table_version(req)) + }), + std::ptr::null_mut() + ) + .into_raw() +} + // ============================================================================ // Helper Functions // ============================================================================ diff --git a/java/pom.xml b/java/pom.xml index 675d9bac8e9..173e234e9f4 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -109,18 +109,23 @@ org.lance lance-namespace-core - 0.4.5 + 0.5.0 org.lance lance-namespace-apache-client - 0.4.5 + 0.5.0 com.fasterxml.jackson.core jackson-databind 2.15.2 + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + 2.15.2 + software.amazon.awssdk diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index 8f6053d2b91..f971af3ad81 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -328,6 +328,28 @@ static Dataset open( String path, ReadOptions options, Session session) { + return open(allocator, selfManagedAllocator, path, options, session, 0, null, null); + } + + /** + * Open a dataset from the specified path with additional options and namespace commit handler. + * + * @param path file path + * @param options the open options + * @param namespaceHandle native namespace handle (0 if not using namespace) + * @param namespaceType "directory" or "rest" (null if not using namespace) + * @param tableId table identifier (null if not using namespace) + * @return Dataset + */ + static Dataset open( + BufferAllocator allocator, + boolean selfManagedAllocator, + String path, + ReadOptions options, + Session session, + long namespaceHandle, + String namespaceType, + List tableId) { Preconditions.checkNotNull(path); Preconditions.checkNotNull(allocator); Preconditions.checkNotNull(options); @@ -348,7 +370,10 @@ static Dataset open( options.getStorageOptions(), options.getSerializedManifest(), options.getStorageOptionsProvider(), - sessionHandle); + sessionHandle, + namespaceHandle, + namespaceType, + tableId); dataset.allocator = allocator; dataset.selfManagedAllocator = selfManagedAllocator; if (effectiveSession != null) { @@ -369,7 +394,10 @@ private static native Dataset openNative( Map storageOptions, Optional serializedManifest, Optional storageOptionsProvider, - long sessionHandle); + long sessionHandle, + long namespaceHandle, + String namespaceType, + List tableId); /** * Creates a builder for opening a dataset. diff --git a/java/src/main/java/org/lance/OpenDatasetBuilder.java b/java/src/main/java/org/lance/OpenDatasetBuilder.java index bce72551a40..0d972d3f905 100644 --- a/java/src/main/java/org/lance/OpenDatasetBuilder.java +++ b/java/src/main/java/org/lance/OpenDatasetBuilder.java @@ -13,8 +13,10 @@ */ package org.lance; +import org.lance.namespace.DirectoryNamespace; import org.lance.namespace.LanceNamespace; import org.lance.namespace.LanceNamespaceStorageOptionsProvider; +import org.lance.namespace.RestNamespace; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; @@ -209,6 +211,9 @@ private Dataset buildFromNamespace() { throw new IllegalArgumentException("Namespace did not return a table location"); } + // Check if namespace manages versioning (commits go through namespace API) + Boolean managedVersioning = response.getManagedVersioning(); + Map namespaceStorageOptions = response.getStorageOptions(); ReadOptions.Builder optionsBuilder = @@ -232,7 +237,54 @@ private Dataset buildFromNamespace() { } optionsBuilder.setStorageOptions(storageOptions); - // Open dataset with regular open method + // If managed_versioning is true, pass namespace info for commit handler setup + if (Boolean.TRUE.equals(managedVersioning)) { + long namespaceHandle = getNamespaceHandle(namespace); + String namespaceType = getNamespaceType(namespace); + return Dataset.open( + allocator, + selfManagedAllocator, + location, + optionsBuilder.build(), + session, + namespaceHandle, + namespaceType, + tableId); + } + + // Open dataset with regular open method (no namespace commit handler) return Dataset.open(allocator, selfManagedAllocator, location, optionsBuilder.build(), session); } + + private static long getNamespaceHandle(LanceNamespace namespace) { + if (namespace instanceof DirectoryNamespace) { + return ((DirectoryNamespace) namespace).getNativeHandle(); + } else if (namespace instanceof RestNamespace) { + return ((RestNamespace) namespace).getNativeHandle(); + } + // Try reflection for custom namespace implementations that have getNativeHandle + try { + java.lang.reflect.Method method = namespace.getClass().getMethod("getNativeHandle"); + return (long) method.invoke(namespace); + } catch (Exception e) { + throw new IllegalArgumentException( + "Unknown namespace type: " + namespace.getClass().getName(), e); + } + } + + private static String getNamespaceType(LanceNamespace namespace) { + if (namespace instanceof DirectoryNamespace) { + return ((DirectoryNamespace) namespace).getNamespaceType(); + } else if (namespace instanceof RestNamespace) { + return ((RestNamespace) namespace).getNamespaceType(); + } + // Try reflection for custom namespace implementations that have getNamespaceType + try { + java.lang.reflect.Method method = namespace.getClass().getMethod("getNamespaceType"); + return (String) method.invoke(namespace); + } catch (Exception e) { + throw new IllegalArgumentException( + "Unknown namespace type: " + namespace.getClass().getName(), e); + } + } } diff --git a/java/src/main/java/org/lance/namespace/DirectoryNamespace.java b/java/src/main/java/org/lance/namespace/DirectoryNamespace.java index 3ffe2b82f01..5788035246e 100644 --- a/java/src/main/java/org/lance/namespace/DirectoryNamespace.java +++ b/java/src/main/java/org/lance/namespace/DirectoryNamespace.java @@ -17,7 +17,9 @@ import org.lance.namespace.model.*; import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; import org.apache.arrow.memory.BufferAllocator; import java.io.Closeable; @@ -142,7 +144,14 @@ public class DirectoryNamespace implements LanceNamespace, Closeable { JniLoader.ensureLoaded(); } - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = createObjectMapper(); + + private static ObjectMapper createObjectMapper() { + ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new JavaTimeModule()); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + return mapper; + } private long nativeDirectoryNamespaceHandle; private BufferAllocator allocator; @@ -400,6 +409,32 @@ public AlterTransactionResponse alterTransaction(AlterTransactionRequest request return fromJson(responseJson, AlterTransactionResponse.class); } + // Table version operations + + @Override + public ListTableVersionsResponse listTableVersions(ListTableVersionsRequest request) { + ensureInitialized(); + String requestJson = toJson(request); + String responseJson = listTableVersionsNative(nativeDirectoryNamespaceHandle, requestJson); + return fromJson(responseJson, ListTableVersionsResponse.class); + } + + @Override + public CreateTableVersionResponse createTableVersion(CreateTableVersionRequest request) { + ensureInitialized(); + String requestJson = toJson(request); + String responseJson = createTableVersionNative(nativeDirectoryNamespaceHandle, requestJson); + return fromJson(responseJson, CreateTableVersionResponse.class); + } + + @Override + public DescribeTableVersionResponse describeTableVersion(DescribeTableVersionRequest request) { + ensureInitialized(); + String requestJson = toJson(request); + String responseJson = describeTableVersionNative(nativeDirectoryNamespaceHandle, requestJson); + return fromJson(responseJson, DescribeTableVersionResponse.class); + } + @Override public void close() { if (nativeDirectoryNamespaceHandle != 0) { @@ -408,6 +443,20 @@ public void close() { } } + /** + * Returns the native handle for this namespace. Used internally for passing to Dataset.open() for + * namespace commit handler support. + */ + public long getNativeHandle() { + ensureInitialized(); + return nativeDirectoryNamespaceHandle; + } + + /** Returns the namespace type identifier. */ + public String getNamespaceType() { + return "directory"; + } + private void ensureInitialized() { if (nativeDirectoryNamespaceHandle == 0) { throw new IllegalStateException( @@ -492,6 +541,12 @@ private native String mergeInsertIntoTableNative( private native String alterTransactionNative(long handle, String requestJson); + private native String listTableVersionsNative(long handle, String requestJson); + + private native String createTableVersionNative(long handle, String requestJson); + + private native String describeTableVersionNative(long handle, String requestJson); + // ========================================================================== // Provider loading helpers // ========================================================================== diff --git a/java/src/main/java/org/lance/namespace/RestNamespace.java b/java/src/main/java/org/lance/namespace/RestNamespace.java index 63dfe28dea7..fa628ce5709 100644 --- a/java/src/main/java/org/lance/namespace/RestNamespace.java +++ b/java/src/main/java/org/lance/namespace/RestNamespace.java @@ -343,6 +343,20 @@ public void close() { } } + /** + * Returns the native handle for this namespace. Used internally for passing to Dataset.open() for + * namespace commit handler support. + */ + public long getNativeHandle() { + ensureInitialized(); + return nativeRestNamespaceHandle; + } + + /** Returns the namespace type identifier. */ + public String getNamespaceType() { + return "rest"; + } + private void ensureInitialized() { if (nativeRestNamespaceHandle == 0) { throw new IllegalStateException("RestNamespace not initialized. Call initialize() first."); diff --git a/java/src/test/java/org/lance/NamespaceIntegrationTest.java b/java/src/test/java/org/lance/NamespaceIntegrationTest.java index 2d6f8ab1443..036ea6bb6e5 100644 --- a/java/src/test/java/org/lance/NamespaceIntegrationTest.java +++ b/java/src/test/java/org/lance/NamespaceIntegrationTest.java @@ -1309,6 +1309,192 @@ void testFragmentCreateAndCommitWithNamespace() throws Exception { } } + /** + * Table version tracking namespace for managed versioning tests. + * + *

This namespace wraps DirectoryNamespace with table_version_tracking_enabled and + * manifest_enabled flags, and tracks create_table_version and describe_table_version calls. + */ + static class TableVersionTrackingNamespace implements LanceNamespace { + private final DirectoryNamespace inner; + private final AtomicInteger createTableVersionCount = new AtomicInteger(0); + private final AtomicInteger describeTableVersionCount = new AtomicInteger(0); + private final Map baseStorageOptions; + + public TableVersionTrackingNamespace(String root, Map storageOptions) { + this.baseStorageOptions = + storageOptions != null ? new HashMap<>(storageOptions) : new HashMap<>(); + + Map dirProps = new HashMap<>(); + if (storageOptions != null) { + for (Map.Entry entry : storageOptions.entrySet()) { + dirProps.put("storage." + entry.getKey(), entry.getValue()); + } + } + dirProps.put("root", root); + dirProps.put("table_version_tracking_enabled", "true"); + dirProps.put("manifest_enabled", "true"); + + this.inner = new DirectoryNamespace(); + try (BufferAllocator allocator = new RootAllocator()) { + this.inner.initialize(dirProps, allocator); + } + } + + public int getCreateTableVersionCount() { + return createTableVersionCount.get(); + } + + public int getDescribeTableVersionCount() { + return describeTableVersionCount.get(); + } + + public long getNativeHandle() { + return inner.getNativeHandle(); + } + + public String getNamespaceType() { + return inner.getNamespaceType(); + } + + @Override + public void initialize(Map configProperties, BufferAllocator allocator) { + // Already initialized in constructor + } + + @Override + public String namespaceId() { + return "TableVersionTrackingNamespace { inner: " + inner.namespaceId() + " }"; + } + + @Override + public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request) { + return inner.createEmptyTable(request); + } + + @Override + public DeclareTableResponse declareTable(DeclareTableRequest request) { + return inner.declareTable(request); + } + + @Override + public DescribeTableResponse describeTable(DescribeTableRequest request) { + return inner.describeTable(request); + } + } + + @Test + void testManagedVersioningWithDirectoryNamespace() throws Exception { + try (BufferAllocator allocator = new RootAllocator()) { + // Set up storage options + Map storageOptions = new HashMap<>(); + storageOptions.put("allow_http", "true"); + storageOptions.put("aws_access_key_id", ACCESS_KEY); + storageOptions.put("aws_secret_access_key", SECRET_KEY); + storageOptions.put("aws_endpoint", ENDPOINT_URL); + storageOptions.put("aws_region", REGION); + + // Create namespace with table_version_tracking_enabled + TableVersionTrackingNamespace namespace = + new TableVersionTrackingNamespace( + "s3://" + BUCKET_NAME + "/managed_versioning_test", storageOptions); + String tableName = UUID.randomUUID().toString(); + + // Create schema and data + Schema schema = + new Schema( + Arrays.asList( + new Field("a", FieldType.nullable(new ArrowType.Int(32, true)), null), + new Field("b", FieldType.nullable(new ArrowType.Int(32, true)), null))); + + try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + IntVector aVector = (IntVector) root.getVector("a"); + IntVector bVector = (IntVector) root.getVector("b"); + + aVector.allocateNew(2); + bVector.allocateNew(2); + + aVector.set(0, 1); + bVector.set(0, 2); + aVector.set(1, 10); + bVector.set(1, 20); + + aVector.setValueCount(2); + bVector.setValueCount(2); + root.setRowCount(2); + + ArrowReader testReader = + new ArrowReader(allocator) { + boolean firstRead = true; + + @Override + public boolean loadNextBatch() { + if (firstRead) { + firstRead = false; + return true; + } + return false; + } + + @Override + public long bytesRead() { + return 0; + } + + @Override + protected void closeReadSource() {} + + @Override + protected Schema readSchema() { + return schema; + } + + @Override + public VectorSchemaRoot getVectorSchemaRoot() { + return root; + } + }; + + // Create dataset through namespace + try (Dataset dataset = + Dataset.write() + .allocator(allocator) + .reader(testReader) + .namespace(namespace) + .tableId(Arrays.asList(tableName)) + .mode(WriteParams.WriteMode.CREATE) + .execute()) { + assertEquals(2, dataset.countRows()); + } + } + + // Verify describe_table returns managed_versioning=true + DescribeTableRequest descReq = new DescribeTableRequest(); + descReq.setId(Arrays.asList(tableName)); + DescribeTableResponse descResp = namespace.describeTable(descReq); + + assertEquals( + Boolean.TRUE, + descResp.getManagedVersioning(), + "Expected managedVersioning=true when table_version_tracking_enabled"); + + // Open dataset through namespace with managed_versioning support + try (Dataset dsFromNamespace = + Dataset.open() + .allocator(allocator) + .namespace(namespace) + .tableId(Arrays.asList(tableName)) + .build()) { + + assertEquals(2, dsFromNamespace.countRows()); + + // Verify we can read the data + List versions = dsFromNamespace.listVersions(); + assertEquals(1, versions.size(), "Should have 1 version after create"); + } + } + } + @Test void testTransactionCommitWithNamespace() throws Exception { try (BufferAllocator allocator = new RootAllocator()) { diff --git a/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java b/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java index 7d6c4741ad8..49117a26439 100644 --- a/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java +++ b/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java @@ -14,6 +14,8 @@ package org.lance.namespace; import org.lance.namespace.model.*; +import org.lance.namespace.model.DescribeTableVersionRequest; +import org.lance.namespace.model.DescribeTableVersionResponse; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -305,4 +307,78 @@ void testCreateEmptyTable() { assertNotNull(createResp); assertNotNull(createResp.getLocation()); } + + @Test + void testDescribeTableReturnsManagedVersioningWhenTrackingEnabled() throws Exception { + // Create namespace with table_version_tracking_enabled and manifest_enabled + DirectoryNamespace trackingNs = new DirectoryNamespace(); + Map config = new HashMap<>(); + config.put("root", tempDir.toString()); + config.put("table_version_tracking_enabled", "true"); + config.put("manifest_enabled", "true"); + trackingNs.initialize(config, allocator); + + try { + // Create parent namespace + CreateNamespaceRequest createNsReq = + new CreateNamespaceRequest().id(Arrays.asList("workspace")); + trackingNs.createNamespace(createNsReq); + + // Create a table + byte[] tableData = createTestTableData(); + CreateTableRequest createReq = + new CreateTableRequest().id(Arrays.asList("workspace", "test_table")); + trackingNs.createTable(createReq, tableData); + + // Describe table should return managedVersioning=true + DescribeTableRequest descReq = + new DescribeTableRequest().id(Arrays.asList("workspace", "test_table")); + DescribeTableResponse descResp = trackingNs.describeTable(descReq); + + assertNotNull(descResp); + assertNotNull(descResp.getLocation()); + assertTrue( + Boolean.TRUE.equals(descResp.getManagedVersioning()), + "Expected managedVersioning=true, got " + descResp.getManagedVersioning()); + } finally { + trackingNs.close(); + } + } + + @Test + void testDescribeTableVersion() throws Exception { + // Use multi-level table ID with manifest_enabled + DirectoryNamespace trackingNs = new DirectoryNamespace(); + Map config = new HashMap<>(); + config.put("root", tempDir.toString()); + config.put("manifest_enabled", "true"); + trackingNs.initialize(config, allocator); + + try { + // Create parent namespace + CreateNamespaceRequest createNsReq = + new CreateNamespaceRequest().id(Arrays.asList("workspace")); + trackingNs.createNamespace(createNsReq); + + // Create a table with multi-level ID + byte[] tableData = createTestTableData(); + CreateTableRequest createReq = + new CreateTableRequest().id(Arrays.asList("workspace", "test_table")); + trackingNs.createTable(createReq, tableData); + + // Describe table version + DescribeTableVersionRequest descReq = + new DescribeTableVersionRequest() + .id(Arrays.asList("workspace", "test_table")) + .version(1L); + DescribeTableVersionResponse descResp = trackingNs.describeTableVersion(descReq); + + assertNotNull(descResp); + assertNotNull(descResp.getVersion()); + assertEquals(Long.valueOf(1), descResp.getVersion().getVersion()); + assertNotNull(descResp.getVersion().getManifestPath()); + } finally { + trackingNs.close(); + } + } } diff --git a/python/Cargo.lock b/python/Cargo.lock index f2c5eb504bf..9ce642cb770 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -4347,9 +4347,9 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.4.5" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2acdba67f84190067532fce07b51a435dd390d7cdc1129a05003e5cb3274cf0" +checksum = "d2c20a25207bbae280c9acd16ccd1e3561ad7f79a57c0e88809cd9c026a8494c" dependencies = [ "reqwest", "serde", diff --git a/python/pyproject.toml b/python/pyproject.toml index 4b6814aa070..7cd0cdb500d 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "pylance" dynamic = ["version"] -dependencies = ["pyarrow>=14", "numpy>=1.22", "lance-namespace>=0.4.5"] +dependencies = ["pyarrow>=14", "numpy>=1.22", "lance-namespace>=0.5.0"] description = "python wrapper for Lance columnar format" authors = [{ name = "Lance Devs", email = "dev@lance.org" }] license = { file = "LICENSE" } diff --git a/python/python/lance/__init__.py b/python/python/lance/__init__.py index 92b32f6dc6a..d3e008019d1 100644 --- a/python/python/lance/__init__.py +++ b/python/python/lance/__init__.py @@ -197,6 +197,7 @@ def dataset( ) # Handle namespace resolution in Python + managed_versioning = False if namespace is not None: if table_id is None: raise ValueError( @@ -210,6 +211,9 @@ def dataset( if uri is None: raise ValueError("Namespace did not return a 'location' for the table") + # Check if namespace manages versioning (commits go through namespace API) + managed_versioning = getattr(response, "managed_versioning", None) is True + namespace_storage_options = response.storage_options if namespace_storage_options: @@ -239,6 +243,8 @@ def dataset( read_params=read_params, session=session, storage_options_provider=storage_options_provider, + namespace=namespace._inner if managed_versioning else None, + table_id=table_id if managed_versioning else None, ) if version is None and asof is not None: ts_cutoff = sanitize_ts(asof) diff --git a/python/python/lance/namespace.py b/python/python/lance/namespace.py index bccb602f169..98fe0b8ebe7 100644 --- a/python/python/lance/namespace.py +++ b/python/python/lance/namespace.py @@ -39,6 +39,8 @@ ListNamespacesResponse, ListTablesRequest, ListTablesResponse, + ListTableVersionsRequest, + ListTableVersionsResponse, NamespaceExistsRequest, RegisterTableRequest, RegisterTableResponse, @@ -393,6 +395,52 @@ def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: response_dict = self._inner.declare_table(request.model_dump()) return DeclareTableResponse.from_dict(response_dict) + # Table version operations + + def list_table_versions( + self, request: ListTableVersionsRequest + ) -> ListTableVersionsResponse: + response_dict = self._inner.list_table_versions(request.model_dump()) + return ListTableVersionsResponse.from_dict(response_dict) + + def create_table_version(self, request: dict) -> dict: + """Create a table version (for external manifest store integration). + + Parameters + ---------- + request : dict + Request dictionary with keys: + - id: List[str] - Table identifier + - version: int - Version number to create + - manifest_path: str - Path to staging manifest + - manifest_size: int (optional) - Size in bytes + - e_tag: str (optional) - ETag for optimistic concurrency + + Returns + ------- + dict + Response dictionary with optional transaction_id + """ + return self._inner.create_table_version(request) + + def describe_table_version(self, request: dict) -> dict: + """Describe a specific table version. + + Parameters + ---------- + request : dict + Request dictionary with keys: + - id: List[str] - Table identifier + - version: int (optional) - Version to describe (None = latest) + + Returns + ------- + dict + Response dictionary with version info: + - version: dict with version, manifest_path, manifest_size, e_tag, timestamp + """ + return self._inner.describe_table_version(request) + class RestNamespace(LanceNamespace): """REST-based Lance Namespace implementation backed by Rust. @@ -542,6 +590,52 @@ def rename_table(self, request: RenameTableRequest) -> RenameTableResponse: response_dict = self._inner.rename_table(request.model_dump()) return RenameTableResponse.from_dict(response_dict) + # Table version operations + + def list_table_versions( + self, request: ListTableVersionsRequest + ) -> ListTableVersionsResponse: + response_dict = self._inner.list_table_versions(request.model_dump()) + return ListTableVersionsResponse.from_dict(response_dict) + + def create_table_version(self, request: dict) -> dict: + """Create a table version (for external manifest store integration). + + Parameters + ---------- + request : dict + Request dictionary with keys: + - id: List[str] - Table identifier + - version: int - Version number to create + - manifest_path: str - Path to staging manifest + - manifest_size: int (optional) - Size in bytes + - e_tag: str (optional) - ETag for optimistic concurrency + + Returns + ------- + dict + Response dictionary with optional transaction_id + """ + return self._inner.create_table_version(request) + + def describe_table_version(self, request: dict) -> dict: + """Describe a specific table version. + + Parameters + ---------- + request : dict + Request dictionary with keys: + - id: List[str] - Table identifier + - version: int (optional) - Version to describe (None = latest) + + Returns + ------- + dict + Response dictionary with version info: + - version: dict with version, manifest_path, manifest_size, e_tag, timestamp + """ + return self._inner.describe_table_version(request) + class RestAdapter: """REST adapter server that creates a namespace backend and exposes it via REST. diff --git a/python/python/tests/test_namespace_integration.py b/python/python/tests/test_namespace_integration.py index 30489496e38..aa6202bcf28 100644 --- a/python/python/tests/test_namespace_integration.py +++ b/python/python/tests/test_namespace_integration.py @@ -838,3 +838,234 @@ def test_file_session_with_storage_options_provider(s3_bucket: str): final_describe_count = namespace.get_describe_call_count() assert final_describe_count == describe_count_after_second_write + + +class TableVersionTrackingNamespace(LanceNamespace): + """Namespace wrapper that tracks table version API calls.""" + + def __init__(self, root: str, storage_options: Dict[str, str] = None): + from lance.namespace import DirectoryNamespace + + self.create_table_version_count = 0 + self.describe_table_version_count = 0 + self.list_table_versions_count = 0 + self.lock = Lock() + + dir_props = { + "root": root, + "table_version_tracking_enabled": "true", + } + if storage_options: + for k, v in storage_options.items(): + dir_props[f"storage.{k}"] = v + + self.inner = DirectoryNamespace(**dir_props) + + def namespace_id(self) -> str: + return f"TableVersionTrackingNamespace {{ inner: {self.inner.namespace_id()} }}" + + def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse: + return self.inner.describe_table(request) + + def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: + return self.inner.declare_table(request) + + def create_table_version(self, request: dict) -> dict: + with self.lock: + self.create_table_version_count += 1 + return self.inner.create_table_version(request) + + def describe_table_version(self, request: dict) -> dict: + with self.lock: + self.describe_table_version_count += 1 + return self.inner.describe_table_version(request) + + def list_table_versions(self, request): + with self.lock: + self.list_table_versions_count += 1 + return self.inner.list_table_versions(request) + + +def test_e2e_describe_table_returns_managed_versioning(): + """Test that describe_table returns managed_versioning=True.""" + import tempfile + + from lance.namespace import CreateTableRequest, DirectoryNamespace + from lance_namespace import CreateNamespaceRequest + + with tempfile.TemporaryDirectory() as tmpdir: + ns = DirectoryNamespace( + root=f"file://{tmpdir}", + table_version_tracking_enabled="true", + manifest_enabled="true", + ) + + # Create parent namespace + create_ns_req = CreateNamespaceRequest(id=["workspace"]) + ns.create_namespace(create_ns_req) + + table1 = pa.Table.from_pylist([{"a": 1, "b": 2}]) + + import io + + sink = io.BytesIO() + with pa.ipc.RecordBatchStreamWriter(sink, table1.schema) as writer: + writer.write_table(table1) + ipc_data = sink.getvalue() + + # Use multi-level table ID (namespace + table) + create_req = CreateTableRequest(id=["workspace", "test_table"]) + ns.create_table(create_req, ipc_data) + + describe_req = DescribeTableRequest(id=["workspace", "test_table"]) + response = ns.describe_table(describe_req) + + assert response.location is not None + assert response.managed_versioning is True, ( + f"Expected managed_versioning=True, got {response.managed_versioning}" + ) + + +def test_e2e_table_version_apis(): + """Test that table version APIs work correctly.""" + import tempfile + + from lance.namespace import CreateTableRequest, DirectoryNamespace + from lance_namespace import CreateNamespaceRequest + + with tempfile.TemporaryDirectory() as tmpdir: + ns = DirectoryNamespace( + root=f"file://{tmpdir}", + table_version_tracking_enabled="true", + manifest_enabled="true", + ) + + # Create parent namespace + create_ns_req = CreateNamespaceRequest(id=["workspace"]) + ns.create_namespace(create_ns_req) + + table1 = pa.Table.from_pylist([{"a": 1, "b": 2}]) + + import io + + sink = io.BytesIO() + with pa.ipc.RecordBatchStreamWriter(sink, table1.schema) as writer: + writer.write_table(table1) + ipc_data = sink.getvalue() + + # Use multi-level table ID (namespace + table) + create_req = CreateTableRequest(id=["workspace", "test_table"]) + ns.create_table(create_req, ipc_data) + + # describe_table_version reads directly from the dataset + describe_req = {"id": ["workspace", "test_table"], "version": 1} + describe_response = ns.describe_table_version(describe_req) + + assert "version" in describe_response + assert describe_response["version"]["version"] == 1 + assert describe_response["version"]["manifest_path"] is not None + + # Get latest version (version=None) + describe_latest_req = {"id": ["workspace", "test_table"], "version": None} + describe_latest_response = ns.describe_table_version(describe_latest_req) + + assert "version" in describe_latest_response + assert describe_latest_response["version"]["version"] == 1 + + +@pytest.mark.integration +def test_managed_versioning_with_commit_handler(s3_bucket: str): + """Test that managed_versioning enables namespace commit handler for writes.""" + from lance.namespace import DirectoryNamespace + + storage_options = copy.deepcopy(CONFIG) + + # Create namespace with table_version_tracking_enabled + dir_props = {f"storage.{k}": v for k, v in storage_options.items()} + dir_props["root"] = f"s3://{s3_bucket}/managed_versioning_test" + dir_props["table_version_tracking_enabled"] = "true" + dir_props["manifest_enabled"] = "true" + + namespace = DirectoryNamespace(**dir_props) + table_name = uuid.uuid4().hex + table_id = ["test_ns", table_name] + + # Create table + table1 = pa.Table.from_pylist([{"a": 1, "b": 2}]) + ds = lance.write_dataset( + table1, namespace=namespace, table_id=table_id, mode="create" + ) + assert ds.count_rows() == 1 + assert len(ds.versions()) == 1 + + # Verify managed_versioning=true is returned + from lance.namespace import DescribeTableRequest + + describe_req = DescribeTableRequest(id=table_id) + describe_resp = namespace.describe_table(describe_req) + assert describe_resp.managed_versioning is True, ( + f"Expected managed_versioning=True, got {describe_resp.managed_versioning}" + ) + + # Open dataset through namespace - this should set up commit handler + ds_from_namespace = lance.dataset( + namespace=namespace, + table_id=table_id, + ) + assert ds_from_namespace.count_rows() == 1 + + # Append data - this should go through the namespace commit handler + table2 = pa.Table.from_pylist([{"a": 10, "b": 20}]) + ds = lance.write_dataset( + table2, namespace=namespace, table_id=table_id, mode="append" + ) + assert ds.count_rows() == 2 + assert len(ds.versions()) == 2 + + # Verify the data through namespace + ds_final = lance.dataset( + namespace=namespace, + table_id=table_id, + ) + assert ds_final.count_rows() == 2 + + +@pytest.mark.integration +def test_e2e_table_version_tracking_with_s3(s3_bucket: str): + """Test end-to-end table version tracking with S3 storage.""" + storage_options = copy.deepcopy(CONFIG) + + namespace = TableVersionTrackingNamespace( + root=f"s3://{s3_bucket}/version_tracking_test", + storage_options=storage_options, + ) + + table_name = uuid.uuid4().hex + table_id = ["test_ns", table_name] + + request = DeclareTableRequest(id=table_id, location=None) + response = namespace.declare_table(request) + + table_uri = response.location + assert table_uri is not None + # managed_versioning indicates namespace-managed commits + assert response.managed_versioning is True + + describe_response = namespace.describe_table( + DescribeTableRequest(id=table_id, version=None) + ) + assert describe_response.location is not None + assert describe_response.managed_versioning is True + + from lance_namespace import ListTableVersionsRequest + + _list_response = namespace.list_table_versions( + ListTableVersionsRequest(id=table_id) + ) + assert namespace.list_table_versions_count == 1 + + describe_version_response = namespace.describe_table_version( + {"id": table_id, "version": None} + ) + assert namespace.describe_table_version_count == 1 + assert "version" in describe_version_response diff --git a/python/src/dataset.rs b/python/src/dataset.rs index f180a5dd145..e5a60f9d9b3 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -81,13 +81,16 @@ use lance_index::{ }; use lance_io::object_store::ObjectStoreParams; use lance_linalg::distance::MetricType; +use lance_namespace::LanceNamespace; use lance_table::format::{BasePath, Fragment, IndexMetadata}; +use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler; use lance_table::io::commit::CommitHandler; use crate::error::PythonErrorExt; use crate::file::object_store_from_uri_or_path; use crate::fragment::FileFragment; use crate::indices::{PyIndexConfig, PyIndexDescription}; +use crate::namespace::{PyDirectoryNamespace, PyRestNamespace}; use crate::rt; use crate::scanner::ScanStatistics; use crate::schema::{logical_schema_from_lance, LanceSchema}; @@ -95,6 +98,7 @@ use crate::session::Session; use crate::storage_options::PyStorageOptionsAccessor; use crate::utils::PyLance; use crate::{LanceReader, Scanner}; +use lance::io::commit::namespace_manifest::LanceNamespaceExternalManifestStore; use self::cleanup::CleanupStats; use self::commit::PyCommitLock; @@ -483,7 +487,7 @@ impl Dataset { #[allow(clippy::too_many_arguments)] #[allow(deprecated)] #[new] - #[pyo3(signature=(uri, version=None, block_size=None, index_cache_size=None, metadata_cache_size=None, commit_handler=None, storage_options=None, manifest=None, metadata_cache_size_bytes=None, index_cache_size_bytes=None, read_params=None, session=None, storage_options_provider=None))] + #[pyo3(signature=(uri, version=None, block_size=None, index_cache_size=None, metadata_cache_size=None, commit_handler=None, storage_options=None, manifest=None, metadata_cache_size_bytes=None, index_cache_size_bytes=None, read_params=None, session=None, storage_options_provider=None, namespace=None, table_id=None))] fn new( py: Python, uri: String, @@ -499,6 +503,8 @@ impl Dataset { read_params: Option<&Bound>, session: Option, storage_options_provider: Option<&Bound<'_, PyAny>>, + namespace: Option<&Bound<'_, PyAny>>, + table_id: Option>, ) -> PyResult { let mut params = ReadParams::default(); if let Some(metadata_cache_size_bytes) = metadata_cache_size_bytes { @@ -593,6 +599,27 @@ impl Dataset { builder = builder.with_storage_options_provider(provider); } + // Set up namespace commit handler if namespace and table_id are provided + if let (Some(ns), Some(tid)) = (namespace, table_id) { + // Extract the inner namespace Arc from either PyDirectoryNamespace or PyRestNamespace + let ns_arc: Arc = + if let Ok(dir_ns) = ns.downcast::() { + dir_ns.borrow().inner.clone() + } else if let Ok(rest_ns) = ns.downcast::() { + rest_ns.borrow().inner.clone() + } else { + return Err(PyValueError::new_err( + "namespace must be either PyDirectoryNamespace or PyRestNamespace", + )); + }; + + let external_store = LanceNamespaceExternalManifestStore::new(ns_arc, tid); + let commit_handler: Arc = Arc::new(ExternalManifestCommitHandler { + external_manifest_store: Arc::new(external_store), + }); + builder = builder.with_commit_handler(commit_handler); + } + let dataset = rt().block_on(Some(py), builder.load())?; match dataset { diff --git a/python/src/namespace.rs b/python/src/namespace.rs index fb2769f66c2..57a70a49753 100644 --- a/python/src/namespace.rs +++ b/python/src/namespace.rs @@ -98,7 +98,7 @@ fn dict_to_hashmap(dict: &Bound<'_, PyDict>) -> PyResult /// Python wrapper for DirectoryNamespace #[pyclass(name = "PyDirectoryNamespace", module = "lance.lance")] pub struct PyDirectoryNamespace { - inner: Arc, + pub(crate) inner: Arc, } #[pymethods] @@ -322,12 +322,50 @@ impl PyDirectoryNamespace { .infer_error()?; Ok(pythonize(py, &response)?.into()) } + + // Table version operations + + fn list_table_versions<'py>( + &self, + py: Python<'py>, + request: &Bound<'_, PyAny>, + ) -> PyResult> { + let request = depythonize(request)?; + let response = crate::rt() + .block_on(Some(py), self.inner.list_table_versions(request))? + .infer_error()?; + pythonize(py, &response).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) + } + + fn create_table_version<'py>( + &self, + py: Python<'py>, + request: &Bound<'_, PyAny>, + ) -> PyResult> { + let request = depythonize(request)?; + let response = crate::rt() + .block_on(Some(py), self.inner.create_table_version(request))? + .infer_error()?; + pythonize(py, &response).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) + } + + fn describe_table_version<'py>( + &self, + py: Python<'py>, + request: &Bound<'_, PyAny>, + ) -> PyResult> { + let request = depythonize(request)?; + let response = crate::rt() + .block_on(Some(py), self.inner.describe_table_version(request))? + .infer_error()?; + pythonize(py, &response).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) + } } /// Python wrapper for RestNamespace #[pyclass(name = "PyRestNamespace", module = "lance.lance")] pub struct PyRestNamespace { - inner: Arc, + pub(crate) inner: Arc, } #[pymethods] @@ -560,6 +598,44 @@ impl PyRestNamespace { .infer_error()?; Ok(pythonize(py, &response)?.into()) } + + // Table version operations + + fn list_table_versions<'py>( + &self, + py: Python<'py>, + request: &Bound<'_, PyAny>, + ) -> PyResult> { + let request = depythonize(request)?; + let response = crate::rt() + .block_on(Some(py), self.inner.list_table_versions(request))? + .infer_error()?; + pythonize(py, &response).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) + } + + fn create_table_version<'py>( + &self, + py: Python<'py>, + request: &Bound<'_, PyAny>, + ) -> PyResult> { + let request = depythonize(request)?; + let response = crate::rt() + .block_on(Some(py), self.inner.create_table_version(request))? + .infer_error()?; + pythonize(py, &response).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) + } + + fn describe_table_version<'py>( + &self, + py: Python<'py>, + request: &Bound<'_, PyAny>, + ) -> PyResult> { + let request = depythonize(request)?; + let response = crate::rt() + .block_on(Some(py), self.inner.describe_table_version(request))? + .infer_error()?; + pythonize(py, &response).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) + } } /// Python wrapper for REST adapter server diff --git a/rust/lance-namespace-impls/Cargo.toml b/rust/lance-namespace-impls/Cargo.toml index b41e7f44e01..64355ce52d1 100644 --- a/rust/lance-namespace-impls/Cargo.toml +++ b/rust/lance-namespace-impls/Cargo.toml @@ -90,6 +90,7 @@ wiremock.workspace = true arrow = { workspace = true } arrow-ipc = { workspace = true } rstest.workspace = true +lance-table.workspace = true [lints] workspace = true diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index 875df33e580..bd83f9bd630 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -12,6 +12,7 @@ use arrow::record_batch::RecordBatchIterator; use arrow_ipc::reader::StreamReader; use async_trait::async_trait; use bytes::Bytes; +use futures::TryStreamExt; use lance::dataset::{Dataset, WriteParams}; use lance::session::Session; use lance_io::object_store::{ObjectStore, ObjectStoreParams, ObjectStoreRegistry}; @@ -23,12 +24,15 @@ use std::sync::Arc; use crate::context::DynamicContextProvider; use lance_namespace::models::{ - CreateEmptyTableRequest, CreateEmptyTableResponse, CreateNamespaceRequest, - CreateNamespaceResponse, CreateTableRequest, CreateTableResponse, DeclareTableRequest, - DeclareTableResponse, DescribeNamespaceRequest, DescribeNamespaceResponse, - DescribeTableRequest, DescribeTableResponse, DropNamespaceRequest, DropNamespaceResponse, - DropTableRequest, DropTableResponse, Identity, ListNamespacesRequest, ListNamespacesResponse, - ListTablesRequest, ListTablesResponse, NamespaceExistsRequest, TableExistsRequest, + BatchDeleteTableVersionsRequest, BatchDeleteTableVersionsResponse, CreateEmptyTableRequest, + CreateEmptyTableResponse, CreateNamespaceRequest, CreateNamespaceResponse, CreateTableRequest, + CreateTableResponse, CreateTableVersionRequest, CreateTableVersionResponse, + DeclareTableRequest, DeclareTableResponse, DescribeNamespaceRequest, DescribeNamespaceResponse, + DescribeTableRequest, DescribeTableResponse, DescribeTableVersionRequest, + DescribeTableVersionResponse, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, + DropTableResponse, Identity, ListNamespacesRequest, ListNamespacesResponse, + ListTableVersionsRequest, ListTableVersionsResponse, ListTablesRequest, ListTablesResponse, + NamespaceExistsRequest, TableExistsRequest, TableVersion, }; use lance_core::{box_error, Error, Result}; @@ -94,6 +98,7 @@ pub struct DirectoryNamespaceBuilder { manifest_enabled: bool, dir_listing_enabled: bool, inline_optimization_enabled: bool, + table_version_tracking_enabled: bool, credential_vendor_properties: HashMap, context_provider: Option>, } @@ -109,6 +114,10 @@ impl std::fmt::Debug for DirectoryNamespaceBuilder { "inline_optimization_enabled", &self.inline_optimization_enabled, ) + .field( + "table_version_tracking_enabled", + &self.table_version_tracking_enabled, + ) .field( "context_provider", &self.context_provider.as_ref().map(|_| "Some(...)"), @@ -131,6 +140,7 @@ impl DirectoryNamespaceBuilder { manifest_enabled: true, dir_listing_enabled: true, // Default to enabled for backwards compatibility inline_optimization_enabled: true, + table_version_tracking_enabled: false, // Default to disabled credential_vendor_properties: HashMap::new(), context_provider: None, } @@ -164,6 +174,18 @@ impl DirectoryNamespaceBuilder { self } + /// Enable or disable table version tracking through the namespace. + /// + /// When enabled, `describe_table` returns `managed_versioning: true` to indicate + /// that commits should go through the namespace's table version APIs rather than + /// direct object store operations. + /// + /// When disabled (default), `managed_versioning` is not set. + pub fn table_version_tracking_enabled(mut self, enabled: bool) -> Self { + self.table_version_tracking_enabled = enabled; + self + } + /// Create a DirectoryNamespaceBuilder from properties HashMap. /// /// This method parses a properties map into builder configuration. @@ -274,6 +296,12 @@ impl DirectoryNamespaceBuilder { .and_then(|v| v.parse::().ok()) .unwrap_or(true); + // Extract table_version_tracking_enabled (default: false) + let table_version_tracking_enabled = properties + .get("table_version_tracking_enabled") + .and_then(|v| v.parse::().ok()) + .unwrap_or(false); + // Extract credential vendor properties (properties prefixed with "credential_vendor.") // The prefix is stripped to get short property names // The build() method will check if enabled=true before creating the vendor @@ -292,6 +320,7 @@ impl DirectoryNamespaceBuilder { manifest_enabled, dir_listing_enabled, inline_optimization_enabled, + table_version_tracking_enabled, credential_vendor_properties, context_provider: None, }) @@ -459,6 +488,7 @@ impl DirectoryNamespaceBuilder { base_path, manifest_ns, dir_listing_enabled: self.dir_listing_enabled, + table_version_tracking_enabled: self.table_version_tracking_enabled, credential_vendor, context_provider: self.context_provider, }) @@ -530,6 +560,9 @@ pub struct DirectoryNamespace { base_path: Path, manifest_ns: Option>, dir_listing_enabled: bool, + /// When true, `describe_table` returns `managed_versioning: true` to indicate + /// commits should go through namespace table version APIs. + table_version_tracking_enabled: bool, /// Credential vendor created once during initialization. /// Used to vend temporary credentials for table access. credential_vendor: Option>, @@ -656,11 +689,38 @@ impl DirectoryNamespace { Ok(id[0].clone()) } - /// Get the full URI path for a table (for returning in responses) + async fn resolve_table_location(&self, id: &Option>) -> Result { + let mut describe_req = DescribeTableRequest::new(); + describe_req.id = id.clone(); + describe_req.load_detailed_metadata = Some(false); + + let describe_resp = self.describe_table(describe_req).await?; + + describe_resp.location.ok_or_else(|| Error::Namespace { + source: format!("Table location not found for: {:?}", id).into(), + location: snafu::location!(), + }) + } + fn table_full_uri(&self, table_name: &str) -> String { format!("{}/{}.lance", &self.root, table_name) } + fn uri_to_object_store_path(uri: &str) -> Path { + let path_str = if let Some(rest) = uri.strip_prefix("file://") { + rest + } else if let Some(rest) = uri.strip_prefix("s3://") { + rest.split_once('/').map(|(_, p)| p).unwrap_or(rest) + } else if let Some(rest) = uri.strip_prefix("gs://") { + rest.split_once('/').map(|(_, p)| p).unwrap_or(rest) + } else if let Some(rest) = uri.strip_prefix("az://") { + rest.split_once('/').map(|(_, p)| p).unwrap_or(rest) + } else { + uri + }; + Path::from(path_str) + } + /// Get the object store path for a table (relative to base_path) fn table_path(&self, table_name: &str) -> Path { self.base_path @@ -707,13 +767,6 @@ impl DirectoryNamespace { } } - /// Atomically create a marker file using put_if_not_exists semantics. - /// - /// This uses `PutMode::Create` which will fail if the file already exists, - /// providing atomic creation semantics to avoid race conditions. - /// - /// Returns Ok(()) if the file was created successfully. - /// Returns Err with appropriate message if the file already exists or other error. async fn put_marker_file_atomic( &self, path: &Path, @@ -1023,6 +1076,10 @@ impl LanceNamespace for DirectoryNamespace { } else if request.vend_credentials == Some(false) { response.storage_options = None; } + // Set managed_versioning flag when table_version_tracking_enabled + if self.table_version_tracking_enabled { + response.managed_versioning = Some(true); + } return Ok(response); } Err(_) @@ -1080,6 +1137,11 @@ impl LanceNamespace for DirectoryNamespace { location: Some(table_uri.clone()), table_uri: Some(table_uri), storage_options, + managed_versioning: if self.table_version_tracking_enabled { + Some(true) + } else { + None + }, ..Default::default() }); } @@ -1122,6 +1184,11 @@ impl LanceNamespace for DirectoryNamespace { schema: Some(Box::new(json_schema)), storage_options, metadata: Some(metadata), + managed_versioning: if self.table_version_tracking_enabled { + Some(true) + } else { + None + }, ..Default::default() }) } @@ -1146,6 +1213,11 @@ impl LanceNamespace for DirectoryNamespace { location: Some(table_uri.clone()), table_uri: Some(table_uri), storage_options, + managed_versioning: if self.table_version_tracking_enabled { + Some(true) + } else { + None + }, ..Default::default() }) } else { @@ -1514,6 +1586,262 @@ impl LanceNamespace for DirectoryNamespace { }) } + async fn list_table_versions( + &self, + request: ListTableVersionsRequest, + ) -> Result { + let table_uri = self.resolve_table_location(&request.id).await?; + + let table_path = Self::uri_to_object_store_path(&table_uri); + let versions_dir = table_path.child("_versions"); + let manifest_metas: Vec<_> = self + .object_store + .read_dir_all(&versions_dir, None) + .try_collect() + .await + .map_err(|e| Error::Namespace { + source: format!( + "Failed to list manifest files for table at '{}': {}", + table_uri, e + ) + .into(), + location: snafu::location!(), + })?; + + let is_v2_naming = manifest_metas + .first() + .is_some_and(|meta| meta.location.filename().is_some_and(|f| f.len() == 29)); + + let mut table_versions: Vec = manifest_metas + .into_iter() + .filter_map(|meta| { + let filename = meta.location.filename()?; + let version_str = filename.strip_suffix(".manifest")?; + if version_str.starts_with('d') { + return None; + } + let file_version: u64 = version_str.parse().ok()?; + + let actual_version = if file_version > u64::MAX / 2 { + u64::MAX - file_version + } else { + file_version + }; + + // Use full path from object_store (relative to object store root) + Some(TableVersion { + version: actual_version as i64, + manifest_path: meta.location.to_string(), + manifest_size: Some(meta.size as i64), + e_tag: meta.e_tag, + timestamp: Some(meta.last_modified.to_rfc3339()), + metadata: None, + }) + }) + .collect(); + + let list_is_ordered = self.object_store.list_is_lexically_ordered; + let want_descending = request.descending == Some(true); + + let needs_sort = if list_is_ordered { + if is_v2_naming { + !want_descending + } else { + want_descending + } + } else { + true + }; + + if needs_sort { + if want_descending { + table_versions.sort_by(|a, b| b.version.cmp(&a.version)); + } else { + table_versions.sort_by(|a, b| a.version.cmp(&b.version)); + } + } + + if let Some(limit) = request.limit { + table_versions.truncate(limit as usize); + } + + Ok(ListTableVersionsResponse { + versions: table_versions, + page_token: None, + }) + } + + async fn create_table_version( + &self, + request: CreateTableVersionRequest, + ) -> Result { + let table_uri = self.resolve_table_location(&request.id).await?; + + let staging_manifest_path = &request.manifest_path; + let version = request.version as u64; + + let table_path = Self::uri_to_object_store_path(&table_uri); + let versions_dir_path = table_path.child("_versions"); + let final_path = versions_dir_path.child(format!("{}.manifest", version)); + + let staging_path = Self::uri_to_object_store_path(staging_manifest_path); + let manifest_data = self + .object_store + .inner + .get(&staging_path) + .await + .map_err(|e| Error::Namespace { + source: format!( + "Failed to read staging manifest at '{}': {}", + staging_manifest_path, e + ) + .into(), + location: snafu::location!(), + })? + .bytes() + .await + .map_err(|e| Error::Namespace { + source: format!( + "Failed to read staging manifest bytes at '{}': {}", + staging_manifest_path, e + ) + .into(), + location: snafu::location!(), + })?; + + self.object_store + .inner + .put_opts( + &final_path, + manifest_data.into(), + PutOptions { + mode: PutMode::Create, + ..Default::default() + }, + ) + .await + .map_err(|e| match e { + object_store::Error::AlreadyExists { .. } + | object_store::Error::Precondition { .. } => Error::Namespace { + source: format!( + "Version {} already exists for table at '{}'", + version, table_uri + ) + .into(), + location: snafu::location!(), + }, + _ => Error::Namespace { + source: format!( + "Failed to create version {} for table at '{}': {}", + version, table_uri, e + ) + .into(), + location: snafu::location!(), + }, + })?; + + Ok(CreateTableVersionResponse { + transaction_id: None, + }) + } + + async fn describe_table_version( + &self, + request: DescribeTableVersionRequest, + ) -> Result { + let table_uri = self.resolve_table_location(&request.id).await?; + + let mut dataset = Dataset::open(&table_uri) + .await + .map_err(|e| Error::Namespace { + source: format!("Failed to open table at '{}': {}", table_uri, e).into(), + location: snafu::location!(), + })?; + + if let Some(version) = request.version { + dataset = dataset + .checkout_version(version as u64) + .await + .map_err(|e| Error::Namespace { + source: format!( + "Failed to checkout version {} for table at '{}': {}", + version, table_uri, e + ) + .into(), + location: snafu::location!(), + })?; + } + + let version_info = dataset.version(); + let manifest_location = dataset.manifest_location(); + let metadata: std::collections::HashMap = + version_info.metadata.into_iter().collect(); + + let table_version = TableVersion { + version: version_info.version as i64, + manifest_path: manifest_location.path.to_string(), + manifest_size: manifest_location.size.map(|s| s as i64), + e_tag: manifest_location.e_tag.clone(), + timestamp: Some(version_info.timestamp.to_rfc3339()), + metadata: if metadata.is_empty() { + None + } else { + Some(metadata) + }, + }; + + Ok(DescribeTableVersionResponse { + version: Box::new(table_version), + }) + } + + async fn batch_delete_table_versions( + &self, + request: BatchDeleteTableVersionsRequest, + ) -> Result { + let table_uri = self.resolve_table_location(&request.id).await?; + + let table_path = Self::uri_to_object_store_path(&table_uri); + let table_path_str = table_path.as_ref(); + let versions_dir_path = Path::from(format!("{}_versions", table_path_str)); + + let mut deleted_count = 0i64; + + for range in &request.ranges { + let start = range.start_version as u64; + let end = if range.end_version > 0 { + range.end_version as u64 + } else { + start + }; + + for version in start..=end { + let version_path = versions_dir_path.child(format!("{}.manifest", version)); + match self.object_store.inner.delete(&version_path).await { + Ok(_) => { + deleted_count += 1; + } + Err(object_store::Error::NotFound { .. }) => {} + Err(e) => { + return Err(Error::Namespace { + source: format!( + "Failed to delete version {} for table at '{}': {}", + version, table_uri, e + ) + .into(), + location: snafu::location!(), + }); + } + } + } + } + + Ok(BatchDeleteTableVersionsResponse { + deleted_count: Some(deleted_count), + transaction_id: None, + }) + } + fn namespace_id(&self) -> String { format!("DirectoryNamespace {{ root: {:?} }}", self.root) } @@ -3553,4 +3881,800 @@ mod tests { assert!(!status.is_deregistered); assert!(!status.has_reserved_file); } + + #[tokio::test] + async fn test_table_version_tracking_enabled_managed_versioning() { + use lance_namespace::models::DescribeTableRequest; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + // Create namespace with table_version_tracking_enabled=true + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .build() + .await + .unwrap(); + + // Create a table + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(vec!["test_table".to_string()]); + namespace + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Describe table should return managed_versioning=true + let mut describe_req = DescribeTableRequest::new(); + describe_req.id = Some(vec!["test_table".to_string()]); + let describe_resp = namespace.describe_table(describe_req).await.unwrap(); + + // managed_versioning should be true + assert_eq!( + describe_resp.managed_versioning, + Some(true), + "managed_versioning should be true when table_version_tracking_enabled=true" + ); + } + + #[tokio::test] + async fn test_table_version_tracking_disabled_no_managed_versioning() { + use lance_namespace::models::DescribeTableRequest; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + // Create namespace with table_version_tracking_enabled=false (default) + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(false) + .build() + .await + .unwrap(); + + // Create a table + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(vec!["test_table".to_string()]); + namespace + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Describe table should not have managed_versioning set + let mut describe_req = DescribeTableRequest::new(); + describe_req.id = Some(vec!["test_table".to_string()]); + let describe_resp = namespace.describe_table(describe_req).await.unwrap(); + + // managed_versioning should be None when table_version_tracking_enabled=false + assert!( + describe_resp.managed_versioning.is_none(), + "managed_versioning should be None when table_version_tracking_enabled=false, got: {:?}", + describe_resp.managed_versioning + ); + } + + #[tokio::test] + async fn test_list_table_versions() { + use lance_namespace::models::ListTableVersionsRequest; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .build() + .await + .unwrap(); + + // Create a table + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(vec!["test_table".to_string()]); + namespace + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // List versions - should have version 1 from table creation + let mut list_req = ListTableVersionsRequest::new(); + list_req.id = Some(vec!["test_table".to_string()]); + let list_resp = namespace.list_table_versions(list_req).await.unwrap(); + + assert!(!list_resp.versions.is_empty()); + let version = list_resp + .versions + .iter() + .find(|v| v.version == 1) + .expect("Expected version 1"); + + // Verify manifest metadata is populated + assert!( + !version.manifest_path.is_empty(), + "manifest_path should be set" + ); + assert!( + version.manifest_path.contains(".manifest"), + "manifest_path should contain .manifest" + ); + assert!( + version.manifest_size.is_some(), + "manifest_size should be set" + ); + assert!( + version.manifest_size.unwrap() > 0, + "manifest_size should be > 0" + ); + assert!(version.timestamp.is_some(), "timestamp should be set"); + } + + #[tokio::test] + async fn test_describe_table_version() { + use lance_namespace::models::DescribeTableVersionRequest; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .build() + .await + .unwrap(); + + // Create a table + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(vec!["test_table".to_string()]); + namespace + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Describe version 1 + let mut describe_req = DescribeTableVersionRequest::new(); + describe_req.id = Some(vec!["test_table".to_string()]); + describe_req.version = Some(1); + let describe_resp = namespace + .describe_table_version(describe_req) + .await + .unwrap(); + + let version = &describe_resp.version; + assert_eq!(version.version, 1); + assert!(version.timestamp.is_some()); + + // Verify manifest metadata is populated + assert!( + !version.manifest_path.is_empty(), + "manifest_path should be set" + ); + assert!( + version.manifest_path.contains(".manifest"), + "manifest_path should contain .manifest" + ); + assert!( + version.manifest_size.is_some(), + "manifest_size should be set" + ); + assert!( + version.manifest_size.unwrap() > 0, + "manifest_size should be > 0" + ); + } + + #[tokio::test] + async fn test_describe_table_version_latest() { + use lance_namespace::models::DescribeTableVersionRequest; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .build() + .await + .unwrap(); + + // Create a table + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(vec!["test_table".to_string()]); + namespace + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Describe latest version (no version specified) + let mut describe_req = DescribeTableVersionRequest::new(); + describe_req.id = Some(vec!["test_table".to_string()]); + describe_req.version = None; + let describe_resp = namespace + .describe_table_version(describe_req) + .await + .unwrap(); + + // Should return version 1 as it's the only version + assert_eq!(describe_resp.version.version, 1); + } + + #[tokio::test] + async fn test_create_table_version() { + use lance_namespace::models::CreateTableVersionRequest; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .build() + .await + .unwrap(); + + // Create a table + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(vec!["test_table".to_string()]); + namespace + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Create a staging manifest by finding the actual manifest file + // Lance may use different naming schemes (V1 or V2) + let table_path = format!("{}/test_table.lance", temp_path); + let versions_dir = format!("{}/_versions", table_path); + let staging_path = format!("{}/staging_manifest", temp_path); + + // Find the first manifest file in the versions directory + let manifest_file = std::fs::read_dir(&versions_dir) + .expect("Failed to read versions directory") + .filter_map(|entry| entry.ok()) + .find(|entry| entry.file_name().to_string_lossy().ends_with(".manifest")) + .expect("No manifest file found"); + + let internal_manifest_path = manifest_file.path(); + std::fs::copy(&internal_manifest_path, &staging_path).unwrap(); + + // Create version 2 from staging manifest + let mut create_version_req = CreateTableVersionRequest::new(2, staging_path); + create_version_req.id = Some(vec!["test_table".to_string()]); + + let result = namespace.create_table_version(create_version_req).await; + assert!( + result.is_ok(), + "create_table_version should succeed: {:?}", + result + ); + + // Verify version 2 was created in the internal versions directory (_versions) + let version_2_path = format!("{}/_versions/2.manifest", table_path); + assert!( + std::path::Path::new(&version_2_path).exists(), + "Version 2 manifest should exist at {}", + version_2_path + ); + } + + #[tokio::test] + async fn test_create_table_version_conflict() { + // create_table_version should fail if the version already exists. + // Each version always writes to a new file location. + use lance_namespace::models::CreateTableVersionRequest; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .build() + .await + .unwrap(); + + // Create a table + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(vec!["test_table".to_string()]); + namespace + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Create a staging manifest by finding the actual manifest file + let table_path = format!("{}/test_table.lance", temp_path); + let versions_dir = format!("{}/_versions", table_path); + let staging_path = format!("{}/staging_manifest", temp_path); + + let manifest_file = std::fs::read_dir(&versions_dir) + .expect("Failed to read versions directory") + .filter_map(|entry| entry.ok()) + .find(|entry| entry.file_name().to_string_lossy().ends_with(".manifest")) + .expect("No manifest file found"); + + let internal_manifest_path = manifest_file.path(); + std::fs::copy(&internal_manifest_path, &staging_path).unwrap(); + + // First create external version 1 + let mut create_version_req = CreateTableVersionRequest::new(1, staging_path.clone()); + create_version_req.id = Some(vec!["test_table".to_string()]); + namespace + .create_table_version(create_version_req) + .await + .unwrap(); + + // Create version 1 again (should fail - conflict) + let mut create_version_req = CreateTableVersionRequest::new(1, staging_path); + create_version_req.id = Some(vec!["test_table".to_string()]); + + let result = namespace.create_table_version(create_version_req).await; + assert!( + result.is_err(), + "create_table_version should fail for existing version" + ); + + // Verify version 1 still exists in internal versions directory (_versions) + let version_1_path = format!("{}/_versions/1.manifest", table_path); + assert!( + std::path::Path::new(&version_1_path).exists(), + "Version 1 manifest should still exist at {}", + version_1_path + ); + } + + #[tokio::test] + async fn test_create_table_version_table_not_found() { + use lance_namespace::models::CreateTableVersionRequest; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + let namespace = DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .build() + .await + .unwrap(); + + // Try to create version for non-existent table + let mut create_version_req = + CreateTableVersionRequest::new(1, "/some/staging/path".to_string()); + create_version_req.id = Some(vec!["non_existent_table".to_string()]); + + let result = namespace.create_table_version(create_version_req).await; + assert!( + result.is_err(), + "create_table_version should fail for non-existent table" + ); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("does not exist"), + "Error should mention table does not exist, got: {}", + err_msg + ); + } + + /// End-to-end integration test module for table version tracking. + mod e2e_table_version_tracking { + use super::*; + use lance::io::commit::namespace_manifest::LanceNamespaceExternalManifestStore; + use lance_table::io::commit::external_manifest::{ + ExternalManifestCommitHandler, ExternalManifestStore, + }; + use lance_table::io::commit::CommitHandler; + use std::sync::atomic::{AtomicUsize, Ordering}; + + /// Tracking wrapper around a namespace that counts method invocations. + struct TrackingNamespace { + inner: DirectoryNamespace, + create_table_version_count: AtomicUsize, + describe_table_version_count: AtomicUsize, + list_table_versions_count: AtomicUsize, + } + + impl TrackingNamespace { + fn new(inner: DirectoryNamespace) -> Self { + Self { + inner, + create_table_version_count: AtomicUsize::new(0), + describe_table_version_count: AtomicUsize::new(0), + list_table_versions_count: AtomicUsize::new(0), + } + } + + fn create_table_version_calls(&self) -> usize { + self.create_table_version_count.load(Ordering::SeqCst) + } + + fn list_table_versions_calls(&self) -> usize { + self.list_table_versions_count.load(Ordering::SeqCst) + } + } + + impl std::fmt::Debug for TrackingNamespace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TrackingNamespace") + .field( + "create_table_version_calls", + &self.create_table_version_calls(), + ) + .finish() + } + } + + #[async_trait] + impl LanceNamespace for TrackingNamespace { + async fn create_namespace( + &self, + request: CreateNamespaceRequest, + ) -> Result { + self.inner.create_namespace(request).await + } + + async fn describe_namespace( + &self, + request: DescribeNamespaceRequest, + ) -> Result { + self.inner.describe_namespace(request).await + } + + async fn namespace_exists(&self, request: NamespaceExistsRequest) -> Result<()> { + self.inner.namespace_exists(request).await + } + + async fn list_namespaces( + &self, + request: ListNamespacesRequest, + ) -> Result { + self.inner.list_namespaces(request).await + } + + async fn drop_namespace( + &self, + request: DropNamespaceRequest, + ) -> Result { + self.inner.drop_namespace(request).await + } + + async fn list_tables(&self, request: ListTablesRequest) -> Result { + self.inner.list_tables(request).await + } + + async fn describe_table( + &self, + request: DescribeTableRequest, + ) -> Result { + self.inner.describe_table(request).await + } + + async fn table_exists(&self, request: TableExistsRequest) -> Result<()> { + self.inner.table_exists(request).await + } + + async fn drop_table(&self, request: DropTableRequest) -> Result { + self.inner.drop_table(request).await + } + + async fn create_table( + &self, + request: CreateTableRequest, + request_data: Bytes, + ) -> Result { + self.inner.create_table(request, request_data).await + } + + #[allow(deprecated)] + async fn create_empty_table( + &self, + request: CreateEmptyTableRequest, + ) -> Result { + self.inner.create_empty_table(request).await + } + + async fn declare_table( + &self, + request: DeclareTableRequest, + ) -> Result { + self.inner.declare_table(request).await + } + + async fn list_table_versions( + &self, + request: ListTableVersionsRequest, + ) -> Result { + self.list_table_versions_count + .fetch_add(1, Ordering::SeqCst); + self.inner.list_table_versions(request).await + } + + async fn create_table_version( + &self, + request: CreateTableVersionRequest, + ) -> Result { + self.create_table_version_count + .fetch_add(1, Ordering::SeqCst); + self.inner.create_table_version(request).await + } + + async fn describe_table_version( + &self, + request: DescribeTableVersionRequest, + ) -> Result { + self.describe_table_version_count + .fetch_add(1, Ordering::SeqCst); + self.inner.describe_table_version(request).await + } + + async fn batch_delete_table_versions( + &self, + request: BatchDeleteTableVersionsRequest, + ) -> Result { + self.inner.batch_delete_table_versions(request).await + } + + fn namespace_id(&self) -> String { + self.inner.namespace_id() + } + } + + #[tokio::test] + async fn test_e2e_describe_table_returns_managed_versioning() { + use lance_namespace::models::{CreateNamespaceRequest, DescribeTableRequest}; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + // Create namespace with table_version_tracking_enabled and manifest_enabled + let inner_ns = DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .manifest_enabled(true) + .build() + .await + .unwrap(); + + let tracking_ns = Arc::new(TrackingNamespace::new(inner_ns)); + + // Create parent namespace + let mut create_ns_req = CreateNamespaceRequest::new(); + create_ns_req.id = Some(vec!["workspace".to_string()]); + tracking_ns.create_namespace(create_ns_req).await.unwrap(); + + // Create a table with multi-level ID (namespace + table) + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(vec!["workspace".to_string(), "test_table".to_string()]); + tracking_ns + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Describe table should return managed_versioning=true + let mut describe_req = DescribeTableRequest::new(); + describe_req.id = Some(vec!["workspace".to_string(), "test_table".to_string()]); + let describe_resp = tracking_ns.describe_table(describe_req).await.unwrap(); + + // managed_versioning should be true + assert_eq!( + describe_resp.managed_versioning, + Some(true), + "managed_versioning should be true when table_version_tracking_enabled=true" + ); + } + + #[tokio::test] + async fn test_e2e_external_manifest_store_invokes_namespace_apis() { + use lance_namespace::models::{CreateNamespaceRequest, DescribeTableRequest}; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + // Create namespace with table_version_tracking_enabled and manifest_enabled + let inner_ns = DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .manifest_enabled(true) + .build() + .await + .unwrap(); + + let tracking_ns = Arc::new(TrackingNamespace::new(inner_ns)); + + // Create parent namespace + let mut create_ns_req = CreateNamespaceRequest::new(); + create_ns_req.id = Some(vec!["workspace".to_string()]); + tracking_ns.create_namespace(create_ns_req).await.unwrap(); + + // Create a table with multi-level ID (namespace + table) + let table_id = vec!["workspace".to_string(), "test_table".to_string()]; + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(table_id.clone()); + tracking_ns + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Get the table location from describe_table + let mut describe_req = DescribeTableRequest::new(); + describe_req.id = Some(table_id.clone()); + let describe_resp = tracking_ns.describe_table(describe_req).await.unwrap(); + let table_location = describe_resp.location.unwrap(); + + // Create the external manifest store using our tracking namespace + let external_store = + LanceNamespaceExternalManifestStore::new(tracking_ns.clone(), table_id.clone()); + + // Test get_latest_version - should invoke list_table_versions + let initial_list_calls = tracking_ns.list_table_versions_calls(); + let latest = external_store + .get_latest_version(&table_location) + .await + .unwrap(); + assert!(latest.is_some(), "Should have at least version 1"); + let (version, _manifest_path) = latest.unwrap(); + assert_eq!(version, 1, "Initial version should be 1"); + assert!( + tracking_ns.list_table_versions_calls() > initial_list_calls, + "list_table_versions should have been called" + ); + + // Test commit - should invoke create_table_version + // Get the table path from describe_table location (strip file:// prefix if present) + let table_path = table_location + .strip_prefix("file://") + .unwrap_or(&table_location); + let versions_dir = format!("{}/_versions", table_path); + let staging_path = format!("{}/staging_manifest_v2", temp_path); + + let manifest_file = std::fs::read_dir(&versions_dir) + .expect("Failed to read versions directory") + .filter_map(|entry| entry.ok()) + .find(|entry| entry.file_name().to_string_lossy().ends_with(".manifest")) + .expect("No manifest file found"); + + let internal_manifest_path = manifest_file.path(); + std::fs::copy(&internal_manifest_path, &staging_path).unwrap(); + let staging_size = std::fs::metadata(&staging_path).unwrap().len(); + + // Create object store for commit method + let object_store = object_store::local::LocalFileSystem::new(); + let base_path = object_store::path::Path::from(table_path); + let staging_obj_path = object_store::path::Path::from(staging_path.as_str()); + + let initial_create_calls = tracking_ns.create_table_version_calls(); + external_store + .commit( + &base_path, + 2, + &staging_obj_path, + staging_size, + None, + &object_store, + lance_table::io::commit::ManifestNamingScheme::V1, + ) + .await + .unwrap(); + assert!( + tracking_ns.create_table_version_calls() > initial_create_calls, + "create_table_version should have been called" + ); + + // Verify version 2 was created in internal versions directory (_versions) + let version_2_path = format!("{}/_versions/2.manifest", table_path); + assert!( + std::path::Path::new(&version_2_path).exists(), + "Version 2 manifest should exist at {}", + version_2_path + ); + } + + #[tokio::test] + async fn test_e2e_dataset_commit_with_external_manifest_store() { + use arrow::array::{Int32Array, StringArray}; + use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; + use arrow::record_batch::RecordBatch; + use lance::dataset::{Dataset, WriteMode, WriteParams}; + use lance_namespace::models::{CreateNamespaceRequest, DescribeTableRequest}; + + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); + + // Create namespace with table_version_tracking_enabled and manifest_enabled + let inner_ns = DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .manifest_enabled(true) + .build() + .await + .unwrap(); + + let tracking_ns = Arc::new(TrackingNamespace::new(inner_ns)); + + // Create parent namespace + let mut create_ns_req = CreateNamespaceRequest::new(); + create_ns_req.id = Some(vec!["workspace".to_string()]); + tracking_ns.create_namespace(create_ns_req).await.unwrap(); + + // Create a table with multi-level ID (namespace + table) + let table_id = vec!["workspace".to_string(), "test_table".to_string()]; + let schema = create_test_schema(); + let ipc_data = create_test_ipc_data(&schema); + let mut create_req = CreateTableRequest::new(); + create_req.id = Some(table_id.clone()); + tracking_ns + .create_table(create_req, bytes::Bytes::from(ipc_data)) + .await + .unwrap(); + + // Get the table location from describe_table + let mut describe_req = DescribeTableRequest::new(); + describe_req.id = Some(table_id.clone()); + let describe_resp = tracking_ns.describe_table(describe_req).await.unwrap(); + let table_location = describe_resp.location.unwrap(); + + // Create the external manifest store commit handler + let external_store = Arc::new(LanceNamespaceExternalManifestStore::new( + tracking_ns.clone(), + table_id.clone(), + )); + let commit_handler: Arc = Arc::new(ExternalManifestCommitHandler { + external_manifest_store: external_store, + }); + + // Open the dataset with the external manifest commit handler + let dataset = Dataset::open(&table_location).await.unwrap(); + assert_eq!(dataset.version().version, 1); + + // Create some data to append + let arrow_schema = Arc::new(ArrowSchema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, true), + ])); + let batch = RecordBatch::try_new( + arrow_schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["a", "b", "c"])), + ], + ) + .unwrap(); + + // Write data using the external manifest commit handler + let initial_create_calls = tracking_ns.create_table_version_calls(); + let write_params = WriteParams { + mode: WriteMode::Append, + commit_handler: Some(commit_handler), + ..Default::default() + }; + + let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema); + Dataset::write(batches, &table_location, Some(write_params)) + .await + .unwrap(); + + // Verify create_table_version was called during commit + assert!( + tracking_ns.create_table_version_calls() > initial_create_calls, + "create_table_version should have been called during dataset write. \ + Initial: {}, Current: {}", + initial_create_calls, + tracking_ns.create_table_version_calls() + ); + + // Verify version 2 was created in internal versions directory (_versions) + let table_path = table_location + .strip_prefix("file://") + .unwrap_or(&table_location); + let version_2_path = format!("{}/_versions/2.manifest", table_path); + assert!( + std::path::Path::new(&version_2_path).exists(), + "Version 2 manifest should exist at {}", + version_2_path + ); + } + } } diff --git a/rust/lance-namespace-impls/src/rest.rs b/rust/lance-namespace-impls/src/rest.rs index 0eae07e4ce2..37493341e05 100644 --- a/rust/lance-namespace-impls/src/rest.rs +++ b/rust/lance-namespace-impls/src/rest.rs @@ -22,13 +22,15 @@ use lance_namespace::models::{ CreateNamespaceRequest, CreateNamespaceResponse, CreateTableIndexRequest, CreateTableIndexResponse, CreateTableRequest, CreateTableResponse, CreateTableScalarIndexResponse, CreateTableTagRequest, CreateTableTagResponse, - DeclareTableRequest, DeclareTableResponse, DeleteFromTableRequest, DeleteFromTableResponse, - DeleteTableTagRequest, DeleteTableTagResponse, DeregisterTableRequest, DeregisterTableResponse, + CreateTableVersionRequest, CreateTableVersionResponse, DeclareTableRequest, + DeclareTableResponse, DeleteFromTableRequest, DeleteFromTableResponse, DeleteTableTagRequest, + DeleteTableTagResponse, DeregisterTableRequest, DeregisterTableResponse, DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableIndexStatsRequest, DescribeTableIndexStatsResponse, DescribeTableRequest, DescribeTableResponse, - DescribeTransactionRequest, DescribeTransactionResponse, DropNamespaceRequest, - DropNamespaceResponse, DropTableIndexRequest, DropTableIndexResponse, DropTableRequest, - DropTableResponse, ExplainTableQueryPlanRequest, GetTableStatsRequest, GetTableStatsResponse, + DescribeTableVersionRequest, DescribeTableVersionResponse, DescribeTransactionRequest, + DescribeTransactionResponse, DropNamespaceRequest, DropNamespaceResponse, + DropTableIndexRequest, DropTableIndexResponse, DropTableRequest, DropTableResponse, + ExplainTableQueryPlanRequest, GetTableStatsRequest, GetTableStatsResponse, GetTableTagVersionRequest, GetTableTagVersionResponse, InsertIntoTableRequest, InsertIntoTableResponse, ListNamespacesRequest, ListNamespacesResponse, ListTableIndicesRequest, ListTableIndicesResponse, ListTableTagsRequest, ListTableTagsResponse, @@ -1188,6 +1190,36 @@ impl LanceNamespace for RestNamespace { .await } + async fn create_table_version( + &self, + request: CreateTableVersionRequest, + ) -> Result { + let id = object_id_str(&request.id, &self.delimiter)?; + let encoded_id = urlencode(&id); + let path = format!("/v1/table/{}/version/create", encoded_id); + let query = [("delimiter", self.delimiter.as_str())]; + self.post_json(&path, &query, &request, "create_table_version", &id) + .await + } + + async fn describe_table_version( + &self, + request: DescribeTableVersionRequest, + ) -> Result { + let id = object_id_str(&request.id, &self.delimiter)?; + let encoded_id = urlencode(&id); + let version_str; + let path = if let Some(version) = request.version { + version_str = version.to_string(); + format!("/v1/table/{}/version/{}", encoded_id, version_str) + } else { + format!("/v1/table/{}/version/latest", encoded_id) + }; + let query = [("delimiter", self.delimiter.as_str())]; + self.get_json(&path, &query, "describe_table_version", &id) + .await + } + async fn update_table_schema_metadata( &self, request: UpdateTableSchemaMetadataRequest, diff --git a/rust/lance-namespace-impls/src/rest_adapter.rs b/rust/lance-namespace-impls/src/rest_adapter.rs index b63331c8a66..2b357476814 100644 --- a/rust/lance-namespace-impls/src/rest_adapter.rs +++ b/rust/lance-namespace-impls/src/rest_adapter.rs @@ -76,6 +76,15 @@ impl RestAdapter { .route("/v1/table/:id/rename", post(rename_table)) .route("/v1/table/:id/restore", post(restore_table)) .route("/v1/table/:id/version/list", get(list_table_versions)) + .route("/v1/table/:id/version/create", post(create_table_version)) + .route( + "/v1/table/:id/version/latest", + get(describe_table_version_latest), + ) + .route( + "/v1/table/:id/version/:version", + get(describe_table_version), + ) .route("/v1/table/:id/stats", get(get_table_stats)) // Table data operations .route("/v1/table/:id/create", post(create_table)) @@ -753,6 +762,74 @@ async fn list_table_versions( } } +async fn create_table_version( + State(backend): State>, + headers: HeaderMap, + Path(id): Path, + Query(params): Query, + Json(body): Json, +) -> Response { + let request = CreateTableVersionRequest { + id: Some(parse_id(&id, params.delimiter.as_deref())), + identity: extract_identity(&headers), + version: body.version, + manifest_path: body.manifest_path, + manifest_size: body.manifest_size, + e_tag: body.e_tag, + metadata: body.metadata, + ..Default::default() + }; + + match backend.create_table_version(request).await { + Ok(response) => (StatusCode::OK, Json(response)).into_response(), + Err(e) => error_to_response(e), + } +} + +#[derive(Debug, Deserialize)] +struct VersionPathParams { + id: String, + version: i64, +} + +async fn describe_table_version( + State(backend): State>, + headers: HeaderMap, + Path(params): Path, + Query(query): Query, +) -> Response { + let request = DescribeTableVersionRequest { + id: Some(parse_id(¶ms.id, query.delimiter.as_deref())), + version: Some(params.version), + identity: extract_identity(&headers), + ..Default::default() + }; + + match backend.describe_table_version(request).await { + Ok(response) => (StatusCode::OK, Json(response)).into_response(), + Err(e) => error_to_response(e), + } +} + +async fn describe_table_version_latest( + State(backend): State>, + headers: HeaderMap, + Path(id): Path, + Query(params): Query, +) -> Response { + let request = DescribeTableVersionRequest { + id: Some(parse_id(&id, params.delimiter.as_deref())), + version: None, + identity: extract_identity(&headers), + ..Default::default() + }; + + match backend.describe_table_version(request).await { + Ok(response) => (StatusCode::OK, Json(response)).into_response(), + Err(e) => error_to_response(e), + } +} + async fn get_table_stats( State(backend): State>, headers: HeaderMap, @@ -2870,8 +2947,7 @@ mod tests { "test_table".to_string(), ]), mode: Some("create".to_string()), - identity: None, - context: None, + ..Default::default() }; let result = namespace.create_table(create_table_req, table_data).await; assert!(result.is_ok(), "Failed to create table: {:?}", result); diff --git a/rust/lance-namespace/src/namespace.rs b/rust/lance-namespace/src/namespace.rs index 3e27df15ba7..7543a7f3200 100644 --- a/rust/lance-namespace/src/namespace.rs +++ b/rust/lance-namespace/src/namespace.rs @@ -12,17 +12,19 @@ use lance_namespace_reqwest_client::models::{ AlterTableAddColumnsRequest, AlterTableAddColumnsResponse, AlterTableAlterColumnsRequest, AlterTableAlterColumnsResponse, AlterTableDropColumnsRequest, AlterTableDropColumnsResponse, AlterTransactionRequest, AlterTransactionResponse, AnalyzeTableQueryPlanRequest, - CountTableRowsRequest, CreateEmptyTableRequest, CreateEmptyTableResponse, - CreateNamespaceRequest, CreateNamespaceResponse, CreateTableIndexRequest, - CreateTableIndexResponse, CreateTableRequest, CreateTableResponse, - CreateTableScalarIndexResponse, CreateTableTagRequest, CreateTableTagResponse, + BatchDeleteTableVersionsRequest, BatchDeleteTableVersionsResponse, CountTableRowsRequest, + CreateEmptyTableRequest, CreateEmptyTableResponse, CreateNamespaceRequest, + CreateNamespaceResponse, CreateTableIndexRequest, CreateTableIndexResponse, CreateTableRequest, + CreateTableResponse, CreateTableScalarIndexResponse, CreateTableTagRequest, + CreateTableTagResponse, CreateTableVersionRequest, CreateTableVersionResponse, DeclareTableRequest, DeclareTableResponse, DeleteFromTableRequest, DeleteFromTableResponse, DeleteTableTagRequest, DeleteTableTagResponse, DeregisterTableRequest, DeregisterTableResponse, DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableIndexStatsRequest, DescribeTableIndexStatsResponse, DescribeTableRequest, DescribeTableResponse, - DescribeTransactionRequest, DescribeTransactionResponse, DropNamespaceRequest, - DropNamespaceResponse, DropTableIndexRequest, DropTableIndexResponse, DropTableRequest, - DropTableResponse, ExplainTableQueryPlanRequest, GetTableStatsRequest, GetTableStatsResponse, + DescribeTableVersionRequest, DescribeTableVersionResponse, DescribeTransactionRequest, + DescribeTransactionResponse, DropNamespaceRequest, DropNamespaceResponse, + DropTableIndexRequest, DropTableIndexResponse, DropTableRequest, DropTableResponse, + ExplainTableQueryPlanRequest, GetTableStatsRequest, GetTableStatsResponse, GetTableTagVersionRequest, GetTableTagVersionResponse, InsertIntoTableRequest, InsertIntoTableResponse, ListNamespacesRequest, ListNamespacesResponse, ListTableIndicesRequest, ListTableIndicesResponse, ListTableTagsRequest, ListTableTagsResponse, @@ -394,6 +396,77 @@ pub trait LanceNamespace: Send + Sync + std::fmt::Debug { }) } + /// Create a new table version entry. + /// + /// This operation supports `put_if_not_exists` semantics, where the operation + /// fails if the version already exists. This is used to coordinate concurrent + /// writes to a table through an external manifest store. + /// + /// # Arguments + /// + /// * `request` - Contains the table identifier, version number, manifest path, + /// and optional metadata like size and ETag. + /// + /// # Errors + /// + /// - Returns an error if the version already exists (conflict). + /// - Returns [`crate::ErrorCode::TableNotFound`] if the table does not exist. + async fn create_table_version( + &self, + _request: CreateTableVersionRequest, + ) -> Result { + Err(Error::NotSupported { + source: "create_table_version not implemented".into(), + location: Location::new(file!(), line!(), column!()), + }) + } + + /// Describe a specific table version. + /// + /// Returns metadata about a specific version of a table, including the + /// manifest path, size, ETag, and timestamp. + /// + /// # Arguments + /// + /// * `request` - Contains the table identifier and optionally the version + /// number. If version is not specified, returns the latest version. + /// + /// # Errors + /// + /// - Returns [`crate::ErrorCode::TableNotFound`] if the table does not exist. + /// - Returns an error if the specified version does not exist. + async fn describe_table_version( + &self, + _request: DescribeTableVersionRequest, + ) -> Result { + Err(Error::NotSupported { + source: "describe_table_version not implemented".into(), + location: Location::new(file!(), line!(), column!()), + }) + } + + /// Batch delete table versions. + /// + /// Deletes multiple version records from a table. This operation supports + /// deleting ranges of versions for efficient bulk cleanup. + /// + /// # Arguments + /// + /// * `request` - Contains the table identifier and version ranges to delete. + /// + /// # Errors + /// + /// - Returns [`crate::ErrorCode::TableNotFound`] if the table does not exist. + async fn batch_delete_table_versions( + &self, + _request: BatchDeleteTableVersionsRequest, + ) -> Result { + Err(Error::NotSupported { + source: "batch_delete_table_versions not implemented".into(), + location: Location::new(file!(), line!(), column!()), + }) + } + /// Update table schema metadata. async fn update_table_schema_metadata( &self, diff --git a/rust/lance-table/src/io/commit/external_manifest.rs b/rust/lance-table/src/io/commit/external_manifest.rs index 3d08809dfe8..11d094b888b 100644 --- a/rust/lance-table/src/io/commit/external_manifest.rs +++ b/rust/lance-table/src/io/commit/external_manifest.rs @@ -90,7 +90,102 @@ pub trait ExternalManifestStore: std::fmt::Debug + Send + Sync { }) } - /// Put the manifest path for a given base_uri and version, should fail if the version already exists + /// Commit a manifest version to the external store. + /// + /// The staging manifest has been written to `staging_path` on the object store. + /// This method should atomically claim the version and return the final manifest location. + /// + /// # For staging-based stores (e.g., DynamoDB) + /// The default implementation: + /// 1. Records the staging path atomically (fails if version exists) + /// 2. Copies staging to final path on object store + /// 3. Updates external store to point to final path + /// 4. Deletes staging manifest + /// + /// # For direct-write stores (e.g., Namespace) + /// Override this method to: + /// 1. Read staging manifest data from object store + /// 2. Write directly to final location with conditional put + /// 3. Delete staging manifest + /// + /// Returns the final manifest location after successful commit. + #[allow(clippy::too_many_arguments)] + async fn commit( + &self, + base_path: &Path, + version: u64, + staging_path: &Path, + size: u64, + e_tag: Option, + object_store: &dyn OSObjectStore, + naming_scheme: ManifestNamingScheme, + ) -> Result { + // Default implementation: staging-based workflow + + // Step 1: Record staging path atomically + self.put_if_not_exists( + base_path.as_ref(), + version, + staging_path.as_ref(), + size, + e_tag.clone(), + ) + .await?; + + // Step 2: Copy staging to final path + let final_path = naming_scheme.manifest_path(base_path, version); + let copied = match object_store.copy(staging_path, &final_path).await { + Ok(_) => true, + Err(ObjectStoreError::NotFound { .. }) => false, + Err(e) => return Err(e.into()), + }; + if copied { + info!(target: TRACE_FILE_AUDIT, mode=AUDIT_MODE_CREATE, r#type=AUDIT_TYPE_MANIFEST, path = final_path.as_ref()); + } + + // Get final e_tag (may change after copy for large files) + let e_tag = if copied && size < 5 * 1024 * 1024 { + e_tag + } else { + let meta = object_store.head(&final_path).await?; + meta.e_tag + }; + + let location = ManifestLocation { + version, + path: final_path.clone(), + size: Some(size), + naming_scheme, + e_tag: e_tag.clone(), + }; + + if !copied { + return Ok(location); + } + + // Step 3: Update external store to final path + self.put_if_exists( + base_path.as_ref(), + version, + final_path.as_ref(), + size, + e_tag, + ) + .await?; + + // Step 4: Delete staging manifest + match object_store.delete(staging_path).await { + Ok(_) => {} + Err(ObjectStoreError::NotFound { .. }) => {} + Err(e) => return Err(e.into()), + } + info!(target: TRACE_FILE_AUDIT, mode=AUDIT_MODE_DELETE, r#type=AUDIT_TYPE_MANIFEST, path = staging_path.as_ref()); + + Ok(location) + } + + /// Record staging manifest path. Used by default commit implementation. + /// Should fail if the version already exists. async fn put_if_not_exists( &self, base_uri: &str, @@ -100,7 +195,8 @@ pub trait ExternalManifestStore: std::fmt::Debug + Send + Sync { e_tag: Option, ) -> Result<()>; - /// Put the manifest path for a given base_uri and version, should fail if the version **does not** already exist + /// Update to final manifest path. Used by default commit implementation. + /// Should fail if the version does not already exist. async fn put_if_exists( &self, base_uri: &str, @@ -140,15 +236,10 @@ pub struct ExternalManifestCommitHandler { } impl ExternalManifestCommitHandler { - /// The manifest is considered committed once the staging manifest is written - /// to object store and that path is committed to the external store. + /// Finalize a manifest that may be in staging state. /// - /// However, to fully complete this, the staging manifest should be materialized - /// into the final path, the final path should be committed to the external store - /// and the staging manifest should be deleted. These steps may be completed - /// by any number of readers or writers, so care should be taken to ensure - /// that the manifest is not lost nor any errors occur due to duplicate - /// operations. + /// This is used by read paths when they encounter a staging manifest. + /// Write paths use `ExternalManifestStore::commit` directly. #[allow(clippy::too_many_arguments)] async fn finalize_manifest( &self, @@ -160,7 +251,7 @@ impl ExternalManifestCommitHandler { store: &dyn OSObjectStore, naming_scheme: ManifestNamingScheme, ) -> std::result::Result { - // step 1: copy the manifest to the final location + // Copy the manifest to the final location let final_manifest_path = naming_scheme.manifest_path(base_path, version); let copied = match store @@ -176,11 +267,6 @@ impl ExternalManifestCommitHandler { } // On S3, the etag can change if originally was MultipartUpload and later was Copy - // https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.html#AmazonS3-Type-Object-ETag - // We only do MultipartUpload for > 5MB files, so we can skip this check - // if size < 5MB. However, we need to double check the final_manifest_path - // exists before we change the external store, otherwise we may point to a - // non-existing manifest. let e_tag = if copied && size < 5 * 1024 * 1024 { e_tag } else { @@ -200,7 +286,7 @@ impl ExternalManifestCommitHandler { return Ok(location); } - // step 2: flip the external store to point to the final location + // Update the external store to point to the final location self.external_manifest_store .put_if_exists( base_path.as_ref(), @@ -211,7 +297,7 @@ impl ExternalManifestCommitHandler { ) .await?; - // step 3: delete the staging manifest + // Delete the staging manifest match store.delete(staging_manifest_path).await { Ok(_) => {} Err(ObjectStoreError::NotFound { .. }) => {} @@ -390,50 +476,39 @@ impl CommitHandler for ExternalManifestCommitHandler { naming_scheme: ManifestNamingScheme, transaction: Option, ) -> std::result::Result { - // path we get here is the path to the manifest we want to write - // use object_store.base_path.as_ref() for getting the root of the dataset - - // step 1: Write the manifest we want to commit to object store with a temporary name + // Write the manifest to object store with a temporary name let path = naming_scheme.manifest_path(base_path, manifest.version); let staging_path = make_staging_manifest_path(&path)?; let write_res = manifest_writer(object_store, manifest, indices, &staging_path, transaction).await?; - // step 2 & 3: Try to commit this version to external store, return err on failure - let res = self + // Commit via external store (handles atomic claim, finalization, and cleanup) + let result = self .external_manifest_store - .put_if_not_exists( - base_path.as_ref(), - manifest.version, - staging_path.as_ref(), - write_res.size as u64, - write_res.e_tag.clone(), - ) - .await - .map_err(|_| CommitError::CommitConflict {}); - - if let Err(err) = res { - // delete the staging manifest - match object_store.inner.delete(&staging_path).await { - Ok(_) => {} - Err(ObjectStoreError::NotFound { .. }) => {} - Err(e) => return Err(CommitError::OtherError(e.into())), - } - info!(target: TRACE_FILE_AUDIT, mode=AUDIT_MODE_DELETE, r#type=AUDIT_TYPE_MANIFEST, path = staging_path.as_ref()); - return Err(err); - } - - Ok(self - .finalize_manifest( + .commit( base_path, - &staging_path, manifest.version, + &staging_path, write_res.size as u64, write_res.e_tag, &object_store.inner, naming_scheme, ) - .await?) + .await; + + match result { + Ok(location) => Ok(location), + Err(_) => { + // On conflict, try to delete the staging manifest + match object_store.inner.delete(&staging_path).await { + Ok(_) => {} + Err(ObjectStoreError::NotFound { .. }) => {} + Err(e) => return Err(CommitError::OtherError(e.into())), + } + info!(target: TRACE_FILE_AUDIT, mode=AUDIT_MODE_DELETE, r#type=AUDIT_TYPE_MANIFEST, path = staging_path.as_ref()); + Err(CommitError::CommitConflict {}) + } + } } async fn delete(&self, base_path: &Path) -> Result<()> { diff --git a/rust/lance/src/dataset.rs b/rust/lance/src/dataset.rs index 3913c5b255f..d85883a787b 100644 --- a/rust/lance/src/dataset.rs +++ b/rust/lance/src/dataset.rs @@ -806,6 +806,7 @@ impl Dataset { transaction_id: fallback_resp.transaction_id, location: fallback_resp.location, storage_options: fallback_resp.storage_options, + properties: fallback_resp.properties, } } Err(e) => { diff --git a/rust/lance/src/dataset/builder.rs b/rust/lance/src/dataset/builder.rs index 9eba7009d19..0d584d98ba2 100644 --- a/rust/lance/src/dataset/builder.rs +++ b/rust/lance/src/dataset/builder.rs @@ -5,6 +5,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration}; use super::refs::{Ref, Refs}; use super::{ReadParams, WriteParams, DEFAULT_INDEX_CACHE_SIZE, DEFAULT_METADATA_CACHE_SIZE}; use crate::dataset::branch_location::BranchLocation; +use crate::io::commit::namespace_manifest::LanceNamespaceExternalManifestStore; use crate::{session::Session, Dataset, Error, Result}; use futures::FutureExt; use lance_core::utils::tracing::{DATASET_LOADING_EVENT, TRACE_DATASET_EVENTS}; @@ -18,6 +19,7 @@ use lance_namespace::models::DescribeTableRequest; use lance_namespace::LanceNamespace; use lance_table::{ format::Manifest, + io::commit::external_manifest::ExternalManifestCommitHandler, io::commit::{commit_handler_from_url, CommitHandler}, }; #[cfg(feature = "aws")] @@ -141,7 +143,17 @@ impl DatasetBuilder { location: location!(), })?; - let mut builder = Self::from_uri(table_uri); + let mut builder = Self::from_uri(&table_uri); + + // Check managed_versioning flag to determine if namespace-managed commits should be used + if response.managed_versioning == Some(true) { + let external_store = + LanceNamespaceExternalManifestStore::new(namespace.clone(), table_id.clone()); + let commit_handler: Arc = Arc::new(ExternalManifestCommitHandler { + external_manifest_store: Arc::new(external_store), + }); + builder.commit_handler = Some(commit_handler); + } // Use namespace storage options if available let namespace_storage_options = response.storage_options; diff --git a/rust/lance/src/io/commit.rs b/rust/lance/src/io/commit.rs index 5900c92fa8d..2ce599453b8 100644 --- a/rust/lance/src/io/commit.rs +++ b/rust/lance/src/io/commit.rs @@ -68,6 +68,7 @@ pub mod conflict_resolver; mod dynamodb; #[cfg(test)] mod external_manifest; +pub mod namespace_manifest; #[cfg(all(feature = "dynamodb_tests", test))] mod s3_test; diff --git a/rust/lance/src/io/commit/namespace_manifest.rs b/rust/lance/src/io/commit/namespace_manifest.rs new file mode 100644 index 00000000000..405865fbc2d --- /dev/null +++ b/rust/lance/src/io/commit/namespace_manifest.rs @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +use std::sync::Arc; + +use async_trait::async_trait; +use lance_core::Result; +use lance_namespace::models::{ + CreateTableVersionRequest, DescribeTableVersionRequest, ListTableVersionsRequest, +}; +use lance_namespace::LanceNamespace; +use lance_table::io::commit::external_manifest::ExternalManifestStore; +use lance_table::io::commit::{ManifestLocation, ManifestNamingScheme}; +use object_store::path::Path; +use object_store::ObjectStore as OSObjectStore; + +#[derive(Debug)] +pub struct LanceNamespaceExternalManifestStore { + namespace: Arc, + table_id: Vec, +} + +impl LanceNamespaceExternalManifestStore { + pub fn new(namespace: Arc, table_id: Vec) -> Self { + Self { + namespace, + table_id, + } + } +} + +#[async_trait] +impl ExternalManifestStore for LanceNamespaceExternalManifestStore { + async fn get(&self, _base_uri: &str, version: u64) -> Result { + let request = DescribeTableVersionRequest { + id: Some(self.table_id.clone()), + version: Some(version as i64), + ..Default::default() + }; + + let response = self.namespace.describe_table_version(request).await?; + + // Namespace returns full path (relative to object store root) + Ok(response.version.manifest_path) + } + + async fn get_latest_version(&self, _base_uri: &str) -> Result> { + let request = ListTableVersionsRequest { + id: Some(self.table_id.clone()), + descending: Some(true), + limit: Some(1), + ..Default::default() + }; + + let response = self.namespace.list_table_versions(request).await?; + + if response.versions.is_empty() { + return Ok(None); + } + + let version = &response.versions[0]; + + // Namespace returns full path (relative to object store root) + Ok(Some(( + version.version as u64, + version.manifest_path.clone(), + ))) + } + + /// Direct-write commit: reads staging manifest and writes directly to final location. + async fn commit( + &self, + base_path: &Path, + version: u64, + staging_path: &Path, + size: u64, + e_tag: Option, + object_store: &dyn OSObjectStore, + naming_scheme: ManifestNamingScheme, + ) -> Result { + // create_table_version reads staging manifest and writes to final location + let request = CreateTableVersionRequest { + id: Some(self.table_id.clone()), + version: version as i64, + manifest_path: staging_path.to_string(), + manifest_size: Some(size as i64), + e_tag: e_tag.clone(), + ..Default::default() + }; + + self.namespace.create_table_version(request).await?; + + // Delete staging manifest (it's been copied to final location) + let _ = object_store.delete(staging_path).await; + + // Return final manifest location (full path relative to object store root) + let final_path = naming_scheme.manifest_path(base_path, version); + + Ok(ManifestLocation { + version, + path: final_path, + size: Some(size), + naming_scheme, + e_tag, + }) + } + + /// Not used when commit() is overridden. + async fn put_if_not_exists( + &self, + _base_uri: &str, + _version: u64, + _path: &str, + _size: u64, + _e_tag: Option, + ) -> Result<()> { + Ok(()) + } + + /// Not used when commit() is overridden. + async fn put_if_exists( + &self, + _base_uri: &str, + _version: u64, + _path: &str, + _size: u64, + _e_tag: Option, + ) -> Result<()> { + Ok(()) + } +} From 91ea5b7f36b59e78d121acc9924d27ad5f1e5bbd Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 09:59:15 -0800 Subject: [PATCH 02/23] fix python --- python/python/lance/dataset.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index c54ac85d6ff..a8978d11077 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -434,6 +434,8 @@ def __init__( read_params: Optional[Dict[str, Any]] = None, session: Optional[Session] = None, storage_options_provider: Optional[Any] = None, + namespace: Optional[Any] = None, + table_id: Optional[List[str]] = None, ): uri = os.fspath(uri) if isinstance(uri, Path) else uri self._uri = uri @@ -464,6 +466,8 @@ def __init__( read_params=read_params, session=session, storage_options_provider=storage_options_provider, + namespace=namespace, + table_id=table_id, ) self._default_scan_options = default_scan_options self._read_params = read_params From 8311d09f144b9b91da071de286a299f840d9bcd2 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 10:07:03 -0800 Subject: [PATCH 03/23] fix java --- java/lance-jni/src/blocking_dataset.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index b84d39e2059..4b64644d8a1 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -1140,7 +1140,7 @@ fn inner_open_native<'local>( // Extract table_id from List let table_id = if !table_id_obj.is_null() { - env.get_strings_opt(&table_id_obj)? + Some(env.get_strings(&table_id_obj)?) } else { None }; From 11bd8082b1c4fb0b8379bebaf0445be0072dc78a Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 10:22:56 -0800 Subject: [PATCH 04/23] fix windows --- rust/lance-namespace-impls/src/dir.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index bd83f9bd630..eff9f9ebc91 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -1852,7 +1852,7 @@ mod tests { use super::*; use arrow_ipc::reader::StreamReader; use lance::dataset::Dataset; - use lance_core::utils::tempfile::TempStdDir; + use lance_core::utils::tempfile::{TempStdDir, TempStrDir}; use lance_namespace::models::{ CreateTableRequest, JsonArrowDataType, JsonArrowField, JsonArrowSchema, ListTablesRequest, }; @@ -3960,10 +3960,9 @@ mod tests { async fn test_list_table_versions() { use lance_namespace::models::ListTableVersionsRequest; - let temp_dir = TempStdDir::default(); - let temp_path = temp_dir.to_str().unwrap(); + let temp_dir = TempStrDir::default(); - let namespace = DirectoryNamespaceBuilder::new(temp_path) + let namespace = DirectoryNamespaceBuilder::new(temp_dir.as_ref()) .table_version_tracking_enabled(true) .build() .await @@ -4167,8 +4166,8 @@ mod tests { // Each version always writes to a new file location. use lance_namespace::models::CreateTableVersionRequest; - let temp_dir = TempStdDir::default(); - let temp_path = temp_dir.to_str().unwrap(); + let temp_dir = TempStrDir::default(); + let temp_path: &str = &temp_dir; let namespace = DirectoryNamespaceBuilder::new(temp_path) .table_version_tracking_enabled(true) @@ -4581,8 +4580,8 @@ mod tests { use lance::dataset::{Dataset, WriteMode, WriteParams}; use lance_namespace::models::{CreateNamespaceRequest, DescribeTableRequest}; - let temp_dir = TempStdDir::default(); - let temp_path = temp_dir.to_str().unwrap(); + let temp_dir = TempStrDir::default(); + let temp_path: &str = &temp_dir; // Create namespace with table_version_tracking_enabled and manifest_enabled let inner_ns = DirectoryNamespaceBuilder::new(temp_path) From 2c62840212fea95ff262e74ad09112d3280743e0 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 11:47:08 -0800 Subject: [PATCH 05/23] fix integ --- python/python/tests/test_namespace_integration.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/python/python/tests/test_namespace_integration.py b/python/python/tests/test_namespace_integration.py index aa6202bcf28..912233de782 100644 --- a/python/python/tests/test_namespace_integration.py +++ b/python/python/tests/test_namespace_integration.py @@ -1033,6 +1033,9 @@ def test_managed_versioning_with_commit_handler(s3_bucket: str): @pytest.mark.integration def test_e2e_table_version_tracking_with_s3(s3_bucket: str): """Test end-to-end table version tracking with S3 storage.""" + import pyarrow as pa + from lance import write_dataset + storage_options = copy.deepcopy(CONFIG) namespace = TableVersionTrackingNamespace( @@ -1043,14 +1046,12 @@ def test_e2e_table_version_tracking_with_s3(s3_bucket: str): table_name = uuid.uuid4().hex table_id = ["test_ns", table_name] - request = DeclareTableRequest(id=table_id, location=None) - response = namespace.declare_table(request) - - table_uri = response.location - assert table_uri is not None - # managed_versioning indicates namespace-managed commits - assert response.managed_versioning is True + # Create initial dataset using write_dataset (internally calls declare_table) + data = pa.table({"id": [1, 2, 3], "name": ["a", "b", "c"]}) + ds = write_dataset(data, namespace=namespace, table_id=table_id, mode="create") + assert ds.count_rows() == 3 + # Check managed_versioning via describe_table describe_response = namespace.describe_table( DescribeTableRequest(id=table_id, version=None) ) From 45b37f1f2c9e92d2d91fea34eb850903a1013a1f Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 18:06:22 -0800 Subject: [PATCH 06/23] improve --- Cargo.lock | 2 - Cargo.toml | 2 +- python/Cargo.lock | 3 +- python/python/lance/namespace.py | 36 + .../tests/test_namespace_integration.py | 25 +- python/src/namespace.rs | 24 + rust/lance-namespace-impls/Cargo.toml | 1 + rust/lance-namespace-impls/src/dir.rs | 704 ++++++++++++------ rust/lance/src/dataset.rs | 1 + .../lance/src/io/commit/namespace_manifest.rs | 32 +- 10 files changed, 591 insertions(+), 239 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 871a27889a5..4a6a636a508 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5308,8 +5308,6 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2c20a25207bbae280c9acd16ccd1e3561ad7f79a57c0e88809cd9c026a8494c" dependencies = [ "reqwest", "serde", diff --git a/Cargo.toml b/Cargo.toml index cf4c2aae6ad..054c17e07dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,7 +66,7 @@ lance-linalg = { version = "=3.0.0-beta.3", path = "./rust/lance-linalg" } lance-namespace = { version = "=3.0.0-beta.3", path = "./rust/lance-namespace" } lance-namespace-impls = { version = "=3.0.0-beta.3", path = "./rust/lance-namespace-impls" } lance-namespace-datafusion = { version = "=3.0.0-beta.3", path = "./rust/lance-namespace-datafusion" } -lance-namespace-reqwest-client = "0.5.0" +lance-namespace-reqwest-client = { path = "../lance-namespace/rust/lance-namespace-reqwest-client" } lance-table = { version = "=3.0.0-beta.3", path = "./rust/lance-table" } lance-test-macros = { version = "=3.0.0-beta.3", path = "./rust/lance-test-macros" } lance-testing = { version = "=3.0.0-beta.3", path = "./rust/lance-testing" } diff --git a/python/Cargo.lock b/python/Cargo.lock index 9ce642cb770..5bd6b130144 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -4332,6 +4332,7 @@ dependencies = [ "lance-index", "lance-io", "lance-namespace", + "lance-table", "log", "object_store", "rand 0.9.2", @@ -4348,8 +4349,6 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2c20a25207bbae280c9acd16ccd1e3561ad7f79a57c0e88809cd9c026a8494c" dependencies = [ "reqwest", "serde", diff --git a/python/python/lance/namespace.py b/python/python/lance/namespace.py index 98fe0b8ebe7..3b919b4ec96 100644 --- a/python/python/lance/namespace.py +++ b/python/python/lance/namespace.py @@ -441,6 +441,24 @@ def describe_table_version(self, request: dict) -> dict: """ return self._inner.describe_table_version(request) + def batch_delete_table_versions(self, request: dict) -> dict: + """Delete multiple table versions in a single request. + + Parameters + ---------- + request : dict + Request dictionary with keys: + - id: List[str] - Table identifier + - versions: List[int] - List of version numbers to delete + + Returns + ------- + dict + Response dictionary with: + - deleted_versions: List[int] - List of successfully deleted versions + """ + return self._inner.batch_delete_table_versions(request) + class RestNamespace(LanceNamespace): """REST-based Lance Namespace implementation backed by Rust. @@ -636,6 +654,24 @@ def describe_table_version(self, request: dict) -> dict: """ return self._inner.describe_table_version(request) + def batch_delete_table_versions(self, request: dict) -> dict: + """Delete multiple table versions in a single request. + + Parameters + ---------- + request : dict + Request dictionary with keys: + - id: List[str] - Table identifier + - versions: List[int] - List of version numbers to delete + + Returns + ------- + dict + Response dictionary with: + - deleted_versions: List[int] - List of successfully deleted versions + """ + return self._inner.batch_delete_table_versions(request) + class RestAdapter: """REST adapter server that creates a namespace backend and exposes it via REST. diff --git a/python/python/tests/test_namespace_integration.py b/python/python/tests/test_namespace_integration.py index 912233de782..1586fcd8273 100644 --- a/python/python/tests/test_namespace_integration.py +++ b/python/python/tests/test_namespace_integration.py @@ -1033,22 +1033,28 @@ def test_managed_versioning_with_commit_handler(s3_bucket: str): @pytest.mark.integration def test_e2e_table_version_tracking_with_s3(s3_bucket: str): """Test end-to-end table version tracking with S3 storage.""" + import lance import pyarrow as pa - from lance import write_dataset + from lance.namespace import DirectoryNamespace storage_options = copy.deepcopy(CONFIG) - namespace = TableVersionTrackingNamespace( - root=f"s3://{s3_bucket}/version_tracking_test", - storage_options=storage_options, - ) + # Create namespace with table_version_tracking_enabled and manifest_enabled + dir_props = {f"storage.{k}": v for k, v in storage_options.items()} + dir_props["root"] = f"s3://{s3_bucket}/version_tracking_test" + dir_props["table_version_tracking_enabled"] = "true" + dir_props["manifest_enabled"] = "true" + + namespace = DirectoryNamespace(**dir_props) table_name = uuid.uuid4().hex table_id = ["test_ns", table_name] # Create initial dataset using write_dataset (internally calls declare_table) data = pa.table({"id": [1, 2, 3], "name": ["a", "b", "c"]}) - ds = write_dataset(data, namespace=namespace, table_id=table_id, mode="create") + ds = lance.write_dataset( + data, namespace=namespace, table_id=table_id, mode="create" + ) assert ds.count_rows() == 3 # Check managed_versioning via describe_table @@ -1060,13 +1066,10 @@ def test_e2e_table_version_tracking_with_s3(s3_bucket: str): from lance_namespace import ListTableVersionsRequest - _list_response = namespace.list_table_versions( - ListTableVersionsRequest(id=table_id) - ) - assert namespace.list_table_versions_count == 1 + list_response = namespace.list_table_versions(ListTableVersionsRequest(id=table_id)) + assert len(list_response.versions) >= 1 describe_version_response = namespace.describe_table_version( {"id": table_id, "version": None} ) - assert namespace.describe_table_version_count == 1 assert "version" in describe_version_response diff --git a/python/src/namespace.rs b/python/src/namespace.rs index 57a70a49753..fe87b149d3a 100644 --- a/python/src/namespace.rs +++ b/python/src/namespace.rs @@ -360,6 +360,18 @@ impl PyDirectoryNamespace { .infer_error()?; pythonize(py, &response).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) } + + fn batch_delete_table_versions<'py>( + &self, + py: Python<'py>, + request: &Bound<'_, PyAny>, + ) -> PyResult> { + let request = depythonize(request)?; + let response = crate::rt() + .block_on(Some(py), self.inner.batch_delete_table_versions(request))? + .infer_error()?; + pythonize(py, &response).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) + } } /// Python wrapper for RestNamespace @@ -636,6 +648,18 @@ impl PyRestNamespace { .infer_error()?; pythonize(py, &response).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) } + + fn batch_delete_table_versions<'py>( + &self, + py: Python<'py>, + request: &Bound<'_, PyAny>, + ) -> PyResult> { + let request = depythonize(request)?; + let response = crate::rt() + .block_on(Some(py), self.inner.batch_delete_table_versions(request))? + .infer_error()?; + pythonize(py, &response).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) + } } /// Python wrapper for REST adapter server diff --git a/rust/lance-namespace-impls/Cargo.toml b/rust/lance-namespace-impls/Cargo.toml index 64355ce52d1..8c84e1bbe8b 100644 --- a/rust/lance-namespace-impls/Cargo.toml +++ b/rust/lance-namespace-impls/Cargo.toml @@ -44,6 +44,7 @@ url = { workspace = true } lance = { workspace = true } lance-index = { workspace = true } lance-io = { workspace = true } +lance-table = { workspace = true } object_store = { workspace = true } arrow = { workspace = true } arrow-ipc = { workspace = true } diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index eff9f9ebc91..caaf2ee930b 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -16,6 +16,7 @@ use futures::TryStreamExt; use lance::dataset::{Dataset, WriteParams}; use lance::session::Session; use lance_io::object_store::{ObjectStore, ObjectStoreParams, ObjectStoreRegistry}; +use lance_table::io::commit::ManifestNamingScheme; use object_store::path::Path; use object_store::{Error as ObjectStoreError, ObjectStore as OSObjectStore, PutMode, PutOptions}; use std::collections::HashMap; @@ -1681,8 +1682,15 @@ impl LanceNamespace for DirectoryNamespace { let version = request.version as u64; let table_path = Self::uri_to_object_store_path(&table_uri); - let versions_dir_path = table_path.child("_versions"); - let final_path = versions_dir_path.child(format!("{}.manifest", version)); + + // Determine naming scheme from request, default to V2 + let naming_scheme = match request.naming_scheme.as_deref() { + Some("V1") => ManifestNamingScheme::V1, + _ => ManifestNamingScheme::V2, + }; + + // Compute final path using the naming scheme + let final_path = naming_scheme.manifest_path(&table_path, version); let staging_path = Self::uri_to_object_store_path(staging_manifest_path); let manifest_data = self @@ -1709,7 +1717,10 @@ impl LanceNamespace for DirectoryNamespace { location: snafu::location!(), })?; - self.object_store + let manifest_size = manifest_data.len() as i64; + + let put_result = self + .object_store .inner .put_opts( &final_path, @@ -1740,8 +1751,25 @@ impl LanceNamespace for DirectoryNamespace { }, })?; + // Delete the staging manifest after successful copy + if let Err(e) = self.object_store.inner.delete(&staging_path).await { + log::warn!( + "Failed to delete staging manifest at '{}': {:?}", + staging_path, + e + ); + } + Ok(CreateTableVersionResponse { transaction_id: None, + version: Some(Box::new(TableVersion { + version: version as i64, + manifest_path: final_path.to_string(), + manifest_size: Some(manifest_size), + e_tag: put_result.e_tag, + timestamp: None, + metadata: None, + })), }) } @@ -3958,84 +3986,164 @@ mod tests { #[tokio::test] async fn test_list_table_versions() { + use arrow::array::{Int32Array, RecordBatchIterator}; + use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; + use arrow::record_batch::RecordBatch; + use lance::dataset::builder::DatasetBuilder; use lance_namespace::models::ListTableVersionsRequest; let temp_dir = TempStrDir::default(); + let temp_path: &str = &temp_dir; - let namespace = DirectoryNamespaceBuilder::new(temp_dir.as_ref()) - .table_version_tracking_enabled(true) - .build() - .await - .unwrap(); + let namespace: Arc = Arc::new( + DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .build() + .await + .unwrap(), + ); - // Create a table + // Create a table (version 1) + let table_id = vec!["test_table".to_string()]; let schema = create_test_schema(); let ipc_data = create_test_ipc_data(&schema); let mut create_req = CreateTableRequest::new(); - create_req.id = Some(vec!["test_table".to_string()]); + create_req.id = Some(table_id.clone()); namespace .create_table(create_req, bytes::Bytes::from(ipc_data)) .await .unwrap(); - // List versions - should have version 1 from table creation + // Open dataset and append data to create versions 2 and 3 + let mut dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone()) + .await + .unwrap() + .load() + .await + .unwrap(); + + let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new( + "id", + DataType::Int32, + false, + )])); + let batch = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from(vec![100, 200]))], + ) + .unwrap(); + + // Append to create version 2 + let batches = RecordBatchIterator::new(vec![Ok(batch.clone())], arrow_schema.clone()); + dataset.append(batches, None).await.unwrap(); + + // Append to create version 3 + let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema); + dataset.append(batches, None).await.unwrap(); + + // List versions - should have versions 1, 2, and 3 let mut list_req = ListTableVersionsRequest::new(); - list_req.id = Some(vec!["test_table".to_string()]); + list_req.id = Some(table_id.clone()); let list_resp = namespace.list_table_versions(list_req).await.unwrap(); - assert!(!list_resp.versions.is_empty()); - let version = list_resp - .versions - .iter() - .find(|v| v.version == 1) - .expect("Expected version 1"); - - // Verify manifest metadata is populated - assert!( - !version.manifest_path.is_empty(), - "manifest_path should be set" - ); - assert!( - version.manifest_path.contains(".manifest"), - "manifest_path should contain .manifest" - ); - assert!( - version.manifest_size.is_some(), - "manifest_size should be set" - ); - assert!( - version.manifest_size.unwrap() > 0, - "manifest_size should be > 0" + assert_eq!( + list_resp.versions.len(), + 3, + "Should have 3 versions, got: {:?}", + list_resp.versions ); - assert!(version.timestamp.is_some(), "timestamp should be set"); + + // Verify each version + for expected_version in 1..=3 { + let version = list_resp + .versions + .iter() + .find(|v| v.version == expected_version) + .unwrap_or_else(|| panic!("Expected version {}", expected_version)); + + assert!( + !version.manifest_path.is_empty(), + "manifest_path should be set for version {}", + expected_version + ); + assert!( + version.manifest_path.contains(".manifest"), + "manifest_path should contain .manifest for version {}", + expected_version + ); + assert!( + version.manifest_size.is_some(), + "manifest_size should be set for version {}", + expected_version + ); + assert!( + version.manifest_size.unwrap() > 0, + "manifest_size should be > 0 for version {}", + expected_version + ); + assert!( + version.timestamp.is_some(), + "timestamp should be set for version {}", + expected_version + ); + } } #[tokio::test] async fn test_describe_table_version() { + use arrow::array::{Int32Array, RecordBatchIterator}; + use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; + use arrow::record_batch::RecordBatch; + use lance::dataset::builder::DatasetBuilder; use lance_namespace::models::DescribeTableVersionRequest; - let temp_dir = TempStdDir::default(); - let temp_path = temp_dir.to_str().unwrap(); + let temp_dir = TempStrDir::default(); + let temp_path: &str = &temp_dir; - let namespace = DirectoryNamespaceBuilder::new(temp_path) - .table_version_tracking_enabled(true) - .build() - .await - .unwrap(); + let namespace: Arc = Arc::new( + DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .build() + .await + .unwrap(), + ); - // Create a table + // Create a table (version 1) + let table_id = vec!["test_table".to_string()]; let schema = create_test_schema(); let ipc_data = create_test_ipc_data(&schema); let mut create_req = CreateTableRequest::new(); - create_req.id = Some(vec!["test_table".to_string()]); + create_req.id = Some(table_id.clone()); namespace .create_table(create_req, bytes::Bytes::from(ipc_data)) .await .unwrap(); + // Open dataset and append data to create version 2 + let mut dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone()) + .await + .unwrap() + .load() + .await + .unwrap(); + + let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new( + "id", + DataType::Int32, + false, + )])); + let batch = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from(vec![100, 200]))], + ) + .unwrap(); + + let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema); + dataset.append(batches, None).await.unwrap(); + // Describe version 1 let mut describe_req = DescribeTableVersionRequest::new(); - describe_req.id = Some(vec!["test_table".to_string()]); + describe_req.id = Some(table_id.clone()); describe_req.version = Some(1); let describe_resp = namespace .describe_table_version(describe_req) @@ -4045,8 +4153,6 @@ mod tests { let version = &describe_resp.version; assert_eq!(version.version, 1); assert!(version.timestamp.is_some()); - - // Verify manifest metadata is populated assert!( !version.manifest_path.is_empty(), "manifest_path should be set" @@ -4063,56 +4169,119 @@ mod tests { version.manifest_size.unwrap() > 0, "manifest_size should be > 0" ); + + // Describe version 2 + let mut describe_req = DescribeTableVersionRequest::new(); + describe_req.id = Some(table_id.clone()); + describe_req.version = Some(2); + let describe_resp = namespace + .describe_table_version(describe_req) + .await + .unwrap(); + + let version = &describe_resp.version; + assert_eq!(version.version, 2); + assert!(version.timestamp.is_some()); + assert!( + !version.manifest_path.is_empty(), + "manifest_path should be set" + ); + assert!( + version.manifest_size.is_some(), + "manifest_size should be set" + ); + assert!( + version.manifest_size.unwrap() > 0, + "manifest_size should be > 0" + ); } #[tokio::test] async fn test_describe_table_version_latest() { + use arrow::array::{Int32Array, RecordBatchIterator}; + use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; + use arrow::record_batch::RecordBatch; + use lance::dataset::builder::DatasetBuilder; use lance_namespace::models::DescribeTableVersionRequest; - let temp_dir = TempStdDir::default(); - let temp_path = temp_dir.to_str().unwrap(); + let temp_dir = TempStrDir::default(); + let temp_path: &str = &temp_dir; - let namespace = DirectoryNamespaceBuilder::new(temp_path) - .table_version_tracking_enabled(true) - .build() - .await - .unwrap(); + let namespace: Arc = Arc::new( + DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .build() + .await + .unwrap(), + ); - // Create a table + // Create a table (version 1) + let table_id = vec!["test_table".to_string()]; let schema = create_test_schema(); let ipc_data = create_test_ipc_data(&schema); let mut create_req = CreateTableRequest::new(); - create_req.id = Some(vec!["test_table".to_string()]); + create_req.id = Some(table_id.clone()); namespace .create_table(create_req, bytes::Bytes::from(ipc_data)) .await .unwrap(); + // Open dataset and append data to create versions 2 and 3 + let mut dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone()) + .await + .unwrap() + .load() + .await + .unwrap(); + + let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new( + "id", + DataType::Int32, + false, + )])); + let batch = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from(vec![100, 200]))], + ) + .unwrap(); + + // Append to create version 2 + let batches = RecordBatchIterator::new(vec![Ok(batch.clone())], arrow_schema.clone()); + dataset.append(batches, None).await.unwrap(); + + // Append to create version 3 + let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema); + dataset.append(batches, None).await.unwrap(); + // Describe latest version (no version specified) let mut describe_req = DescribeTableVersionRequest::new(); - describe_req.id = Some(vec!["test_table".to_string()]); + describe_req.id = Some(table_id.clone()); describe_req.version = None; let describe_resp = namespace .describe_table_version(describe_req) .await .unwrap(); - // Should return version 1 as it's the only version - assert_eq!(describe_resp.version.version, 1); + // Should return version 3 as it's the latest + assert_eq!(describe_resp.version.version, 3); } #[tokio::test] async fn test_create_table_version() { + use futures::TryStreamExt; + use lance::dataset::builder::DatasetBuilder; use lance_namespace::models::CreateTableVersionRequest; - let temp_dir = TempStdDir::default(); - let temp_path = temp_dir.to_str().unwrap(); + let temp_dir = TempStrDir::default(); + let temp_path: &str = &temp_dir; - let namespace = DirectoryNamespaceBuilder::new(temp_path) - .table_version_tracking_enabled(true) - .build() - .await - .unwrap(); + let namespace: Arc = Arc::new( + DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .build() + .await + .unwrap(), + ); // Create a table let schema = create_test_schema(); @@ -4124,25 +4293,60 @@ mod tests { .await .unwrap(); - // Create a staging manifest by finding the actual manifest file - // Lance may use different naming schemes (V1 or V2) - let table_path = format!("{}/test_table.lance", temp_path); - let versions_dir = format!("{}/_versions", table_path); - let staging_path = format!("{}/staging_manifest", temp_path); - - // Find the first manifest file in the versions directory - let manifest_file = std::fs::read_dir(&versions_dir) - .expect("Failed to read versions directory") - .filter_map(|entry| entry.ok()) - .find(|entry| entry.file_name().to_string_lossy().ends_with(".manifest")) + // Open the dataset using from_namespace to get proper object_store and paths + let table_id = vec!["test_table".to_string()]; + let dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone()) + .await + .unwrap() + .load() + .await + .unwrap(); + + // Use dataset's object_store to find and copy the manifest + let versions_path = dataset.versions_dir(); + let manifest_metas: Vec<_> = dataset + .object_store() + .inner + .list(Some(&versions_path)) + .try_collect() + .await + .unwrap(); + + let manifest_meta = manifest_metas + .iter() + .find(|m| { + m.location + .filename() + .map(|f| f.ends_with(".manifest")) + .unwrap_or(false) + }) .expect("No manifest file found"); - let internal_manifest_path = manifest_file.path(); - std::fs::copy(&internal_manifest_path, &staging_path).unwrap(); + // Read the existing manifest data + let manifest_data = dataset + .object_store() + .inner + .get(&manifest_meta.location) + .await + .unwrap() + .bytes() + .await + .unwrap(); + + // Write to a staging location using the dataset's object_store + let staging_path = dataset.versions_dir().child("staging_manifest"); + dataset + .object_store() + .inner + .put(&staging_path, manifest_data.into()) + .await + .unwrap(); // Create version 2 from staging manifest - let mut create_version_req = CreateTableVersionRequest::new(2, staging_path); - create_version_req.id = Some(vec!["test_table".to_string()]); + // Use the same naming scheme as the existing dataset (V2) + let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string()); + create_version_req.id = Some(table_id.clone()); + create_version_req.naming_scheme = Some("V2".to_string()); let result = namespace.create_table_version(create_version_req).await; assert!( @@ -4151,29 +4355,45 @@ mod tests { result ); - // Verify version 2 was created in the internal versions directory (_versions) - let version_2_path = format!("{}/_versions/2.manifest", table_path); + // Verify version 2 was created at the path returned in the response + let response = result.unwrap(); + let version_info = response + .version + .expect("response should contain version info"); + let version_2_path = Path::from(version_info.manifest_path); + let head_result = dataset.object_store().inner.head(&version_2_path).await; assert!( - std::path::Path::new(&version_2_path).exists(), + head_result.is_ok(), "Version 2 manifest should exist at {}", version_2_path ); + + // Verify the staging file has been deleted + let staging_head_result = dataset.object_store().inner.head(&staging_path).await; + assert!( + staging_head_result.is_err(), + "Staging manifest should have been deleted after create_table_version" + ); } #[tokio::test] async fn test_create_table_version_conflict() { // create_table_version should fail if the version already exists. // Each version always writes to a new file location. + use futures::TryStreamExt; + use lance::dataset::builder::DatasetBuilder; use lance_namespace::models::CreateTableVersionRequest; let temp_dir = TempStrDir::default(); let temp_path: &str = &temp_dir; - let namespace = DirectoryNamespaceBuilder::new(temp_path) - .table_version_tracking_enabled(true) - .build() - .await - .unwrap(); + let namespace: Arc = Arc::new( + DirectoryNamespaceBuilder::new(temp_path) + .table_version_tracking_enabled(true) + .build() + .await + .unwrap(), + ); // Create a table let schema = create_test_schema(); @@ -4185,31 +4405,79 @@ mod tests { .await .unwrap(); - // Create a staging manifest by finding the actual manifest file - let table_path = format!("{}/test_table.lance", temp_path); - let versions_dir = format!("{}/_versions", table_path); - let staging_path = format!("{}/staging_manifest", temp_path); + // Open the dataset using from_namespace to get proper object_store and paths + let table_id = vec!["test_table".to_string()]; + let dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone()) + .await + .unwrap() + .load() + .await + .unwrap(); - let manifest_file = std::fs::read_dir(&versions_dir) - .expect("Failed to read versions directory") - .filter_map(|entry| entry.ok()) - .find(|entry| entry.file_name().to_string_lossy().ends_with(".manifest")) + // Use dataset's object_store to find and copy the manifest + let versions_path = dataset.versions_dir(); + let manifest_metas: Vec<_> = dataset + .object_store() + .inner + .list(Some(&versions_path)) + .try_collect() + .await + .unwrap(); + + let manifest_meta = manifest_metas + .iter() + .find(|m| { + m.location + .filename() + .map(|f| f.ends_with(".manifest")) + .unwrap_or(false) + }) .expect("No manifest file found"); - let internal_manifest_path = manifest_file.path(); - std::fs::copy(&internal_manifest_path, &staging_path).unwrap(); + // Read the existing manifest data + let manifest_data = dataset + .object_store() + .inner + .get(&manifest_meta.location) + .await + .unwrap() + .bytes() + .await + .unwrap(); - // First create external version 1 - let mut create_version_req = CreateTableVersionRequest::new(1, staging_path.clone()); - create_version_req.id = Some(vec!["test_table".to_string()]); - namespace - .create_table_version(create_version_req) + // Write to a staging location using the dataset's object_store + let staging_path = dataset.versions_dir().child("staging_manifest"); + dataset + .object_store() + .inner + .put(&staging_path, manifest_data.into()) .await .unwrap(); - // Create version 1 again (should fail - conflict) - let mut create_version_req = CreateTableVersionRequest::new(1, staging_path); - create_version_req.id = Some(vec!["test_table".to_string()]); + // First create version 2 (should succeed) + let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string()); + create_version_req.id = Some(table_id.clone()); + create_version_req.naming_scheme = Some("V2".to_string()); + let first_result = namespace.create_table_version(create_version_req).await; + assert!( + first_result.is_ok(), + "First create_table_version for version 2 should succeed: {:?}", + first_result + ); + + // Get the path from the response for verification + let version_2_path = Path::from( + first_result + .unwrap() + .version + .expect("response should contain version info") + .manifest_path, + ); + + // Create version 2 again (should fail - conflict) + let mut create_version_req = CreateTableVersionRequest::new(2, staging_path.to_string()); + create_version_req.id = Some(table_id.clone()); + create_version_req.naming_scheme = Some("V2".to_string()); let result = namespace.create_table_version(create_version_req).await; assert!( @@ -4217,12 +4485,12 @@ mod tests { "create_table_version should fail for existing version" ); - // Verify version 1 still exists in internal versions directory (_versions) - let version_1_path = format!("{}/_versions/1.manifest", table_path); + // Verify version 2 still exists using the dataset's object_store + let head_result = dataset.object_store().inner.head(&version_2_path).await; assert!( - std::path::Path::new(&version_1_path).exists(), - "Version 1 manifest should still exist at {}", - version_1_path + head_result.is_ok(), + "Version 2 manifest should still exist at {}", + version_2_path ); } @@ -4261,9 +4529,7 @@ mod tests { mod e2e_table_version_tracking { use super::*; use lance::io::commit::namespace_manifest::LanceNamespaceExternalManifestStore; - use lance_table::io::commit::external_manifest::{ - ExternalManifestCommitHandler, ExternalManifestStore, - }; + use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler; use lance_table::io::commit::CommitHandler; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -4289,6 +4555,10 @@ mod tests { self.create_table_version_count.load(Ordering::SeqCst) } + fn describe_table_version_calls(&self) -> usize { + self.describe_table_version_count.load(Ordering::SeqCst) + } + fn list_table_versions_calls(&self) -> usize { self.list_table_versions_count.load(Ordering::SeqCst) } @@ -4421,41 +4691,38 @@ mod tests { } #[tokio::test] - async fn test_e2e_describe_table_returns_managed_versioning() { + async fn test_describe_table_returns_managed_versioning() { use lance_namespace::models::{CreateNamespaceRequest, DescribeTableRequest}; let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); // Create namespace with table_version_tracking_enabled and manifest_enabled - let inner_ns = DirectoryNamespaceBuilder::new(temp_path) + let ns = DirectoryNamespaceBuilder::new(temp_path) .table_version_tracking_enabled(true) .manifest_enabled(true) .build() .await .unwrap(); - let tracking_ns = Arc::new(TrackingNamespace::new(inner_ns)); - // Create parent namespace let mut create_ns_req = CreateNamespaceRequest::new(); create_ns_req.id = Some(vec!["workspace".to_string()]); - tracking_ns.create_namespace(create_ns_req).await.unwrap(); + ns.create_namespace(create_ns_req).await.unwrap(); // Create a table with multi-level ID (namespace + table) let schema = create_test_schema(); let ipc_data = create_test_ipc_data(&schema); let mut create_req = CreateTableRequest::new(); create_req.id = Some(vec!["workspace".to_string(), "test_table".to_string()]); - tracking_ns - .create_table(create_req, bytes::Bytes::from(ipc_data)) + ns.create_table(create_req, bytes::Bytes::from(ipc_data)) .await .unwrap(); // Describe table should return managed_versioning=true let mut describe_req = DescribeTableRequest::new(); describe_req.id = Some(vec!["workspace".to_string(), "test_table".to_string()]); - let describe_resp = tracking_ns.describe_table(describe_req).await.unwrap(); + let describe_resp = ns.describe_table(describe_req).await.unwrap(); // managed_versioning should be true assert_eq!( @@ -4467,7 +4734,11 @@ mod tests { #[tokio::test] async fn test_e2e_external_manifest_store_invokes_namespace_apis() { - use lance_namespace::models::{CreateNamespaceRequest, DescribeTableRequest}; + use arrow::array::{Int32Array, StringArray}; + use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; + use arrow::record_batch::RecordBatch; + use lance::dataset::builder::DatasetBuilder; + use lance_namespace::models::CreateNamespaceRequest; let temp_dir = TempStdDir::default(); let temp_path = temp_dir.to_str().unwrap(); @@ -4481,11 +4752,12 @@ mod tests { .unwrap(); let tracking_ns = Arc::new(TrackingNamespace::new(inner_ns)); + let ns: Arc = tracking_ns.clone(); // Create parent namespace let mut create_ns_req = CreateNamespaceRequest::new(); create_ns_req.id = Some(vec!["workspace".to_string()]); - tracking_ns.create_namespace(create_ns_req).await.unwrap(); + ns.create_namespace(create_ns_req).await.unwrap(); // Create a table with multi-level ID (namespace + table) let table_id = vec!["workspace".to_string(), "test_table".to_string()]; @@ -4493,82 +4765,78 @@ mod tests { let ipc_data = create_test_ipc_data(&schema); let mut create_req = CreateTableRequest::new(); create_req.id = Some(table_id.clone()); - tracking_ns - .create_table(create_req, bytes::Bytes::from(ipc_data)) + ns.create_table(create_req, bytes::Bytes::from(ipc_data)) .await .unwrap(); - // Get the table location from describe_table - let mut describe_req = DescribeTableRequest::new(); - describe_req.id = Some(table_id.clone()); - let describe_resp = tracking_ns.describe_table(describe_req).await.unwrap(); - let table_location = describe_resp.location.unwrap(); - - // Create the external manifest store using our tracking namespace - let external_store = - LanceNamespaceExternalManifestStore::new(tracking_ns.clone(), table_id.clone()); - - // Test get_latest_version - should invoke list_table_versions - let initial_list_calls = tracking_ns.list_table_versions_calls(); - let latest = external_store - .get_latest_version(&table_location) + // Open the dataset using from_namespace + let mut dataset = DatasetBuilder::from_namespace(ns.clone(), table_id.clone()) + .await + .unwrap() + .load() .await .unwrap(); - assert!(latest.is_some(), "Should have at least version 1"); - let (version, _manifest_path) = latest.unwrap(); - assert_eq!(version, 1, "Initial version should be 1"); - assert!( - tracking_ns.list_table_versions_calls() > initial_list_calls, - "list_table_versions should have been called" + assert_eq!(dataset.version().version, 1); + + // Create some data to append + let arrow_schema = Arc::new(ArrowSchema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, true), + ])); + let batch = RecordBatch::try_new( + arrow_schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["a", "b", "c"])), + ], + ) + .unwrap(); + + // Append data - this should call create_table_version exactly once + assert_eq!( + tracking_ns.create_table_version_calls(), + 0, + "create_table_version should not have been called yet" ); - // Test commit - should invoke create_table_version - // Get the table path from describe_table location (strip file:// prefix if present) - let table_path = table_location - .strip_prefix("file://") - .unwrap_or(&table_location); - let versions_dir = format!("{}/_versions", table_path); - let staging_path = format!("{}/staging_manifest_v2", temp_path); - - let manifest_file = std::fs::read_dir(&versions_dir) - .expect("Failed to read versions directory") - .filter_map(|entry| entry.ok()) - .find(|entry| entry.file_name().to_string_lossy().ends_with(".manifest")) - .expect("No manifest file found"); - - let internal_manifest_path = manifest_file.path(); - std::fs::copy(&internal_manifest_path, &staging_path).unwrap(); - let staging_size = std::fs::metadata(&staging_path).unwrap().len(); - - // Create object store for commit method - let object_store = object_store::local::LocalFileSystem::new(); - let base_path = object_store::path::Path::from(table_path); - let staging_obj_path = object_store::path::Path::from(staging_path.as_str()); + let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema); + dataset.append(batches, None).await.unwrap(); - let initial_create_calls = tracking_ns.create_table_version_calls(); - external_store - .commit( - &base_path, - 2, - &staging_obj_path, - staging_size, - None, - &object_store, - lance_table::io::commit::ManifestNamingScheme::V1, - ) + assert_eq!( + tracking_ns.create_table_version_calls(), + 1, + "create_table_version should have been called exactly once during commit" + ); + + // checkout_latest should call list_table_versions exactly once + let initial_list_calls = tracking_ns.list_table_versions_calls(); + let latest_dataset = DatasetBuilder::from_namespace(ns.clone(), table_id.clone()) + .await + .unwrap() + .load() .await .unwrap(); - assert!( - tracking_ns.create_table_version_calls() > initial_create_calls, - "create_table_version should have been called" + assert_eq!(latest_dataset.version().version, 2); + assert_eq!( + tracking_ns.list_table_versions_calls(), + initial_list_calls + 1, + "list_table_versions should have been called exactly once during checkout_latest" ); - // Verify version 2 was created in internal versions directory (_versions) - let version_2_path = format!("{}/_versions/2.manifest", table_path); - assert!( - std::path::Path::new(&version_2_path).exists(), - "Version 2 manifest should exist at {}", - version_2_path + // checkout to specific version should call describe_table_version exactly once + let initial_describe_calls = tracking_ns.describe_table_version_calls(); + let v1_dataset = DatasetBuilder::from_namespace(ns.clone(), table_id.clone()) + .await + .unwrap() + .with_version(1) + .load() + .await + .unwrap(); + assert_eq!(v1_dataset.version().version, 1); + assert_eq!( + tracking_ns.describe_table_version_calls(), + initial_describe_calls + 1, + "describe_table_version should have been called exactly once during checkout to version 1" ); } @@ -4577,11 +4845,14 @@ mod tests { use arrow::array::{Int32Array, StringArray}; use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; use arrow::record_batch::RecordBatch; + use futures::TryStreamExt; + use lance::dataset::builder::DatasetBuilder; use lance::dataset::{Dataset, WriteMode, WriteParams}; - use lance_namespace::models::{CreateNamespaceRequest, DescribeTableRequest}; + use lance_namespace::models::CreateNamespaceRequest; + use lance_table::io::commit::ManifestNamingScheme; - let temp_dir = TempStrDir::default(); - let temp_path: &str = &temp_dir; + let temp_dir = TempStdDir::default(); + let temp_path = temp_dir.to_str().unwrap(); // Create namespace with table_version_tracking_enabled and manifest_enabled let inner_ns = DirectoryNamespaceBuilder::new(temp_path) @@ -4609,11 +4880,14 @@ mod tests { .await .unwrap(); - // Get the table location from describe_table - let mut describe_req = DescribeTableRequest::new(); - describe_req.id = Some(table_id.clone()); - let describe_resp = tracking_ns.describe_table(describe_req).await.unwrap(); - let table_location = describe_resp.location.unwrap(); + // Open the dataset using from_namespace to get proper paths + let dataset = DatasetBuilder::from_namespace(tracking_ns.clone(), table_id.clone()) + .await + .unwrap() + .load() + .await + .unwrap(); + assert_eq!(dataset.version().version, 1); // Create the external manifest store commit handler let external_store = Arc::new(LanceNamespaceExternalManifestStore::new( @@ -4624,10 +4898,6 @@ mod tests { external_manifest_store: external_store, }); - // Open the dataset with the external manifest commit handler - let dataset = Dataset::open(&table_location).await.unwrap(); - assert_eq!(dataset.version().version, 1); - // Create some data to append let arrow_schema = Arc::new(ArrowSchema::new(vec![ Field::new("id", DataType::Int32, false), @@ -4651,7 +4921,7 @@ mod tests { }; let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema); - Dataset::write(batches, &table_location, Some(write_params)) + Dataset::write(batches, dataset.uri(), Some(write_params)) .await .unwrap(); @@ -4664,15 +4934,27 @@ mod tests { tracking_ns.create_table_version_calls() ); - // Verify version 2 was created in internal versions directory (_versions) - let table_path = table_location - .strip_prefix("file://") - .unwrap_or(&table_location); - let version_2_path = format!("{}/_versions/2.manifest", table_path); + // Verify version 2 was created using the dataset's object_store + // List manifests in the versions directory to find the V2 named manifest + let manifest_metas: Vec<_> = dataset + .object_store() + .inner + .list(Some(&dataset.versions_dir())) + .try_collect() + .await + .unwrap(); + let version_2_found = manifest_metas.iter().any(|m| { + m.location + .filename() + .map(|f| { + f.ends_with(".manifest") + && ManifestNamingScheme::V2.parse_version(f) == Some(2) + }) + .unwrap_or(false) + }); assert!( - std::path::Path::new(&version_2_path).exists(), - "Version 2 manifest should exist at {}", - version_2_path + version_2_found, + "Version 2 manifest should exist in versions directory" ); } } diff --git a/rust/lance/src/dataset.rs b/rust/lance/src/dataset.rs index d85883a787b..9feb02615e1 100644 --- a/rust/lance/src/dataset.rs +++ b/rust/lance/src/dataset.rs @@ -807,6 +807,7 @@ impl Dataset { location: fallback_resp.location, storage_options: fallback_resp.storage_options, properties: fallback_resp.properties, + managed_versioning: None, } } Err(e) => { diff --git a/rust/lance/src/io/commit/namespace_manifest.rs b/rust/lance/src/io/commit/namespace_manifest.rs index 405865fbc2d..fe259fb9241 100644 --- a/rust/lance/src/io/commit/namespace_manifest.rs +++ b/rust/lance/src/io/commit/namespace_manifest.rs @@ -70,38 +70,46 @@ impl ExternalManifestStore for LanceNamespaceExternalManifestStore { /// Direct-write commit: reads staging manifest and writes directly to final location. async fn commit( &self, - base_path: &Path, + _base_path: &Path, version: u64, staging_path: &Path, size: u64, e_tag: Option, - object_store: &dyn OSObjectStore, + _object_store: &dyn OSObjectStore, naming_scheme: ManifestNamingScheme, ) -> Result { // create_table_version reads staging manifest and writes to final location + let naming_scheme_str = match naming_scheme { + ManifestNamingScheme::V1 => "V1", + ManifestNamingScheme::V2 => "V2", + }; + let request = CreateTableVersionRequest { id: Some(self.table_id.clone()), version: version as i64, manifest_path: staging_path.to_string(), manifest_size: Some(size as i64), e_tag: e_tag.clone(), + naming_scheme: Some(naming_scheme_str.to_string()), ..Default::default() }; - self.namespace.create_table_version(request).await?; - - // Delete staging manifest (it's been copied to final location) - let _ = object_store.delete(staging_path).await; + let response = self.namespace.create_table_version(request).await?; - // Return final manifest location (full path relative to object store root) - let final_path = naming_scheme.manifest_path(base_path, version); + // Get version info from response + let version_info = response + .version + .ok_or_else(|| lance_core::Error::Internal { + message: "create_table_version response missing version info".to_string(), + location: snafu::location!(), + })?; Ok(ManifestLocation { - version, - path: final_path, - size: Some(size), + version: version_info.version as u64, + path: Path::from(version_info.manifest_path), + size: version_info.manifest_size.map(|s| s as u64), naming_scheme, - e_tag, + e_tag: version_info.e_tag, }) } From 5a45e9f00c450d0de3a7c3ba206d54f51fe966a9 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 18:44:16 -0800 Subject: [PATCH 07/23] fix jni --- java/lance-jni/Cargo.lock | 3 +- java/lance-jni/src/blocking_dataset.rs | 94 +++- java/lance-jni/src/namespace.rs | 411 ++++++++++++++++++ java/src/main/java/org/lance/Dataset.java | 15 +- .../java/org/lance/OpenDatasetBuilder.java | 41 +- .../lance/namespace/DirectoryNamespace.java | 12 + .../org/lance/namespace/RestNamespace.java | 41 ++ 7 files changed, 546 insertions(+), 71 deletions(-) diff --git a/java/lance-jni/Cargo.lock b/java/lance-jni/Cargo.lock index 89f82a1e025..df55705a473 100644 --- a/java/lance-jni/Cargo.lock +++ b/java/lance-jni/Cargo.lock @@ -3809,6 +3809,7 @@ dependencies = [ "lance-index", "lance-io", "lance-namespace", + "lance-table", "log", "object_store", "rand 0.9.2", @@ -3825,8 +3826,6 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2c20a25207bbae280c9acd16ccd1e3561ad7f79a57c0e88809cd9c026a8494c" dependencies = [ "reqwest", "serde", diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index 4b64644d8a1..add17a1afe5 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -1065,8 +1065,7 @@ pub extern "system" fn Java_org_lance_Dataset_openNative<'local>( serialized_manifest: JObject, // Optional storage_options_provider_obj: JObject, // Optional session_handle: jlong, // Session handle, 0 means no session - namespace_handle: jlong, // Namespace handle, 0 means no namespace - namespace_type: JString, // "directory" or "rest", null if no namespace + namespace_obj: JObject, // LanceNamespace object, null if no namespace table_id_obj: JObject, // List, null if no namespace ) -> JObject<'local> { ok_or_throw!( @@ -1082,8 +1081,7 @@ pub extern "system" fn Java_org_lance_Dataset_openNative<'local>( serialized_manifest, storage_options_provider_obj, session_handle, - namespace_handle, - namespace_type, + namespace_obj, table_id_obj, ) ) @@ -1101,11 +1099,12 @@ fn inner_open_native<'local>( serialized_manifest: JObject, // Optional storage_options_provider_obj: JObject, // Optional session_handle: jlong, // Session handle, 0 means no session - namespace_handle: jlong, // Namespace handle, 0 means no namespace - namespace_type: JString, // "directory" or "rest", null if no namespace + namespace_obj: JObject, // LanceNamespace object, null if no namespace table_id_obj: JObject, // List, null if no namespace ) -> Result> { - use crate::namespace::{BlockingDirectoryNamespace, BlockingRestNamespace}; + use crate::namespace::{ + create_java_lance_namespace, BlockingDirectoryNamespace, BlockingRestNamespace, + }; let path_str: String = path.extract(env)?; let version = env.get_u64_opt(&version_obj)?; @@ -1123,20 +1122,26 @@ fn inner_open_native<'local>( storage_options_provider.map(|v| Arc::new(v) as Arc); // Extract namespace and table_id if provided (before get_bytes_opt which holds borrow) - let (namespace, table_id) = if namespace_handle != 0 && !namespace_type.is_null() { - let ns_type: String = namespace_type.extract(env)?; - let ns_arc: Arc = if ns_type == "directory" { - let ns = unsafe { &*(namespace_handle as *const BlockingDirectoryNamespace) }; - ns.inner.clone() - } else if ns_type == "rest" { - let ns = unsafe { &*(namespace_handle as *const BlockingRestNamespace) }; - ns.inner.clone() - } else { - return Err(Error::input_error(format!( - "Unknown namespace type: {}", - ns_type - ))); - }; + let (namespace, table_id) = if !namespace_obj.is_null() { + // Check if it's a native implementation by trying to get getNativeHandle + let ns_arc: Arc = + if let Ok(native_handle) = get_native_namespace_handle(env, &namespace_obj) { + // Get the namespace type to determine which native implementation it is + let ns_type = get_namespace_type(env, &namespace_obj)?; + if ns_type == "directory" { + let ns = unsafe { &*(native_handle as *const BlockingDirectoryNamespace) }; + ns.inner.clone() + } else if ns_type == "rest" { + let ns = unsafe { &*(native_handle as *const BlockingRestNamespace) }; + ns.inner.clone() + } else { + // Unknown native type, fall back to Java bridge + create_java_lance_namespace(env, &namespace_obj)? + } + } else { + // Not a native implementation, create a Java bridge wrapper + create_java_lance_namespace(env, &namespace_obj)? + }; // Extract table_id from List let table_id = if !table_id_obj.is_null() { @@ -1171,6 +1176,53 @@ fn inner_open_native<'local>( dataset.into_java(env) } +/// Try to get the native handle from a Java LanceNamespace object. +/// Returns Ok(handle) if the object has a getNativeHandle method, Err otherwise. +fn get_native_namespace_handle(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { + let result = env.call_method(namespace_obj, "getNativeHandle", "()J", &[]); + match result { + Ok(value) => value.j().map_err(|e| { + Error::runtime_error(format!("getNativeHandle did not return a long: {}", e)) + }), + Err(_) => Err(Error::runtime_error( + "Namespace does not have getNativeHandle method".to_string(), + )), + } +} + +/// Get the namespace type from a Java LanceNamespace object. +fn get_namespace_type(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { + let result = env.call_method( + namespace_obj, + "getNamespaceType", + "()Ljava/lang/String;", + &[], + ); + match result { + Ok(value) => { + let jstring = value.l().map_err(|e| { + Error::runtime_error(format!("getNamespaceType did not return an object: {}", e)) + })?; + if jstring.is_null() { + return Err(Error::runtime_error( + "getNamespaceType returned null".to_string(), + )); + } + let jstring_ref = JString::from(jstring); + let java_string = env.get_string(&jstring_ref).map_err(|e| { + Error::runtime_error(format!( + "Failed to convert getNamespaceType result to string: {}", + e + )) + })?; + Ok(java_string.into()) + } + Err(_) => Err(Error::runtime_error( + "Namespace does not have getNamespaceType method".to_string(), + )), + } +} + #[no_mangle] pub extern "system" fn Java_org_lance_Dataset_getFragmentsNative<'a>( mut env: JNIEnv<'a>, diff --git a/java/lance-jni/src/namespace.rs b/java/lance-jni/src/namespace.rs index 72d1b0a9140..dccb0b1a51a 100644 --- a/java/lance-jni/src/namespace.rs +++ b/java/lance-jni/src/namespace.rs @@ -4,6 +4,7 @@ use std::collections::HashMap; use std::sync::Arc; +use async_trait::async_trait; use bytes::Bytes; use jni::objects::{GlobalRef, JByteArray, JMap, JObject, JString, JValue}; use jni::sys::{jbyteArray, jlong, jstring}; @@ -126,6 +127,382 @@ pub struct BlockingRestNamespace { pub(crate) inner: Arc, } +// ============================================================================ +// JavaLanceNamespace - Generic wrapper for any Java LanceNamespace implementation +// ============================================================================ + +/// Java-implemented LanceNamespace wrapper. +/// +/// This wraps any Java object that implements the LanceNamespace interface +/// and forwards calls to the Java implementation via JNI. +pub struct JavaLanceNamespace { + java_namespace: GlobalRef, + jvm: Arc, + namespace_id: String, +} + +impl std::fmt::Debug for JavaLanceNamespace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "JavaLanceNamespace({})", self.namespace_id) + } +} + +impl JavaLanceNamespace { + /// Create a new wrapper for a Java LanceNamespace object. + pub fn new(env: &mut JNIEnv, java_namespace: &JObject) -> Result { + let java_namespace = env.new_global_ref(java_namespace)?; + let jvm = Arc::new(env.get_java_vm()?); + + // Cache namespace_id since it's called frequently and won't change + let namespace_id = Self::call_namespace_id_internal(env, &java_namespace)?; + + Ok(Self { + java_namespace, + jvm, + namespace_id, + }) + } + + fn call_namespace_id_internal(env: &mut JNIEnv, java_namespace: &GlobalRef) -> Result { + let result = env + .call_method(java_namespace, "namespaceId", "()Ljava/lang/String;", &[]) + .map_err(|e| { + Error::runtime_error(format!( + "Failed to call namespaceId on Java namespace: {}", + e + )) + })?; + + let jstring = result.l().map_err(|e| { + Error::runtime_error(format!("namespaceId did not return an object: {}", e)) + })?; + + if jstring.is_null() { + return Err(Error::runtime_error( + "namespaceId returned null".to_string(), + )); + } + + let jstring_ref = JString::from(jstring); + let java_string = env.get_string(&jstring_ref).map_err(|e| { + Error::runtime_error(format!( + "Failed to convert namespaceId to Rust string: {}", + e + )) + })?; + + Ok(java_string.into()) + } + + /// Call a namespace method that takes a JSON request and returns a JSON response. + fn call_json_method( + &self, + method_name: &str, + request: Req, + ) -> lance_core::Result + where + Req: Serialize, + Resp: for<'de> Deserialize<'de>, + { + let java_namespace = self.java_namespace.clone(); + let jvm = self.jvm.clone(); + let method_name = method_name.to_string(); + let request_json = serde_json::to_string(&request).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to serialize request: {}", + e + ))), + location: snafu::location!(), + })?; + + let mut env = jvm + .attach_current_thread() + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to attach to JVM: {}", + e + ))), + location: snafu::location!(), + })?; + + // Create Java string for request + let jrequest = env + .new_string(&request_json) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to create request string: {}", + e + ))), + location: snafu::location!(), + })?; + + // Call the method + let result = env + .call_method( + &java_namespace, + &method_name, + "(Ljava/lang/String;)Ljava/lang/String;", + &[JValue::Object(&jrequest)], + ) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to call {}: {}", + method_name, e + ))), + location: snafu::location!(), + })?; + + let response_obj = result.l().map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "{} did not return an object: {}", + method_name, e + ))), + location: snafu::location!(), + })?; + + if response_obj.is_null() { + return Err(lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "{} returned null", + method_name + ))), + location: snafu::location!(), + }); + } + + let response_str: String = env + .get_string(&JString::from(response_obj)) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to convert response to string: {}", + e + ))), + location: snafu::location!(), + })? + .into(); + + serde_json::from_str(&response_str).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to deserialize response: {}", + e + ))), + location: snafu::location!(), + }) + } +} + +#[async_trait] +impl LanceNamespaceTrait for JavaLanceNamespace { + fn namespace_id(&self) -> String { + self.namespace_id.clone() + } + + async fn describe_table_version( + &self, + request: DescribeTableVersionRequest, + ) -> lance_core::Result { + let java_namespace = self.java_namespace.clone(); + let jvm = self.jvm.clone(); + + tokio::task::spawn_blocking(move || { + let mut env = jvm + .attach_current_thread() + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to attach to JVM: {}", + e + ))), + location: snafu::location!(), + })?; + + let request_json = + serde_json::to_string(&request).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to serialize request: {}", + e + ))), + location: snafu::location!(), + })?; + + let jrequest = env + .new_string(&request_json) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to create request string: {}", + e + ))), + location: snafu::location!(), + })?; + + let result = env + .call_method( + &java_namespace, + "describeTableVersionJson", + "(Ljava/lang/String;)Ljava/lang/String;", + &[JValue::Object(&jrequest)], + ) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to call describeTableVersionJson: {}", + e + ))), + location: snafu::location!(), + })?; + + let response_obj = result.l().map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "describeTableVersionJson did not return an object: {}", + e + ))), + location: snafu::location!(), + })?; + + if response_obj.is_null() { + return Err(lance_core::Error::IO { + source: Box::new(std::io::Error::other( + "describeTableVersionJson returned null", + )), + location: snafu::location!(), + }); + } + + let response_str: String = env + .get_string(&JString::from(response_obj)) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to convert response to string: {}", + e + ))), + location: snafu::location!(), + })? + .into(); + + serde_json::from_str(&response_str).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to deserialize response: {}", + e + ))), + location: snafu::location!(), + }) + }) + .await + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to spawn blocking task: {}", + e + ))), + location: snafu::location!(), + })? + } + + async fn create_table_version( + &self, + request: CreateTableVersionRequest, + ) -> lance_core::Result { + let java_namespace = self.java_namespace.clone(); + let jvm = self.jvm.clone(); + + tokio::task::spawn_blocking(move || { + let mut env = jvm + .attach_current_thread() + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to attach to JVM: {}", + e + ))), + location: snafu::location!(), + })?; + + let request_json = + serde_json::to_string(&request).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to serialize request: {}", + e + ))), + location: snafu::location!(), + })?; + + let jrequest = env + .new_string(&request_json) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to create request string: {}", + e + ))), + location: snafu::location!(), + })?; + + let result = env + .call_method( + &java_namespace, + "createTableVersionJson", + "(Ljava/lang/String;)Ljava/lang/String;", + &[JValue::Object(&jrequest)], + ) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to call createTableVersionJson: {}", + e + ))), + location: snafu::location!(), + })?; + + let response_obj = result.l().map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "createTableVersionJson did not return an object: {}", + e + ))), + location: snafu::location!(), + })?; + + if response_obj.is_null() { + return Err(lance_core::Error::IO { + source: Box::new(std::io::Error::other( + "createTableVersionJson returned null", + )), + location: snafu::location!(), + }); + } + + let response_str: String = env + .get_string(&JString::from(response_obj)) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to convert response to string: {}", + e + ))), + location: snafu::location!(), + })? + .into(); + + serde_json::from_str(&response_str).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to deserialize response: {}", + e + ))), + location: snafu::location!(), + }) + }) + .await + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to spawn blocking task: {}", + e + ))), + location: snafu::location!(), + })? + } +} + +/// Create a JavaLanceNamespace wrapper from a JNI environment and Java object. +pub fn create_java_lance_namespace( + env: &mut JNIEnv, + java_namespace: &JObject, +) -> Result> { + let wrapper = JavaLanceNamespace::new(env, java_namespace)?; + Ok(Arc::new(wrapper)) +} + // ============================================================================ // DirectoryNamespace JNI Functions // ============================================================================ @@ -703,6 +1080,23 @@ pub extern "system" fn Java_org_lance_namespace_DirectoryNamespace_describeTable .into_raw() } +#[no_mangle] +pub extern "system" fn Java_org_lance_namespace_DirectoryNamespace_batchDeleteTableVersionsNative( + mut env: JNIEnv, + _obj: JObject, + handle: jlong, + request_json: JString, +) -> jstring { + ok_or_throw_with_return!( + env, + call_namespace_method(&mut env, handle, request_json, |ns, req| { + RT.block_on(ns.inner.batch_delete_table_versions(req)) + }), + std::ptr::null_mut() + ) + .into_raw() +} + // ============================================================================ // RestNamespace JNI Functions // ============================================================================ @@ -1294,6 +1688,23 @@ pub extern "system" fn Java_org_lance_namespace_RestNamespace_describeTableVersi .into_raw() } +#[no_mangle] +pub extern "system" fn Java_org_lance_namespace_RestNamespace_batchDeleteTableVersionsNative( + mut env: JNIEnv, + _obj: JObject, + handle: jlong, + request_json: JString, +) -> jstring { + ok_or_throw_with_return!( + env, + call_rest_namespace_method(&mut env, handle, request_json, |ns, req| { + RT.block_on(ns.inner.batch_delete_table_versions(req)) + }), + std::ptr::null_mut() + ) + .into_raw() +} + // ============================================================================ // Helper Functions // ============================================================================ diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index f971af3ad81..6d0778b7f51 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -30,6 +30,7 @@ import org.lance.ipc.ScanOptions; import org.lance.merge.MergeInsertParams; import org.lance.merge.MergeInsertResult; +import org.lance.namespace.LanceNamespace; import org.lance.operation.UpdateConfig; import org.lance.operation.UpdateMap; import org.lance.schema.ColumnAlteration; @@ -328,7 +329,7 @@ static Dataset open( String path, ReadOptions options, Session session) { - return open(allocator, selfManagedAllocator, path, options, session, 0, null, null); + return open(allocator, selfManagedAllocator, path, options, session, null, null); } /** @@ -336,8 +337,7 @@ static Dataset open( * * @param path file path * @param options the open options - * @param namespaceHandle native namespace handle (0 if not using namespace) - * @param namespaceType "directory" or "rest" (null if not using namespace) + * @param namespace the LanceNamespace to use for managed versioning (null if not using namespace) * @param tableId table identifier (null if not using namespace) * @return Dataset */ @@ -347,8 +347,7 @@ static Dataset open( String path, ReadOptions options, Session session, - long namespaceHandle, - String namespaceType, + LanceNamespace namespace, List tableId) { Preconditions.checkNotNull(path); Preconditions.checkNotNull(allocator); @@ -371,8 +370,7 @@ static Dataset open( options.getSerializedManifest(), options.getStorageOptionsProvider(), sessionHandle, - namespaceHandle, - namespaceType, + namespace, tableId); dataset.allocator = allocator; dataset.selfManagedAllocator = selfManagedAllocator; @@ -395,8 +393,7 @@ private static native Dataset openNative( Optional serializedManifest, Optional storageOptionsProvider, long sessionHandle, - long namespaceHandle, - String namespaceType, + LanceNamespace namespace, List tableId); /** diff --git a/java/src/main/java/org/lance/OpenDatasetBuilder.java b/java/src/main/java/org/lance/OpenDatasetBuilder.java index 0d972d3f905..85bc19eac6e 100644 --- a/java/src/main/java/org/lance/OpenDatasetBuilder.java +++ b/java/src/main/java/org/lance/OpenDatasetBuilder.java @@ -13,10 +13,8 @@ */ package org.lance; -import org.lance.namespace.DirectoryNamespace; import org.lance.namespace.LanceNamespace; import org.lance.namespace.LanceNamespaceStorageOptionsProvider; -import org.lance.namespace.RestNamespace; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; @@ -237,54 +235,19 @@ private Dataset buildFromNamespace() { } optionsBuilder.setStorageOptions(storageOptions); - // If managed_versioning is true, pass namespace info for commit handler setup + // If managed_versioning is true, pass namespace for commit handler setup if (Boolean.TRUE.equals(managedVersioning)) { - long namespaceHandle = getNamespaceHandle(namespace); - String namespaceType = getNamespaceType(namespace); return Dataset.open( allocator, selfManagedAllocator, location, optionsBuilder.build(), session, - namespaceHandle, - namespaceType, + namespace, tableId); } // Open dataset with regular open method (no namespace commit handler) return Dataset.open(allocator, selfManagedAllocator, location, optionsBuilder.build(), session); } - - private static long getNamespaceHandle(LanceNamespace namespace) { - if (namespace instanceof DirectoryNamespace) { - return ((DirectoryNamespace) namespace).getNativeHandle(); - } else if (namespace instanceof RestNamespace) { - return ((RestNamespace) namespace).getNativeHandle(); - } - // Try reflection for custom namespace implementations that have getNativeHandle - try { - java.lang.reflect.Method method = namespace.getClass().getMethod("getNativeHandle"); - return (long) method.invoke(namespace); - } catch (Exception e) { - throw new IllegalArgumentException( - "Unknown namespace type: " + namespace.getClass().getName(), e); - } - } - - private static String getNamespaceType(LanceNamespace namespace) { - if (namespace instanceof DirectoryNamespace) { - return ((DirectoryNamespace) namespace).getNamespaceType(); - } else if (namespace instanceof RestNamespace) { - return ((RestNamespace) namespace).getNamespaceType(); - } - // Try reflection for custom namespace implementations that have getNamespaceType - try { - java.lang.reflect.Method method = namespace.getClass().getMethod("getNamespaceType"); - return (String) method.invoke(namespace); - } catch (Exception e) { - throw new IllegalArgumentException( - "Unknown namespace type: " + namespace.getClass().getName(), e); - } - } } diff --git a/java/src/main/java/org/lance/namespace/DirectoryNamespace.java b/java/src/main/java/org/lance/namespace/DirectoryNamespace.java index 5788035246e..ce4df88510a 100644 --- a/java/src/main/java/org/lance/namespace/DirectoryNamespace.java +++ b/java/src/main/java/org/lance/namespace/DirectoryNamespace.java @@ -435,6 +435,16 @@ public DescribeTableVersionResponse describeTableVersion(DescribeTableVersionReq return fromJson(responseJson, DescribeTableVersionResponse.class); } + @Override + public BatchDeleteTableVersionsResponse batchDeleteTableVersions( + BatchDeleteTableVersionsRequest request) { + ensureInitialized(); + String requestJson = toJson(request); + String responseJson = + batchDeleteTableVersionsNative(nativeDirectoryNamespaceHandle, requestJson); + return fromJson(responseJson, BatchDeleteTableVersionsResponse.class); + } + @Override public void close() { if (nativeDirectoryNamespaceHandle != 0) { @@ -547,6 +557,8 @@ private native String mergeInsertIntoTableNative( private native String describeTableVersionNative(long handle, String requestJson); + private native String batchDeleteTableVersionsNative(long handle, String requestJson); + // ========================================================================== // Provider loading helpers // ========================================================================== diff --git a/java/src/main/java/org/lance/namespace/RestNamespace.java b/java/src/main/java/org/lance/namespace/RestNamespace.java index fa628ce5709..050560632af 100644 --- a/java/src/main/java/org/lance/namespace/RestNamespace.java +++ b/java/src/main/java/org/lance/namespace/RestNamespace.java @@ -335,6 +335,39 @@ public AlterTransactionResponse alterTransaction(AlterTransactionRequest request return fromJson(responseJson, AlterTransactionResponse.class); } + @Override + public ListTableVersionsResponse listTableVersions(ListTableVersionsRequest request) { + ensureInitialized(); + String requestJson = toJson(request); + String responseJson = listTableVersionsNative(nativeRestNamespaceHandle, requestJson); + return fromJson(responseJson, ListTableVersionsResponse.class); + } + + @Override + public CreateTableVersionResponse createTableVersion(CreateTableVersionRequest request) { + ensureInitialized(); + String requestJson = toJson(request); + String responseJson = createTableVersionNative(nativeRestNamespaceHandle, requestJson); + return fromJson(responseJson, CreateTableVersionResponse.class); + } + + @Override + public DescribeTableVersionResponse describeTableVersion(DescribeTableVersionRequest request) { + ensureInitialized(); + String requestJson = toJson(request); + String responseJson = describeTableVersionNative(nativeRestNamespaceHandle, requestJson); + return fromJson(responseJson, DescribeTableVersionResponse.class); + } + + @Override + public BatchDeleteTableVersionsResponse batchDeleteTableVersions( + BatchDeleteTableVersionsRequest request) { + ensureInitialized(); + String requestJson = toJson(request); + String responseJson = batchDeleteTableVersionsNative(nativeRestNamespaceHandle, requestJson); + return fromJson(responseJson, BatchDeleteTableVersionsResponse.class); + } + @Override public void close() { if (nativeRestNamespaceHandle != 0) { @@ -442,6 +475,14 @@ private native String mergeInsertIntoTableNative( private native String alterTransactionNative(long handle, String requestJson); + private native String listTableVersionsNative(long handle, String requestJson); + + private native String createTableVersionNative(long handle, String requestJson); + + private native String describeTableVersionNative(long handle, String requestJson); + + private native String batchDeleteTableVersionsNative(long handle, String requestJson); + // ========================================================================== // Provider loading helpers // ========================================================================== From 3d2408e9a4296edd31758ce62a43230d5232fe75 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 19:15:07 -0800 Subject: [PATCH 08/23] fix timestamp issue --- Cargo.lock | 2 +- java/lance-jni/Cargo.lock | 2 +- java/lance-jni/src/blocking_dataset.rs | 99 ++++------- java/lance-jni/src/namespace.rs | 96 ---------- java/pom.xml | 5 - .../lance/namespace/DirectoryNamespace.java | 7 - .../org/lance/namespace/RestNamespace.java | 5 - .../org/lance/NamespaceIntegrationTest.java | 164 ++++++++++++++++-- rust/lance-namespace-impls/src/dir.rs | 6 +- 9 files changed, 192 insertions(+), 194 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4a6a636a508..49ffe955126 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5307,7 +5307,7 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.5.0" +version = "0.5.1" dependencies = [ "reqwest", "serde", diff --git a/java/lance-jni/Cargo.lock b/java/lance-jni/Cargo.lock index df55705a473..6be61e7e78b 100644 --- a/java/lance-jni/Cargo.lock +++ b/java/lance-jni/Cargo.lock @@ -3825,7 +3825,7 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.5.0" +version = "0.5.1" dependencies = [ "reqwest", "serde", diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index add17a1afe5..b0211b5d0ad 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -1123,25 +1123,19 @@ fn inner_open_native<'local>( // Extract namespace and table_id if provided (before get_bytes_opt which holds borrow) let (namespace, table_id) = if !namespace_obj.is_null() { - // Check if it's a native implementation by trying to get getNativeHandle - let ns_arc: Arc = - if let Ok(native_handle) = get_native_namespace_handle(env, &namespace_obj) { - // Get the namespace type to determine which native implementation it is - let ns_type = get_namespace_type(env, &namespace_obj)?; - if ns_type == "directory" { - let ns = unsafe { &*(native_handle as *const BlockingDirectoryNamespace) }; - ns.inner.clone() - } else if ns_type == "rest" { - let ns = unsafe { &*(native_handle as *const BlockingRestNamespace) }; - ns.inner.clone() - } else { - // Unknown native type, fall back to Java bridge - create_java_lance_namespace(env, &namespace_obj)? - } - } else { - // Not a native implementation, create a Java bridge wrapper - create_java_lance_namespace(env, &namespace_obj)? - }; + // Check if it's a native implementation using instanceof checks + let ns_arc: Arc = if is_directory_namespace(env, &namespace_obj)? { + let native_handle = get_native_namespace_handle(env, &namespace_obj)?; + let ns = unsafe { &*(native_handle as *const BlockingDirectoryNamespace) }; + ns.inner.clone() + } else if is_rest_namespace(env, &namespace_obj)? { + let native_handle = get_native_namespace_handle(env, &namespace_obj)?; + let ns = unsafe { &*(native_handle as *const BlockingRestNamespace) }; + ns.inner.clone() + } else { + // Custom Java implementation, create a Java bridge wrapper + create_java_lance_namespace(env, &namespace_obj)? + }; // Extract table_id from List let table_id = if !table_id_obj.is_null() { @@ -1176,51 +1170,32 @@ fn inner_open_native<'local>( dataset.into_java(env) } -/// Try to get the native handle from a Java LanceNamespace object. -/// Returns Ok(handle) if the object has a getNativeHandle method, Err otherwise. -fn get_native_namespace_handle(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { - let result = env.call_method(namespace_obj, "getNativeHandle", "()J", &[]); - match result { - Ok(value) => value.j().map_err(|e| { - Error::runtime_error(format!("getNativeHandle did not return a long: {}", e)) - }), - Err(_) => Err(Error::runtime_error( - "Namespace does not have getNativeHandle method".to_string(), - )), - } +/// Check if the Java object is an instance of DirectoryNamespace. +fn is_directory_namespace(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { + let class = env + .find_class("org/lance/namespace/DirectoryNamespace") + .map_err(|e| { + Error::runtime_error(format!("Failed to find DirectoryNamespace class: {}", e)) + })?; + env.is_instance_of(namespace_obj, class) + .map_err(|e| Error::runtime_error(format!("Failed to check instanceof: {}", e))) } -/// Get the namespace type from a Java LanceNamespace object. -fn get_namespace_type(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { - let result = env.call_method( - namespace_obj, - "getNamespaceType", - "()Ljava/lang/String;", - &[], - ); - match result { - Ok(value) => { - let jstring = value.l().map_err(|e| { - Error::runtime_error(format!("getNamespaceType did not return an object: {}", e)) - })?; - if jstring.is_null() { - return Err(Error::runtime_error( - "getNamespaceType returned null".to_string(), - )); - } - let jstring_ref = JString::from(jstring); - let java_string = env.get_string(&jstring_ref).map_err(|e| { - Error::runtime_error(format!( - "Failed to convert getNamespaceType result to string: {}", - e - )) - })?; - Ok(java_string.into()) - } - Err(_) => Err(Error::runtime_error( - "Namespace does not have getNamespaceType method".to_string(), - )), - } +/// Check if the Java object is an instance of RestNamespace. +fn is_rest_namespace(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { + let class = env + .find_class("org/lance/namespace/RestNamespace") + .map_err(|e| Error::runtime_error(format!("Failed to find RestNamespace class: {}", e)))?; + env.is_instance_of(namespace_obj, class) + .map_err(|e| Error::runtime_error(format!("Failed to check instanceof: {}", e))) +} + +/// Get the native handle from a Java LanceNamespace object. +fn get_native_namespace_handle(env: &mut JNIEnv, namespace_obj: &JObject) -> Result { + env.call_method(namespace_obj, "getNativeHandle", "()J", &[]) + .map_err(|e| Error::runtime_error(format!("Failed to call getNativeHandle: {}", e)))? + .j() + .map_err(|e| Error::runtime_error(format!("getNativeHandle did not return a long: {}", e))) } #[no_mangle] diff --git a/java/lance-jni/src/namespace.rs b/java/lance-jni/src/namespace.rs index dccb0b1a51a..81feb67404b 100644 --- a/java/lance-jni/src/namespace.rs +++ b/java/lance-jni/src/namespace.rs @@ -193,102 +193,6 @@ impl JavaLanceNamespace { Ok(java_string.into()) } - - /// Call a namespace method that takes a JSON request and returns a JSON response. - fn call_json_method( - &self, - method_name: &str, - request: Req, - ) -> lance_core::Result - where - Req: Serialize, - Resp: for<'de> Deserialize<'de>, - { - let java_namespace = self.java_namespace.clone(); - let jvm = self.jvm.clone(); - let method_name = method_name.to_string(); - let request_json = serde_json::to_string(&request).map_err(|e| lance_core::Error::IO { - source: Box::new(std::io::Error::other(format!( - "Failed to serialize request: {}", - e - ))), - location: snafu::location!(), - })?; - - let mut env = jvm - .attach_current_thread() - .map_err(|e| lance_core::Error::IO { - source: Box::new(std::io::Error::other(format!( - "Failed to attach to JVM: {}", - e - ))), - location: snafu::location!(), - })?; - - // Create Java string for request - let jrequest = env - .new_string(&request_json) - .map_err(|e| lance_core::Error::IO { - source: Box::new(std::io::Error::other(format!( - "Failed to create request string: {}", - e - ))), - location: snafu::location!(), - })?; - - // Call the method - let result = env - .call_method( - &java_namespace, - &method_name, - "(Ljava/lang/String;)Ljava/lang/String;", - &[JValue::Object(&jrequest)], - ) - .map_err(|e| lance_core::Error::IO { - source: Box::new(std::io::Error::other(format!( - "Failed to call {}: {}", - method_name, e - ))), - location: snafu::location!(), - })?; - - let response_obj = result.l().map_err(|e| lance_core::Error::IO { - source: Box::new(std::io::Error::other(format!( - "{} did not return an object: {}", - method_name, e - ))), - location: snafu::location!(), - })?; - - if response_obj.is_null() { - return Err(lance_core::Error::IO { - source: Box::new(std::io::Error::other(format!( - "{} returned null", - method_name - ))), - location: snafu::location!(), - }); - } - - let response_str: String = env - .get_string(&JString::from(response_obj)) - .map_err(|e| lance_core::Error::IO { - source: Box::new(std::io::Error::other(format!( - "Failed to convert response to string: {}", - e - ))), - location: snafu::location!(), - })? - .into(); - - serde_json::from_str(&response_str).map_err(|e| lance_core::Error::IO { - source: Box::new(std::io::Error::other(format!( - "Failed to deserialize response: {}", - e - ))), - location: snafu::location!(), - }) - } } #[async_trait] diff --git a/java/pom.xml b/java/pom.xml index 173e234e9f4..4fc3fdd78df 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -121,11 +121,6 @@ jackson-databind 2.15.2 - - com.fasterxml.jackson.datatype - jackson-datatype-jsr310 - 2.15.2 - software.amazon.awssdk diff --git a/java/src/main/java/org/lance/namespace/DirectoryNamespace.java b/java/src/main/java/org/lance/namespace/DirectoryNamespace.java index ce4df88510a..423a11a38a3 100644 --- a/java/src/main/java/org/lance/namespace/DirectoryNamespace.java +++ b/java/src/main/java/org/lance/namespace/DirectoryNamespace.java @@ -19,7 +19,6 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; import org.apache.arrow.memory.BufferAllocator; import java.io.Closeable; @@ -148,7 +147,6 @@ public class DirectoryNamespace implements LanceNamespace, Closeable { private static ObjectMapper createObjectMapper() { ObjectMapper mapper = new ObjectMapper(); - mapper.registerModule(new JavaTimeModule()); mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); return mapper; } @@ -462,11 +460,6 @@ public long getNativeHandle() { return nativeDirectoryNamespaceHandle; } - /** Returns the namespace type identifier. */ - public String getNamespaceType() { - return "directory"; - } - private void ensureInitialized() { if (nativeDirectoryNamespaceHandle == 0) { throw new IllegalStateException( diff --git a/java/src/main/java/org/lance/namespace/RestNamespace.java b/java/src/main/java/org/lance/namespace/RestNamespace.java index 050560632af..e90465f6a96 100644 --- a/java/src/main/java/org/lance/namespace/RestNamespace.java +++ b/java/src/main/java/org/lance/namespace/RestNamespace.java @@ -385,11 +385,6 @@ public long getNativeHandle() { return nativeRestNamespaceHandle; } - /** Returns the namespace type identifier. */ - public String getNamespaceType() { - return "rest"; - } - private void ensureInitialized() { if (nativeRestNamespaceHandle == 0) { throw new IllegalStateException("RestNamespace not initialized. Call initialize() first."); diff --git a/java/src/test/java/org/lance/NamespaceIntegrationTest.java b/java/src/test/java/org/lance/NamespaceIntegrationTest.java index 036ea6bb6e5..7c4c66551de 100644 --- a/java/src/test/java/org/lance/NamespaceIntegrationTest.java +++ b/java/src/test/java/org/lance/NamespaceIntegrationTest.java @@ -16,12 +16,20 @@ import org.lance.namespace.DirectoryNamespace; import org.lance.namespace.LanceNamespace; import org.lance.namespace.LanceNamespaceStorageOptionsProvider; +import org.lance.namespace.model.BatchDeleteTableVersionsRequest; +import org.lance.namespace.model.BatchDeleteTableVersionsResponse; import org.lance.namespace.model.CreateEmptyTableRequest; import org.lance.namespace.model.CreateEmptyTableResponse; +import org.lance.namespace.model.CreateTableVersionRequest; +import org.lance.namespace.model.CreateTableVersionResponse; import org.lance.namespace.model.DeclareTableRequest; import org.lance.namespace.model.DeclareTableResponse; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; +import org.lance.namespace.model.DescribeTableVersionRequest; +import org.lance.namespace.model.DescribeTableVersionResponse; +import org.lance.namespace.model.ListTableVersionsRequest; +import org.lance.namespace.model.ListTableVersionsResponse; import org.lance.operation.Append; import org.apache.arrow.memory.BufferAllocator; @@ -1315,7 +1323,7 @@ void testFragmentCreateAndCommitWithNamespace() throws Exception { *

This namespace wraps DirectoryNamespace with table_version_tracking_enabled and * manifest_enabled flags, and tracks create_table_version and describe_table_version calls. */ - static class TableVersionTrackingNamespace implements LanceNamespace { + static class TableVersionTrackingNamespace implements LanceNamespace, java.io.Closeable { private final DirectoryNamespace inner; private final AtomicInteger createTableVersionCount = new AtomicInteger(0); private final AtomicInteger describeTableVersionCount = new AtomicInteger(0); @@ -1353,10 +1361,6 @@ public long getNativeHandle() { return inner.getNativeHandle(); } - public String getNamespaceType() { - return inner.getNamespaceType(); - } - @Override public void initialize(Map configProperties, BufferAllocator allocator) { // Already initialized in constructor @@ -1381,6 +1385,34 @@ public DeclareTableResponse declareTable(DeclareTableRequest request) { public DescribeTableResponse describeTable(DescribeTableRequest request) { return inner.describeTable(request); } + + @Override + public CreateTableVersionResponse createTableVersion(CreateTableVersionRequest request) { + createTableVersionCount.incrementAndGet(); + return inner.createTableVersion(request); + } + + @Override + public DescribeTableVersionResponse describeTableVersion(DescribeTableVersionRequest request) { + describeTableVersionCount.incrementAndGet(); + return inner.describeTableVersion(request); + } + + @Override + public ListTableVersionsResponse listTableVersions(ListTableVersionsRequest request) { + return inner.listTableVersions(request); + } + + @Override + public BatchDeleteTableVersionsResponse batchDeleteTableVersions( + BatchDeleteTableVersionsRequest request) { + return inner.batchDeleteTableVersions(request); + } + + @Override + public void close() { + inner.close(); + } } @Test @@ -1399,6 +1431,7 @@ void testManagedVersioningWithDirectoryNamespace() throws Exception { new TableVersionTrackingNamespace( "s3://" + BUCKET_NAME + "/managed_versioning_test", storageOptions); String tableName = UUID.randomUUID().toString(); + List tableId = Arrays.asList(tableName); // Create schema and data Schema schema = @@ -1461,16 +1494,17 @@ public VectorSchemaRoot getVectorSchemaRoot() { .allocator(allocator) .reader(testReader) .namespace(namespace) - .tableId(Arrays.asList(tableName)) + .tableId(tableId) .mode(WriteParams.WriteMode.CREATE) .execute()) { assertEquals(2, dataset.countRows()); + assertEquals(1, dataset.version()); } } // Verify describe_table returns managed_versioning=true DescribeTableRequest descReq = new DescribeTableRequest(); - descReq.setId(Arrays.asList(tableName)); + descReq.setId(tableId); DescribeTableResponse descResp = namespace.describeTable(descReq); assertEquals( @@ -1478,20 +1512,122 @@ public VectorSchemaRoot getVectorSchemaRoot() { descResp.getManagedVersioning(), "Expected managedVersioning=true when table_version_tracking_enabled"); - // Open dataset through namespace with managed_versioning support + // Open dataset through namespace - this should call describe_table_version for latest + int initialDescribeCount = namespace.getDescribeTableVersionCount(); try (Dataset dsFromNamespace = + Dataset.open().allocator(allocator).namespace(namespace).tableId(tableId).build()) { + + assertEquals(2, dsFromNamespace.countRows()); + assertEquals(1, dsFromNamespace.version()); + } + assertEquals( + initialDescribeCount + 1, + namespace.getDescribeTableVersionCount(), + "describe_table_version should have been called once when opening latest version"); + + // Append data - this should call create_table_version exactly once + assertEquals( + 0, + namespace.getCreateTableVersionCount(), + "create_table_version should not have been called yet"); + + try (VectorSchemaRoot appendRoot = VectorSchemaRoot.create(schema, allocator)) { + IntVector aVector = (IntVector) appendRoot.getVector("a"); + IntVector bVector = (IntVector) appendRoot.getVector("b"); + + aVector.allocateNew(2); + bVector.allocateNew(2); + + aVector.set(0, 100); + bVector.set(0, 200); + aVector.set(1, 1000); + bVector.set(1, 2000); + + aVector.setValueCount(2); + bVector.setValueCount(2); + appendRoot.setRowCount(2); + + ArrowReader appendReader = + new ArrowReader(allocator) { + boolean firstRead = true; + + @Override + public boolean loadNextBatch() { + if (firstRead) { + firstRead = false; + return true; + } + return false; + } + + @Override + public long bytesRead() { + return 0; + } + + @Override + protected void closeReadSource() {} + + @Override + protected Schema readSchema() { + return schema; + } + + @Override + public VectorSchemaRoot getVectorSchemaRoot() { + return appendRoot; + } + }; + + // Append through namespace + try (Dataset dataset = + Dataset.write() + .allocator(allocator) + .reader(appendReader) + .namespace(namespace) + .tableId(tableId) + .mode(WriteParams.WriteMode.APPEND) + .execute()) { + assertEquals(4, dataset.countRows()); + assertEquals(2, dataset.version()); + } + } + + assertEquals( + 1, + namespace.getCreateTableVersionCount(), + "create_table_version should have been called exactly once during append"); + + // Open latest version - should call describe_table_version + int describeCountBeforeLatest = namespace.getDescribeTableVersionCount(); + try (Dataset latestDs = + Dataset.open().allocator(allocator).namespace(namespace).tableId(tableId).build()) { + + assertEquals(4, latestDs.countRows()); + assertEquals(2, latestDs.version()); + } + assertEquals( + describeCountBeforeLatest + 1, + namespace.getDescribeTableVersionCount(), + "describe_table_version should have been called once when opening latest version"); + + // Open specific version (version 1) - should call describe_table_version + int describeCountBeforeV1 = namespace.getDescribeTableVersionCount(); + try (Dataset v1Ds = Dataset.open() .allocator(allocator) .namespace(namespace) - .tableId(Arrays.asList(tableName)) + .tableId(tableId) + .readOptions(new ReadOptions.Builder().setVersion(1L).build()) .build()) { - assertEquals(2, dsFromNamespace.countRows()); - - // Verify we can read the data - List versions = dsFromNamespace.listVersions(); - assertEquals(1, versions.size(), "Should have 1 version after create"); + assertEquals(2, v1Ds.countRows()); + assertEquals(1, v1Ds.version()); } + assertEquals( + describeCountBeforeV1 + 1, + namespace.getDescribeTableVersionCount(), + "describe_table_version should have been called once when opening version 1"); } } diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index caaf2ee930b..bf0b506e92c 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -1635,7 +1635,7 @@ impl LanceNamespace for DirectoryNamespace { manifest_path: meta.location.to_string(), manifest_size: Some(meta.size as i64), e_tag: meta.e_tag, - timestamp: Some(meta.last_modified.to_rfc3339()), + timestamp_millis: Some(meta.last_modified.timestamp_millis()), metadata: None, }) }) @@ -1767,7 +1767,7 @@ impl LanceNamespace for DirectoryNamespace { manifest_path: final_path.to_string(), manifest_size: Some(manifest_size), e_tag: put_result.e_tag, - timestamp: None, + timestamp_millis: None, metadata: None, })), }) @@ -1810,7 +1810,7 @@ impl LanceNamespace for DirectoryNamespace { manifest_path: manifest_location.path.to_string(), manifest_size: manifest_location.size.map(|s| s as i64), e_tag: manifest_location.e_tag.clone(), - timestamp: Some(version_info.timestamp.to_rfc3339()), + timestamp_millis: Some(version_info.timestamp.timestamp_millis()), metadata: if metadata.is_empty() { None } else { From 4e552c0f120b5e6a70ca67b37aeb312cf1431ab0 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 21:18:49 -0800 Subject: [PATCH 09/23] bump to 0.5.2 and cleanup --- Cargo.lock | 2 +- java/lance-jni/Cargo.lock | 2 +- java/pom.xml | 4 +- .../namespace/DirectoryNamespaceTest.java | 301 ++++++++++++++++++ python/Cargo.lock | 2 +- python/pyproject.toml | 2 +- python/python/lance/__init__.py | 2 +- python/python/lance/dataset.py | 9 + python/python/tests/test_namespace_dir.py | 165 ++++++++++ .../tests/test_namespace_integration.py | 235 -------------- python/src/dataset.rs | 44 ++- python/src/namespace.rs | 225 +++++++++++++ python/uv.lock | 14 +- rust/lance-namespace-impls/src/dir.rs | 8 +- 14 files changed, 757 insertions(+), 258 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 49ffe955126..33593b54e05 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5307,7 +5307,7 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.5.1" +version = "0.5.2" dependencies = [ "reqwest", "serde", diff --git a/java/lance-jni/Cargo.lock b/java/lance-jni/Cargo.lock index 6be61e7e78b..d05b58c2095 100644 --- a/java/lance-jni/Cargo.lock +++ b/java/lance-jni/Cargo.lock @@ -3825,7 +3825,7 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.5.1" +version = "0.5.2" dependencies = [ "reqwest", "serde", diff --git a/java/pom.xml b/java/pom.xml index 4fc3fdd78df..000805f0899 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -109,12 +109,12 @@ org.lance lance-namespace-core - 0.5.0 + 0.5.2 org.lance lance-namespace-apache-client - 0.5.0 + 0.5.2 com.fasterxml.jackson.core diff --git a/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java b/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java index 49117a26439..be8fff4c442 100644 --- a/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java +++ b/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java @@ -13,6 +13,9 @@ */ package org.lance.namespace; +import org.lance.Dataset; +import org.lance.ReadOptions; +import org.lance.WriteParams; import org.lance.namespace.model.*; import org.lance.namespace.model.DescribeTableVersionRequest; import org.lance.namespace.model.DescribeTableVersionResponse; @@ -22,6 +25,7 @@ import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowReader; import org.apache.arrow.vector.ipc.ArrowStreamWriter; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; @@ -37,6 +41,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; import static org.junit.jupiter.api.Assertions.*; @@ -381,4 +386,300 @@ void testDescribeTableVersion() throws Exception { trackingNs.close(); } } + + /** + * Inner class that wraps DirectoryNamespace and tracks API calls for testing managed versioning. + */ + static class TableVersionTrackingNamespace implements LanceNamespace, java.io.Closeable { + private final DirectoryNamespace inner; + private final AtomicInteger createTableVersionCount = new AtomicInteger(0); + private final AtomicInteger describeTableVersionCount = new AtomicInteger(0); + + public TableVersionTrackingNamespace(Path root) { + Map dirProps = new HashMap<>(); + dirProps.put("root", root.toString()); + dirProps.put("table_version_tracking_enabled", "true"); + dirProps.put("manifest_enabled", "true"); + + this.inner = new DirectoryNamespace(); + try (BufferAllocator allocator = new RootAllocator()) { + this.inner.initialize(dirProps, allocator); + } + } + + public int getCreateTableVersionCount() { + return createTableVersionCount.get(); + } + + public int getDescribeTableVersionCount() { + return describeTableVersionCount.get(); + } + + public long getNativeHandle() { + return inner.getNativeHandle(); + } + + @Override + public void initialize(Map configProperties, BufferAllocator allocator) { + // Already initialized in constructor + } + + @Override + public String namespaceId() { + return "TableVersionTrackingNamespace { inner: " + inner.namespaceId() + " }"; + } + + @Override + public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request) { + return inner.createEmptyTable(request); + } + + @Override + public DeclareTableResponse declareTable(DeclareTableRequest request) { + return inner.declareTable(request); + } + + @Override + public DescribeTableResponse describeTable(DescribeTableRequest request) { + return inner.describeTable(request); + } + + @Override + public CreateTableVersionResponse createTableVersion(CreateTableVersionRequest request) { + createTableVersionCount.incrementAndGet(); + return inner.createTableVersion(request); + } + + @Override + public DescribeTableVersionResponse describeTableVersion(DescribeTableVersionRequest request) { + describeTableVersionCount.incrementAndGet(); + return inner.describeTableVersion(request); + } + + @Override + public ListTableVersionsResponse listTableVersions(ListTableVersionsRequest request) { + return inner.listTableVersions(request); + } + + @Override + public BatchDeleteTableVersionsResponse batchDeleteTableVersions( + BatchDeleteTableVersionsRequest request) { + return inner.batchDeleteTableVersions(request); + } + + @Override + public void close() { + inner.close(); + } + } + + @Test + void testManagedVersioningWithDirectoryNamespace(@TempDir Path managedVersioningTempDir) + throws Exception { + try (BufferAllocator allocator = new RootAllocator()) { + // Create namespace with table_version_tracking_enabled + TableVersionTrackingNamespace namespace = + new TableVersionTrackingNamespace(managedVersioningTempDir); + String tableName = "test_table"; + java.util.List tableId = Arrays.asList(tableName); + + // Create schema and data + Schema schema = + new Schema( + Arrays.asList( + new Field("a", FieldType.nullable(new ArrowType.Int(32, true)), null), + new Field("b", FieldType.nullable(new ArrowType.Int(32, true)), null))); + + try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + IntVector aVector = (IntVector) root.getVector("a"); + IntVector bVector = (IntVector) root.getVector("b"); + + aVector.allocateNew(2); + bVector.allocateNew(2); + + aVector.set(0, 1); + bVector.set(0, 2); + aVector.set(1, 10); + bVector.set(1, 20); + + aVector.setValueCount(2); + bVector.setValueCount(2); + root.setRowCount(2); + + ArrowReader testReader = + new ArrowReader(allocator) { + boolean firstRead = true; + + @Override + public boolean loadNextBatch() { + if (firstRead) { + firstRead = false; + return true; + } + return false; + } + + @Override + public long bytesRead() { + return 0; + } + + @Override + protected void closeReadSource() {} + + @Override + protected Schema readSchema() { + return schema; + } + + @Override + public VectorSchemaRoot getVectorSchemaRoot() { + return root; + } + }; + + // Create dataset through namespace + try (Dataset dataset = + Dataset.write() + .allocator(allocator) + .reader(testReader) + .namespace(namespace) + .tableId(tableId) + .mode(WriteParams.WriteMode.CREATE) + .execute()) { + assertEquals(2, dataset.countRows()); + assertEquals(1, dataset.version()); + } + } + + // Verify describe_table returns managed_versioning=true + DescribeTableRequest descReq = new DescribeTableRequest(); + descReq.setId(tableId); + DescribeTableResponse descResp = namespace.describeTable(descReq); + + assertEquals( + Boolean.TRUE, + descResp.getManagedVersioning(), + "Expected managedVersioning=true when table_version_tracking_enabled"); + + // Open dataset through namespace - this should call describe_table_version for latest + int initialDescribeCount = namespace.getDescribeTableVersionCount(); + try (Dataset dsFromNamespace = + Dataset.open().allocator(allocator).namespace(namespace).tableId(tableId).build()) { + + assertEquals(2, dsFromNamespace.countRows()); + assertEquals(1, dsFromNamespace.version()); + } + assertEquals( + initialDescribeCount + 1, + namespace.getDescribeTableVersionCount(), + "describe_table_version should have been called once when opening latest version"); + + // Append data - this should call create_table_version exactly once + assertEquals( + 0, + namespace.getCreateTableVersionCount(), + "create_table_version should not have been called yet"); + + try (VectorSchemaRoot appendRoot = VectorSchemaRoot.create(schema, allocator)) { + IntVector aVector = (IntVector) appendRoot.getVector("a"); + IntVector bVector = (IntVector) appendRoot.getVector("b"); + + aVector.allocateNew(2); + bVector.allocateNew(2); + + aVector.set(0, 100); + bVector.set(0, 200); + aVector.set(1, 1000); + bVector.set(1, 2000); + + aVector.setValueCount(2); + bVector.setValueCount(2); + appendRoot.setRowCount(2); + + ArrowReader appendReader = + new ArrowReader(allocator) { + boolean firstRead = true; + + @Override + public boolean loadNextBatch() { + if (firstRead) { + firstRead = false; + return true; + } + return false; + } + + @Override + public long bytesRead() { + return 0; + } + + @Override + protected void closeReadSource() {} + + @Override + protected Schema readSchema() { + return schema; + } + + @Override + public VectorSchemaRoot getVectorSchemaRoot() { + return appendRoot; + } + }; + + // Append through namespace + try (Dataset dataset = + Dataset.write() + .allocator(allocator) + .reader(appendReader) + .namespace(namespace) + .tableId(tableId) + .mode(WriteParams.WriteMode.APPEND) + .execute()) { + assertEquals(4, dataset.countRows()); + assertEquals(2, dataset.version()); + } + } + + assertEquals( + 1, + namespace.getCreateTableVersionCount(), + "create_table_version should have been called exactly once during append"); + + // Open latest version - should call describe_table_version + int describeCountBeforeLatest = namespace.getDescribeTableVersionCount(); + try (Dataset latestDs = + Dataset.open().allocator(allocator).namespace(namespace).tableId(tableId).build()) { + + assertEquals(4, latestDs.countRows()); + assertEquals(2, latestDs.version()); + } + assertEquals( + describeCountBeforeLatest + 1, + namespace.getDescribeTableVersionCount(), + "describe_table_version should have been called once when opening latest version"); + + // Open specific version (version 1) - should call describe_table_version + int describeCountBeforeV1 = namespace.getDescribeTableVersionCount(); + try (Dataset v1Ds = + Dataset.open() + .allocator(allocator) + .namespace(namespace) + .tableId(tableId) + .readOptions(new ReadOptions.Builder().setVersion(1L).build()) + .build()) { + + assertEquals(2, v1Ds.countRows()); + assertEquals(1, v1Ds.version()); + } + assertEquals( + describeCountBeforeV1 + 1, + namespace.getDescribeTableVersionCount(), + "describe_table_version should have been called once when opening version 1"); + + namespace.close(); + } + } } diff --git a/python/Cargo.lock b/python/Cargo.lock index 5bd6b130144..4bc40ad5b34 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -4348,7 +4348,7 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.5.0" +version = "0.5.2" dependencies = [ "reqwest", "serde", diff --git a/python/pyproject.toml b/python/pyproject.toml index 7cd0cdb500d..a533726fe1b 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "pylance" dynamic = ["version"] -dependencies = ["pyarrow>=14", "numpy>=1.22", "lance-namespace>=0.5.0"] +dependencies = ["pyarrow>=14", "numpy>=1.22", "lance-namespace>=0.5.2"] description = "python wrapper for Lance columnar format" authors = [{ name = "Lance Devs", email = "dev@lance.org" }] license = { file = "LICENSE" } diff --git a/python/python/lance/__init__.py b/python/python/lance/__init__.py index d3e008019d1..453400f8cf2 100644 --- a/python/python/lance/__init__.py +++ b/python/python/lance/__init__.py @@ -243,7 +243,7 @@ def dataset( read_params=read_params, session=session, storage_options_provider=storage_options_provider, - namespace=namespace._inner if managed_versioning else None, + namespace=namespace if managed_versioning else None, table_id=table_id if managed_versioning else None, ) if version is None and asof is not None: diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index a8978d11077..5da8bae9e0a 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -5817,6 +5817,9 @@ def write_dataset( f"Namespace did not return a table location in {mode} response" ) + # Check if namespace manages versioning (commits go through namespace API) + managed_versioning = getattr(response, "managed_versioning", None) is True + # Use namespace storage options namespace_storage_options = response.storage_options @@ -5841,6 +5844,7 @@ def write_dataset( raise ValueError("Both 'namespace' and 'table_id' must be provided together.") else: storage_options_provider = None + managed_versioning = False if use_legacy_format is not None: warnings.warn( @@ -5881,6 +5885,11 @@ def write_dataset( if storage_options_provider is not None: params["storage_options_provider"] = storage_options_provider + # Add namespace and table_id for managed versioning (external manifest store) + if managed_versioning and namespace is not None and table_id is not None: + params["namespace"] = namespace + params["table_id"] = table_id + if commit_lock: if not callable(commit_lock): raise TypeError(f"commit_lock must be a function, got {type(commit_lock)}") diff --git a/python/python/tests/test_namespace_dir.py b/python/python/tests/test_namespace_dir.py index 74871facf6d..9630006123d 100644 --- a/python/python/tests/test_namespace_dir.py +++ b/python/python/tests/test_namespace_dir.py @@ -12,6 +12,7 @@ import tempfile import uuid +from threading import Lock import lance import lance.namespace @@ -21,13 +22,19 @@ CreateEmptyTableRequest, CreateNamespaceRequest, CreateTableRequest, + CreateTableVersionRequest, + CreateTableVersionResponse, DeregisterTableRequest, DescribeNamespaceRequest, DescribeTableRequest, + DescribeTableVersionRequest, + DescribeTableVersionResponse, DropNamespaceRequest, DropTableRequest, ListNamespacesRequest, ListTablesRequest, + ListTableVersionsRequest, + ListTableVersionsResponse, NamespaceExistsRequest, RegisterTableRequest, TableExistsRequest, @@ -720,3 +727,161 @@ def test_connect_with_storage_options(self): # This should work without errors ns = connect("dir", properties) assert isinstance(ns, lance.namespace.DirectoryNamespace) + + +class TableVersionTrackingNamespace(lance.namespace.DirectoryNamespace): + """Namespace wrapper that tracks table version API calls. + + Similar to the Rust TrackingNamespace and Java TableVersionTrackingNamespace, + this extends DirectoryNamespace with table_version_tracking_enabled=true and + counts create_table_version and describe_table_version calls. + + This class implements the JSON bridge methods that PyLanceNamespace calls, + allowing API call tracking to work even when the calls go through Rust. + + Unlike a wrapper approach, this extends DirectoryNamespace directly so that + Rust can detect it as a DirectoryNamespace subclass and use the native handle. + """ + + def __init__(self, root: str): + dir_props = { + "root": root, + "table_version_tracking_enabled": "true", + "manifest_enabled": "true", + } + super().__init__(**dir_props) + self.create_table_version_count = 0 + self.describe_table_version_count = 0 + self.list_table_versions_count = 0 + self._lock = Lock() + + def namespace_id(self) -> str: + return f"TableVersionTrackingNamespace {{ inner: {super().namespace_id()} }}" + + def create_table_version( + self, request: CreateTableVersionRequest + ) -> CreateTableVersionResponse: + with self._lock: + self.create_table_version_count += 1 + return super().create_table_version(request) + + def describe_table_version( + self, request: DescribeTableVersionRequest + ) -> DescribeTableVersionResponse: + with self._lock: + self.describe_table_version_count += 1 + return super().describe_table_version(request) + + def list_table_versions( + self, request: ListTableVersionsRequest + ) -> ListTableVersionsResponse: + with self._lock: + self.list_table_versions_count += 1 + return super().list_table_versions(request) + + # JSON bridge methods for Rust PyLanceNamespace callbacks + # These call the parent's _inner (PyDirectoryNamespace) directly with dict API + def describe_table_version_json(self, request_json: str) -> str: + """JSON bridge that increments counter before delegating.""" + import json + + with self._lock: + self.describe_table_version_count += 1 + request_dict = json.loads(request_json) + response_dict = self._inner.describe_table_version(request_dict) + return json.dumps(response_dict) + + def create_table_version_json(self, request_json: str) -> str: + """JSON bridge that increments counter before delegating.""" + import json + + with self._lock: + self.create_table_version_count += 1 + request_dict = json.loads(request_json) + response_dict = self._inner.create_table_version(request_dict) + return json.dumps(response_dict) + + def list_table_versions_json(self, request_json: str) -> str: + """JSON bridge that increments counter before delegating.""" + import json + + with self._lock: + self.list_table_versions_count += 1 + request_dict = json.loads(request_json) + response_dict = self._inner.list_table_versions(request_dict) + return json.dumps(response_dict) + + +def test_e2e_external_manifest_store_invokes_namespace_apis(): + """Test that namespace APIs are invoked correctly for managed versioning. + + This test mirrors: + - Rust: test_e2e_external_manifest_store_invokes_namespace_apis + - Java: testManagedVersioningWithDirectoryNamespace + + It verifies: + 1. list_table_versions is called when opening dataset (latest version) + 2. create_table_version is called exactly once during append + 3. describe_table_version is called when opening specific version + """ + with tempfile.TemporaryDirectory() as tmpdir: + namespace = TableVersionTrackingNamespace(root=f"file://{tmpdir}") + table_name = "test_table" + table_id = [table_name] + + # Create initial table + table1 = pa.Table.from_pylist([{"a": 1, "b": 2}, {"a": 10, "b": 20}]) + ds = lance.write_dataset( + table1, namespace=namespace, table_id=table_id, mode="create" + ) + assert ds.count_rows() == 2 + assert len(ds.versions()) == 1 + + # Verify describe_table returns managed_versioning=True + describe_resp = namespace.describe_table(DescribeTableRequest(id=table_id)) + assert describe_resp.managed_versioning is True, ( + f"Expected managed_versioning=True, got {describe_resp.managed_versioning}" + ) + + # Open dataset through namespace - should call list_table_versions for latest + initial_list_count = namespace.list_table_versions_count + ds_from_namespace = lance.dataset(namespace=namespace, table_id=table_id) + assert ds_from_namespace.count_rows() == 2 + assert ds_from_namespace.version == 1 + assert namespace.list_table_versions_count == initial_list_count + 1, ( + "list_table_versions should be called once when opening latest version" + ) + + # Append data - should call create_table_version exactly once + assert namespace.create_table_version_count == 0, ( + "create_table_version should not have been called yet" + ) + + table2 = pa.Table.from_pylist([{"a": 100, "b": 200}, {"a": 1000, "b": 2000}]) + ds = lance.write_dataset( + table2, namespace=namespace, table_id=table_id, mode="append" + ) + assert ds.count_rows() == 4 + assert len(ds.versions()) == 2 + + assert namespace.create_table_version_count == 1, ( + "create_table_version should be called exactly once during append" + ) + + # Open latest version - should call list_table_versions + list_count_before_latest = namespace.list_table_versions_count + latest_ds = lance.dataset(namespace=namespace, table_id=table_id) + assert latest_ds.count_rows() == 4 + assert latest_ds.version == 2 + assert namespace.list_table_versions_count == list_count_before_latest + 1, ( + "list_table_versions should be called once when opening latest version" + ) + + # Open specific version (v1) - should call describe_table_version + describe_count_before_v1 = namespace.describe_table_version_count + v1_ds = lance.dataset(namespace=namespace, table_id=table_id, version=1) + assert v1_ds.count_rows() == 2 + assert v1_ds.version == 1 + assert namespace.describe_table_version_count == describe_count_before_v1 + 1, ( + "describe_table_version should be called once when opening version 1" + ) diff --git a/python/python/tests/test_namespace_integration.py b/python/python/tests/test_namespace_integration.py index 1586fcd8273..30489496e38 100644 --- a/python/python/tests/test_namespace_integration.py +++ b/python/python/tests/test_namespace_integration.py @@ -838,238 +838,3 @@ def test_file_session_with_storage_options_provider(s3_bucket: str): final_describe_count = namespace.get_describe_call_count() assert final_describe_count == describe_count_after_second_write - - -class TableVersionTrackingNamespace(LanceNamespace): - """Namespace wrapper that tracks table version API calls.""" - - def __init__(self, root: str, storage_options: Dict[str, str] = None): - from lance.namespace import DirectoryNamespace - - self.create_table_version_count = 0 - self.describe_table_version_count = 0 - self.list_table_versions_count = 0 - self.lock = Lock() - - dir_props = { - "root": root, - "table_version_tracking_enabled": "true", - } - if storage_options: - for k, v in storage_options.items(): - dir_props[f"storage.{k}"] = v - - self.inner = DirectoryNamespace(**dir_props) - - def namespace_id(self) -> str: - return f"TableVersionTrackingNamespace {{ inner: {self.inner.namespace_id()} }}" - - def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse: - return self.inner.describe_table(request) - - def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: - return self.inner.declare_table(request) - - def create_table_version(self, request: dict) -> dict: - with self.lock: - self.create_table_version_count += 1 - return self.inner.create_table_version(request) - - def describe_table_version(self, request: dict) -> dict: - with self.lock: - self.describe_table_version_count += 1 - return self.inner.describe_table_version(request) - - def list_table_versions(self, request): - with self.lock: - self.list_table_versions_count += 1 - return self.inner.list_table_versions(request) - - -def test_e2e_describe_table_returns_managed_versioning(): - """Test that describe_table returns managed_versioning=True.""" - import tempfile - - from lance.namespace import CreateTableRequest, DirectoryNamespace - from lance_namespace import CreateNamespaceRequest - - with tempfile.TemporaryDirectory() as tmpdir: - ns = DirectoryNamespace( - root=f"file://{tmpdir}", - table_version_tracking_enabled="true", - manifest_enabled="true", - ) - - # Create parent namespace - create_ns_req = CreateNamespaceRequest(id=["workspace"]) - ns.create_namespace(create_ns_req) - - table1 = pa.Table.from_pylist([{"a": 1, "b": 2}]) - - import io - - sink = io.BytesIO() - with pa.ipc.RecordBatchStreamWriter(sink, table1.schema) as writer: - writer.write_table(table1) - ipc_data = sink.getvalue() - - # Use multi-level table ID (namespace + table) - create_req = CreateTableRequest(id=["workspace", "test_table"]) - ns.create_table(create_req, ipc_data) - - describe_req = DescribeTableRequest(id=["workspace", "test_table"]) - response = ns.describe_table(describe_req) - - assert response.location is not None - assert response.managed_versioning is True, ( - f"Expected managed_versioning=True, got {response.managed_versioning}" - ) - - -def test_e2e_table_version_apis(): - """Test that table version APIs work correctly.""" - import tempfile - - from lance.namespace import CreateTableRequest, DirectoryNamespace - from lance_namespace import CreateNamespaceRequest - - with tempfile.TemporaryDirectory() as tmpdir: - ns = DirectoryNamespace( - root=f"file://{tmpdir}", - table_version_tracking_enabled="true", - manifest_enabled="true", - ) - - # Create parent namespace - create_ns_req = CreateNamespaceRequest(id=["workspace"]) - ns.create_namespace(create_ns_req) - - table1 = pa.Table.from_pylist([{"a": 1, "b": 2}]) - - import io - - sink = io.BytesIO() - with pa.ipc.RecordBatchStreamWriter(sink, table1.schema) as writer: - writer.write_table(table1) - ipc_data = sink.getvalue() - - # Use multi-level table ID (namespace + table) - create_req = CreateTableRequest(id=["workspace", "test_table"]) - ns.create_table(create_req, ipc_data) - - # describe_table_version reads directly from the dataset - describe_req = {"id": ["workspace", "test_table"], "version": 1} - describe_response = ns.describe_table_version(describe_req) - - assert "version" in describe_response - assert describe_response["version"]["version"] == 1 - assert describe_response["version"]["manifest_path"] is not None - - # Get latest version (version=None) - describe_latest_req = {"id": ["workspace", "test_table"], "version": None} - describe_latest_response = ns.describe_table_version(describe_latest_req) - - assert "version" in describe_latest_response - assert describe_latest_response["version"]["version"] == 1 - - -@pytest.mark.integration -def test_managed_versioning_with_commit_handler(s3_bucket: str): - """Test that managed_versioning enables namespace commit handler for writes.""" - from lance.namespace import DirectoryNamespace - - storage_options = copy.deepcopy(CONFIG) - - # Create namespace with table_version_tracking_enabled - dir_props = {f"storage.{k}": v for k, v in storage_options.items()} - dir_props["root"] = f"s3://{s3_bucket}/managed_versioning_test" - dir_props["table_version_tracking_enabled"] = "true" - dir_props["manifest_enabled"] = "true" - - namespace = DirectoryNamespace(**dir_props) - table_name = uuid.uuid4().hex - table_id = ["test_ns", table_name] - - # Create table - table1 = pa.Table.from_pylist([{"a": 1, "b": 2}]) - ds = lance.write_dataset( - table1, namespace=namespace, table_id=table_id, mode="create" - ) - assert ds.count_rows() == 1 - assert len(ds.versions()) == 1 - - # Verify managed_versioning=true is returned - from lance.namespace import DescribeTableRequest - - describe_req = DescribeTableRequest(id=table_id) - describe_resp = namespace.describe_table(describe_req) - assert describe_resp.managed_versioning is True, ( - f"Expected managed_versioning=True, got {describe_resp.managed_versioning}" - ) - - # Open dataset through namespace - this should set up commit handler - ds_from_namespace = lance.dataset( - namespace=namespace, - table_id=table_id, - ) - assert ds_from_namespace.count_rows() == 1 - - # Append data - this should go through the namespace commit handler - table2 = pa.Table.from_pylist([{"a": 10, "b": 20}]) - ds = lance.write_dataset( - table2, namespace=namespace, table_id=table_id, mode="append" - ) - assert ds.count_rows() == 2 - assert len(ds.versions()) == 2 - - # Verify the data through namespace - ds_final = lance.dataset( - namespace=namespace, - table_id=table_id, - ) - assert ds_final.count_rows() == 2 - - -@pytest.mark.integration -def test_e2e_table_version_tracking_with_s3(s3_bucket: str): - """Test end-to-end table version tracking with S3 storage.""" - import lance - import pyarrow as pa - from lance.namespace import DirectoryNamespace - - storage_options = copy.deepcopy(CONFIG) - - # Create namespace with table_version_tracking_enabled and manifest_enabled - dir_props = {f"storage.{k}": v for k, v in storage_options.items()} - dir_props["root"] = f"s3://{s3_bucket}/version_tracking_test" - dir_props["table_version_tracking_enabled"] = "true" - dir_props["manifest_enabled"] = "true" - - namespace = DirectoryNamespace(**dir_props) - - table_name = uuid.uuid4().hex - table_id = ["test_ns", table_name] - - # Create initial dataset using write_dataset (internally calls declare_table) - data = pa.table({"id": [1, 2, 3], "name": ["a", "b", "c"]}) - ds = lance.write_dataset( - data, namespace=namespace, table_id=table_id, mode="create" - ) - assert ds.count_rows() == 3 - - # Check managed_versioning via describe_table - describe_response = namespace.describe_table( - DescribeTableRequest(id=table_id, version=None) - ) - assert describe_response.location is not None - assert describe_response.managed_versioning is True - - from lance_namespace import ListTableVersionsRequest - - list_response = namespace.list_table_versions(ListTableVersionsRequest(id=table_id)) - assert len(list_response.versions) >= 1 - - describe_version_response = namespace.describe_table_version( - {"id": table_id, "version": None} - ) - assert "version" in describe_version_response diff --git a/python/src/dataset.rs b/python/src/dataset.rs index e5a60f9d9b3..20ee0e5e707 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -90,7 +90,7 @@ use crate::error::PythonErrorExt; use crate::file::object_store_from_uri_or_path; use crate::fragment::FileFragment; use crate::indices::{PyIndexConfig, PyIndexDescription}; -use crate::namespace::{PyDirectoryNamespace, PyRestNamespace}; +use crate::namespace::{PyDirectoryNamespace, PyLanceNamespace, PyRestNamespace}; use crate::rt; use crate::scanner::ScanStatistics; use crate::schema::{logical_schema_from_lance, LanceSchema}; @@ -601,16 +601,19 @@ impl Dataset { // Set up namespace commit handler if namespace and table_id are provided if let (Some(ns), Some(tid)) = (namespace, table_id) { - // Extract the inner namespace Arc from either PyDirectoryNamespace or PyRestNamespace + // Extract the inner namespace Arc from PyDirectoryNamespace, PyRestNamespace, + // or create a PyLanceNamespace wrapper for custom Python implementations let ns_arc: Arc = if let Ok(dir_ns) = ns.downcast::() { + // Native DirectoryNamespace - use inner directly (bypass Python layer) dir_ns.borrow().inner.clone() } else if let Ok(rest_ns) = ns.downcast::() { + // Native RestNamespace - use inner directly (bypass Python layer) rest_ns.borrow().inner.clone() } else { - return Err(PyValueError::new_err( - "namespace must be either PyDirectoryNamespace or PyRestNamespace", - )); + // Custom Python implementation - wrap with PyLanceNamespace + // This calls back into Python for namespace methods + PyLanceNamespace::create_arc(py, ns)? }; let external_store = LanceNamespaceExternalManifestStore::new(ns_arc, tid); @@ -3159,6 +3162,37 @@ pub fn get_write_params(options: &Bound<'_, PyDict>) -> PyResult>(options, "namespace")?; + let table_id_opt = get_dict_opt::>(options, "table_id")?; + + if let (Some(ns), Some(table_id)) = (namespace_opt, table_id_opt) { + let py = options.py(); + // Extract the inner namespace Arc from PyDirectoryNamespace, PyRestNamespace, + // or create a PyLanceNamespace wrapper for custom Python implementations + let ns_arc: Arc = + if let Ok(dir_ns) = ns.downcast::() { + // Native DirectoryNamespace - use inner directly (bypass Python layer) + dir_ns.borrow().inner.clone() + } else if let Ok(rest_ns) = ns.downcast::() { + // Native RestNamespace - use inner directly (bypass Python layer) + rest_ns.borrow().inner.clone() + } else { + // Custom Python implementation - wrap with PyLanceNamespace + PyLanceNamespace::create_arc(py, &ns)? + }; + + let external_store = LanceNamespaceExternalManifestStore::new(ns_arc, table_id); + let commit_handler: Arc = + Arc::new(ExternalManifestCommitHandler { + external_manifest_store: Arc::new(external_store), + }); + p.commit_handler = Some(commit_handler); + } + } + Some(p) }; Ok(params) diff --git a/python/src/namespace.rs b/python/src/namespace.rs index fe87b149d3a..c079c6bd918 100644 --- a/python/src/namespace.rs +++ b/python/src/namespace.rs @@ -6,7 +6,13 @@ use std::collections::HashMap; use std::sync::Arc; +use async_trait::async_trait; use bytes::Bytes; +use lance_namespace::models::{ + CreateTableVersionRequest, CreateTableVersionResponse, DescribeTableVersionRequest, + DescribeTableVersionResponse, ListTableVersionsRequest, ListTableVersionsResponse, +}; +use lance_namespace::LanceNamespace as LanceNamespaceTrait; use lance_namespace_impls::RestNamespaceBuilder; use lance_namespace_impls::{ConnectBuilder, RestAdapter, RestAdapterConfig, RestAdapterHandle}; use lance_namespace_impls::{DirectoryNamespaceBuilder, DynamicContextProvider, OperationInfo}; @@ -662,6 +668,225 @@ impl PyRestNamespace { } } +/// Wrapper that allows any Python object implementing LanceNamespace protocol +/// to be used as a Rust LanceNamespace. +/// +/// This is similar to JavaLanceNamespace in the Java bindings - it wraps a Python +/// object and calls back into Python when namespace methods are invoked. +/// +/// We use `Arc>` instead of `Py` directly because cloning `Py` +/// requires the GIL, but cloning `Arc` does not. This allows us to pass the +/// namespace reference to `spawn_blocking` without holding the GIL. +pub struct PyLanceNamespace { + py_namespace: Arc>, + namespace_id: String, +} + +impl PyLanceNamespace { + /// Create a new PyLanceNamespace wrapper around a Python namespace object. + pub fn new(_py: Python<'_>, py_namespace: &Bound<'_, PyAny>) -> PyResult { + // Get the namespace_id by calling the Python method + let namespace_id = py_namespace + .call_method0("namespace_id")? + .extract::()?; + + Ok(Self { + py_namespace: Arc::new(py_namespace.clone().unbind()), + namespace_id, + }) + } + + /// Create an Arc from a Python namespace object. + pub fn create_arc( + py: Python<'_>, + py_namespace: &Bound<'_, PyAny>, + ) -> PyResult> { + let wrapper = Self::new(py, py_namespace)?; + Ok(Arc::new(wrapper)) + } +} + +impl std::fmt::Debug for PyLanceNamespace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "PyLanceNamespace {{ id: {} }}", self.namespace_id) + } +} + +#[async_trait] +impl LanceNamespaceTrait for PyLanceNamespace { + fn namespace_id(&self) -> String { + self.namespace_id.clone() + } + + async fn describe_table_version( + &self, + request: DescribeTableVersionRequest, + ) -> lance_core::Result { + // Clone the Arc (doesn't need GIL) to pass to spawn_blocking + let py_namespace = self.py_namespace.clone(); + let request_json = serde_json::to_string(&request).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to serialize request: {}", + e + ))), + location: snafu::location!(), + })?; + + let response_json = tokio::task::spawn_blocking(move || { + Python::attach(|py| { + let result = + py_namespace.call_method1(py, "describe_table_version_json", (request_json,)); + + match result { + Ok(response_py) => { + let response_str: String = + response_py.extract(py).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to extract response string: {}", + e + ))), + location: snafu::location!(), + })?; + Ok(response_str) + } + Err(e) => Err(lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to call describe_table_version_json: {}", + e + ))), + location: snafu::location!(), + }), + } + }) + }) + .await + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!("Task join error: {}", e))), + location: snafu::location!(), + })??; + + serde_json::from_str(&response_json).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to deserialize response: {}", + e + ))), + location: snafu::location!(), + }) + } + + async fn create_table_version( + &self, + request: CreateTableVersionRequest, + ) -> lance_core::Result { + // Clone the Arc (doesn't need GIL) to pass to spawn_blocking + let py_namespace = self.py_namespace.clone(); + let request_json = serde_json::to_string(&request).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to serialize request: {}", + e + ))), + location: snafu::location!(), + })?; + + let response_json = tokio::task::spawn_blocking(move || { + Python::attach(|py| { + let result = + py_namespace.call_method1(py, "create_table_version_json", (request_json,)); + + match result { + Ok(response_py) => { + let response_str: String = + response_py.extract(py).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to extract response string: {}", + e + ))), + location: snafu::location!(), + })?; + Ok(response_str) + } + Err(e) => Err(lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to call create_table_version_json: {}", + e + ))), + location: snafu::location!(), + }), + } + }) + }) + .await + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!("Task join error: {}", e))), + location: snafu::location!(), + })??; + + serde_json::from_str(&response_json).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to deserialize response: {}", + e + ))), + location: snafu::location!(), + }) + } + + async fn list_table_versions( + &self, + request: ListTableVersionsRequest, + ) -> lance_core::Result { + // Clone the Arc (doesn't need GIL) to pass to spawn_blocking + let py_namespace = self.py_namespace.clone(); + let request_json = serde_json::to_string(&request).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to serialize request: {}", + e + ))), + location: snafu::location!(), + })?; + + let response_json = tokio::task::spawn_blocking(move || { + Python::attach(|py| { + let result = + py_namespace.call_method1(py, "list_table_versions_json", (request_json,)); + + match result { + Ok(response_py) => { + let response_str: String = + response_py.extract(py).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to extract response string: {}", + e + ))), + location: snafu::location!(), + })?; + Ok(response_str) + } + Err(e) => Err(lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to call list_table_versions_json: {}", + e + ))), + location: snafu::location!(), + }), + } + }) + }) + .await + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!("Task join error: {}", e))), + location: snafu::location!(), + })??; + + serde_json::from_str(&response_json).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to deserialize response: {}", + e + ))), + location: snafu::location!(), + }) + } +} + /// Python wrapper for REST adapter server #[pyclass(name = "PyRestAdapter", module = "lance.lance")] pub struct PyRestAdapter { diff --git a/python/uv.lock b/python/uv.lock index 8f65fa0f636..a6a5bafa918 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -990,19 +990,19 @@ wheels = [ [[package]] name = "lance-namespace" -version = "0.4.5" +version = "0.5.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "lance-namespace-urllib3-client" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b4/b5/0c3c55cf336b1e90392c2e24ac833551659e8bb3c61644b2d94825eb31bd/lance_namespace-0.4.5.tar.gz", hash = "sha256:0aee0abed3a1fa762c2955c7d12bb3004cea5c82ba28f6fcb9fe79d0cc19e317", size = 9827, upload-time = "2026-01-07T19:20:23.005Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2b/c6/aec0d7752e15536564b50cf9a8926f0e5d7780aa3ab8ce8bca46daa55659/lance_namespace-0.5.2.tar.gz", hash = "sha256:566cc33091b5631793ab411f095d46c66391db0a62343cd6b4470265bb04d577", size = 10274, upload-time = "2026-02-20T03:14:31.777Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/34/88/173687dad72baf819223e3b506898e386bc88c26ff8da5e8013291e02daf/lance_namespace-0.4.5-py3-none-any.whl", hash = "sha256:cd1a4f789de03ba23a0c16f100b1464cca572a5d04e428917a54d09db912d548", size = 11703, upload-time = "2026-01-07T19:20:25.394Z" }, + { url = "https://files.pythonhosted.org/packages/d6/3d/737c008d8fb2861e7ce260e2ffab0d5058eae41556181f80f1a1c3b52ef5/lance_namespace-0.5.2-py3-none-any.whl", hash = "sha256:6ccaf5649bf6ee6aa92eed9c535a114b7b4eb08e89f40426f58bc1466cbcffa3", size = 12087, upload-time = "2026-02-20T03:14:35.261Z" }, ] [[package]] name = "lance-namespace-urllib3-client" -version = "0.4.5" +version = "0.5.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, @@ -1011,9 +1011,9 @@ dependencies = [ { name = "urllib3", version = "1.26.20", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "urllib3", version = "2.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/97/a9/4e527c2f05704565618b239b0965f829d1a194837f01234af3f8e2f33d92/lance_namespace_urllib3_client-0.4.5.tar.gz", hash = "sha256:184deda8cf8700926d994618187053c644eb1f2866a4479e7b80843cacc92b1c", size = 159726, upload-time = "2026-01-07T19:20:24.025Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e9/64/51622c93ec8c164483c83b68764e5e76e52286c0137a8247bc6a7fac25f4/lance_namespace_urllib3_client-0.5.2.tar.gz", hash = "sha256:8a3a238006e6eabc01fc9d385ac3de22ba933aef0ae8987558f3c3199c9b3799", size = 172578, upload-time = "2026-02-20T03:14:33.031Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ca/86/0adee7190408a28dcc5a0562c674537457e3de59ee51d1c724ecdc4a9930/lance_namespace_urllib3_client-0.4.5-py3-none-any.whl", hash = "sha256:2ee154d616ba4721f0bfdf043d33c4fef2e79d380653e2f263058ab00fb4adf4", size = 277969, upload-time = "2026-01-07T19:20:26.597Z" }, + { url = "https://files.pythonhosted.org/packages/2a/10/f86d994498b37f7f35d0b8c2f7626a16fe4cb1949b518c1e5d5052ecf95f/lance_namespace_urllib3_client-0.5.2-py3-none-any.whl", hash = "sha256:83cefb6fd6e5df0b99b5e866ee3d46300d375b75e8af32c27bc16fbf7c1a5978", size = 300351, upload-time = "2026-02-20T03:14:34.236Z" }, ] [[package]] @@ -2459,7 +2459,7 @@ requires-dist = [ { name = "duckdb", marker = "extra == 'tests'" }, { name = "geoarrow-rust-core", marker = "extra == 'geo'" }, { name = "geoarrow-rust-io", marker = "extra == 'geo'" }, - { name = "lance-namespace", specifier = ">=0.4.5" }, + { name = "lance-namespace", specifier = ">=0.5.2" }, { name = "ml-dtypes", marker = "extra == 'tests'" }, { name = "numpy", specifier = ">=1.22" }, { name = "pandas", marker = "extra == 'tests'" }, diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index bf0b506e92c..db0043f27c3 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -4082,8 +4082,8 @@ mod tests { expected_version ); assert!( - version.timestamp.is_some(), - "timestamp should be set for version {}", + version.timestamp_millis.is_some(), + "timestamp_millis should be set for version {}", expected_version ); } @@ -4152,7 +4152,7 @@ mod tests { let version = &describe_resp.version; assert_eq!(version.version, 1); - assert!(version.timestamp.is_some()); + assert!(version.timestamp_millis.is_some()); assert!( !version.manifest_path.is_empty(), "manifest_path should be set" @@ -4181,7 +4181,7 @@ mod tests { let version = &describe_resp.version; assert_eq!(version.version, 2); - assert!(version.timestamp.is_some()); + assert!(version.timestamp_millis.is_some()); assert!( !version.manifest_path.is_empty(), "manifest_path should be set" From f0e39a9ea273fc17810fcdf9c11a6b98805b2555 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 22:21:31 -0800 Subject: [PATCH 10/23] fix jni --- java/lance-jni/src/blocking_dataset.rs | 56 +- java/lance-jni/src/namespace.rs | 1344 ++++++++++++++++- java/src/main/java/org/lance/Dataset.java | 74 +- .../java/org/lance/WriteDatasetBuilder.java | 35 +- .../org/lance/NamespaceIntegrationTest.java | 322 ---- .../namespace/DirectoryNamespaceTest.java | 37 +- 6 files changed, 1441 insertions(+), 427 deletions(-) diff --git a/java/lance-jni/src/blocking_dataset.rs b/java/lance-jni/src/blocking_dataset.rs index b0211b5d0ad..3d6c1ab8e64 100644 --- a/java/lance-jni/src/blocking_dataset.rs +++ b/java/lance-jni/src/blocking_dataset.rs @@ -436,6 +436,7 @@ fn inner_create_with_ffi_schema<'local>( initial_bases, target_bases, reader, + None, // No namespace for schema-only creation ) } @@ -499,17 +500,20 @@ pub extern "system" fn Java_org_lance_Dataset_createWithFfiStream<'local>( max_bytes_per_file, mode, enable_stable_row_ids, - enable_v2_manifest_paths, data_storage_version, + enable_v2_manifest_paths, storage_options_obj, JObject::null(), initial_bases, - target_bases + target_bases, + JObject::null(), // No namespace + JObject::null(), // No table_id ) ) } #[no_mangle] +#[allow(clippy::too_many_arguments)] pub extern "system" fn Java_org_lance_Dataset_createWithFfiStreamAndProvider<'local>( mut env: JNIEnv<'local>, _obj: JObject, @@ -526,6 +530,8 @@ pub extern "system" fn Java_org_lance_Dataset_createWithFfiStreamAndProvider<'lo storage_options_provider_obj: JObject, // Optional initial_bases: JObject, // Optional> target_bases: JObject, // Optional> + namespace_obj: JObject, // LanceNamespace (can be null) + table_id_obj: JObject, // List (can be null) ) -> JObject<'local> { ok_or_throw!( env, @@ -543,7 +549,9 @@ pub extern "system" fn Java_org_lance_Dataset_createWithFfiStreamAndProvider<'lo storage_options_obj, storage_options_provider_obj, initial_bases, - target_bases + target_bases, + namespace_obj, + table_id_obj, ) ) } @@ -564,9 +572,38 @@ fn inner_create_with_ffi_stream<'local>( storage_options_provider_obj: JObject, // Optional initial_bases: JObject, // Optional> target_bases: JObject, // Optional> + namespace_obj: JObject, // LanceNamespace (can be null) + table_id_obj: JObject, // List (can be null) ) -> Result> { + use crate::namespace::{ + create_java_lance_namespace, BlockingDirectoryNamespace, BlockingRestNamespace, + }; + let stream_ptr = arrow_array_stream_addr as *mut FFI_ArrowArrayStream; let reader = unsafe { ArrowArrayStreamReader::from_raw(stream_ptr) }?; + + // Create the namespace wrapper for commit handling (if provided) + let namespace_info = if namespace_obj.is_null() { + None + } else { + let namespace: Arc = if is_directory_namespace(env, &namespace_obj)? { + let native_handle = get_native_namespace_handle(env, &namespace_obj)?; + let ns = unsafe { &*(native_handle as *const BlockingDirectoryNamespace) }; + ns.inner.clone() + } else if is_rest_namespace(env, &namespace_obj)? { + let native_handle = get_native_namespace_handle(env, &namespace_obj)?; + let ns = unsafe { &*(native_handle as *const BlockingRestNamespace) }; + ns.inner.clone() + } else { + // Custom Java implementation, create a Java bridge wrapper + create_java_lance_namespace(env, &namespace_obj)? + }; + + // Extract table_id from Java List + let table_id = env.get_strings(&table_id_obj)?; + Some((namespace, table_id)) + }; + create_dataset( env, path, @@ -582,6 +619,7 @@ fn inner_create_with_ffi_stream<'local>( initial_bases, target_bases, reader, + namespace_info, ) } @@ -601,10 +639,11 @@ fn create_dataset<'local>( initial_bases: JObject, target_bases: JObject, reader: impl RecordBatchReader + Send + 'static, + namespace_info: Option<(Arc, Vec)>, ) -> Result> { let path_str = path.extract(env)?; - let write_params = extract_write_params( + let mut write_params = extract_write_params( env, &max_rows_per_file, &max_rows_per_group, @@ -619,6 +658,15 @@ fn create_dataset<'local>( &target_bases, )?; + // Set up namespace commit handler if provided + if let Some((namespace, table_id)) = namespace_info { + let external_store = LanceNamespaceExternalManifestStore::new(namespace, table_id); + let commit_handler: Arc = Arc::new(ExternalManifestCommitHandler { + external_manifest_store: Arc::new(external_store), + }); + write_params.commit_handler = Some(commit_handler); + } + let dataset = BlockingDataset::write(reader, &path_str, Some(write_params))?; dataset.into_java(env) } diff --git a/java/lance-jni/src/namespace.rs b/java/lance-jni/src/namespace.rs index 81feb67404b..748152082e8 100644 --- a/java/lance-jni/src/namespace.rs +++ b/java/lance-jni/src/namespace.rs @@ -195,18 +195,252 @@ impl JavaLanceNamespace { } } -#[async_trait] -impl LanceNamespaceTrait for JavaLanceNamespace { - fn namespace_id(&self) -> String { - self.namespace_id.clone() +impl JavaLanceNamespace { + /// Helper to deserialize JSON to Java object using ObjectMapper. + fn deserialize_request<'a>( + env: &mut JNIEnv<'a>, + json: &str, + request_class: &str, + ) -> lance_core::Result> { + let jrequest_json = env.new_string(json).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to create request JSON string: {}", + e + ))), + location: snafu::location!(), + })?; + + // Create ObjectMapper + let object_mapper_class = env + .find_class("com/fasterxml/jackson/databind/ObjectMapper") + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to find ObjectMapper class: {}", + e + ))), + location: snafu::location!(), + })?; + + let object_mapper = env + .new_object(&object_mapper_class, "()V", &[]) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to create ObjectMapper: {}", + e + ))), + location: snafu::location!(), + })?; + + // Get request class + let request_class_obj = + env.find_class(request_class) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to find request class {}: {}", + request_class, e + ))), + location: snafu::location!(), + })?; + + // Call objectMapper.readValue(json, class) + env.call_method( + &object_mapper, + "readValue", + "(Ljava/lang/String;Ljava/lang/Class;)Ljava/lang/Object;", + &[ + JValue::Object(&jrequest_json), + JValue::Object(&request_class_obj), + ], + ) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to deserialize request via ObjectMapper: {}", + e + ))), + location: snafu::location!(), + })? + .l() + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "ObjectMapper.readValue did not return an object: {}", + e + ))), + location: snafu::location!(), + }) } - async fn describe_table_version( + /// Helper to serialize Java object to JSON using ObjectMapper. + fn serialize_response(env: &mut JNIEnv, response_obj: &JObject) -> lance_core::Result { + // Create ObjectMapper + let object_mapper_class = env + .find_class("com/fasterxml/jackson/databind/ObjectMapper") + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to find ObjectMapper class: {}", + e + ))), + location: snafu::location!(), + })?; + + let object_mapper = env + .new_object(&object_mapper_class, "()V", &[]) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to create ObjectMapper: {}", + e + ))), + location: snafu::location!(), + })?; + + // Call objectMapper.writeValueAsString(obj) + let response_json_obj = env + .call_method( + &object_mapper, + "writeValueAsString", + "(Ljava/lang/Object;)Ljava/lang/String;", + &[JValue::Object(response_obj)], + ) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to serialize response via ObjectMapper: {}", + e + ))), + location: snafu::location!(), + })? + .l() + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "ObjectMapper.writeValueAsString did not return a string: {}", + e + ))), + location: snafu::location!(), + })?; + + let response_str: String = env + .get_string(&JString::from(response_json_obj)) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to convert response JSON to string: {}", + e + ))), + location: snafu::location!(), + })? + .into(); + + Ok(response_str) + } + + /// Helper to call a Java method that takes a request object and returns a response object. + /// JSON conversion is done via Jackson ObjectMapper. + async fn call_json_method( &self, - request: DescribeTableVersionRequest, - ) -> lance_core::Result { + method_name: &'static str, + request_class: &str, + response_class: &str, + request: Req, + ) -> lance_core::Result + where + Req: serde::Serialize + Send + 'static, + Resp: serde::de::DeserializeOwned + Send + 'static, + { + let java_namespace = self.java_namespace.clone(); + let jvm = self.jvm.clone(); + let request_class = request_class.to_string(); + let response_class = response_class.to_string(); + + tokio::task::spawn_blocking(move || { + let mut env = jvm + .attach_current_thread() + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to attach to JVM: {}", + e + ))), + location: snafu::location!(), + })?; + + // Serialize request to JSON + let request_json = + serde_json::to_string(&request).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to serialize request: {}", + e + ))), + location: snafu::location!(), + })?; + + // Deserialize JSON to Java request object via ObjectMapper + let request_obj = Self::deserialize_request(&mut env, &request_json, &request_class)?; + + // Call the interface method with request object + let method_sig = format!("(L{};)L{};", request_class, response_class); + let response_obj = env + .call_method( + &java_namespace, + method_name, + &method_sig, + &[JValue::Object(&request_obj)], + ) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to call {}: {}", + method_name, e + ))), + location: snafu::location!(), + })? + .l() + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "{} did not return an object: {}", + method_name, e + ))), + location: snafu::location!(), + })?; + + if response_obj.is_null() { + return Err(lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "{} returned null", + method_name + ))), + location: snafu::location!(), + }); + } + + // Serialize Java response to JSON via ObjectMapper + let response_str = Self::serialize_response(&mut env, &response_obj)?; + + serde_json::from_str(&response_str).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to deserialize response: {}", + e + ))), + location: snafu::location!(), + }) + }) + .await + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to spawn blocking task: {}", + e + ))), + location: snafu::location!(), + })? + } + + /// Helper for void methods (return ()). + async fn call_void_method( + &self, + method_name: &'static str, + request_class: &str, + request: Req, + ) -> lance_core::Result<()> + where + Req: serde::Serialize + Send + 'static, + { let java_namespace = self.java_namespace.clone(); let jvm = self.jvm.clone(); + let request_class = request_class.to_string(); tokio::task::spawn_blocking(move || { let mut env = jvm @@ -219,6 +453,7 @@ impl LanceNamespaceTrait for JavaLanceNamespace { location: snafu::location!(), })?; + // Serialize request to JSON let request_json = serde_json::to_string(&request).map_err(|e| lance_core::Error::IO { source: Box::new(std::io::Error::other(format!( @@ -228,44 +463,106 @@ impl LanceNamespaceTrait for JavaLanceNamespace { location: snafu::location!(), })?; - let jrequest = env - .new_string(&request_json) + // Deserialize JSON to Java request object via ObjectMapper + let request_obj = Self::deserialize_request(&mut env, &request_json, &request_class)?; + + // Call the interface method with request object + let method_sig = format!("(L{};)V", request_class); + env.call_method( + &java_namespace, + method_name, + &method_sig, + &[JValue::Object(&request_obj)], + ) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to call {}: {}", + method_name, e + ))), + location: snafu::location!(), + })?; + + Ok(()) + }) + .await + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to spawn blocking task: {}", + e + ))), + location: snafu::location!(), + })? + } + + /// Helper for methods returning a string directly. + async fn call_string_method( + &self, + method_name: &'static str, + request_class: &str, + request: Req, + ) -> lance_core::Result + where + Req: serde::Serialize + Send + 'static, + { + let java_namespace = self.java_namespace.clone(); + let jvm = self.jvm.clone(); + let request_class = request_class.to_string(); + + tokio::task::spawn_blocking(move || { + let mut env = jvm + .attach_current_thread() .map_err(|e| lance_core::Error::IO { source: Box::new(std::io::Error::other(format!( - "Failed to create request string: {}", + "Failed to attach to JVM: {}", + e + ))), + location: snafu::location!(), + })?; + + // Serialize request to JSON + let request_json = + serde_json::to_string(&request).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to serialize request: {}", e ))), location: snafu::location!(), })?; + // Deserialize JSON to Java request object via ObjectMapper + let request_obj = Self::deserialize_request(&mut env, &request_json, &request_class)?; + + // Call the interface method with request object + let method_sig = format!("(L{};)Ljava/lang/String;", request_class); let result = env .call_method( &java_namespace, - "describeTableVersionJson", - "(Ljava/lang/String;)Ljava/lang/String;", - &[JValue::Object(&jrequest)], + method_name, + &method_sig, + &[JValue::Object(&request_obj)], ) .map_err(|e| lance_core::Error::IO { source: Box::new(std::io::Error::other(format!( - "Failed to call describeTableVersionJson: {}", - e + "Failed to call {}: {}", + method_name, e ))), location: snafu::location!(), })?; let response_obj = result.l().map_err(|e| lance_core::Error::IO { source: Box::new(std::io::Error::other(format!( - "describeTableVersionJson did not return an object: {}", - e + "{} did not return an object: {}", + method_name, e ))), location: snafu::location!(), })?; if response_obj.is_null() { return Err(lance_core::Error::IO { - source: Box::new(std::io::Error::other( - "describeTableVersionJson returned null", - )), + source: Box::new(std::io::Error::other(format!( + "{} returned null", + method_name + ))), location: snafu::location!(), }); } @@ -281,13 +578,7 @@ impl LanceNamespaceTrait for JavaLanceNamespace { })? .into(); - serde_json::from_str(&response_str).map_err(|e| lance_core::Error::IO { - source: Box::new(std::io::Error::other(format!( - "Failed to deserialize response: {}", - e - ))), - location: snafu::location!(), - }) + Ok(response_str) }) .await .map_err(|e| lance_core::Error::IO { @@ -299,12 +590,19 @@ impl LanceNamespaceTrait for JavaLanceNamespace { })? } - async fn create_table_version( + /// Helper for methods returning Long (boxed). + async fn call_long_method( &self, - request: CreateTableVersionRequest, - ) -> lance_core::Result { + method_name: &'static str, + request_class: &str, + request: Req, + ) -> lance_core::Result + where + Req: serde::Serialize + Send + 'static, + { let java_namespace = self.java_namespace.clone(); let jvm = self.jvm.clone(); + let request_class = request_class.to_string(); tokio::task::spawn_blocking(move || { let mut env = jvm @@ -317,6 +615,7 @@ impl LanceNamespaceTrait for JavaLanceNamespace { location: snafu::location!(), })?; + // Serialize request to JSON let request_json = serde_json::to_string(&request).map_err(|e| lance_core::Error::IO { source: Box::new(std::io::Error::other(format!( @@ -326,66 +625,64 @@ impl LanceNamespaceTrait for JavaLanceNamespace { location: snafu::location!(), })?; - let jrequest = env - .new_string(&request_json) - .map_err(|e| lance_core::Error::IO { - source: Box::new(std::io::Error::other(format!( - "Failed to create request string: {}", - e - ))), - location: snafu::location!(), - })?; + // Deserialize JSON to Java request object via ObjectMapper + let request_obj = Self::deserialize_request(&mut env, &request_json, &request_class)?; + // Call the interface method with request object - returns Long (boxed) + let method_sig = format!("(L{};)Ljava/lang/Long;", request_class); let result = env .call_method( &java_namespace, - "createTableVersionJson", - "(Ljava/lang/String;)Ljava/lang/String;", - &[JValue::Object(&jrequest)], + method_name, + &method_sig, + &[JValue::Object(&request_obj)], ) .map_err(|e| lance_core::Error::IO { source: Box::new(std::io::Error::other(format!( - "Failed to call createTableVersionJson: {}", - e + "Failed to call {}: {}", + method_name, e ))), location: snafu::location!(), })?; - let response_obj = result.l().map_err(|e| lance_core::Error::IO { + let long_obj = result.l().map_err(|e| lance_core::Error::IO { source: Box::new(std::io::Error::other(format!( - "createTableVersionJson did not return an object: {}", - e + "{} did not return an object: {}", + method_name, e ))), location: snafu::location!(), })?; - if response_obj.is_null() { + if long_obj.is_null() { return Err(lance_core::Error::IO { - source: Box::new(std::io::Error::other( - "createTableVersionJson returned null", - )), + source: Box::new(std::io::Error::other(format!( + "{} returned null", + method_name + ))), location: snafu::location!(), }); } - let response_str: String = env - .get_string(&JString::from(response_obj)) + // Unbox Long to long + let long_value = env + .call_method(&long_obj, "longValue", "()J", &[]) .map_err(|e| lance_core::Error::IO { source: Box::new(std::io::Error::other(format!( - "Failed to convert response to string: {}", + "Failed to call longValue: {}", e ))), location: snafu::location!(), })? - .into(); + .j() + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "longValue did not return a long: {}", + e + ))), + location: snafu::location!(), + })?; - serde_json::from_str(&response_str).map_err(|e| lance_core::Error::IO { - source: Box::new(std::io::Error::other(format!( - "Failed to deserialize response: {}", - e - ))), - location: snafu::location!(), - }) + Ok(long_value) }) .await .map_err(|e| lance_core::Error::IO { @@ -396,6 +693,929 @@ impl LanceNamespaceTrait for JavaLanceNamespace { location: snafu::location!(), })? } + + /// Helper for methods with Bytes parameter (request + byte[] data). + async fn call_with_bytes_method( + &self, + method_name: &'static str, + request_class: &str, + response_class: &str, + request: Req, + data: Bytes, + ) -> lance_core::Result + where + Req: serde::Serialize + Send + 'static, + Resp: serde::de::DeserializeOwned + Send + 'static, + { + let java_namespace = self.java_namespace.clone(); + let jvm = self.jvm.clone(); + let request_class = request_class.to_string(); + let response_class = response_class.to_string(); + + tokio::task::spawn_blocking(move || { + let mut env = jvm + .attach_current_thread() + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to attach to JVM: {}", + e + ))), + location: snafu::location!(), + })?; + + // Serialize request to JSON + let request_json = + serde_json::to_string(&request).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to serialize request: {}", + e + ))), + location: snafu::location!(), + })?; + + // Deserialize JSON to Java request object via ObjectMapper + let request_obj = Self::deserialize_request(&mut env, &request_json, &request_class)?; + + let jdata = env + .byte_array_from_slice(&data) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to create byte array: {}", + e + ))), + location: snafu::location!(), + })?; + + // Call the interface method with request object and byte array + let method_sig = format!("(L{};[B)L{};", request_class, response_class); + let response_obj = env + .call_method( + &java_namespace, + method_name, + &method_sig, + &[JValue::Object(&request_obj), JValue::Object(&jdata)], + ) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to call {}: {}", + method_name, e + ))), + location: snafu::location!(), + })? + .l() + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "{} did not return an object: {}", + method_name, e + ))), + location: snafu::location!(), + })?; + + if response_obj.is_null() { + return Err(lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "{} returned null", + method_name + ))), + location: snafu::location!(), + }); + } + + // Serialize Java response to JSON via ObjectMapper + let response_str = Self::serialize_response(&mut env, &response_obj)?; + + serde_json::from_str(&response_str).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to deserialize response: {}", + e + ))), + location: snafu::location!(), + }) + }) + .await + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to spawn blocking task: {}", + e + ))), + location: snafu::location!(), + })? + } + + /// Helper for methods returning Bytes (byte[]). + async fn call_bytes_method( + &self, + method_name: &'static str, + request_class: &str, + request: Req, + ) -> lance_core::Result + where + Req: serde::Serialize + Send + 'static, + { + let java_namespace = self.java_namespace.clone(); + let jvm = self.jvm.clone(); + let request_class = request_class.to_string(); + + tokio::task::spawn_blocking(move || { + let mut env = jvm + .attach_current_thread() + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to attach to JVM: {}", + e + ))), + location: snafu::location!(), + })?; + + // Serialize request to JSON + let request_json = + serde_json::to_string(&request).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to serialize request: {}", + e + ))), + location: snafu::location!(), + })?; + + // Deserialize JSON to Java request object via ObjectMapper + let request_obj = Self::deserialize_request(&mut env, &request_json, &request_class)?; + + // Call the interface method with request object - returns byte[] + let method_sig = format!("(L{};)[B", request_class); + let result = env + .call_method( + &java_namespace, + method_name, + &method_sig, + &[JValue::Object(&request_obj)], + ) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to call {}: {}", + method_name, e + ))), + location: snafu::location!(), + })?; + + let response_obj = result.l().map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "{} did not return an object: {}", + method_name, e + ))), + location: snafu::location!(), + })?; + + if response_obj.is_null() { + return Err(lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "{} returned null", + method_name + ))), + location: snafu::location!(), + }); + } + + let byte_array = JByteArray::from(response_obj); + let bytes = env + .convert_byte_array(byte_array) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to convert byte array: {}", + e + ))), + location: snafu::location!(), + })?; + + Ok(Bytes::from(bytes)) + }) + .await + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to spawn blocking task: {}", + e + ))), + location: snafu::location!(), + })? + } + + /// Helper for methods with request + extra String parameter (e.g., indexName). + /// Extracts the extra string via getter_method on the request object. + async fn call_json_method_with_extra_string( + &self, + method_name: &'static str, + request_class: &str, + response_class: &str, + getter_method: &'static str, + request: Req, + ) -> lance_core::Result + where + Req: serde::Serialize + Send + 'static, + Resp: serde::de::DeserializeOwned + Send + 'static, + { + let java_namespace = self.java_namespace.clone(); + let jvm = self.jvm.clone(); + let request_class = request_class.to_string(); + let response_class = response_class.to_string(); + + tokio::task::spawn_blocking(move || { + let mut env = jvm + .attach_current_thread() + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to attach to JVM: {}", + e + ))), + location: snafu::location!(), + })?; + + // Serialize request to JSON + let request_json = + serde_json::to_string(&request).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to serialize request: {}", + e + ))), + location: snafu::location!(), + })?; + + // Deserialize JSON to Java request object via ObjectMapper + let request_obj = Self::deserialize_request(&mut env, &request_json, &request_class)?; + + // Call getter method to extract extra string (e.g., getIndexName) + let extra_string_obj = env + .call_method(&request_obj, getter_method, "()Ljava/lang/String;", &[]) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to call {}: {}", + getter_method, e + ))), + location: snafu::location!(), + })? + .l() + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "{} did not return an object: {}", + getter_method, e + ))), + location: snafu::location!(), + })?; + + // Call the interface method with request object and extra string + let method_sig = format!( + "(L{};Ljava/lang/String;)L{};", + request_class, response_class + ); + let response_obj = env + .call_method( + &java_namespace, + method_name, + &method_sig, + &[ + JValue::Object(&request_obj), + JValue::Object(&extra_string_obj), + ], + ) + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to call {}: {}", + method_name, e + ))), + location: snafu::location!(), + })? + .l() + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "{} did not return an object: {}", + method_name, e + ))), + location: snafu::location!(), + })?; + + if response_obj.is_null() { + return Err(lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "{} returned null", + method_name + ))), + location: snafu::location!(), + }); + } + + // Serialize Java response to JSON via ObjectMapper + let response_str = Self::serialize_response(&mut env, &response_obj)?; + + serde_json::from_str(&response_str).map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to deserialize response: {}", + e + ))), + location: snafu::location!(), + }) + }) + .await + .map_err(|e| lance_core::Error::IO { + source: Box::new(std::io::Error::other(format!( + "Failed to spawn blocking task: {}", + e + ))), + location: snafu::location!(), + })? + } +} + +const MODEL_PKG: &str = "org/lance/namespace/model"; + +#[async_trait] +impl LanceNamespaceTrait for JavaLanceNamespace { + fn namespace_id(&self) -> String { + self.namespace_id.clone() + } + + async fn list_namespaces( + &self, + request: ListNamespacesRequest, + ) -> lance_core::Result { + self.call_json_method( + "listNamespaces", + &format!("{}/ListNamespacesRequest", MODEL_PKG), + &format!("{}/ListNamespacesResponse", MODEL_PKG), + request, + ) + .await + } + + async fn describe_namespace( + &self, + request: DescribeNamespaceRequest, + ) -> lance_core::Result { + self.call_json_method( + "describeNamespace", + &format!("{}/DescribeNamespaceRequest", MODEL_PKG), + &format!("{}/DescribeNamespaceResponse", MODEL_PKG), + request, + ) + .await + } + + async fn create_namespace( + &self, + request: CreateNamespaceRequest, + ) -> lance_core::Result { + self.call_json_method( + "createNamespace", + &format!("{}/CreateNamespaceRequest", MODEL_PKG), + &format!("{}/CreateNamespaceResponse", MODEL_PKG), + request, + ) + .await + } + + async fn drop_namespace( + &self, + request: DropNamespaceRequest, + ) -> lance_core::Result { + self.call_json_method( + "dropNamespace", + &format!("{}/DropNamespaceRequest", MODEL_PKG), + &format!("{}/DropNamespaceResponse", MODEL_PKG), + request, + ) + .await + } + + async fn namespace_exists(&self, request: NamespaceExistsRequest) -> lance_core::Result<()> { + self.call_void_method( + "namespaceExists", + &format!("{}/NamespaceExistsRequest", MODEL_PKG), + request, + ) + .await + } + + async fn list_tables( + &self, + request: ListTablesRequest, + ) -> lance_core::Result { + self.call_json_method( + "listTables", + &format!("{}/ListTablesRequest", MODEL_PKG), + &format!("{}/ListTablesResponse", MODEL_PKG), + request, + ) + .await + } + + async fn describe_table( + &self, + request: DescribeTableRequest, + ) -> lance_core::Result { + self.call_json_method( + "describeTable", + &format!("{}/DescribeTableRequest", MODEL_PKG), + &format!("{}/DescribeTableResponse", MODEL_PKG), + request, + ) + .await + } + + async fn register_table( + &self, + request: RegisterTableRequest, + ) -> lance_core::Result { + self.call_json_method( + "registerTable", + &format!("{}/RegisterTableRequest", MODEL_PKG), + &format!("{}/RegisterTableResponse", MODEL_PKG), + request, + ) + .await + } + + async fn table_exists(&self, request: TableExistsRequest) -> lance_core::Result<()> { + self.call_void_method( + "tableExists", + &format!("{}/TableExistsRequest", MODEL_PKG), + request, + ) + .await + } + + async fn drop_table(&self, request: DropTableRequest) -> lance_core::Result { + self.call_json_method( + "dropTable", + &format!("{}/DropTableRequest", MODEL_PKG), + &format!("{}/DropTableResponse", MODEL_PKG), + request, + ) + .await + } + + async fn deregister_table( + &self, + request: DeregisterTableRequest, + ) -> lance_core::Result { + self.call_json_method( + "deregisterTable", + &format!("{}/DeregisterTableRequest", MODEL_PKG), + &format!("{}/DeregisterTableResponse", MODEL_PKG), + request, + ) + .await + } + + async fn count_table_rows(&self, request: CountTableRowsRequest) -> lance_core::Result { + self.call_long_method( + "countTableRows", + &format!("{}/CountTableRowsRequest", MODEL_PKG), + request, + ) + .await + } + + async fn create_table( + &self, + request: CreateTableRequest, + data: Bytes, + ) -> lance_core::Result { + self.call_with_bytes_method( + "createTable", + &format!("{}/CreateTableRequest", MODEL_PKG), + &format!("{}/CreateTableResponse", MODEL_PKG), + request, + data, + ) + .await + } + + async fn declare_table( + &self, + request: DeclareTableRequest, + ) -> lance_core::Result { + self.call_json_method( + "declareTable", + &format!("{}/DeclareTableRequest", MODEL_PKG), + &format!("{}/DeclareTableResponse", MODEL_PKG), + request, + ) + .await + } + + #[allow(deprecated)] + async fn create_empty_table( + &self, + request: CreateEmptyTableRequest, + ) -> lance_core::Result { + self.call_json_method( + "createEmptyTable", + &format!("{}/CreateEmptyTableRequest", MODEL_PKG), + &format!("{}/CreateEmptyTableResponse", MODEL_PKG), + request, + ) + .await + } + + async fn insert_into_table( + &self, + request: InsertIntoTableRequest, + data: Bytes, + ) -> lance_core::Result { + self.call_with_bytes_method( + "insertIntoTable", + &format!("{}/InsertIntoTableRequest", MODEL_PKG), + &format!("{}/InsertIntoTableResponse", MODEL_PKG), + request, + data, + ) + .await + } + + async fn merge_insert_into_table( + &self, + request: MergeInsertIntoTableRequest, + data: Bytes, + ) -> lance_core::Result { + self.call_with_bytes_method( + "mergeInsertIntoTable", + &format!("{}/MergeInsertIntoTableRequest", MODEL_PKG), + &format!("{}/MergeInsertIntoTableResponse", MODEL_PKG), + request, + data, + ) + .await + } + + async fn update_table( + &self, + request: UpdateTableRequest, + ) -> lance_core::Result { + self.call_json_method( + "updateTable", + &format!("{}/UpdateTableRequest", MODEL_PKG), + &format!("{}/UpdateTableResponse", MODEL_PKG), + request, + ) + .await + } + + async fn delete_from_table( + &self, + request: DeleteFromTableRequest, + ) -> lance_core::Result { + self.call_json_method( + "deleteFromTable", + &format!("{}/DeleteFromTableRequest", MODEL_PKG), + &format!("{}/DeleteFromTableResponse", MODEL_PKG), + request, + ) + .await + } + + async fn query_table(&self, request: QueryTableRequest) -> lance_core::Result { + self.call_bytes_method( + "queryTable", + &format!("{}/QueryTableRequest", MODEL_PKG), + request, + ) + .await + } + + async fn create_table_index( + &self, + request: CreateTableIndexRequest, + ) -> lance_core::Result { + self.call_json_method( + "createTableIndex", + &format!("{}/CreateTableIndexRequest", MODEL_PKG), + &format!("{}/CreateTableIndexResponse", MODEL_PKG), + request, + ) + .await + } + + async fn list_table_indices( + &self, + request: ListTableIndicesRequest, + ) -> lance_core::Result { + self.call_json_method( + "listTableIndices", + &format!("{}/ListTableIndicesRequest", MODEL_PKG), + &format!("{}/ListTableIndicesResponse", MODEL_PKG), + request, + ) + .await + } + + async fn describe_table_index_stats( + &self, + request: DescribeTableIndexStatsRequest, + ) -> lance_core::Result { + self.call_json_method_with_extra_string( + "describeTableIndexStats", + &format!("{}/DescribeTableIndexStatsRequest", MODEL_PKG), + &format!("{}/DescribeTableIndexStatsResponse", MODEL_PKG), + "getIndexName", + request, + ) + .await + } + + async fn describe_transaction( + &self, + request: DescribeTransactionRequest, + ) -> lance_core::Result { + self.call_json_method( + "describeTransaction", + &format!("{}/DescribeTransactionRequest", MODEL_PKG), + &format!("{}/DescribeTransactionResponse", MODEL_PKG), + request, + ) + .await + } + + async fn alter_transaction( + &self, + request: AlterTransactionRequest, + ) -> lance_core::Result { + self.call_json_method( + "alterTransaction", + &format!("{}/AlterTransactionRequest", MODEL_PKG), + &format!("{}/AlterTransactionResponse", MODEL_PKG), + request, + ) + .await + } + + async fn create_table_scalar_index( + &self, + request: CreateTableIndexRequest, + ) -> lance_core::Result { + self.call_json_method( + "createTableScalarIndex", + &format!("{}/CreateTableIndexRequest", MODEL_PKG), + &format!("{}/CreateTableScalarIndexResponse", MODEL_PKG), + request, + ) + .await + } + + async fn drop_table_index( + &self, + request: DropTableIndexRequest, + ) -> lance_core::Result { + self.call_json_method_with_extra_string( + "dropTableIndex", + &format!("{}/DropTableIndexRequest", MODEL_PKG), + &format!("{}/DropTableIndexResponse", MODEL_PKG), + "getIndexName", + request, + ) + .await + } + + async fn list_all_tables( + &self, + request: ListTablesRequest, + ) -> lance_core::Result { + self.call_json_method( + "listAllTables", + &format!("{}/ListTablesRequest", MODEL_PKG), + &format!("{}/ListTablesResponse", MODEL_PKG), + request, + ) + .await + } + + async fn restore_table( + &self, + request: RestoreTableRequest, + ) -> lance_core::Result { + self.call_json_method( + "restoreTable", + &format!("{}/RestoreTableRequest", MODEL_PKG), + &format!("{}/RestoreTableResponse", MODEL_PKG), + request, + ) + .await + } + + async fn rename_table( + &self, + request: RenameTableRequest, + ) -> lance_core::Result { + self.call_json_method( + "renameTable", + &format!("{}/RenameTableRequest", MODEL_PKG), + &format!("{}/RenameTableResponse", MODEL_PKG), + request, + ) + .await + } + + async fn list_table_versions( + &self, + request: ListTableVersionsRequest, + ) -> lance_core::Result { + self.call_json_method( + "listTableVersions", + &format!("{}/ListTableVersionsRequest", MODEL_PKG), + &format!("{}/ListTableVersionsResponse", MODEL_PKG), + request, + ) + .await + } + + async fn create_table_version( + &self, + request: CreateTableVersionRequest, + ) -> lance_core::Result { + self.call_json_method( + "createTableVersion", + &format!("{}/CreateTableVersionRequest", MODEL_PKG), + &format!("{}/CreateTableVersionResponse", MODEL_PKG), + request, + ) + .await + } + + async fn describe_table_version( + &self, + request: DescribeTableVersionRequest, + ) -> lance_core::Result { + self.call_json_method( + "describeTableVersion", + &format!("{}/DescribeTableVersionRequest", MODEL_PKG), + &format!("{}/DescribeTableVersionResponse", MODEL_PKG), + request, + ) + .await + } + + async fn batch_delete_table_versions( + &self, + request: BatchDeleteTableVersionsRequest, + ) -> lance_core::Result { + self.call_json_method( + "batchDeleteTableVersions", + &format!("{}/BatchDeleteTableVersionsRequest", MODEL_PKG), + &format!("{}/BatchDeleteTableVersionsResponse", MODEL_PKG), + request, + ) + .await + } + + async fn update_table_schema_metadata( + &self, + request: UpdateTableSchemaMetadataRequest, + ) -> lance_core::Result { + self.call_json_method( + "updateTableSchemaMetadata", + &format!("{}/UpdateTableSchemaMetadataRequest", MODEL_PKG), + &format!("{}/UpdateTableSchemaMetadataResponse", MODEL_PKG), + request, + ) + .await + } + + async fn get_table_stats( + &self, + request: GetTableStatsRequest, + ) -> lance_core::Result { + self.call_json_method( + "getTableStats", + &format!("{}/GetTableStatsRequest", MODEL_PKG), + &format!("{}/GetTableStatsResponse", MODEL_PKG), + request, + ) + .await + } + + async fn explain_table_query_plan( + &self, + request: ExplainTableQueryPlanRequest, + ) -> lance_core::Result { + self.call_string_method( + "explainTableQueryPlan", + &format!("{}/ExplainTableQueryPlanRequest", MODEL_PKG), + request, + ) + .await + } + + async fn analyze_table_query_plan( + &self, + request: AnalyzeTableQueryPlanRequest, + ) -> lance_core::Result { + self.call_string_method( + "analyzeTableQueryPlan", + &format!("{}/AnalyzeTableQueryPlanRequest", MODEL_PKG), + request, + ) + .await + } + + async fn alter_table_add_columns( + &self, + request: AlterTableAddColumnsRequest, + ) -> lance_core::Result { + self.call_json_method( + "alterTableAddColumns", + &format!("{}/AlterTableAddColumnsRequest", MODEL_PKG), + &format!("{}/AlterTableAddColumnsResponse", MODEL_PKG), + request, + ) + .await + } + + async fn alter_table_alter_columns( + &self, + request: AlterTableAlterColumnsRequest, + ) -> lance_core::Result { + self.call_json_method( + "alterTableAlterColumns", + &format!("{}/AlterTableAlterColumnsRequest", MODEL_PKG), + &format!("{}/AlterTableAlterColumnsResponse", MODEL_PKG), + request, + ) + .await + } + + async fn alter_table_drop_columns( + &self, + request: AlterTableDropColumnsRequest, + ) -> lance_core::Result { + self.call_json_method( + "alterTableDropColumns", + &format!("{}/AlterTableDropColumnsRequest", MODEL_PKG), + &format!("{}/AlterTableDropColumnsResponse", MODEL_PKG), + request, + ) + .await + } + + async fn list_table_tags( + &self, + request: ListTableTagsRequest, + ) -> lance_core::Result { + self.call_json_method( + "listTableTags", + &format!("{}/ListTableTagsRequest", MODEL_PKG), + &format!("{}/ListTableTagsResponse", MODEL_PKG), + request, + ) + .await + } + + async fn get_table_tag_version( + &self, + request: GetTableTagVersionRequest, + ) -> lance_core::Result { + self.call_json_method( + "getTableTagVersion", + &format!("{}/GetTableTagVersionRequest", MODEL_PKG), + &format!("{}/GetTableTagVersionResponse", MODEL_PKG), + request, + ) + .await + } + + async fn create_table_tag( + &self, + request: CreateTableTagRequest, + ) -> lance_core::Result { + self.call_json_method( + "createTableTag", + &format!("{}/CreateTableTagRequest", MODEL_PKG), + &format!("{}/CreateTableTagResponse", MODEL_PKG), + request, + ) + .await + } + + async fn delete_table_tag( + &self, + request: DeleteTableTagRequest, + ) -> lance_core::Result { + self.call_json_method( + "deleteTableTag", + &format!("{}/DeleteTableTagRequest", MODEL_PKG), + &format!("{}/DeleteTableTagResponse", MODEL_PKG), + request, + ) + .await + } + + async fn update_table_tag( + &self, + request: UpdateTableTagRequest, + ) -> lance_core::Result { + self.call_json_method( + "updateTableTag", + &format!("{}/UpdateTableTagRequest", MODEL_PKG), + &format!("{}/UpdateTableTagResponse", MODEL_PKG), + request, + ) + .await + } } /// Create a JavaLanceNamespace wrapper from a JNI environment and Java object. diff --git a/java/src/main/java/org/lance/Dataset.java b/java/src/main/java/org/lance/Dataset.java index 6d0778b7f51..ef5340f5744 100644 --- a/java/src/main/java/org/lance/Dataset.java +++ b/java/src/main/java/org/lance/Dataset.java @@ -194,27 +194,7 @@ static Dataset create( String path, WriteParams params, StorageOptionsProvider storageOptionsProvider) { - Preconditions.checkNotNull(allocator); - Preconditions.checkNotNull(stream); - Preconditions.checkNotNull(path); - Preconditions.checkNotNull(params); - Dataset dataset = - createWithFfiStreamAndProvider( - stream.memoryAddress(), - path, - params.getMaxRowsPerFile(), - params.getMaxRowsPerGroup(), - params.getMaxBytesPerFile(), - params.getMode(), - params.getEnableStableRowIds(), - params.getDataStorageVersion(), - params.getEnableV2ManifestPaths(), - params.getStorageOptions(), - Optional.ofNullable(storageOptionsProvider), - params.getInitialBases(), - params.getTargetBases()); - dataset.allocator = allocator; - return dataset; + return create(allocator, stream, path, params, storageOptionsProvider, null, null); } private static native Dataset createWithFfiSchema( @@ -258,7 +238,57 @@ private static native Dataset createWithFfiStreamAndProvider( Map storageOptions, Optional storageOptionsProvider, Optional> initialBases, - Optional> targetBases); + Optional> targetBases, + LanceNamespace namespace, + List tableId); + + /** + * Creates a dataset with optional namespace support for managed versioning. + * + *

When a namespace is provided, the commit handler will use the namespace's + * create_table_version method for version tracking. + * + * @param allocator buffer allocator + * @param stream arrow stream + * @param path dataset uri + * @param params write parameters + * @param storageOptionsProvider optional provider for dynamic storage options/credentials + * @param namespace optional namespace implementation for managed versioning (can be null) + * @param tableId optional table identifier within the namespace (can be null) + * @return Dataset + */ + static Dataset create( + BufferAllocator allocator, + ArrowArrayStream stream, + String path, + WriteParams params, + StorageOptionsProvider storageOptionsProvider, + LanceNamespace namespace, + List tableId) { + Preconditions.checkNotNull(allocator); + Preconditions.checkNotNull(stream); + Preconditions.checkNotNull(path); + Preconditions.checkNotNull(params); + Dataset dataset = + createWithFfiStreamAndProvider( + stream.memoryAddress(), + path, + params.getMaxRowsPerFile(), + params.getMaxRowsPerGroup(), + params.getMaxBytesPerFile(), + params.getMode(), + params.getEnableStableRowIds(), + params.getDataStorageVersion(), + params.getEnableV2ManifestPaths(), + params.getStorageOptions(), + Optional.ofNullable(storageOptionsProvider), + params.getInitialBases(), + params.getTargetBases(), + namespace, + tableId); + dataset.allocator = allocator; + return dataset; + } /** * Open a dataset from the specified path. diff --git a/java/src/main/java/org/lance/WriteDatasetBuilder.java b/java/src/main/java/org/lance/WriteDatasetBuilder.java index 2151ccae434..d904cec7277 100644 --- a/java/src/main/java/org/lance/WriteDatasetBuilder.java +++ b/java/src/main/java/org/lance/WriteDatasetBuilder.java @@ -436,8 +436,9 @@ private Dataset executeWithNamespace() { ? null : new LanceNamespaceStorageOptionsProvider(namespace, tableId); - // Use Dataset.create() which handles CREATE/APPEND/OVERWRITE modes - return createDatasetWithStream(tableUri, params, storageOptionsProvider); + // Use Dataset.create() with namespace for managed versioning support + return createDatasetWithStreamAndNamespace( + tableUri, params, storageOptionsProvider, namespace, tableId); } private Dataset executeWithUri() { @@ -479,4 +480,34 @@ private Dataset createDatasetWithStream( throw new IllegalStateException("No data source provided"); } + + private Dataset createDatasetWithStreamAndNamespace( + String path, + WriteParams params, + StorageOptionsProvider storageOptionsProvider, + LanceNamespace namespace, + List tableId) { + // If stream is directly provided, use it + if (stream != null) { + return Dataset.create( + allocator, stream, path, params, storageOptionsProvider, namespace, tableId); + } + + // If reader is provided, convert to stream + if (reader != null) { + try (ArrowArrayStream tempStream = ArrowArrayStream.allocateNew(allocator)) { + Data.exportArrayStream(allocator, reader, tempStream); + return Dataset.create( + allocator, tempStream, path, params, storageOptionsProvider, namespace, tableId); + } + } + + // If only schema is provided (empty table), use Dataset.create with schema + // Note: Schema-only creation doesn't support namespace-based commit handling + if (schema != null) { + return Dataset.create(allocator, path, schema, params); + } + + throw new IllegalStateException("No data source provided"); + } } diff --git a/java/src/test/java/org/lance/NamespaceIntegrationTest.java b/java/src/test/java/org/lance/NamespaceIntegrationTest.java index 7c4c66551de..2d6f8ab1443 100644 --- a/java/src/test/java/org/lance/NamespaceIntegrationTest.java +++ b/java/src/test/java/org/lance/NamespaceIntegrationTest.java @@ -16,20 +16,12 @@ import org.lance.namespace.DirectoryNamespace; import org.lance.namespace.LanceNamespace; import org.lance.namespace.LanceNamespaceStorageOptionsProvider; -import org.lance.namespace.model.BatchDeleteTableVersionsRequest; -import org.lance.namespace.model.BatchDeleteTableVersionsResponse; import org.lance.namespace.model.CreateEmptyTableRequest; import org.lance.namespace.model.CreateEmptyTableResponse; -import org.lance.namespace.model.CreateTableVersionRequest; -import org.lance.namespace.model.CreateTableVersionResponse; import org.lance.namespace.model.DeclareTableRequest; import org.lance.namespace.model.DeclareTableResponse; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; -import org.lance.namespace.model.DescribeTableVersionRequest; -import org.lance.namespace.model.DescribeTableVersionResponse; -import org.lance.namespace.model.ListTableVersionsRequest; -import org.lance.namespace.model.ListTableVersionsResponse; import org.lance.operation.Append; import org.apache.arrow.memory.BufferAllocator; @@ -1317,320 +1309,6 @@ void testFragmentCreateAndCommitWithNamespace() throws Exception { } } - /** - * Table version tracking namespace for managed versioning tests. - * - *

This namespace wraps DirectoryNamespace with table_version_tracking_enabled and - * manifest_enabled flags, and tracks create_table_version and describe_table_version calls. - */ - static class TableVersionTrackingNamespace implements LanceNamespace, java.io.Closeable { - private final DirectoryNamespace inner; - private final AtomicInteger createTableVersionCount = new AtomicInteger(0); - private final AtomicInteger describeTableVersionCount = new AtomicInteger(0); - private final Map baseStorageOptions; - - public TableVersionTrackingNamespace(String root, Map storageOptions) { - this.baseStorageOptions = - storageOptions != null ? new HashMap<>(storageOptions) : new HashMap<>(); - - Map dirProps = new HashMap<>(); - if (storageOptions != null) { - for (Map.Entry entry : storageOptions.entrySet()) { - dirProps.put("storage." + entry.getKey(), entry.getValue()); - } - } - dirProps.put("root", root); - dirProps.put("table_version_tracking_enabled", "true"); - dirProps.put("manifest_enabled", "true"); - - this.inner = new DirectoryNamespace(); - try (BufferAllocator allocator = new RootAllocator()) { - this.inner.initialize(dirProps, allocator); - } - } - - public int getCreateTableVersionCount() { - return createTableVersionCount.get(); - } - - public int getDescribeTableVersionCount() { - return describeTableVersionCount.get(); - } - - public long getNativeHandle() { - return inner.getNativeHandle(); - } - - @Override - public void initialize(Map configProperties, BufferAllocator allocator) { - // Already initialized in constructor - } - - @Override - public String namespaceId() { - return "TableVersionTrackingNamespace { inner: " + inner.namespaceId() + " }"; - } - - @Override - public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request) { - return inner.createEmptyTable(request); - } - - @Override - public DeclareTableResponse declareTable(DeclareTableRequest request) { - return inner.declareTable(request); - } - - @Override - public DescribeTableResponse describeTable(DescribeTableRequest request) { - return inner.describeTable(request); - } - - @Override - public CreateTableVersionResponse createTableVersion(CreateTableVersionRequest request) { - createTableVersionCount.incrementAndGet(); - return inner.createTableVersion(request); - } - - @Override - public DescribeTableVersionResponse describeTableVersion(DescribeTableVersionRequest request) { - describeTableVersionCount.incrementAndGet(); - return inner.describeTableVersion(request); - } - - @Override - public ListTableVersionsResponse listTableVersions(ListTableVersionsRequest request) { - return inner.listTableVersions(request); - } - - @Override - public BatchDeleteTableVersionsResponse batchDeleteTableVersions( - BatchDeleteTableVersionsRequest request) { - return inner.batchDeleteTableVersions(request); - } - - @Override - public void close() { - inner.close(); - } - } - - @Test - void testManagedVersioningWithDirectoryNamespace() throws Exception { - try (BufferAllocator allocator = new RootAllocator()) { - // Set up storage options - Map storageOptions = new HashMap<>(); - storageOptions.put("allow_http", "true"); - storageOptions.put("aws_access_key_id", ACCESS_KEY); - storageOptions.put("aws_secret_access_key", SECRET_KEY); - storageOptions.put("aws_endpoint", ENDPOINT_URL); - storageOptions.put("aws_region", REGION); - - // Create namespace with table_version_tracking_enabled - TableVersionTrackingNamespace namespace = - new TableVersionTrackingNamespace( - "s3://" + BUCKET_NAME + "/managed_versioning_test", storageOptions); - String tableName = UUID.randomUUID().toString(); - List tableId = Arrays.asList(tableName); - - // Create schema and data - Schema schema = - new Schema( - Arrays.asList( - new Field("a", FieldType.nullable(new ArrowType.Int(32, true)), null), - new Field("b", FieldType.nullable(new ArrowType.Int(32, true)), null))); - - try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - IntVector aVector = (IntVector) root.getVector("a"); - IntVector bVector = (IntVector) root.getVector("b"); - - aVector.allocateNew(2); - bVector.allocateNew(2); - - aVector.set(0, 1); - bVector.set(0, 2); - aVector.set(1, 10); - bVector.set(1, 20); - - aVector.setValueCount(2); - bVector.setValueCount(2); - root.setRowCount(2); - - ArrowReader testReader = - new ArrowReader(allocator) { - boolean firstRead = true; - - @Override - public boolean loadNextBatch() { - if (firstRead) { - firstRead = false; - return true; - } - return false; - } - - @Override - public long bytesRead() { - return 0; - } - - @Override - protected void closeReadSource() {} - - @Override - protected Schema readSchema() { - return schema; - } - - @Override - public VectorSchemaRoot getVectorSchemaRoot() { - return root; - } - }; - - // Create dataset through namespace - try (Dataset dataset = - Dataset.write() - .allocator(allocator) - .reader(testReader) - .namespace(namespace) - .tableId(tableId) - .mode(WriteParams.WriteMode.CREATE) - .execute()) { - assertEquals(2, dataset.countRows()); - assertEquals(1, dataset.version()); - } - } - - // Verify describe_table returns managed_versioning=true - DescribeTableRequest descReq = new DescribeTableRequest(); - descReq.setId(tableId); - DescribeTableResponse descResp = namespace.describeTable(descReq); - - assertEquals( - Boolean.TRUE, - descResp.getManagedVersioning(), - "Expected managedVersioning=true when table_version_tracking_enabled"); - - // Open dataset through namespace - this should call describe_table_version for latest - int initialDescribeCount = namespace.getDescribeTableVersionCount(); - try (Dataset dsFromNamespace = - Dataset.open().allocator(allocator).namespace(namespace).tableId(tableId).build()) { - - assertEquals(2, dsFromNamespace.countRows()); - assertEquals(1, dsFromNamespace.version()); - } - assertEquals( - initialDescribeCount + 1, - namespace.getDescribeTableVersionCount(), - "describe_table_version should have been called once when opening latest version"); - - // Append data - this should call create_table_version exactly once - assertEquals( - 0, - namespace.getCreateTableVersionCount(), - "create_table_version should not have been called yet"); - - try (VectorSchemaRoot appendRoot = VectorSchemaRoot.create(schema, allocator)) { - IntVector aVector = (IntVector) appendRoot.getVector("a"); - IntVector bVector = (IntVector) appendRoot.getVector("b"); - - aVector.allocateNew(2); - bVector.allocateNew(2); - - aVector.set(0, 100); - bVector.set(0, 200); - aVector.set(1, 1000); - bVector.set(1, 2000); - - aVector.setValueCount(2); - bVector.setValueCount(2); - appendRoot.setRowCount(2); - - ArrowReader appendReader = - new ArrowReader(allocator) { - boolean firstRead = true; - - @Override - public boolean loadNextBatch() { - if (firstRead) { - firstRead = false; - return true; - } - return false; - } - - @Override - public long bytesRead() { - return 0; - } - - @Override - protected void closeReadSource() {} - - @Override - protected Schema readSchema() { - return schema; - } - - @Override - public VectorSchemaRoot getVectorSchemaRoot() { - return appendRoot; - } - }; - - // Append through namespace - try (Dataset dataset = - Dataset.write() - .allocator(allocator) - .reader(appendReader) - .namespace(namespace) - .tableId(tableId) - .mode(WriteParams.WriteMode.APPEND) - .execute()) { - assertEquals(4, dataset.countRows()); - assertEquals(2, dataset.version()); - } - } - - assertEquals( - 1, - namespace.getCreateTableVersionCount(), - "create_table_version should have been called exactly once during append"); - - // Open latest version - should call describe_table_version - int describeCountBeforeLatest = namespace.getDescribeTableVersionCount(); - try (Dataset latestDs = - Dataset.open().allocator(allocator).namespace(namespace).tableId(tableId).build()) { - - assertEquals(4, latestDs.countRows()); - assertEquals(2, latestDs.version()); - } - assertEquals( - describeCountBeforeLatest + 1, - namespace.getDescribeTableVersionCount(), - "describe_table_version should have been called once when opening latest version"); - - // Open specific version (version 1) - should call describe_table_version - int describeCountBeforeV1 = namespace.getDescribeTableVersionCount(); - try (Dataset v1Ds = - Dataset.open() - .allocator(allocator) - .namespace(namespace) - .tableId(tableId) - .readOptions(new ReadOptions.Builder().setVersion(1L).build()) - .build()) { - - assertEquals(2, v1Ds.countRows()); - assertEquals(1, v1Ds.version()); - } - assertEquals( - describeCountBeforeV1 + 1, - namespace.getDescribeTableVersionCount(), - "describe_table_version should have been called once when opening version 1"); - } - } - @Test void testTransactionCommitWithNamespace() throws Exception { try (BufferAllocator allocator = new RootAllocator()) { diff --git a/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java b/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java index be8fff4c442..6e262d0b826 100644 --- a/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java +++ b/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java @@ -394,6 +394,7 @@ static class TableVersionTrackingNamespace implements LanceNamespace, java.io.Cl private final DirectoryNamespace inner; private final AtomicInteger createTableVersionCount = new AtomicInteger(0); private final AtomicInteger describeTableVersionCount = new AtomicInteger(0); + private final AtomicInteger listTableVersionsCount = new AtomicInteger(0); public TableVersionTrackingNamespace(Path root) { Map dirProps = new HashMap<>(); @@ -415,6 +416,10 @@ public int getDescribeTableVersionCount() { return describeTableVersionCount.get(); } + public int getListTableVersionsCount() { + return listTableVersionsCount.get(); + } + public long getNativeHandle() { return inner.getNativeHandle(); } @@ -458,6 +463,7 @@ public DescribeTableVersionResponse describeTableVersion(DescribeTableVersionReq @Override public ListTableVersionsResponse listTableVersions(ListTableVersionsRequest request) { + listTableVersionsCount.incrementAndGet(); return inner.listTableVersions(request); } @@ -562,8 +568,8 @@ public VectorSchemaRoot getVectorSchemaRoot() { descResp.getManagedVersioning(), "Expected managedVersioning=true when table_version_tracking_enabled"); - // Open dataset through namespace - this should call describe_table_version for latest - int initialDescribeCount = namespace.getDescribeTableVersionCount(); + // Open dataset through namespace - this should call list_table_versions for latest + int initialListCount = namespace.getListTableVersionsCount(); try (Dataset dsFromNamespace = Dataset.open().allocator(allocator).namespace(namespace).tableId(tableId).build()) { @@ -571,15 +577,16 @@ public VectorSchemaRoot getVectorSchemaRoot() { assertEquals(1, dsFromNamespace.version()); } assertEquals( - initialDescribeCount + 1, - namespace.getDescribeTableVersionCount(), - "describe_table_version should have been called once when opening latest version"); + initialListCount + 1, + namespace.getListTableVersionsCount(), + "list_table_versions should have been called once when opening latest version"); - // Append data - this should call create_table_version exactly once + // Append data - this should call create_table_version again (once more for version 2) + // Note: create_table_version was already called once during CREATE for version 1 assertEquals( - 0, + 1, namespace.getCreateTableVersionCount(), - "create_table_version should not have been called yet"); + "create_table_version should have been called once during CREATE"); try (VectorSchemaRoot appendRoot = VectorSchemaRoot.create(schema, allocator)) { IntVector aVector = (IntVector) appendRoot.getVector("a"); @@ -644,12 +651,12 @@ public VectorSchemaRoot getVectorSchemaRoot() { } assertEquals( - 1, + 2, namespace.getCreateTableVersionCount(), - "create_table_version should have been called exactly once during append"); + "create_table_version should have been called twice (once for CREATE, once for APPEND)"); - // Open latest version - should call describe_table_version - int describeCountBeforeLatest = namespace.getDescribeTableVersionCount(); + // Open latest version - should call list_table_versions + int listCountBeforeLatest = namespace.getListTableVersionsCount(); try (Dataset latestDs = Dataset.open().allocator(allocator).namespace(namespace).tableId(tableId).build()) { @@ -657,9 +664,9 @@ public VectorSchemaRoot getVectorSchemaRoot() { assertEquals(2, latestDs.version()); } assertEquals( - describeCountBeforeLatest + 1, - namespace.getDescribeTableVersionCount(), - "describe_table_version should have been called once when opening latest version"); + listCountBeforeLatest + 1, + namespace.getListTableVersionsCount(), + "list_table_versions should have been called once when opening latest version"); // Open specific version (version 1) - should call describe_table_version int describeCountBeforeV1 = namespace.getDescribeTableVersionCount(); From 0948270854823a3f8824f52f0fb4355f78001eca Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 22:33:55 -0800 Subject: [PATCH 11/23] refactor commit as put --- .../src/io/commit/external_manifest.rs | 60 +++++++++---------- .../lance/src/io/commit/namespace_manifest.rs | 16 +++-- 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/rust/lance-table/src/io/commit/external_manifest.rs b/rust/lance-table/src/io/commit/external_manifest.rs index 11d094b888b..d7a87ba739c 100644 --- a/rust/lance-table/src/io/commit/external_manifest.rs +++ b/rust/lance-table/src/io/commit/external_manifest.rs @@ -90,27 +90,16 @@ pub trait ExternalManifestStore: std::fmt::Debug + Send + Sync { }) } - /// Commit a manifest version to the external store. + /// Put the manifest to the external store. /// /// The staging manifest has been written to `staging_path` on the object store. /// This method should atomically claim the version and return the final manifest location. /// - /// # For staging-based stores (e.g., DynamoDB) - /// The default implementation: - /// 1. Records the staging path atomically (fails if version exists) - /// 2. Copies staging to final path on object store - /// 3. Updates external store to point to final path - /// 4. Deletes staging manifest - /// - /// # For direct-write stores (e.g., Namespace) - /// Override this method to: - /// 1. Read staging manifest data from object store - /// 2. Write directly to final location with conditional put - /// 3. Delete staging manifest - /// - /// Returns the final manifest location after successful commit. + /// The default implementation uses put_if_not_exists and put_if_exists to + /// implement a staging-based workflow. Implementations that can write directly + /// (e.g., namespace-backed stores) should override this method. #[allow(clippy::too_many_arguments)] - async fn commit( + async fn put( &self, base_path: &Path, version: u64, @@ -184,8 +173,7 @@ pub trait ExternalManifestStore: std::fmt::Debug + Send + Sync { Ok(location) } - /// Record staging manifest path. Used by default commit implementation. - /// Should fail if the version already exists. + /// Put the manifest path for a given base_uri and version, should fail if the version already exists async fn put_if_not_exists( &self, base_uri: &str, @@ -195,8 +183,7 @@ pub trait ExternalManifestStore: std::fmt::Debug + Send + Sync { e_tag: Option, ) -> Result<()>; - /// Update to final manifest path. Used by default commit implementation. - /// Should fail if the version does not already exist. + /// Put the manifest path for a given base_uri and version, should fail if the version **does not** already exist async fn put_if_exists( &self, base_uri: &str, @@ -236,10 +223,15 @@ pub struct ExternalManifestCommitHandler { } impl ExternalManifestCommitHandler { - /// Finalize a manifest that may be in staging state. + /// The manifest is considered committed once the staging manifest is written + /// to object store and that path is committed to the external store. /// - /// This is used by read paths when they encounter a staging manifest. - /// Write paths use `ExternalManifestStore::commit` directly. + /// However, to fully complete this, the staging manifest should be materialized + /// into the final path, the final path should be committed to the external store + /// and the staging manifest should be deleted. These steps may be completed + /// by any number of readers or writers, so care should be taken to ensure + /// that the manifest is not lost nor any errors occur due to duplicate + /// operations. #[allow(clippy::too_many_arguments)] async fn finalize_manifest( &self, @@ -251,7 +243,7 @@ impl ExternalManifestCommitHandler { store: &dyn OSObjectStore, naming_scheme: ManifestNamingScheme, ) -> std::result::Result { - // Copy the manifest to the final location + // step 1: copy the manifest to the final location let final_manifest_path = naming_scheme.manifest_path(base_path, version); let copied = match store @@ -267,6 +259,11 @@ impl ExternalManifestCommitHandler { } // On S3, the etag can change if originally was MultipartUpload and later was Copy + // https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.html#AmazonS3-Type-Object-ETag + // We only do MultipartUpload for > 5MB files, so we can skip this check + // if size < 5MB. However, we need to double check the final_manifest_path + // exists before we change the external store, otherwise we may point to a + // non-existing manifest. let e_tag = if copied && size < 5 * 1024 * 1024 { e_tag } else { @@ -286,7 +283,7 @@ impl ExternalManifestCommitHandler { return Ok(location); } - // Update the external store to point to the final location + // step 2: flip the external store to point to the final location self.external_manifest_store .put_if_exists( base_path.as_ref(), @@ -297,7 +294,7 @@ impl ExternalManifestCommitHandler { ) .await?; - // Delete the staging manifest + // step 3: delete the staging manifest match store.delete(staging_manifest_path).await { Ok(_) => {} Err(ObjectStoreError::NotFound { .. }) => {} @@ -476,16 +473,19 @@ impl CommitHandler for ExternalManifestCommitHandler { naming_scheme: ManifestNamingScheme, transaction: Option, ) -> std::result::Result { - // Write the manifest to object store with a temporary name + // path we get here is the path to the manifest we want to write + // use object_store.base_path.as_ref() for getting the root of the dataset + + // step 1: Write the manifest we want to commit to object store with a temporary name let path = naming_scheme.manifest_path(base_path, manifest.version); let staging_path = make_staging_manifest_path(&path)?; let write_res = manifest_writer(object_store, manifest, indices, &staging_path, transaction).await?; - // Commit via external store (handles atomic claim, finalization, and cleanup) + // step 2 & 3: Put the manifest to external store let result = self .external_manifest_store - .commit( + .put( base_path, manifest.version, &staging_path, @@ -499,7 +499,7 @@ impl CommitHandler for ExternalManifestCommitHandler { match result { Ok(location) => Ok(location), Err(_) => { - // On conflict, try to delete the staging manifest + // delete the staging manifest match object_store.inner.delete(&staging_path).await { Ok(_) => {} Err(ObjectStoreError::NotFound { .. }) => {} diff --git a/rust/lance/src/io/commit/namespace_manifest.rs b/rust/lance/src/io/commit/namespace_manifest.rs index fe259fb9241..632863f39e1 100644 --- a/rust/lance/src/io/commit/namespace_manifest.rs +++ b/rust/lance/src/io/commit/namespace_manifest.rs @@ -67,8 +67,8 @@ impl ExternalManifestStore for LanceNamespaceExternalManifestStore { ))) } - /// Direct-write commit: reads staging manifest and writes directly to final location. - async fn commit( + /// Put the manifest to the namespace store. + async fn put( &self, _base_path: &Path, version: u64, @@ -113,7 +113,6 @@ impl ExternalManifestStore for LanceNamespaceExternalManifestStore { }) } - /// Not used when commit() is overridden. async fn put_if_not_exists( &self, _base_uri: &str, @@ -122,10 +121,12 @@ impl ExternalManifestStore for LanceNamespaceExternalManifestStore { _size: u64, _e_tag: Option, ) -> Result<()> { - Ok(()) + Err(lance_core::Error::NotSupported { + source: "put_if_not_exists is not supported for namespace-backed stores".into(), + location: snafu::location!(), + }) } - /// Not used when commit() is overridden. async fn put_if_exists( &self, _base_uri: &str, @@ -134,6 +135,9 @@ impl ExternalManifestStore for LanceNamespaceExternalManifestStore { _size: u64, _e_tag: Option, ) -> Result<()> { - Ok(()) + Err(lance_core::Error::NotSupported { + source: "put_if_exists is not supported for namespace-backed stores".into(), + location: snafu::location!(), + }) } } From 807bc5c2805d958c60cea87dc023687a784e9663 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 22:40:55 -0800 Subject: [PATCH 12/23] update lock --- Cargo.lock | 26 ++++++++++--------- Cargo.toml | 2 +- .../namespace/DirectoryNamespaceTest.java | 2 +- python/python/tests/test_namespace_dir.py | 6 ++--- rust/lance-namespace-impls/src/dir.rs | 2 +- 5 files changed, 20 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 33593b54e05..e562bbdaee9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2991,7 +2991,7 @@ dependencies = [ "libc", "option-ext", "redox_users 0.5.2", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -3230,7 +3230,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -4213,7 +4213,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.2", + "socket2 0.5.10", "system-configuration", "tokio", "tower-service", @@ -4546,7 +4546,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -4641,7 +4641,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -5308,6 +5308,8 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ad4c947349acd6e37e984eba0254588bd894e6128434338b9e6904e56fb4633" dependencies = [ "reqwest", "serde", @@ -6072,7 +6074,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -7122,7 +7124,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.36", - "socket2 0.6.2", + "socket2 0.5.10", "thiserror 2.0.18", "tokio", "tracing", @@ -7159,9 +7161,9 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.2", + "socket2 0.5.10", "tracing", - "windows-sys 0.60.2", + "windows-sys 0.59.0", ] [[package]] @@ -7762,7 +7764,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -8771,7 +8773,7 @@ dependencies = [ "getrandom 0.4.1", "once_cell", "rustix 1.1.3", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -9735,7 +9737,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 054c17e07dc..4c33ead299c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,7 +66,7 @@ lance-linalg = { version = "=3.0.0-beta.3", path = "./rust/lance-linalg" } lance-namespace = { version = "=3.0.0-beta.3", path = "./rust/lance-namespace" } lance-namespace-impls = { version = "=3.0.0-beta.3", path = "./rust/lance-namespace-impls" } lance-namespace-datafusion = { version = "=3.0.0-beta.3", path = "./rust/lance-namespace-datafusion" } -lance-namespace-reqwest-client = { path = "../lance-namespace/rust/lance-namespace-reqwest-client" } +lance-namespace-reqwest-client = "0.5.2" lance-table = { version = "=3.0.0-beta.3", path = "./rust/lance-table" } lance-test-macros = { version = "=3.0.0-beta.3", path = "./rust/lance-test-macros" } lance-testing = { version = "=3.0.0-beta.3", path = "./rust/lance-testing" } diff --git a/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java b/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java index 6e262d0b826..91827f8cac6 100644 --- a/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java +++ b/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java @@ -480,7 +480,7 @@ public void close() { } @Test - void testManagedVersioningWithDirectoryNamespace(@TempDir Path managedVersioningTempDir) + void testExternalManifestStoreInvokesNamespaceApis(@TempDir Path managedVersioningTempDir) throws Exception { try (BufferAllocator allocator = new RootAllocator()) { // Create namespace with table_version_tracking_enabled diff --git a/python/python/tests/test_namespace_dir.py b/python/python/tests/test_namespace_dir.py index 9630006123d..92c372a0ce4 100644 --- a/python/python/tests/test_namespace_dir.py +++ b/python/python/tests/test_namespace_dir.py @@ -812,12 +812,12 @@ def list_table_versions_json(self, request_json: str) -> str: return json.dumps(response_dict) -def test_e2e_external_manifest_store_invokes_namespace_apis(): +def test_external_manifest_store_invokes_namespace_apis(): """Test that namespace APIs are invoked correctly for managed versioning. This test mirrors: - - Rust: test_e2e_external_manifest_store_invokes_namespace_apis - - Java: testManagedVersioningWithDirectoryNamespace + - Rust: test_external_manifest_store_invokes_namespace_apis + - Java: testExternalManifestStoreInvokesNamespaceApis It verifies: 1. list_table_versions is called when opening dataset (latest version) diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index db0043f27c3..bbb5421e64e 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -4733,7 +4733,7 @@ mod tests { } #[tokio::test] - async fn test_e2e_external_manifest_store_invokes_namespace_apis() { + async fn test_external_manifest_store_invokes_namespace_apis() { use arrow::array::{Int32Array, StringArray}; use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; use arrow::record_batch::RecordBatch; From 2722cf950dc0e31cb1c7e922fd73280de1953f59 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 22:49:42 -0800 Subject: [PATCH 13/23] fix lock2 --- Cargo.lock | 220 +++++++++++++++++++------------------- java/lance-jni/Cargo.lock | 150 +++++++++++++------------- python/Cargo.lock | 164 ++++++++++++++-------------- 3 files changed, 269 insertions(+), 265 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e562bbdaee9..cb4687dfe89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -166,9 +166,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.101" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "approx" @@ -475,9 +475,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.39" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68650b7df54f0293fd061972a0fb05aaf4fc0879d3b3d21a638a182c5c543b9f" +checksum = "7d67d43201f4d20c78bcda740c142ca52482d81da80681533d33bf3f0596c8e2" dependencies = [ "compression-codecs", "compression-core", @@ -540,7 +540,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -575,7 +575,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -640,9 +640,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.12" +version = "1.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e26bbf46abc608f2dc61fd6cb3b7b0665497cc259a21520151ed98f8b37d2c79" +checksum = "6d203b0bf2626dcba8665f5cd0871d7c2c0930223d6b6be9097592fea21242d0" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -652,9 +652,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.15.4" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b7b6141e96a8c160799cc2d5adecd5cbbe5054cb8c7c4af53da0f83bb7ad256" +checksum = "d9a7b350e3bb1767102698302bc37256cbd48422809984b98d292c40e2579aa9" dependencies = [ "aws-lc-sys", "zeroize", @@ -674,9 +674,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.7.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0f92058d22a46adf53ec57a6a96f34447daf02bff52e8fb956c66bcd5c6ac12" +checksum = "ede2ddc593e6c8acc6ce3358c28d6677a6dc49b65ba4b37a2befe14a11297e75" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -702,9 +702,9 @@ dependencies = [ [[package]] name = "aws-sdk-dynamodb" -version = "1.105.0" +version = "1.106.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82d2214c2ad3a175d3ece5a5af26916c29caa3e12e9e05b3cb8ed5e837b54b67" +checksum = "014bc689d3d71d09c3c1e29cc967bab873e1812926ce38ffb7754305a075072d" dependencies = [ "aws-credential-types", "aws-runtime", @@ -726,9 +726,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3" -version = "1.123.0" +version = "1.124.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c018f22146966fdd493a664f62ee2483dff256b42a08c125ab6a084bde7b77fe" +checksum = "744c09d75dfec039a05cf8e117c995ded3b0baffa6eb83f3ed7075a01d8d8947" dependencies = [ "aws-credential-types", "aws-runtime", @@ -761,9 +761,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.94.0" +version = "1.95.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "699da1961a289b23842d88fe2984c6ff68735fdf9bdcbc69ceaeb2491c9bf434" +checksum = "00c5ff27c6ba2cbd95e6e26e2e736676fdf6bcf96495b187733f521cfe4ce448" dependencies = [ "aws-credential-types", "aws-runtime", @@ -785,9 +785,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.96.0" +version = "1.97.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3e3a4cb3b124833eafea9afd1a6cc5f8ddf3efefffc6651ef76a03cbc6b4981" +checksum = "4d186f1e5a3694a188e5a0640b3115ccc6e084d104e16fd6ba968dca072ffef8" dependencies = [ "aws-credential-types", "aws-runtime", @@ -809,9 +809,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.98.0" +version = "1.99.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89c4f19655ab0856375e169865c91264de965bd74c407c7f1e403184b1049409" +checksum = "9acba7c62f3d4e2408fa998a3a8caacd8b9a5b5549cf36e2372fbdae329d5449" dependencies = [ "aws-credential-types", "aws-runtime", @@ -834,9 +834,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f6ae9b71597dc5fd115d52849d7a5556ad9265885ad3492ea8d73b93bbc46e" +checksum = "37411f8e0f4bea0c3ca0958ce7f18f6439db24d555dbd809787262cd00926aa9" dependencies = [ "aws-credential-types", "aws-smithy-eventstream", @@ -1062,9 +1062,9 @@ dependencies = [ [[package]] name = "aws-types" -version = "1.3.12" +version = "1.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c50f3cdf47caa8d01f2be4a6663ea02418e892f9bbfd82c7b9a3a37eaccdd3a" +checksum = "0470cc047657c6e286346bdf10a8719d26efd6a91626992e0e64481e44323e96" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1453,7 +1453,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -1479,9 +1479,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.1" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "bytemuck" @@ -1628,9 +1628,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.59" +version = "4.5.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5caf74d17c3aec5495110c34cc3f78644bfa89af6c8993ed4de2790e49b6499" +checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" dependencies = [ "clap_builder", "clap_derive", @@ -1638,9 +1638,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.59" +version = "4.5.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "370daa45065b80218950227371916a1633217ae42b2715b2287b606dcd618e24" +checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" dependencies = [ "anstream", "anstyle", @@ -1657,7 +1657,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -1693,9 +1693,9 @@ dependencies = [ [[package]] name = "compression-codecs" -version = "0.4.36" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a" +checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" dependencies = [ "compression-core", "flate2", @@ -2071,7 +2071,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.11.1", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2084,7 +2084,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.11.1", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2106,7 +2106,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2117,7 +2117,7 @@ checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ "darling_core 0.23.0", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2603,7 +2603,7 @@ checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2911,7 +2911,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2931,7 +2931,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core 0.20.2", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2991,7 +2991,7 @@ dependencies = [ "libc", "option-ext", "redox_users 0.5.2", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3002,7 +3002,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -3214,7 +3214,7 @@ checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -3230,7 +3230,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3549,7 +3549,7 @@ checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -3717,9 +3717,9 @@ dependencies = [ [[package]] name = "geographiclib-rs" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc8f647bd562db28a15e0dce4a77d89e3a78f6f85943e782418ebdbb420ea3c4" +checksum = "c5a7f08910fd98737a6eda7568e7c5e645093e073328eeef49758cfe8b0489c7" dependencies = [ "libm", ] @@ -4213,7 +4213,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.2", "system-configuration", "tokio", "tower-service", @@ -4434,7 +4434,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", "zstd", ] @@ -4546,7 +4546,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4641,7 +4641,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4652,7 +4652,7 @@ checksum = "f7946b4325269738f270bb55b3c19ab5c5040525f83fd625259422a9d25d9be5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5370,7 +5370,7 @@ version = "3.0.0-beta.3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5949,7 +5949,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5991,7 +5991,7 @@ checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6008,9 +6008,9 @@ checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" [[package]] name = "native-tls" -version = "0.2.16" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d5d26952a508f321b4d3d2e80e78fc2603eaefcdf0c30783867f19586518bdc" +checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" dependencies = [ "libc", "log", @@ -6074,7 +6074,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6187,7 +6187,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6386,7 +6386,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6715,7 +6715,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6942,7 +6942,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -7031,7 +7031,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.116", + "syn 2.0.117", "tempfile", ] @@ -7045,7 +7045,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -7124,7 +7124,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.36", - "socket2 0.5.10", + "socket2 0.6.2", "thiserror 2.0.18", "tokio", "tracing", @@ -7161,9 +7161,9 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.2", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -7678,7 +7678,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.116", + "syn 2.0.117", "unicode-ident", ] @@ -7696,7 +7696,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.116", + "syn 2.0.117", "unicode-ident", ] @@ -7764,7 +7764,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -7920,7 +7920,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -7972,9 +7972,9 @@ dependencies = [ [[package]] name = "security-framework" -version = "3.6.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ "bitflags 2.11.0", "core-foundation 0.10.1", @@ -7985,9 +7985,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.16.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "321c8673b092a9a42605034a9879d73cb79101ed5fd117bc9a597b89b4e9e61a" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" dependencies = [ "core-foundation-sys", "libc", @@ -8036,7 +8036,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -8047,7 +8047,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -8093,7 +8093,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -8105,7 +8105,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -8278,7 +8278,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -8395,7 +8395,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -8462,7 +8462,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -8474,7 +8474,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -8497,7 +8497,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.116", + "syn 2.0.117", "typify", "walkdir", ] @@ -8544,9 +8544,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.116" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -8570,7 +8570,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -8773,7 +8773,7 @@ dependencies = [ "getrandom 0.4.1", "once_cell", "rustix 1.1.3", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -8801,7 +8801,7 @@ checksum = "be35209fd0781c5401458ab66e4f98accf63553e8fae7425503e92fdd319783b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -8830,7 +8830,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -8841,7 +8841,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -9009,7 +9009,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -9184,7 +9184,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -9293,7 +9293,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.116", + "syn 2.0.117", "thiserror 2.0.18", "unicode-ident", ] @@ -9311,7 +9311,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.116", + "syn 2.0.117", "typify-impl", ] @@ -9617,7 +9617,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", "wasm-bindgen-shared", ] @@ -9737,7 +9737,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -9767,7 +9767,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -9778,7 +9778,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -10109,7 +10109,7 @@ dependencies = [ "heck", "indexmap", "prettyplease", - "syn 2.0.116", + "syn 2.0.117", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -10125,7 +10125,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -10260,7 +10260,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", "synstructure", ] @@ -10281,7 +10281,7 @@ checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -10301,7 +10301,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", "synstructure", ] @@ -10341,14 +10341,14 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] name = "zlib-rs" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a33bbf307b25a1774cee0687694ec72fa7814b3ab5c1c12a9d2fc6a36fc439c" +checksum = "c745c48e1007337ed136dc99df34128b9faa6ed542d80a1c673cf55a6d7236c8" [[package]] name = "zmij" diff --git a/java/lance-jni/Cargo.lock b/java/lance-jni/Cargo.lock index d05b58c2095..b949f1b0670 100644 --- a/java/lance-jni/Cargo.lock +++ b/java/lance-jni/Cargo.lock @@ -124,9 +124,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.101" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "approx" @@ -396,9 +396,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.39" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68650b7df54f0293fd061972a0fb05aaf4fc0879d3b3d21a638a182c5c543b9f" +checksum = "7d67d43201f4d20c78bcda740c142ca52482d81da80681533d33bf3f0596c8e2" dependencies = [ "compression-codecs", "compression-core", @@ -425,7 +425,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -436,7 +436,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -501,9 +501,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.12" +version = "1.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e26bbf46abc608f2dc61fd6cb3b7b0665497cc259a21520151ed98f8b37d2c79" +checksum = "6d203b0bf2626dcba8665f5cd0871d7c2c0930223d6b6be9097592fea21242d0" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -513,9 +513,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.15.4" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b7b6141e96a8c160799cc2d5adecd5cbbe5054cb8c7c4af53da0f83bb7ad256" +checksum = "d9a7b350e3bb1767102698302bc37256cbd48422809984b98d292c40e2579aa9" dependencies = [ "aws-lc-sys", "zeroize", @@ -535,9 +535,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.7.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0f92058d22a46adf53ec57a6a96f34447daf02bff52e8fb956c66bcd5c6ac12" +checksum = "ede2ddc593e6c8acc6ce3358c28d6677a6dc49b65ba4b37a2befe14a11297e75" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -560,9 +560,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.94.0" +version = "1.95.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "699da1961a289b23842d88fe2984c6ff68735fdf9bdcbc69ceaeb2491c9bf434" +checksum = "00c5ff27c6ba2cbd95e6e26e2e736676fdf6bcf96495b187733f521cfe4ce448" dependencies = [ "aws-credential-types", "aws-runtime", @@ -584,9 +584,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.96.0" +version = "1.97.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3e3a4cb3b124833eafea9afd1a6cc5f8ddf3efefffc6651ef76a03cbc6b4981" +checksum = "4d186f1e5a3694a188e5a0640b3115ccc6e084d104e16fd6ba968dca072ffef8" dependencies = [ "aws-credential-types", "aws-runtime", @@ -608,9 +608,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.98.0" +version = "1.99.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89c4f19655ab0856375e169865c91264de965bd74c407c7f1e403184b1049409" +checksum = "9acba7c62f3d4e2408fa998a3a8caacd8b9a5b5549cf36e2372fbdae329d5449" dependencies = [ "aws-credential-types", "aws-runtime", @@ -633,9 +633,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f6ae9b71597dc5fd115d52849d7a5556ad9265885ad3492ea8d73b93bbc46e" +checksum = "37411f8e0f4bea0c3ca0958ce7f18f6439db24d555dbd809787262cd00926aa9" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -813,9 +813,9 @@ dependencies = [ [[package]] name = "aws-types" -version = "1.3.12" +version = "1.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c50f3cdf47caa8d01f2be4a6663ea02418e892f9bbfd82c7b9a3a37eaccdd3a" +checksum = "0470cc047657c6e286346bdf10a8719d26efd6a91626992e0e64481e44323e96" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1016,7 +1016,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -1042,9 +1042,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.1" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "bytemuck" @@ -1190,9 +1190,9 @@ dependencies = [ [[package]] name = "compression-codecs" -version = "0.4.36" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a" +checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" dependencies = [ "compression-core", "flate2", @@ -1399,7 +1399,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -1410,7 +1410,7 @@ checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ "darling_core", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -1890,7 +1890,7 @@ checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2163,7 +2163,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2463,7 +2463,7 @@ checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2631,9 +2631,9 @@ dependencies = [ [[package]] name = "geographiclib-rs" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc8f647bd562db28a15e0dce4a77d89e3a78f6f85943e782418ebdbb420ea3c4" +checksum = "c5a7f08910fd98737a6eda7568e7c5e645093e073328eeef49758cfe8b0489c7" dependencies = [ "libm", ] @@ -3263,7 +3263,7 @@ checksum = "f7946b4325269738f270bb55b3c19ab5c5040525f83fd625259422a9d25d9be5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -3826,6 +3826,8 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ad4c947349acd6e37e984eba0254588bd894e6128434338b9e6904e56fb4633" dependencies = [ "reqwest", "serde", @@ -4352,7 +4354,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -4721,7 +4723,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -4826,7 +4828,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -4872,7 +4874,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.116", + "syn 2.0.117", "tempfile", ] @@ -4886,7 +4888,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5363,7 +5365,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.116", + "syn 2.0.117", "unicode-ident", ] @@ -5546,7 +5548,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5574,9 +5576,9 @@ dependencies = [ [[package]] name = "security-framework" -version = "3.6.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ "bitflags", "core-foundation", @@ -5587,9 +5589,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.16.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "321c8673b092a9a42605034a9879d73cb79101ed5fd117bc9a597b89b4e9e61a" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" dependencies = [ "core-foundation-sys", "libc", @@ -5638,7 +5640,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5649,7 +5651,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5684,7 +5686,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5696,7 +5698,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5859,7 +5861,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5924,7 +5926,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5970,7 +5972,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5993,7 +5995,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.116", + "syn 2.0.117", "typify", "walkdir", ] @@ -6017,9 +6019,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.116" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -6043,7 +6045,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6243,7 +6245,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6254,7 +6256,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6376,7 +6378,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6531,7 +6533,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6619,7 +6621,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.116", + "syn 2.0.117", "thiserror 2.0.18", "unicode-ident", ] @@ -6637,7 +6639,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.116", + "syn 2.0.117", "typify-impl", ] @@ -6838,7 +6840,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", "wasm-bindgen-shared", ] @@ -6979,7 +6981,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6990,7 +6992,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -7287,7 +7289,7 @@ dependencies = [ "heck", "indexmap", "prettyplease", - "syn 2.0.116", + "syn 2.0.117", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -7303,7 +7305,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -7416,7 +7418,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", "synstructure", ] @@ -7437,7 +7439,7 @@ checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -7457,7 +7459,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", "synstructure", ] @@ -7497,14 +7499,14 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] name = "zlib-rs" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a33bbf307b25a1774cee0687694ec72fa7814b3ab5c1c12a9d2fc6a36fc439c" +checksum = "c745c48e1007337ed136dc99df34128b9faa6ed542d80a1c673cf55a6d7236c8" [[package]] name = "zmij" diff --git a/python/Cargo.lock b/python/Cargo.lock index 4bc40ad5b34..7ca0d45387d 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -178,9 +178,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.101" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "approx" @@ -527,7 +527,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -538,7 +538,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -603,9 +603,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.12" +version = "1.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e26bbf46abc608f2dc61fd6cb3b7b0665497cc259a21520151ed98f8b37d2c79" +checksum = "6d203b0bf2626dcba8665f5cd0871d7c2c0930223d6b6be9097592fea21242d0" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -615,9 +615,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.15.4" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b7b6141e96a8c160799cc2d5adecd5cbbe5054cb8c7c4af53da0f83bb7ad256" +checksum = "d9a7b350e3bb1767102698302bc37256cbd48422809984b98d292c40e2579aa9" dependencies = [ "aws-lc-sys", "zeroize", @@ -637,9 +637,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.7.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0f92058d22a46adf53ec57a6a96f34447daf02bff52e8fb956c66bcd5c6ac12" +checksum = "ede2ddc593e6c8acc6ce3358c28d6677a6dc49b65ba4b37a2befe14a11297e75" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -662,9 +662,9 @@ dependencies = [ [[package]] name = "aws-sdk-dynamodb" -version = "1.105.0" +version = "1.106.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82d2214c2ad3a175d3ece5a5af26916c29caa3e12e9e05b3cb8ed5e837b54b67" +checksum = "014bc689d3d71d09c3c1e29cc967bab873e1812926ce38ffb7754305a075072d" dependencies = [ "aws-credential-types", "aws-runtime", @@ -686,9 +686,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.94.0" +version = "1.95.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "699da1961a289b23842d88fe2984c6ff68735fdf9bdcbc69ceaeb2491c9bf434" +checksum = "00c5ff27c6ba2cbd95e6e26e2e736676fdf6bcf96495b187733f521cfe4ce448" dependencies = [ "aws-credential-types", "aws-runtime", @@ -710,9 +710,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.96.0" +version = "1.97.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3e3a4cb3b124833eafea9afd1a6cc5f8ddf3efefffc6651ef76a03cbc6b4981" +checksum = "4d186f1e5a3694a188e5a0640b3115ccc6e084d104e16fd6ba968dca072ffef8" dependencies = [ "aws-credential-types", "aws-runtime", @@ -734,9 +734,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.98.0" +version = "1.99.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89c4f19655ab0856375e169865c91264de965bd74c407c7f1e403184b1049409" +checksum = "9acba7c62f3d4e2408fa998a3a8caacd8b9a5b5549cf36e2372fbdae329d5449" dependencies = [ "aws-credential-types", "aws-runtime", @@ -759,9 +759,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f6ae9b71597dc5fd115d52849d7a5556ad9265885ad3492ea8d73b93bbc46e" +checksum = "37411f8e0f4bea0c3ca0958ce7f18f6439db24d555dbd809787262cd00926aa9" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -948,9 +948,9 @@ dependencies = [ [[package]] name = "aws-types" -version = "1.3.12" +version = "1.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c50f3cdf47caa8d01f2be4a6663ea02418e892f9bbfd82c7b9a3a37eaccdd3a" +checksum = "0470cc047657c6e286346bdf10a8719d26efd6a91626992e0e64481e44323e96" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1177,7 +1177,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -1203,9 +1203,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.1" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "bytemuck" @@ -1608,7 +1608,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -1621,7 +1621,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -1632,7 +1632,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -1643,7 +1643,7 @@ checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ "darling_core 0.23.0", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2164,7 +2164,7 @@ checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2455,7 +2455,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2465,7 +2465,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2509,7 +2509,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -2893,7 +2893,7 @@ checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -3070,9 +3070,9 @@ dependencies = [ [[package]] name = "geographiclib-rs" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc8f647bd562db28a15e0dce4a77d89e3a78f6f85943e782418ebdbb420ea3c4" +checksum = "c5a7f08910fd98737a6eda7568e7c5e645093e073328eeef49758cfe8b0489c7" dependencies = [ "libm", ] @@ -3677,7 +3677,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", "zstd", ] @@ -3829,7 +3829,7 @@ checksum = "f7946b4325269738f270bb55b3c19ab5c5040525f83fd625259422a9d25d9be5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -4349,6 +4349,8 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ad4c947349acd6e37e984eba0254588bd894e6128434338b9e6904e56fb4633" dependencies = [ "reqwest", "serde", @@ -5079,7 +5081,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5496,7 +5498,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5601,7 +5603,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5671,7 +5673,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.116", + "syn 2.0.117", "tempfile", ] @@ -5685,7 +5687,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5804,7 +5806,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -5817,7 +5819,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6076,7 +6078,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6339,7 +6341,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.116", + "syn 2.0.117", "unicode-ident", ] @@ -6544,7 +6546,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6582,9 +6584,9 @@ dependencies = [ [[package]] name = "security-framework" -version = "3.6.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ "bitflags 2.11.0", "core-foundation", @@ -6595,9 +6597,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.16.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "321c8673b092a9a42605034a9879d73cb79101ed5fd117bc9a597b89b4e9e61a" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" dependencies = [ "core-foundation-sys", "libc", @@ -6646,7 +6648,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6657,7 +6659,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6692,7 +6694,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6704,7 +6706,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6867,7 +6869,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -6943,7 +6945,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -7011,7 +7013,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -7023,7 +7025,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -7046,7 +7048,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.116", + "syn 2.0.117", "typify", "walkdir", ] @@ -7070,9 +7072,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.116" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -7096,7 +7098,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -7313,7 +7315,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -7324,7 +7326,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -7446,7 +7448,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -7611,7 +7613,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -7737,7 +7739,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.116", + "syn 2.0.117", "thiserror 2.0.18", "unicode-ident", ] @@ -7755,7 +7757,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.116", + "syn 2.0.117", "typify-impl", ] @@ -7989,7 +7991,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", "wasm-bindgen-shared", ] @@ -8130,7 +8132,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -8141,7 +8143,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -8372,7 +8374,7 @@ dependencies = [ "heck", "indexmap", "prettyplease", - "syn 2.0.116", + "syn 2.0.117", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -8388,7 +8390,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -8526,7 +8528,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", "synstructure", ] @@ -8547,7 +8549,7 @@ checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] @@ -8567,7 +8569,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", "synstructure", ] @@ -8607,14 +8609,14 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.116", + "syn 2.0.117", ] [[package]] name = "zlib-rs" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a33bbf307b25a1774cee0687694ec72fa7814b3ab5c1c12a9d2fc6a36fc439c" +checksum = "c745c48e1007337ed136dc99df34128b9faa6ed542d80a1c673cf55a6d7236c8" [[package]] name = "zmij" From 03559ef3df9ddf56ad1d35c3868cb86aeefd8824 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 23:02:24 -0800 Subject: [PATCH 14/23] fix java --- .../main/java/org/lance/WriteDatasetBuilder.java | 15 ++++++++++++--- rust/lance-namespace-impls/src/dir.rs | 5 +++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/java/src/main/java/org/lance/WriteDatasetBuilder.java b/java/src/main/java/org/lance/WriteDatasetBuilder.java index d904cec7277..5985f903119 100644 --- a/java/src/main/java/org/lance/WriteDatasetBuilder.java +++ b/java/src/main/java/org/lance/WriteDatasetBuilder.java @@ -364,6 +364,7 @@ public Dataset execute() { private Dataset executeWithNamespace() { String tableUri; Map namespaceStorageOptions = null; + boolean managedVersioning = false; // Mode-specific namespace operations if (mode == WriteParams.WriteMode.CREATE) { @@ -379,13 +380,16 @@ private Dataset executeWithNamespace() { DeclareTableResponse declareResponse = namespace.declareTable(declareRequest); location = declareResponse.getLocation(); responseStorageOptions = declareResponse.getStorageOptions(); + managedVersioning = Boolean.TRUE.equals(declareResponse.getManagedVersioning()); } catch (UnsupportedOperationException e) { // Fall back to deprecated createEmptyTable + // Note: createEmptyTable doesn't support managedVersioning CreateEmptyTableRequest fallbackRequest = new CreateEmptyTableRequest(); fallbackRequest.setId(tableId); CreateEmptyTableResponse fallbackResponse = namespace.createEmptyTable(fallbackRequest); location = fallbackResponse.getLocation(); responseStorageOptions = fallbackResponse.getStorageOptions(); + managedVersioning = false; } tableUri = location; @@ -407,6 +411,7 @@ private Dataset executeWithNamespace() { } namespaceStorageOptions = ignoreNamespaceStorageOptions ? null : response.getStorageOptions(); + managedVersioning = Boolean.TRUE.equals(response.getManagedVersioning()); } // Merge storage options (namespace options + user options, with namespace taking precedence) @@ -436,9 +441,13 @@ private Dataset executeWithNamespace() { ? null : new LanceNamespaceStorageOptionsProvider(namespace, tableId); - // Use Dataset.create() with namespace for managed versioning support - return createDatasetWithStreamAndNamespace( - tableUri, params, storageOptionsProvider, namespace, tableId); + // Only use namespace for commit handling if managedVersioning is enabled + if (managedVersioning) { + return createDatasetWithStreamAndNamespace( + tableUri, params, storageOptionsProvider, namespace, tableId); + } else { + return createDatasetWithStream(tableUri, params, storageOptionsProvider); + } } private Dataset executeWithUri() { diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index bbb5421e64e..ee0440c5570 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -1505,6 +1505,11 @@ impl LanceNamespace for DirectoryNamespace { Ok(DeclareTableResponse { location: Some(table_uri), storage_options, + managed_versioning: if self.table_version_tracking_enabled { + Some(true) + } else { + None + }, ..Default::default() }) } From 07fc8e86797f03e328a9edff9123e30b12c06599 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 23:37:50 -0800 Subject: [PATCH 15/23] fix test --- .../test/java/org/lance/namespace/DirectoryNamespaceTest.java | 3 +-- rust/lance-namespace-impls/src/dir.rs | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java b/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java index 91827f8cac6..5850f57453f 100644 --- a/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java +++ b/java/src/test/java/org/lance/namespace/DirectoryNamespaceTest.java @@ -581,8 +581,7 @@ public VectorSchemaRoot getVectorSchemaRoot() { namespace.getListTableVersionsCount(), "list_table_versions should have been called once when opening latest version"); - // Append data - this should call create_table_version again (once more for version 2) - // Note: create_table_version was already called once during CREATE for version 1 + // Verify create_table_version was called once during CREATE assertEquals( 1, namespace.getCreateTableVersionCount(), diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index ee0440c5570..6dd6926ac31 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -1447,6 +1447,10 @@ impl LanceNamespace for DirectoryNamespace { } else if request.vend_credentials == Some(false) { response.storage_options = None; } + // Set managed_versioning when table_version_tracking_enabled + if self.table_version_tracking_enabled { + response.managed_versioning = Some(true); + } return Ok(response); } From 4b3bc8badb4baeb06ea5a51cdf4c86a02901dc37 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 19 Feb 2026 23:57:18 -0800 Subject: [PATCH 16/23] fix python test --- python/python/tests/test_namespace_dir.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/python/python/tests/test_namespace_dir.py b/python/python/tests/test_namespace_dir.py index 92c372a0ce4..c7ca4379342 100644 --- a/python/python/tests/test_namespace_dir.py +++ b/python/python/tests/test_namespace_dir.py @@ -852,11 +852,12 @@ def test_external_manifest_store_invokes_namespace_apis(): "list_table_versions should be called once when opening latest version" ) - # Append data - should call create_table_version exactly once - assert namespace.create_table_version_count == 0, ( - "create_table_version should not have been called yet" + # Verify create_table_version was called once during CREATE + assert namespace.create_table_version_count == 1, ( + "create_table_version should have been called once during CREATE" ) + # Append data - should call create_table_version again table2 = pa.Table.from_pylist([{"a": 100, "b": 200}, {"a": 1000, "b": 2000}]) ds = lance.write_dataset( table2, namespace=namespace, table_id=table_id, mode="append" @@ -864,8 +865,8 @@ def test_external_manifest_store_invokes_namespace_apis(): assert ds.count_rows() == 4 assert len(ds.versions()) == 2 - assert namespace.create_table_version_count == 1, ( - "create_table_version should be called exactly once during append" + assert namespace.create_table_version_count == 2, ( + "create_table_version should be called twice (once for CREATE, once for APPEND)" ) # Open latest version - should call list_table_versions From 2c9e33eefb31e83e7db1f77c559b69b9044c96f6 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 20 Feb 2026 00:12:04 -0800 Subject: [PATCH 17/23] just make declare table work --- rust/lance-namespace-impls/src/dir.rs | 59 ++++++++++++++++----------- rust/lance/src/dataset.rs | 33 ++++++++++++++- 2 files changed, 66 insertions(+), 26 deletions(-) diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index 6dd6926ac31..53cc76a0760 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -4747,6 +4747,8 @@ mod tests { use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; use arrow::record_batch::RecordBatch; use lance::dataset::builder::DatasetBuilder; + use lance::dataset::{WriteMode, WriteParams}; + use lance::Dataset; use lance_namespace::models::CreateNamespaceRequest; let temp_dir = TempStdDir::default(); @@ -4770,24 +4772,8 @@ mod tests { // Create a table with multi-level ID (namespace + table) let table_id = vec!["workspace".to_string(), "test_table".to_string()]; - let schema = create_test_schema(); - let ipc_data = create_test_ipc_data(&schema); - let mut create_req = CreateTableRequest::new(); - create_req.id = Some(table_id.clone()); - ns.create_table(create_req, bytes::Bytes::from(ipc_data)) - .await - .unwrap(); - - // Open the dataset using from_namespace - let mut dataset = DatasetBuilder::from_namespace(ns.clone(), table_id.clone()) - .await - .unwrap() - .load() - .await - .unwrap(); - assert_eq!(dataset.version().version, 1); - // Create some data to append + // Create some initial data let arrow_schema = Arc::new(ArrowSchema::new(vec![ Field::new("id", DataType::Int32, false), Field::new("name", DataType::Utf8, true), @@ -4801,20 +4787,45 @@ mod tests { ) .unwrap(); - // Append data - this should call create_table_version exactly once + // Create a table using write_into_namespace + let batches = RecordBatchIterator::new(vec![Ok(batch.clone())], arrow_schema.clone()); + let write_params = WriteParams { + mode: WriteMode::Create, + ..Default::default() + }; + let mut dataset = Dataset::write_into_namespace( + batches, + ns.clone(), + table_id.clone(), + Some(write_params), + ) + .await + .unwrap(); + assert_eq!(dataset.version().version, 1); + + // Verify create_table_version was called once during initial write_into_namespace assert_eq!( tracking_ns.create_table_version_calls(), - 0, - "create_table_version should not have been called yet" + 1, + "create_table_version should have been called once during initial write_into_namespace" ); - let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema); - dataset.append(batches, None).await.unwrap(); + // Append data - this should call create_table_version again + let append_batch = RecordBatch::try_new( + arrow_schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![4, 5, 6])), + Arc::new(StringArray::from(vec!["d", "e", "f"])), + ], + ) + .unwrap(); + let append_batches = RecordBatchIterator::new(vec![Ok(append_batch)], arrow_schema); + dataset.append(append_batches, None).await.unwrap(); assert_eq!( tracking_ns.create_table_version_calls(), - 1, - "create_table_version should have been called exactly once during commit" + 2, + "create_table_version should have been called twice (once for create, once for append)" ); // checkout_latest should call list_table_versions exactly once diff --git a/rust/lance/src/dataset.rs b/rust/lance/src/dataset.rs index 9feb02615e1..f62aa9df32d 100644 --- a/rust/lance/src/dataset.rs +++ b/rust/lance/src/dataset.rs @@ -41,9 +41,12 @@ use lance_table::format::{ pb, DataFile, DataStorageFormat, DeletionFile, Fragment, IndexMetadata, Manifest, RowIdMeta, }; use lance_table::io::commit::{ - migrate_scheme_to_v2, write_manifest_file_to_path, CommitConfig, CommitError, CommitHandler, - CommitLock, ManifestLocation, ManifestNamingScheme, VERSIONS_DIR, + external_manifest::ExternalManifestCommitHandler, migrate_scheme_to_v2, + write_manifest_file_to_path, CommitConfig, CommitError, CommitHandler, CommitLock, + ManifestLocation, ManifestNamingScheme, VERSIONS_DIR, }; + +use crate::io::commit::namespace_manifest::LanceNamespaceExternalManifestStore; use lance_table::io::manifest::{read_manifest, read_manifest_indexes}; use object_store::path::Path; use prost::Message; @@ -825,6 +828,19 @@ impl Dataset { location: location!(), })?; + // Set up commit handler when managed_versioning is enabled + if response.managed_versioning == Some(true) { + let external_store = LanceNamespaceExternalManifestStore::new( + namespace.clone(), + table_id.clone(), + ); + let commit_handler: Arc = + Arc::new(ExternalManifestCommitHandler { + external_manifest_store: Arc::new(external_store), + }); + write_params.commit_handler = Some(commit_handler); + } + // Set initial credentials and provider from namespace if let Some(namespace_storage_options) = response.storage_options { let provider: Arc = Arc::new( @@ -874,6 +890,19 @@ impl Dataset { location: location!(), })?; + // Set up commit handler when managed_versioning is enabled + if response.managed_versioning == Some(true) { + let external_store = LanceNamespaceExternalManifestStore::new( + namespace.clone(), + table_id.clone(), + ); + let commit_handler: Arc = + Arc::new(ExternalManifestCommitHandler { + external_manifest_store: Arc::new(external_store), + }); + write_params.commit_handler = Some(commit_handler); + } + // Set initial credentials and provider from namespace if let Some(namespace_storage_options) = response.storage_options { let provider: Arc = From b07e80b74c9d009137282e8a3c5c43496a6e73be Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 20 Feb 2026 00:27:05 -0800 Subject: [PATCH 18/23] fix lint --- python/python/tests/test_namespace_dir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/python/tests/test_namespace_dir.py b/python/python/tests/test_namespace_dir.py index c7ca4379342..c414f327967 100644 --- a/python/python/tests/test_namespace_dir.py +++ b/python/python/tests/test_namespace_dir.py @@ -866,7 +866,7 @@ def test_external_manifest_store_invokes_namespace_apis(): assert len(ds.versions()) == 2 assert namespace.create_table_version_count == 2, ( - "create_table_version should be called twice (once for CREATE, once for APPEND)" + "create_table_version should be called twice (CREATE + APPEND)" ) # Open latest version - should call list_table_versions From 0309c149a6fa1fabddeecc0fdb5f5ba912953c19 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 20 Feb 2026 01:06:11 -0800 Subject: [PATCH 19/23] fix python test race condition --- python/python/tests/test_namespace_dir.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/python/tests/test_namespace_dir.py b/python/python/tests/test_namespace_dir.py index c414f327967..dd979a73a41 100644 --- a/python/python/tests/test_namespace_dir.py +++ b/python/python/tests/test_namespace_dir.py @@ -826,8 +826,11 @@ def test_external_manifest_store_invokes_namespace_apis(): """ with tempfile.TemporaryDirectory() as tmpdir: namespace = TableVersionTrackingNamespace(root=f"file://{tmpdir}") - table_name = "test_table" - table_id = [table_name] + + # Create parent namespace first (like Rust/Java tests) + namespace.create_namespace(CreateNamespaceRequest(id=["workspace"])) + + table_id = ["workspace", "test_table"] # Create initial table table1 = pa.Table.from_pylist([{"a": 1, "b": 2}, {"a": 10, "b": 20}]) From 036ec5af6a3713b38d121326e228f56ff199cd82 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 20 Feb 2026 09:07:03 -0800 Subject: [PATCH 20/23] improve dir related test to use multi-level --- rust/lance-namespace-impls/src/dir.rs | 252 ++++++++++++++------------ 1 file changed, 132 insertions(+), 120 deletions(-) diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index 53cc76a0760..be711fd1e94 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -3998,8 +3998,8 @@ mod tests { use arrow::array::{Int32Array, RecordBatchIterator}; use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; use arrow::record_batch::RecordBatch; - use lance::dataset::builder::DatasetBuilder; - use lance_namespace::models::ListTableVersionsRequest; + use lance::dataset::{Dataset, WriteMode, WriteParams}; + use lance_namespace::models::{CreateNamespaceRequest, ListTableVersionsRequest}; let temp_dir = TempStrDir::default(); let temp_path: &str = &temp_dir; @@ -4012,25 +4012,13 @@ mod tests { .unwrap(), ); - // Create a table (version 1) - let table_id = vec!["test_table".to_string()]; - let schema = create_test_schema(); - let ipc_data = create_test_ipc_data(&schema); - let mut create_req = CreateTableRequest::new(); - create_req.id = Some(table_id.clone()); - namespace - .create_table(create_req, bytes::Bytes::from(ipc_data)) - .await - .unwrap(); - - // Open dataset and append data to create versions 2 and 3 - let mut dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone()) - .await - .unwrap() - .load() - .await - .unwrap(); + // Create parent namespace first + let mut create_ns_req = CreateNamespaceRequest::new(); + create_ns_req.id = Some(vec!["workspace".to_string()]); + namespace.create_namespace(create_ns_req).await.unwrap(); + // Create a table using write_into_namespace (version 1) + let table_id = vec!["workspace".to_string(), "test_table".to_string()]; let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new( "id", DataType::Int32, @@ -4038,16 +4026,39 @@ mod tests { )])); let batch = RecordBatch::try_new( arrow_schema.clone(), - vec![Arc::new(Int32Array::from(vec![100, 200]))], + vec![Arc::new(Int32Array::from(vec![1, 2, 3]))], ) .unwrap(); + let batches = RecordBatchIterator::new(vec![Ok(batch.clone())], arrow_schema.clone()); + let write_params = WriteParams { + mode: WriteMode::Create, + ..Default::default() + }; + let mut dataset = Dataset::write_into_namespace( + batches, + namespace.clone(), + table_id.clone(), + Some(write_params), + ) + .await + .unwrap(); // Append to create version 2 - let batches = RecordBatchIterator::new(vec![Ok(batch.clone())], arrow_schema.clone()); + let batch2 = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from(vec![100, 200]))], + ) + .unwrap(); + let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema.clone()); dataset.append(batches, None).await.unwrap(); // Append to create version 3 - let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema); + let batch3 = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from(vec![300, 400]))], + ) + .unwrap(); + let batches = RecordBatchIterator::new(vec![Ok(batch3)], arrow_schema); dataset.append(batches, None).await.unwrap(); // List versions - should have versions 1, 2, and 3 @@ -4103,8 +4114,8 @@ mod tests { use arrow::array::{Int32Array, RecordBatchIterator}; use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; use arrow::record_batch::RecordBatch; - use lance::dataset::builder::DatasetBuilder; - use lance_namespace::models::DescribeTableVersionRequest; + use lance::dataset::{Dataset, WriteMode, WriteParams}; + use lance_namespace::models::{CreateNamespaceRequest, DescribeTableVersionRequest}; let temp_dir = TempStrDir::default(); let temp_path: &str = &temp_dir; @@ -4117,25 +4128,13 @@ mod tests { .unwrap(), ); - // Create a table (version 1) - let table_id = vec!["test_table".to_string()]; - let schema = create_test_schema(); - let ipc_data = create_test_ipc_data(&schema); - let mut create_req = CreateTableRequest::new(); - create_req.id = Some(table_id.clone()); - namespace - .create_table(create_req, bytes::Bytes::from(ipc_data)) - .await - .unwrap(); - - // Open dataset and append data to create version 2 - let mut dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone()) - .await - .unwrap() - .load() - .await - .unwrap(); + // Create parent namespace first + let mut create_ns_req = CreateNamespaceRequest::new(); + create_ns_req.id = Some(vec!["workspace".to_string()]); + namespace.create_namespace(create_ns_req).await.unwrap(); + // Create a table using write_into_namespace (version 1) + let table_id = vec!["workspace".to_string(), "test_table".to_string()]; let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new( "id", DataType::Int32, @@ -4143,11 +4142,30 @@ mod tests { )])); let batch = RecordBatch::try_new( arrow_schema.clone(), - vec![Arc::new(Int32Array::from(vec![100, 200]))], + vec![Arc::new(Int32Array::from(vec![1, 2, 3]))], ) .unwrap(); + let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone()); + let write_params = WriteParams { + mode: WriteMode::Create, + ..Default::default() + }; + let mut dataset = Dataset::write_into_namespace( + batches, + namespace.clone(), + table_id.clone(), + Some(write_params), + ) + .await + .unwrap(); - let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema); + // Append data to create version 2 + let batch2 = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from(vec![100, 200]))], + ) + .unwrap(); + let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema); dataset.append(batches, None).await.unwrap(); // Describe version 1 @@ -4210,8 +4228,8 @@ mod tests { use arrow::array::{Int32Array, RecordBatchIterator}; use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; use arrow::record_batch::RecordBatch; - use lance::dataset::builder::DatasetBuilder; - use lance_namespace::models::DescribeTableVersionRequest; + use lance::dataset::{Dataset, WriteMode, WriteParams}; + use lance_namespace::models::{CreateNamespaceRequest, DescribeTableVersionRequest}; let temp_dir = TempStrDir::default(); let temp_path: &str = &temp_dir; @@ -4224,25 +4242,13 @@ mod tests { .unwrap(), ); - // Create a table (version 1) - let table_id = vec!["test_table".to_string()]; - let schema = create_test_schema(); - let ipc_data = create_test_ipc_data(&schema); - let mut create_req = CreateTableRequest::new(); - create_req.id = Some(table_id.clone()); - namespace - .create_table(create_req, bytes::Bytes::from(ipc_data)) - .await - .unwrap(); - - // Open dataset and append data to create versions 2 and 3 - let mut dataset = DatasetBuilder::from_namespace(namespace.clone(), table_id.clone()) - .await - .unwrap() - .load() - .await - .unwrap(); + // Create parent namespace first + let mut create_ns_req = CreateNamespaceRequest::new(); + create_ns_req.id = Some(vec!["workspace".to_string()]); + namespace.create_namespace(create_ns_req).await.unwrap(); + // Create a table using write_into_namespace (version 1) + let table_id = vec!["workspace".to_string(), "test_table".to_string()]; let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new( "id", DataType::Int32, @@ -4250,16 +4256,39 @@ mod tests { )])); let batch = RecordBatch::try_new( arrow_schema.clone(), - vec![Arc::new(Int32Array::from(vec![100, 200]))], + vec![Arc::new(Int32Array::from(vec![1, 2, 3]))], + ) + .unwrap(); + let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone()); + let write_params = WriteParams { + mode: WriteMode::Create, + ..Default::default() + }; + let mut dataset = Dataset::write_into_namespace( + batches, + namespace.clone(), + table_id.clone(), + Some(write_params), ) + .await .unwrap(); // Append to create version 2 - let batches = RecordBatchIterator::new(vec![Ok(batch.clone())], arrow_schema.clone()); + let batch2 = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from(vec![100, 200]))], + ) + .unwrap(); + let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema.clone()); dataset.append(batches, None).await.unwrap(); // Append to create version 3 - let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema); + let batch3 = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from(vec![300, 400]))], + ) + .unwrap(); + let batches = RecordBatchIterator::new(vec![Ok(batch3)], arrow_schema); dataset.append(batches, None).await.unwrap(); // Describe latest version (no version specified) @@ -4537,9 +4566,6 @@ mod tests { /// End-to-end integration test module for table version tracking. mod e2e_table_version_tracking { use super::*; - use lance::io::commit::namespace_manifest::LanceNamespaceExternalManifestStore; - use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler; - use lance_table::io::commit::CommitHandler; use std::sync::atomic::{AtomicUsize, Ordering}; /// Tracking wrapper around a namespace that counts method invocations. @@ -4861,12 +4887,11 @@ mod tests { } #[tokio::test] - async fn test_e2e_dataset_commit_with_external_manifest_store() { + async fn test_dataset_commit_with_external_manifest_store() { use arrow::array::{Int32Array, StringArray}; use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; use arrow::record_batch::RecordBatch; use futures::TryStreamExt; - use lance::dataset::builder::DatasetBuilder; use lance::dataset::{Dataset, WriteMode, WriteParams}; use lance_namespace::models::CreateNamespaceRequest; use lance_table::io::commit::ManifestNamingScheme; @@ -4882,43 +4907,15 @@ mod tests { .await .unwrap(); - let tracking_ns = Arc::new(TrackingNamespace::new(inner_ns)); + let tracking_ns: Arc = Arc::new(TrackingNamespace::new(inner_ns)); // Create parent namespace let mut create_ns_req = CreateNamespaceRequest::new(); create_ns_req.id = Some(vec!["workspace".to_string()]); tracking_ns.create_namespace(create_ns_req).await.unwrap(); - // Create a table with multi-level ID (namespace + table) + // Create a table using write_into_namespace let table_id = vec!["workspace".to_string(), "test_table".to_string()]; - let schema = create_test_schema(); - let ipc_data = create_test_ipc_data(&schema); - let mut create_req = CreateTableRequest::new(); - create_req.id = Some(table_id.clone()); - tracking_ns - .create_table(create_req, bytes::Bytes::from(ipc_data)) - .await - .unwrap(); - - // Open the dataset using from_namespace to get proper paths - let dataset = DatasetBuilder::from_namespace(tracking_ns.clone(), table_id.clone()) - .await - .unwrap() - .load() - .await - .unwrap(); - assert_eq!(dataset.version().version, 1); - - // Create the external manifest store commit handler - let external_store = Arc::new(LanceNamespaceExternalManifestStore::new( - tracking_ns.clone(), - table_id.clone(), - )); - let commit_handler: Arc = Arc::new(ExternalManifestCommitHandler { - external_manifest_store: external_store, - }); - - // Create some data to append let arrow_schema = Arc::new(ArrowSchema::new(vec![ Field::new("id", DataType::Int32, false), Field::new("name", DataType::Utf8, true), @@ -4931,28 +4928,43 @@ mod tests { ], ) .unwrap(); + let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema.clone()); + let write_params = WriteParams { + mode: WriteMode::Create, + ..Default::default() + }; + let dataset = Dataset::write_into_namespace( + batches, + tracking_ns.clone(), + table_id.clone(), + Some(write_params), + ) + .await + .unwrap(); + assert_eq!(dataset.version().version, 1); - // Write data using the external manifest commit handler - let initial_create_calls = tracking_ns.create_table_version_calls(); + // Append data using write_into_namespace (APPEND mode) + let batch2 = RecordBatch::try_new( + arrow_schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![4, 5, 6])), + Arc::new(StringArray::from(vec!["d", "e", "f"])), + ], + ) + .unwrap(); + let batches = RecordBatchIterator::new(vec![Ok(batch2)], arrow_schema); let write_params = WriteParams { mode: WriteMode::Append, - commit_handler: Some(commit_handler), ..Default::default() }; - - let batches = RecordBatchIterator::new(vec![Ok(batch)], arrow_schema); - Dataset::write(batches, dataset.uri(), Some(write_params)) - .await - .unwrap(); - - // Verify create_table_version was called during commit - assert!( - tracking_ns.create_table_version_calls() > initial_create_calls, - "create_table_version should have been called during dataset write. \ - Initial: {}, Current: {}", - initial_create_calls, - tracking_ns.create_table_version_calls() - ); + Dataset::write_into_namespace( + batches, + tracking_ns.clone(), + table_id.clone(), + Some(write_params), + ) + .await + .unwrap(); // Verify version 2 was created using the dataset's object_store // List manifests in the versions directory to find the V2 named manifest From 146510ae1d2a2afa52140c0eca73ce11b4543b79 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 20 Feb 2026 09:38:26 -0800 Subject: [PATCH 21/23] use in memory --- python/python/tests/test_namespace_dir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/python/tests/test_namespace_dir.py b/python/python/tests/test_namespace_dir.py index dd979a73a41..9bb535ab9f0 100644 --- a/python/python/tests/test_namespace_dir.py +++ b/python/python/tests/test_namespace_dir.py @@ -825,7 +825,7 @@ def test_external_manifest_store_invokes_namespace_apis(): 3. describe_table_version is called when opening specific version """ with tempfile.TemporaryDirectory() as tmpdir: - namespace = TableVersionTrackingNamespace(root=f"file://{tmpdir}") + namespace = TableVersionTrackingNamespace(root=tmpdir) # Create parent namespace first (like Rust/Java tests) namespace.create_namespace(CreateNamespaceRequest(id=["workspace"])) From 7c732863a3d628f8890d6ba0aaf64e2359df0b64 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 20 Feb 2026 11:44:41 -0800 Subject: [PATCH 22/23] skip windows --- python/python/tests/test_namespace_dir.py | 5 +++++ rust/lance-namespace-impls/src/dir.rs | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/python/python/tests/test_namespace_dir.py b/python/python/tests/test_namespace_dir.py index 9bb535ab9f0..abbb37b9865 100644 --- a/python/python/tests/test_namespace_dir.py +++ b/python/python/tests/test_namespace_dir.py @@ -10,6 +10,7 @@ These tests mirror the Rust tests in rust/lance-namespace-impls/src/dir.rs """ +import sys import tempfile import uuid from threading import Lock @@ -812,6 +813,10 @@ def list_table_versions_json(self, request_json: str) -> str: return json.dumps(response_dict) +@pytest.mark.skipif( + sys.platform == "win32", + reason="External manifest store has known issues on Windows", +) def test_external_manifest_store_invokes_namespace_apis(): """Test that namespace APIs are invoked correctly for managed versioning. diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index be711fd1e94..bd3c2c538d3 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -3994,6 +3994,7 @@ mod tests { } #[tokio::test] + #[cfg(not(windows))] async fn test_list_table_versions() { use arrow::array::{Int32Array, RecordBatchIterator}; use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; @@ -4110,6 +4111,7 @@ mod tests { } #[tokio::test] + #[cfg(not(windows))] async fn test_describe_table_version() { use arrow::array::{Int32Array, RecordBatchIterator}; use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; @@ -4224,6 +4226,7 @@ mod tests { } #[tokio::test] + #[cfg(not(windows))] async fn test_describe_table_version_latest() { use arrow::array::{Int32Array, RecordBatchIterator}; use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; @@ -4887,6 +4890,7 @@ mod tests { } #[tokio::test] + #[cfg(not(windows))] async fn test_dataset_commit_with_external_manifest_store() { use arrow::array::{Int32Array, StringArray}; use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; From 2e9c71e10aaf4ec49902feb1c2f2fd0fc8c6a7aa Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 20 Feb 2026 12:24:24 -0800 Subject: [PATCH 23/23] skip windows --- rust/lance-namespace-impls/src/dir.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rust/lance-namespace-impls/src/dir.rs b/rust/lance-namespace-impls/src/dir.rs index bd3c2c538d3..88f92d1395d 100644 --- a/rust/lance-namespace-impls/src/dir.rs +++ b/rust/lance-namespace-impls/src/dir.rs @@ -4308,6 +4308,7 @@ mod tests { } #[tokio::test] + #[cfg(not(windows))] async fn test_create_table_version() { use futures::TryStreamExt; use lance::dataset::builder::DatasetBuilder; @@ -4418,6 +4419,7 @@ mod tests { } #[tokio::test] + #[cfg(not(windows))] async fn test_create_table_version_conflict() { // create_table_version should fail if the version already exists. // Each version always writes to a new file location. @@ -4771,6 +4773,7 @@ mod tests { } #[tokio::test] + #[cfg(not(windows))] async fn test_external_manifest_store_invokes_namespace_apis() { use arrow::array::{Int32Array, StringArray}; use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};