From 5ac1358dcb9b5bae3302f97682aea5bc600c1a7f Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Tue, 24 Feb 2026 12:54:59 -0800 Subject: [PATCH 1/2] fix: set namespace commit handler for LanceDataset.commit --- python/python/lance/dataset.py | 14 ++++++++ python/src/dataset.rs | 63 ++++++++++++++++++++++++++++++---- python/uv.lock | 2 +- 3 files changed, 71 insertions(+), 8 deletions(-) diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 5da8bae9e0a..d1d6f1926a5 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -3399,6 +3399,8 @@ def commit( *, commit_message: Optional[str] = None, enable_stable_row_ids: Optional[bool] = None, + namespace: Optional["LanceNamespace"] = None, + table_id: Optional[List[str]] = None, ) -> LanceDataset: """Create a new version of dataset @@ -3465,6 +3467,14 @@ def commit( row IDs assign each row a monotonically increasing id that persists across compaction and other maintenance operations. This option is ignored for existing datasets. + namespace : LanceNamespace, optional + A namespace instance to use for managed versioning. When provided along + with table_id, the commit will go through the namespace's + create_table_version API, enabling server-side version tracking and + event emission. Use lance.namespace.connect() to create a namespace. + table_id : List[str], optional + The table identifier within the namespace (e.g., ["workspace", "table"]). + Must be provided together with namespace for managed versioning. Returns ------- @@ -3535,6 +3545,8 @@ def commit( detached=detached, max_retries=max_retries, enable_stable_row_ids=enable_stable_row_ids, + namespace=namespace, + table_id=table_id, ) elif isinstance(operation, LanceOperation.BaseOperation): new_ds = _Dataset.commit( @@ -3549,6 +3561,8 @@ def commit( max_retries=max_retries, commit_message=commit_message, enable_stable_row_ids=enable_stable_row_ids, + namespace=namespace, + table_id=table_id, ) else: raise TypeError( diff --git a/python/src/dataset.rs b/python/src/dataset.rs index acb207f3f15..95fc2c44265 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -2177,7 +2177,7 @@ impl Dataset { #[allow(clippy::too_many_arguments)] #[staticmethod] - #[pyo3(signature = (dest, operation, read_version = None, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, commit_message = None, enable_stable_row_ids = None))] + #[pyo3(signature = (dest, operation, read_version = None, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, commit_message = None, enable_stable_row_ids = None, namespace = None, table_id = None))] fn commit( dest: PyWriteDest, operation: PyLance, @@ -2190,6 +2190,8 @@ impl Dataset { max_retries: Option, commit_message: Option, enable_stable_row_ids: Option, + namespace: Option<&Bound<'_, PyAny>>, + table_id: Option>, ) -> PyResult { let mut transaction = Transaction::new(read_version.unwrap_or_default(), operation.0, None); @@ -2210,13 +2212,15 @@ impl Dataset { detached, max_retries, enable_stable_row_ids, + namespace, + table_id, ) } #[allow(clippy::too_many_arguments)] #[allow(deprecated)] #[staticmethod] - #[pyo3(signature = (dest, transaction, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, enable_stable_row_ids = None))] + #[pyo3(signature = (dest, transaction, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, enable_stable_row_ids = None, namespace = None, table_id = None))] fn commit_transaction( dest: PyWriteDest, transaction: PyLance, @@ -2227,6 +2231,8 @@ impl Dataset { detached: Option, max_retries: Option, enable_stable_row_ids: Option, + namespace: Option<&Bound<'_, PyAny>>, + table_id: Option>, ) -> PyResult { let accessor = crate::storage_options::create_accessor_from_python( storage_options.clone(), @@ -2242,14 +2248,57 @@ impl Dataset { None }; - let commit_handler = commit_lock + // Create commit_handler: prefer user-provided commit_lock, then namespace-based handler + let commit_handler: Option> = if let Some(commit_lock) = commit_lock .as_ref() - .map(|commit_lock| { + { + // User provided a commit_lock + Some( commit_lock .into_py_any(commit_lock.py()) - .map(|cl| Arc::new(PyCommitLock::new(cl)) as Arc) - }) - .transpose()?; + .map(|cl| Arc::new(PyCommitLock::new(cl)) as Arc)?, + ) + } else if let (Some(ns), Some(tid)) = (namespace, table_id) { + // Create ExternalManifestCommitHandler from namespace and table_id + let py = ns.py(); + let ns_arc: Arc = + if let Ok(dir_ns) = ns.downcast::() { + dir_ns.borrow().inner.clone() + } else if let Ok(rest_ns) = ns.downcast::() { + rest_ns.borrow().inner.clone() + } else if let Ok(inner) = ns.getattr("_inner") { + let type_name = ns + .get_type() + .name() + .map(|n| n.to_string()) + .unwrap_or_default(); + + if type_name == "DirectoryNamespace" { + if let Ok(dir_ns) = inner.downcast::() { + dir_ns.borrow().inner.clone() + } else { + PyLanceNamespace::create_arc(py, ns)? + } + } else if type_name == "RestNamespace" { + if let Ok(rest_ns) = inner.downcast::() { + rest_ns.borrow().inner.clone() + } else { + PyLanceNamespace::create_arc(py, ns)? + } + } else { + PyLanceNamespace::create_arc(py, ns)? + } + } else { + PyLanceNamespace::create_arc(py, ns)? + }; + + let external_store = LanceNamespaceExternalManifestStore::new(ns_arc, tid); + Some(Arc::new(ExternalManifestCommitHandler { + external_manifest_store: Arc::new(external_store), + }) as Arc) + } else { + None + }; let mut builder = CommitBuilder::new(dest.as_dest()) .enable_v2_manifest_paths(enable_v2_manifest_paths.unwrap_or(true)) diff --git a/python/uv.lock b/python/uv.lock index a6a5bafa918..3c19533280c 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -2454,7 +2454,7 @@ torch = [ [package.metadata] requires-dist = [ { name = "boto3", marker = "extra == 'tests'" }, - { name = "datafusion", marker = "extra == 'tests'", specifier = ">=50.1" }, + { name = "datafusion", marker = "extra == 'tests'", specifier = ">=50.1,<52" }, { name = "datasets", marker = "extra == 'tests'" }, { name = "duckdb", marker = "extra == 'tests'" }, { name = "geoarrow-rust-core", marker = "extra == 'geo'" }, From 29d26054d728c21d062396883d7f82a45f036fb7 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Tue, 24 Feb 2026 13:32:54 -0800 Subject: [PATCH 2/2] fix: remove deprecated clippy::string_to_string lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clippy::string_to_string lint was removed in recent Rust versions. Removing it from the config fixes the CI lint failure. Also refactored extract_namespace_arc into a shared utility function in namespace.rs to reduce code duplication. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- python/.cargo/config.toml | 1 - python/python/lance/dataset.py | 8 +- python/src/dataset.rs | 165 +++++---------------------------- python/src/namespace.rs | 45 +++++++++ 4 files changed, 69 insertions(+), 150 deletions(-) diff --git a/python/.cargo/config.toml b/python/.cargo/config.toml index 3c7937b1bbe..f9f9bc0544a 100644 --- a/python/.cargo/config.toml +++ b/python/.cargo/config.toml @@ -17,7 +17,6 @@ rustflags = [ "-Wclippy::string_add_assign", "-Wclippy::string_add", "-Wclippy::string_lit_as_bytes", - "-Wclippy::string_to_string", "-Wclippy::use_self", "-Aclippy::redundant_pub_crate", # PyO3 macros don't pass this. ] diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index d1d6f1926a5..68b38e66d77 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -3468,13 +3468,11 @@ def commit( across compaction and other maintenance operations. This option is ignored for existing datasets. namespace : LanceNamespace, optional - A namespace instance to use for managed versioning. When provided along - with table_id, the commit will go through the namespace's - create_table_version API, enabling server-side version tracking and - event emission. Use lance.namespace.connect() to create a namespace. + A namespace instance. Must be provided together with table_id. + Use lance.namespace.connect() to create a namespace. table_id : List[str], optional The table identifier within the namespace (e.g., ["workspace", "table"]). - Must be provided together with namespace for managed versioning. + Must be provided together with namespace. Returns ------- diff --git a/python/src/dataset.rs b/python/src/dataset.rs index 95fc2c44265..1a5393a1ea3 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -81,7 +81,6 @@ use lance_index::{ }; use lance_io::object_store::ObjectStoreParams; use lance_linalg::distance::MetricType; -use lance_namespace::LanceNamespace; use lance_table::format::{BasePath, Fragment, IndexMetadata}; use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler; use lance_table::io::commit::CommitHandler; @@ -90,7 +89,7 @@ use crate::error::PythonErrorExt; use crate::file::object_store_from_uri_or_path; use crate::fragment::FileFragment; use crate::indices::{PyIndexConfig, PyIndexDescription}; -use crate::namespace::{PyDirectoryNamespace, PyLanceNamespace, PyRestNamespace}; +use crate::namespace::extract_namespace_arc; use crate::rt; use crate::scanner::ScanStatistics; use crate::schema::{logical_schema_from_lance, LanceSchema}; @@ -601,52 +600,7 @@ impl Dataset { // Set up namespace commit handler if namespace and table_id are provided if let (Some(ns), Some(tid)) = (namespace, table_id) { - // Extract the inner namespace Arc from PyDirectoryNamespace, PyRestNamespace, - // or create a PyLanceNamespace wrapper for custom Python implementations. - // - // Python wrapper classes (DirectoryNamespace, RestNamespace in namespace.py) - // store the PyO3 class in `_inner`. For the EXACT wrapper classes, we can - // use _inner directly. For subclasses (which may override methods), we must - // use PyLanceNamespace to call through Python. - let ns_arc: Arc = - if let Ok(dir_ns) = ns.downcast::() { - // Direct PyO3 class - dir_ns.borrow().inner.clone() - } else if let Ok(rest_ns) = ns.downcast::() { - // Direct PyO3 class - rest_ns.borrow().inner.clone() - } else if let Ok(inner) = ns.getattr("_inner") { - // Python wrapper class - check if it's the exact wrapper class - // (not a subclass) by comparing type names - let type_name = ns - .get_type() - .name() - .map(|n| n.to_string()) - .unwrap_or_default(); - - if type_name == "DirectoryNamespace" { - if let Ok(dir_ns) = inner.downcast::() { - dir_ns.borrow().inner.clone() - } else { - PyLanceNamespace::create_arc(py, ns)? - } - } else if type_name == "RestNamespace" { - if let Ok(rest_ns) = inner.downcast::() { - rest_ns.borrow().inner.clone() - } else { - PyLanceNamespace::create_arc(py, ns)? - } - } else { - // Subclass or custom implementation - use PyLanceNamespace - // to call through Python (requires *_json methods) - PyLanceNamespace::create_arc(py, ns)? - } - } else { - // Custom Python implementation - wrap with PyLanceNamespace - // This calls back into Python for namespace methods - PyLanceNamespace::create_arc(py, ns)? - }; - + let ns_arc = extract_namespace_arc(py, ns)?; let external_store = LanceNamespaceExternalManifestStore::new(ns_arc, tid); let commit_handler: Arc = Arc::new(ExternalManifestCommitHandler { external_manifest_store: Arc::new(external_store), @@ -2249,56 +2203,24 @@ impl Dataset { }; // Create commit_handler: prefer user-provided commit_lock, then namespace-based handler - let commit_handler: Option> = if let Some(commit_lock) = commit_lock - .as_ref() - { - // User provided a commit_lock - Some( - commit_lock - .into_py_any(commit_lock.py()) - .map(|cl| Arc::new(PyCommitLock::new(cl)) as Arc)?, - ) - } else if let (Some(ns), Some(tid)) = (namespace, table_id) { - // Create ExternalManifestCommitHandler from namespace and table_id - let py = ns.py(); - let ns_arc: Arc = - if let Ok(dir_ns) = ns.downcast::() { - dir_ns.borrow().inner.clone() - } else if let Ok(rest_ns) = ns.downcast::() { - rest_ns.borrow().inner.clone() - } else if let Ok(inner) = ns.getattr("_inner") { - let type_name = ns - .get_type() - .name() - .map(|n| n.to_string()) - .unwrap_or_default(); - - if type_name == "DirectoryNamespace" { - if let Ok(dir_ns) = inner.downcast::() { - dir_ns.borrow().inner.clone() - } else { - PyLanceNamespace::create_arc(py, ns)? - } - } else if type_name == "RestNamespace" { - if let Ok(rest_ns) = inner.downcast::() { - rest_ns.borrow().inner.clone() - } else { - PyLanceNamespace::create_arc(py, ns)? - } - } else { - PyLanceNamespace::create_arc(py, ns)? - } - } else { - PyLanceNamespace::create_arc(py, ns)? - }; - - let external_store = LanceNamespaceExternalManifestStore::new(ns_arc, tid); - Some(Arc::new(ExternalManifestCommitHandler { - external_manifest_store: Arc::new(external_store), - }) as Arc) - } else { - None - }; + let commit_handler: Option> = + if let Some(commit_lock) = commit_lock.as_ref() { + // User provided a commit_lock + Some( + commit_lock + .into_py_any(commit_lock.py()) + .map(|cl| Arc::new(PyCommitLock::new(cl)) as Arc)?, + ) + } else if let (Some(ns), Some(tid)) = (namespace, table_id) { + // Create ExternalManifestCommitHandler from namespace and table_id + let ns_arc = extract_namespace_arc(ns.py(), ns)?; + let external_store = LanceNamespaceExternalManifestStore::new(ns_arc, tid); + Some(Arc::new(ExternalManifestCommitHandler { + external_manifest_store: Arc::new(external_store), + }) as Arc) + } else { + None + }; let mut builder = CommitBuilder::new(dest.as_dest()) .enable_v2_manifest_paths(enable_v2_manifest_paths.unwrap_or(true)) @@ -3249,52 +3171,7 @@ pub fn get_write_params(options: &Bound<'_, PyDict>) -> PyResult>(options, "table_id")?; if let (Some(ns), Some(table_id)) = (namespace_opt, table_id_opt) { - let py = options.py(); - // Extract the inner namespace Arc from PyDirectoryNamespace, PyRestNamespace, - // or create a PyLanceNamespace wrapper for custom Python implementations. - // - // Python wrapper classes (DirectoryNamespace, RestNamespace in namespace.py) - // store the PyO3 class in `_inner`. For the EXACT wrapper classes, we can - // use _inner directly. For subclasses (which may override methods), we must - // use PyLanceNamespace to call through Python. - let ns_arc: Arc = - if let Ok(dir_ns) = ns.downcast::() { - // Direct PyO3 class - dir_ns.borrow().inner.clone() - } else if let Ok(rest_ns) = ns.downcast::() { - // Direct PyO3 class - rest_ns.borrow().inner.clone() - } else if let Ok(inner) = ns.getattr("_inner") { - // Python wrapper class - check if it's the exact wrapper class - // (not a subclass) by comparing type names - let type_name = ns - .get_type() - .name() - .map(|n| n.to_string()) - .unwrap_or_default(); - - if type_name == "DirectoryNamespace" { - if let Ok(dir_ns) = inner.downcast::() { - dir_ns.borrow().inner.clone() - } else { - PyLanceNamespace::create_arc(py, &ns)? - } - } else if type_name == "RestNamespace" { - if let Ok(rest_ns) = inner.downcast::() { - rest_ns.borrow().inner.clone() - } else { - PyLanceNamespace::create_arc(py, &ns)? - } - } else { - // Subclass or custom implementation - use PyLanceNamespace - // to call through Python (requires *_json methods) - PyLanceNamespace::create_arc(py, &ns)? - } - } else { - // Custom Python implementation - wrap with PyLanceNamespace - PyLanceNamespace::create_arc(py, &ns)? - }; - + let ns_arc = extract_namespace_arc(options.py(), &ns)?; let external_store = LanceNamespaceExternalManifestStore::new(ns_arc, table_id); let commit_handler: Arc = Arc::new(ExternalManifestCommitHandler { diff --git a/python/src/namespace.rs b/python/src/namespace.rs index c079c6bd918..753432df314 100644 --- a/python/src/namespace.rs +++ b/python/src/namespace.rs @@ -887,6 +887,51 @@ impl LanceNamespaceTrait for PyLanceNamespace { } } +/// Extract an `Arc` from a Python namespace object. +/// +/// This function handles the different ways a Python namespace can be provided: +/// 1. Direct PyO3 class (PyDirectoryNamespace or PyRestNamespace) +/// 2. Python wrapper class with `_inner` attribute that holds the PyO3 class +/// 3. Custom Python implementation (wrapped with PyLanceNamespace) +/// +/// For Python wrapper classes (DirectoryNamespace, RestNamespace in namespace.py), +/// we check if it's the exact wrapper class by comparing type names. Subclasses +/// are wrapped with PyLanceNamespace to call through Python. +pub fn extract_namespace_arc( + py: Python<'_>, + ns: &Bound<'_, PyAny>, +) -> PyResult> { + // Direct PyO3 class + if let Ok(dir_ns) = ns.downcast::() { + return Ok(dir_ns.borrow().inner.clone()); + } + if let Ok(rest_ns) = ns.downcast::() { + return Ok(rest_ns.borrow().inner.clone()); + } + + // Python wrapper class - check if it's the exact wrapper class + if let Ok(inner) = ns.getattr("_inner") { + let type_name = ns + .get_type() + .name() + .map(|n| n.to_string()) + .unwrap_or_default(); + + if type_name == "DirectoryNamespace" { + if let Ok(dir_ns) = inner.downcast::() { + return Ok(dir_ns.borrow().inner.clone()); + } + } else if type_name == "RestNamespace" { + if let Ok(rest_ns) = inner.downcast::() { + return Ok(rest_ns.borrow().inner.clone()); + } + } + } + + // Custom Python implementation or subclass - wrap with PyLanceNamespace + PyLanceNamespace::create_arc(py, ns) +} + /// Python wrapper for REST adapter server #[pyclass(name = "PyRestAdapter", module = "lance.lance")] pub struct PyRestAdapter {