diff --git a/python/.cargo/config.toml b/python/.cargo/config.toml index 3c7937b1bbe..f9f9bc0544a 100644 --- a/python/.cargo/config.toml +++ b/python/.cargo/config.toml @@ -17,7 +17,6 @@ rustflags = [ "-Wclippy::string_add_assign", "-Wclippy::string_add", "-Wclippy::string_lit_as_bytes", - "-Wclippy::string_to_string", "-Wclippy::use_self", "-Aclippy::redundant_pub_crate", # PyO3 macros don't pass this. ] diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 5da8bae9e0a..68b38e66d77 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -3399,6 +3399,8 @@ def commit( *, commit_message: Optional[str] = None, enable_stable_row_ids: Optional[bool] = None, + namespace: Optional["LanceNamespace"] = None, + table_id: Optional[List[str]] = None, ) -> LanceDataset: """Create a new version of dataset @@ -3465,6 +3467,12 @@ def commit( row IDs assign each row a monotonically increasing id that persists across compaction and other maintenance operations. This option is ignored for existing datasets. + namespace : LanceNamespace, optional + A namespace instance. Must be provided together with table_id. + Use lance.namespace.connect() to create a namespace. + table_id : List[str], optional + The table identifier within the namespace (e.g., ["workspace", "table"]). + Must be provided together with namespace. Returns ------- @@ -3535,6 +3543,8 @@ def commit( detached=detached, max_retries=max_retries, enable_stable_row_ids=enable_stable_row_ids, + namespace=namespace, + table_id=table_id, ) elif isinstance(operation, LanceOperation.BaseOperation): new_ds = _Dataset.commit( @@ -3549,6 +3559,8 @@ def commit( max_retries=max_retries, commit_message=commit_message, enable_stable_row_ids=enable_stable_row_ids, + namespace=namespace, + table_id=table_id, ) else: raise TypeError( diff --git a/python/src/dataset.rs b/python/src/dataset.rs index acb207f3f15..1a5393a1ea3 100644 --- a/python/src/dataset.rs +++ b/python/src/dataset.rs @@ -81,7 +81,6 @@ use lance_index::{ }; use lance_io::object_store::ObjectStoreParams; use lance_linalg::distance::MetricType; -use lance_namespace::LanceNamespace; use lance_table::format::{BasePath, Fragment, IndexMetadata}; use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler; use lance_table::io::commit::CommitHandler; @@ -90,7 +89,7 @@ use crate::error::PythonErrorExt; use crate::file::object_store_from_uri_or_path; use crate::fragment::FileFragment; use crate::indices::{PyIndexConfig, PyIndexDescription}; -use crate::namespace::{PyDirectoryNamespace, PyLanceNamespace, PyRestNamespace}; +use crate::namespace::extract_namespace_arc; use crate::rt; use crate::scanner::ScanStatistics; use crate::schema::{logical_schema_from_lance, LanceSchema}; @@ -601,52 +600,7 @@ impl Dataset { // Set up namespace commit handler if namespace and table_id are provided if let (Some(ns), Some(tid)) = (namespace, table_id) { - // Extract the inner namespace Arc from PyDirectoryNamespace, PyRestNamespace, - // or create a PyLanceNamespace wrapper for custom Python implementations. - // - // Python wrapper classes (DirectoryNamespace, RestNamespace in namespace.py) - // store the PyO3 class in `_inner`. For the EXACT wrapper classes, we can - // use _inner directly. For subclasses (which may override methods), we must - // use PyLanceNamespace to call through Python. - let ns_arc: Arc = - if let Ok(dir_ns) = ns.downcast::() { - // Direct PyO3 class - dir_ns.borrow().inner.clone() - } else if let Ok(rest_ns) = ns.downcast::() { - // Direct PyO3 class - rest_ns.borrow().inner.clone() - } else if let Ok(inner) = ns.getattr("_inner") { - // Python wrapper class - check if it's the exact wrapper class - // (not a subclass) by comparing type names - let type_name = ns - .get_type() - .name() - .map(|n| n.to_string()) - .unwrap_or_default(); - - if type_name == "DirectoryNamespace" { - if let Ok(dir_ns) = inner.downcast::() { - dir_ns.borrow().inner.clone() - } else { - PyLanceNamespace::create_arc(py, ns)? - } - } else if type_name == "RestNamespace" { - if let Ok(rest_ns) = inner.downcast::() { - rest_ns.borrow().inner.clone() - } else { - PyLanceNamespace::create_arc(py, ns)? - } - } else { - // Subclass or custom implementation - use PyLanceNamespace - // to call through Python (requires *_json methods) - PyLanceNamespace::create_arc(py, ns)? - } - } else { - // Custom Python implementation - wrap with PyLanceNamespace - // This calls back into Python for namespace methods - PyLanceNamespace::create_arc(py, ns)? - }; - + let ns_arc = extract_namespace_arc(py, ns)?; let external_store = LanceNamespaceExternalManifestStore::new(ns_arc, tid); let commit_handler: Arc = Arc::new(ExternalManifestCommitHandler { external_manifest_store: Arc::new(external_store), @@ -2177,7 +2131,7 @@ impl Dataset { #[allow(clippy::too_many_arguments)] #[staticmethod] - #[pyo3(signature = (dest, operation, read_version = None, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, commit_message = None, enable_stable_row_ids = None))] + #[pyo3(signature = (dest, operation, read_version = None, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, commit_message = None, enable_stable_row_ids = None, namespace = None, table_id = None))] fn commit( dest: PyWriteDest, operation: PyLance, @@ -2190,6 +2144,8 @@ impl Dataset { max_retries: Option, commit_message: Option, enable_stable_row_ids: Option, + namespace: Option<&Bound<'_, PyAny>>, + table_id: Option>, ) -> PyResult { let mut transaction = Transaction::new(read_version.unwrap_or_default(), operation.0, None); @@ -2210,13 +2166,15 @@ impl Dataset { detached, max_retries, enable_stable_row_ids, + namespace, + table_id, ) } #[allow(clippy::too_many_arguments)] #[allow(deprecated)] #[staticmethod] - #[pyo3(signature = (dest, transaction, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, enable_stable_row_ids = None))] + #[pyo3(signature = (dest, transaction, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, enable_stable_row_ids = None, namespace = None, table_id = None))] fn commit_transaction( dest: PyWriteDest, transaction: PyLance, @@ -2227,6 +2185,8 @@ impl Dataset { detached: Option, max_retries: Option, enable_stable_row_ids: Option, + namespace: Option<&Bound<'_, PyAny>>, + table_id: Option>, ) -> PyResult { let accessor = crate::storage_options::create_accessor_from_python( storage_options.clone(), @@ -2242,14 +2202,25 @@ impl Dataset { None }; - let commit_handler = commit_lock - .as_ref() - .map(|commit_lock| { - commit_lock - .into_py_any(commit_lock.py()) - .map(|cl| Arc::new(PyCommitLock::new(cl)) as Arc) - }) - .transpose()?; + // Create commit_handler: prefer user-provided commit_lock, then namespace-based handler + let commit_handler: Option> = + if let Some(commit_lock) = commit_lock.as_ref() { + // User provided a commit_lock + Some( + commit_lock + .into_py_any(commit_lock.py()) + .map(|cl| Arc::new(PyCommitLock::new(cl)) as Arc)?, + ) + } else if let (Some(ns), Some(tid)) = (namespace, table_id) { + // Create ExternalManifestCommitHandler from namespace and table_id + let ns_arc = extract_namespace_arc(ns.py(), ns)?; + let external_store = LanceNamespaceExternalManifestStore::new(ns_arc, tid); + Some(Arc::new(ExternalManifestCommitHandler { + external_manifest_store: Arc::new(external_store), + }) as Arc) + } else { + None + }; let mut builder = CommitBuilder::new(dest.as_dest()) .enable_v2_manifest_paths(enable_v2_manifest_paths.unwrap_or(true)) @@ -3200,52 +3171,7 @@ pub fn get_write_params(options: &Bound<'_, PyDict>) -> PyResult>(options, "table_id")?; if let (Some(ns), Some(table_id)) = (namespace_opt, table_id_opt) { - let py = options.py(); - // Extract the inner namespace Arc from PyDirectoryNamespace, PyRestNamespace, - // or create a PyLanceNamespace wrapper for custom Python implementations. - // - // Python wrapper classes (DirectoryNamespace, RestNamespace in namespace.py) - // store the PyO3 class in `_inner`. For the EXACT wrapper classes, we can - // use _inner directly. For subclasses (which may override methods), we must - // use PyLanceNamespace to call through Python. - let ns_arc: Arc = - if let Ok(dir_ns) = ns.downcast::() { - // Direct PyO3 class - dir_ns.borrow().inner.clone() - } else if let Ok(rest_ns) = ns.downcast::() { - // Direct PyO3 class - rest_ns.borrow().inner.clone() - } else if let Ok(inner) = ns.getattr("_inner") { - // Python wrapper class - check if it's the exact wrapper class - // (not a subclass) by comparing type names - let type_name = ns - .get_type() - .name() - .map(|n| n.to_string()) - .unwrap_or_default(); - - if type_name == "DirectoryNamespace" { - if let Ok(dir_ns) = inner.downcast::() { - dir_ns.borrow().inner.clone() - } else { - PyLanceNamespace::create_arc(py, &ns)? - } - } else if type_name == "RestNamespace" { - if let Ok(rest_ns) = inner.downcast::() { - rest_ns.borrow().inner.clone() - } else { - PyLanceNamespace::create_arc(py, &ns)? - } - } else { - // Subclass or custom implementation - use PyLanceNamespace - // to call through Python (requires *_json methods) - PyLanceNamespace::create_arc(py, &ns)? - } - } else { - // Custom Python implementation - wrap with PyLanceNamespace - PyLanceNamespace::create_arc(py, &ns)? - }; - + let ns_arc = extract_namespace_arc(options.py(), &ns)?; let external_store = LanceNamespaceExternalManifestStore::new(ns_arc, table_id); let commit_handler: Arc = Arc::new(ExternalManifestCommitHandler { diff --git a/python/src/namespace.rs b/python/src/namespace.rs index c079c6bd918..753432df314 100644 --- a/python/src/namespace.rs +++ b/python/src/namespace.rs @@ -887,6 +887,51 @@ impl LanceNamespaceTrait for PyLanceNamespace { } } +/// Extract an `Arc` from a Python namespace object. +/// +/// This function handles the different ways a Python namespace can be provided: +/// 1. Direct PyO3 class (PyDirectoryNamespace or PyRestNamespace) +/// 2. Python wrapper class with `_inner` attribute that holds the PyO3 class +/// 3. Custom Python implementation (wrapped with PyLanceNamespace) +/// +/// For Python wrapper classes (DirectoryNamespace, RestNamespace in namespace.py), +/// we check if it's the exact wrapper class by comparing type names. Subclasses +/// are wrapped with PyLanceNamespace to call through Python. +pub fn extract_namespace_arc( + py: Python<'_>, + ns: &Bound<'_, PyAny>, +) -> PyResult> { + // Direct PyO3 class + if let Ok(dir_ns) = ns.downcast::() { + return Ok(dir_ns.borrow().inner.clone()); + } + if let Ok(rest_ns) = ns.downcast::() { + return Ok(rest_ns.borrow().inner.clone()); + } + + // Python wrapper class - check if it's the exact wrapper class + if let Ok(inner) = ns.getattr("_inner") { + let type_name = ns + .get_type() + .name() + .map(|n| n.to_string()) + .unwrap_or_default(); + + if type_name == "DirectoryNamespace" { + if let Ok(dir_ns) = inner.downcast::() { + return Ok(dir_ns.borrow().inner.clone()); + } + } else if type_name == "RestNamespace" { + if let Ok(rest_ns) = inner.downcast::() { + return Ok(rest_ns.borrow().inner.clone()); + } + } + } + + // Custom Python implementation or subclass - wrap with PyLanceNamespace + PyLanceNamespace::create_arc(py, ns) +} + /// Python wrapper for REST adapter server #[pyclass(name = "PyRestAdapter", module = "lance.lance")] pub struct PyRestAdapter { diff --git a/python/uv.lock b/python/uv.lock index a6a5bafa918..3c19533280c 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -2454,7 +2454,7 @@ torch = [ [package.metadata] requires-dist = [ { name = "boto3", marker = "extra == 'tests'" }, - { name = "datafusion", marker = "extra == 'tests'", specifier = ">=50.1" }, + { name = "datafusion", marker = "extra == 'tests'", specifier = ">=50.1,<52" }, { name = "datasets", marker = "extra == 'tests'" }, { name = "duckdb", marker = "extra == 'tests'" }, { name = "geoarrow-rust-core", marker = "extra == 'geo'" },