Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion python/.cargo/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ rustflags = [
"-Wclippy::string_add_assign",
"-Wclippy::string_add",
"-Wclippy::string_lit_as_bytes",
"-Wclippy::string_to_string",
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

depreacted clippy lint

"-Wclippy::use_self",
"-Aclippy::redundant_pub_crate", # PyO3 macros don't pass this.
]
Expand Down
12 changes: 12 additions & 0 deletions python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3399,6 +3399,8 @@ def commit(
*,
commit_message: Optional[str] = None,
enable_stable_row_ids: Optional[bool] = None,
namespace: Optional["LanceNamespace"] = None,
table_id: Optional[List[str]] = None,
) -> LanceDataset:
"""Create a new version of dataset

Expand Down Expand Up @@ -3465,6 +3467,12 @@ def commit(
row IDs assign each row a monotonically increasing id that persists
across compaction and other maintenance operations. This option is
ignored for existing datasets.
namespace : LanceNamespace, optional
A namespace instance. Must be provided together with table_id.
Use lance.namespace.connect() to create a namespace.
table_id : List[str], optional
The table identifier within the namespace (e.g., ["workspace", "table"]).
Must be provided together with namespace.

Returns
-------
Expand Down Expand Up @@ -3535,6 +3543,8 @@ def commit(
detached=detached,
max_retries=max_retries,
enable_stable_row_ids=enable_stable_row_ids,
namespace=namespace,
table_id=table_id,
)
elif isinstance(operation, LanceOperation.BaseOperation):
new_ds = _Dataset.commit(
Expand All @@ -3549,6 +3559,8 @@ def commit(
max_retries=max_retries,
commit_message=commit_message,
enable_stable_row_ids=enable_stable_row_ids,
namespace=namespace,
table_id=table_id,
)
else:
raise TypeError(
Expand Down
134 changes: 30 additions & 104 deletions python/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ use lance_index::{
};
use lance_io::object_store::ObjectStoreParams;
use lance_linalg::distance::MetricType;
use lance_namespace::LanceNamespace;
use lance_table::format::{BasePath, Fragment, IndexMetadata};
use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler;
use lance_table::io::commit::CommitHandler;
Expand All @@ -90,7 +89,7 @@ use crate::error::PythonErrorExt;
use crate::file::object_store_from_uri_or_path;
use crate::fragment::FileFragment;
use crate::indices::{PyIndexConfig, PyIndexDescription};
use crate::namespace::{PyDirectoryNamespace, PyLanceNamespace, PyRestNamespace};
use crate::namespace::extract_namespace_arc;
use crate::rt;
use crate::scanner::ScanStatistics;
use crate::schema::{logical_schema_from_lance, LanceSchema};
Expand Down Expand Up @@ -601,52 +600,7 @@ impl Dataset {

// Set up namespace commit handler if namespace and table_id are provided
if let (Some(ns), Some(tid)) = (namespace, table_id) {
// Extract the inner namespace Arc from PyDirectoryNamespace, PyRestNamespace,
// or create a PyLanceNamespace wrapper for custom Python implementations.
//
// Python wrapper classes (DirectoryNamespace, RestNamespace in namespace.py)
// store the PyO3 class in `_inner`. For the EXACT wrapper classes, we can
// use _inner directly. For subclasses (which may override methods), we must
// use PyLanceNamespace to call through Python.
let ns_arc: Arc<dyn LanceNamespace> =
if let Ok(dir_ns) = ns.downcast::<PyDirectoryNamespace>() {
// Direct PyO3 class
dir_ns.borrow().inner.clone()
} else if let Ok(rest_ns) = ns.downcast::<PyRestNamespace>() {
// Direct PyO3 class
rest_ns.borrow().inner.clone()
} else if let Ok(inner) = ns.getattr("_inner") {
// Python wrapper class - check if it's the exact wrapper class
// (not a subclass) by comparing type names
let type_name = ns
.get_type()
.name()
.map(|n| n.to_string())
.unwrap_or_default();

if type_name == "DirectoryNamespace" {
if let Ok(dir_ns) = inner.downcast::<PyDirectoryNamespace>() {
dir_ns.borrow().inner.clone()
} else {
PyLanceNamespace::create_arc(py, ns)?
}
} else if type_name == "RestNamespace" {
if let Ok(rest_ns) = inner.downcast::<PyRestNamespace>() {
rest_ns.borrow().inner.clone()
} else {
PyLanceNamespace::create_arc(py, ns)?
}
} else {
// Subclass or custom implementation - use PyLanceNamespace
// to call through Python (requires *_json methods)
PyLanceNamespace::create_arc(py, ns)?
}
} else {
// Custom Python implementation - wrap with PyLanceNamespace
// This calls back into Python for namespace methods
PyLanceNamespace::create_arc(py, ns)?
};

let ns_arc = extract_namespace_arc(py, ns)?;
let external_store = LanceNamespaceExternalManifestStore::new(ns_arc, tid);
let commit_handler: Arc<dyn CommitHandler> = Arc::new(ExternalManifestCommitHandler {
external_manifest_store: Arc::new(external_store),
Expand Down Expand Up @@ -2177,7 +2131,7 @@ impl Dataset {

#[allow(clippy::too_many_arguments)]
#[staticmethod]
#[pyo3(signature = (dest, operation, read_version = None, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, commit_message = None, enable_stable_row_ids = None))]
#[pyo3(signature = (dest, operation, read_version = None, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, commit_message = None, enable_stable_row_ids = None, namespace = None, table_id = None))]
fn commit(
dest: PyWriteDest,
operation: PyLance<Operation>,
Expand All @@ -2190,6 +2144,8 @@ impl Dataset {
max_retries: Option<u32>,
commit_message: Option<String>,
enable_stable_row_ids: Option<bool>,
namespace: Option<&Bound<'_, PyAny>>,
table_id: Option<Vec<String>>,
) -> PyResult<Self> {
let mut transaction = Transaction::new(read_version.unwrap_or_default(), operation.0, None);

Expand All @@ -2210,13 +2166,15 @@ impl Dataset {
detached,
max_retries,
enable_stable_row_ids,
namespace,
table_id,
)
}

#[allow(clippy::too_many_arguments)]
#[allow(deprecated)]
#[staticmethod]
#[pyo3(signature = (dest, transaction, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, enable_stable_row_ids = None))]
#[pyo3(signature = (dest, transaction, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, enable_stable_row_ids = None, namespace = None, table_id = None))]
fn commit_transaction(
dest: PyWriteDest,
transaction: PyLance<Transaction>,
Expand All @@ -2227,6 +2185,8 @@ impl Dataset {
detached: Option<bool>,
max_retries: Option<u32>,
enable_stable_row_ids: Option<bool>,
namespace: Option<&Bound<'_, PyAny>>,
table_id: Option<Vec<String>>,
) -> PyResult<Self> {
let accessor = crate::storage_options::create_accessor_from_python(
storage_options.clone(),
Expand All @@ -2242,14 +2202,25 @@ impl Dataset {
None
};

let commit_handler = commit_lock
.as_ref()
.map(|commit_lock| {
commit_lock
.into_py_any(commit_lock.py())
.map(|cl| Arc::new(PyCommitLock::new(cl)) as Arc<dyn CommitHandler>)
})
.transpose()?;
// Create commit_handler: prefer user-provided commit_lock, then namespace-based handler
let commit_handler: Option<Arc<dyn CommitHandler>> =
if let Some(commit_lock) = commit_lock.as_ref() {
// User provided a commit_lock
Some(
commit_lock
.into_py_any(commit_lock.py())
.map(|cl| Arc::new(PyCommitLock::new(cl)) as Arc<dyn CommitHandler>)?,
)
} else if let (Some(ns), Some(tid)) = (namespace, table_id) {
// Create ExternalManifestCommitHandler from namespace and table_id
let ns_arc = extract_namespace_arc(ns.py(), ns)?;
let external_store = LanceNamespaceExternalManifestStore::new(ns_arc, tid);
Some(Arc::new(ExternalManifestCommitHandler {
external_manifest_store: Arc::new(external_store),
}) as Arc<dyn CommitHandler>)
} else {
None
};

let mut builder = CommitBuilder::new(dest.as_dest())
.enable_v2_manifest_paths(enable_v2_manifest_paths.unwrap_or(true))
Expand Down Expand Up @@ -3200,52 +3171,7 @@ pub fn get_write_params(options: &Bound<'_, PyDict>) -> PyResult<Option<WritePar
let table_id_opt = get_dict_opt::<Vec<String>>(options, "table_id")?;

if let (Some(ns), Some(table_id)) = (namespace_opt, table_id_opt) {
let py = options.py();
// Extract the inner namespace Arc from PyDirectoryNamespace, PyRestNamespace,
// or create a PyLanceNamespace wrapper for custom Python implementations.
//
// Python wrapper classes (DirectoryNamespace, RestNamespace in namespace.py)
// store the PyO3 class in `_inner`. For the EXACT wrapper classes, we can
// use _inner directly. For subclasses (which may override methods), we must
// use PyLanceNamespace to call through Python.
let ns_arc: Arc<dyn LanceNamespace> =
if let Ok(dir_ns) = ns.downcast::<PyDirectoryNamespace>() {
// Direct PyO3 class
dir_ns.borrow().inner.clone()
} else if let Ok(rest_ns) = ns.downcast::<PyRestNamespace>() {
// Direct PyO3 class
rest_ns.borrow().inner.clone()
} else if let Ok(inner) = ns.getattr("_inner") {
// Python wrapper class - check if it's the exact wrapper class
// (not a subclass) by comparing type names
let type_name = ns
.get_type()
.name()
.map(|n| n.to_string())
.unwrap_or_default();

if type_name == "DirectoryNamespace" {
if let Ok(dir_ns) = inner.downcast::<PyDirectoryNamespace>() {
dir_ns.borrow().inner.clone()
} else {
PyLanceNamespace::create_arc(py, &ns)?
}
} else if type_name == "RestNamespace" {
if let Ok(rest_ns) = inner.downcast::<PyRestNamespace>() {
rest_ns.borrow().inner.clone()
} else {
PyLanceNamespace::create_arc(py, &ns)?
}
} else {
// Subclass or custom implementation - use PyLanceNamespace
// to call through Python (requires *_json methods)
PyLanceNamespace::create_arc(py, &ns)?
}
} else {
// Custom Python implementation - wrap with PyLanceNamespace
PyLanceNamespace::create_arc(py, &ns)?
};

let ns_arc = extract_namespace_arc(options.py(), &ns)?;
let external_store = LanceNamespaceExternalManifestStore::new(ns_arc, table_id);
let commit_handler: Arc<dyn CommitHandler> =
Arc::new(ExternalManifestCommitHandler {
Expand Down
45 changes: 45 additions & 0 deletions python/src/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -887,6 +887,51 @@ impl LanceNamespaceTrait for PyLanceNamespace {
}
}

/// Extract an `Arc<dyn LanceNamespace>` from a Python namespace object.
///
/// This function handles the different ways a Python namespace can be provided:
/// 1. Direct PyO3 class (PyDirectoryNamespace or PyRestNamespace)
/// 2. Python wrapper class with `_inner` attribute that holds the PyO3 class
/// 3. Custom Python implementation (wrapped with PyLanceNamespace)
///
/// For Python wrapper classes (DirectoryNamespace, RestNamespace in namespace.py),
/// we check if it's the exact wrapper class by comparing type names. Subclasses
/// are wrapped with PyLanceNamespace to call through Python.
pub fn extract_namespace_arc(
py: Python<'_>,
ns: &Bound<'_, PyAny>,
) -> PyResult<Arc<dyn LanceNamespaceTrait>> {
// Direct PyO3 class
if let Ok(dir_ns) = ns.downcast::<PyDirectoryNamespace>() {
return Ok(dir_ns.borrow().inner.clone());
}
if let Ok(rest_ns) = ns.downcast::<PyRestNamespace>() {
return Ok(rest_ns.borrow().inner.clone());
}

// Python wrapper class - check if it's the exact wrapper class
if let Ok(inner) = ns.getattr("_inner") {
let type_name = ns
.get_type()
.name()
.map(|n| n.to_string())
.unwrap_or_default();

if type_name == "DirectoryNamespace" {
if let Ok(dir_ns) = inner.downcast::<PyDirectoryNamespace>() {
return Ok(dir_ns.borrow().inner.clone());
}
} else if type_name == "RestNamespace" {
if let Ok(rest_ns) = inner.downcast::<PyRestNamespace>() {
return Ok(rest_ns.borrow().inner.clone());
}
}
}

// Custom Python implementation or subclass - wrap with PyLanceNamespace
PyLanceNamespace::create_arc(py, ns)
}

/// Python wrapper for REST adapter server
#[pyclass(name = "PyRestAdapter", module = "lance.lance")]
pub struct PyRestAdapter {
Expand Down
2 changes: 1 addition & 1 deletion python/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.