Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3352,6 +3352,7 @@ def commit(
max_retries: int = 20,
*,
commit_message: Optional[str] = None,
enable_stable_row_ids: Optional[bool] = None,
) -> LanceDataset:
"""Create a new version of dataset

Expand Down Expand Up @@ -3413,6 +3414,11 @@ def commit(
commit_message: str, optional
A message to associate with this commit. This message will be stored in the
dataset's metadata and can be retrieved using read_transaction().
enable_stable_row_ids: bool, optional
If True, enables stable row IDs when creating a new dataset. Stable
row IDs assign each row a monotonically increasing id that persists
across compaction and other maintenance operations. This option is
ignored for existing datasets.

Returns
-------
Expand Down Expand Up @@ -3482,6 +3488,7 @@ def commit(
enable_v2_manifest_paths=enable_v2_manifest_paths,
detached=detached,
max_retries=max_retries,
enable_stable_row_ids=enable_stable_row_ids,
)
elif isinstance(operation, LanceOperation.BaseOperation):
new_ds = _Dataset.commit(
Expand All @@ -3495,6 +3502,7 @@ def commit(
detached=detached,
max_retries=max_retries,
commit_message=commit_message,
enable_stable_row_ids=enable_stable_row_ids,
)
else:
raise TypeError(
Expand Down
1 change: 1 addition & 0 deletions python/python/lance/lance/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ class _Dataset:
enable_v2_manifest_paths: Optional[bool] = None,
detached: Optional[bool] = None,
max_retries: Optional[int] = None,
enable_stable_row_ids: Optional[bool] = None,
**kwargs,
) -> _Dataset: ...
@staticmethod
Expand Down
30 changes: 30 additions & 0 deletions python/python/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4697,6 +4697,36 @@ def test_commit_message_and_get_properties(tmp_path):
)


def test_commit_with_stable_row_ids(tmp_path: Path):
"""Test that commit() with enable_stable_row_ids creates stable row IDs."""
base_uri = str(tmp_path)
table = pa.table({"a": range(10)})

# Create dataset via commit with Overwrite and enable_stable_row_ids
fragments = lance.fragment.write_fragments(table, base_uri)
operation = lance.LanceOperation.Overwrite(table.schema, fragments)
ds = lance.LanceDataset.commit(
base_uri,
operation,
enable_stable_row_ids=True,
)

# Append more data
table2 = pa.table({"a": range(10, 20)})
fragments2 = lance.fragment.write_fragments(table2, base_uri)
ds = lance.LanceDataset.commit(
base_uri,
lance.LanceOperation.Append(fragments2),
read_version=ds.version,
)

# Verify row IDs are sequential (stable row IDs assign monotonic IDs)
result = ds.scanner(with_row_id=True).to_table()
assert len(result) == 20
row_ids = [result["_rowid"][i].as_py() for i in range(20)]
assert row_ids == list(range(20))


def test_table_metadata_updates(tmp_path: Path):
"""Test table metadata incremental updates and full replacement."""
arr = pa.array([1, 2, 3])
Expand Down
11 changes: 9 additions & 2 deletions python/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2196,7 +2196,7 @@ impl Dataset {

#[allow(clippy::too_many_arguments)]
#[staticmethod]
#[pyo3(signature = (dest, operation, read_version = None, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, commit_message = None))]
#[pyo3(signature = (dest, operation, read_version = None, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, commit_message = None, enable_stable_row_ids = None))]
fn commit(
dest: PyWriteDest,
operation: PyLance<Operation>,
Expand All @@ -2208,6 +2208,7 @@ impl Dataset {
detached: Option<bool>,
max_retries: Option<u32>,
commit_message: Option<String>,
enable_stable_row_ids: Option<bool>,
) -> PyResult<Self> {
let mut transaction = Transaction::new(read_version.unwrap_or_default(), operation.0, None);

Expand All @@ -2227,13 +2228,14 @@ impl Dataset {
enable_v2_manifest_paths,
detached,
max_retries,
enable_stable_row_ids,
)
}

#[allow(clippy::too_many_arguments)]
#[allow(deprecated)]
#[staticmethod]
#[pyo3(signature = (dest, transaction, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None))]
#[pyo3(signature = (dest, transaction, commit_lock = None, storage_options = None, storage_options_provider = None, enable_v2_manifest_paths = None, detached = None, max_retries = None, enable_stable_row_ids = None))]
fn commit_transaction(
dest: PyWriteDest,
transaction: PyLance<Transaction>,
Expand All @@ -2243,6 +2245,7 @@ impl Dataset {
enable_v2_manifest_paths: Option<bool>,
detached: Option<bool>,
max_retries: Option<u32>,
enable_stable_row_ids: Option<bool>,
) -> PyResult<Self> {
let accessor = crate::storage_options::create_accessor_from_python(
storage_options.clone(),
Expand Down Expand Up @@ -2272,6 +2275,10 @@ impl Dataset {
.with_detached(detached.unwrap_or(false))
.with_max_retries(max_retries.unwrap_or(20));

if let Some(enable) = enable_stable_row_ids {
builder = builder.use_stable_row_ids(enable);
}

if let Some(store_params) = object_store_params {
builder = builder.with_store_params(store_params);
}
Expand Down