Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ jobs:
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo test --locked --features ${ALL_FEATURES}
build-no-lock:
runs-on: ubuntu-24.04
runs-on: warp-ubuntu-2404-x64-8x
timeout-minutes: 30
env:
# Need up-to-date compilers for kernels
Expand Down
1 change: 1 addition & 0 deletions java/lance-jni/src/blocking_dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ pub fn inner_commit_overwrite<'local>(
fragments,
schema,
config_upsert_values: None,
initial_bases: None,
};
let path_str = path.extract(env)?;
let read_version = env.get_u64_opt(&read_version_obj)?;
Expand Down
2 changes: 2 additions & 0 deletions java/lance-jni/src/transaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ fn convert_to_java_operation_inner<'local>(
fragments: rust_fragments,
schema,
config_upsert_values,
initial_bases: _,
} => {
let java_fragments = export_vec(env, &rust_fragments)?;
let java_schema = convert_to_java_schema(env, schema)?;
Expand Down Expand Up @@ -890,6 +891,7 @@ fn convert_to_rust_operation(
fragments,
schema,
config_upsert_values,
initial_bases: None,
}
}
"Rewrite" => {
Expand Down
2 changes: 2 additions & 0 deletions protos/transaction.proto
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ message Transaction {
map<string, bytes> schema_metadata = 3;
// Key-value pairs to merge with existing config.
map<string, string> config_upsert_values = 4;
// The base paths to be added for the initial dataset creation
repeated BasePath initial_bases = 5;
}

// Add or replace a new secondary index.
Expand Down
2 changes: 2 additions & 0 deletions python/python/lance/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
)
from .fragment import FragmentMetadata, LanceFragment
from .lance import (
DatasetBasePath,
FFILanceTableProvider,
ScanStatistics,
bytes_read_counter,
Expand All @@ -47,6 +48,7 @@
__all__ = [
"BlobColumn",
"BlobFile",
"DatasetBasePath",
"DataStatistics",
"FieldStatistics",
"FragmentMetadata",
Expand Down
18 changes: 18 additions & 0 deletions python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
CleanupStats,
Compaction,
CompactionMetrics,
DatasetBasePath,
LanceSchema,
ScanStatistics,
_Dataset,
Expand Down Expand Up @@ -4897,6 +4898,8 @@ def write_dataset(
auto_cleanup_options: Optional[AutoCleanupConfig] = None,
commit_message: Optional[str] = None,
transaction_properties: Optional[Dict[str, str]] = None,
initial_bases: Optional[List[DatasetBasePath]] = None,
target_bases: Optional[List[str]] = None,
) -> LanceDataset:
"""Write a given data_obj to the given uri

Expand Down Expand Up @@ -4975,6 +4978,19 @@ def write_dataset(
and can be retrieved using read_transaction().
If both `commit_message` and `properties` are provided, `commit_message` will
override any "lance.commit.message" key in `properties`.
initial_bases: list of DatasetBasePath, optional
New base paths to register in the manifest. Only used in **CREATE mode**.
Cannot be specified in APPEND or OVERWRITE modes.
target_bases: list of str, optional
References to base paths where data should be written. Can be
specified in all modes.

Each string is resolved by trying to match:
1. Base name (e.g., "primary", "archive") from registered bases
2. Base path URI (e.g., "s3://bucket1/data")

**CREATE mode**: References must match bases in `initial_bases`
**APPEND/OVERWRITE modes**: References must match bases in the existing manifest
"""
if use_legacy_format is not None:
warnings.warn(
Expand Down Expand Up @@ -5016,6 +5032,8 @@ def write_dataset(
"enable_stable_row_ids": enable_stable_row_ids,
"auto_cleanup_options": auto_cleanup_options,
"transaction_properties": merged_properties,
"initial_bases": initial_bases,
"target_bases": target_bases,
}

if commit_lock:
Expand Down
Loading