diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 7863cb5d318..9020658fab3 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -465,6 +465,7 @@ def __init__( ) self._default_scan_options = default_scan_options self._read_params = read_params + self._storage_options_provider = storage_options_provider @classmethod def __deserialize__( @@ -526,11 +527,13 @@ def __setstate__(self, state): ) self._default_scan_options = default_scan_options self._read_params = read_params + self._storage_options_provider = None def __copy__(self): ds = LanceDataset.__new__(LanceDataset) ds._uri = self._uri ds._storage_options = self._storage_options + ds._storage_options_provider = self._storage_options_provider ds._ds = copy.copy(self._ds) ds._default_scan_options = self._default_scan_options ds._read_params = self._read_params.copy() if self._read_params else None @@ -625,6 +628,7 @@ def create_branch( ds._ds = new_ds ds._uri = new_ds.uri ds._storage_options = self._storage_options + ds._storage_options_provider = self._storage_options_provider ds._default_scan_options = self._default_scan_options ds._read_params = self._read_params return ds @@ -2283,6 +2287,27 @@ def storage_options_accessor(self): """ return self._ds.storage_options_accessor() + def new_file_session(self): + """ + Create a new file session for reading and writing files in this dataset. + + The file session will use the dataset's storage options and provider + for credential management, enabling automatic credential refresh for + long-running operations. + + Returns + ------- + LanceFileSession + A file session configured for this dataset's storage location. + """ + from lance.file import LanceFileSession + + return LanceFileSession( + base_path=self._uri, + storage_options=self.latest_storage_options(), + storage_options_provider=self._storage_options_provider, + ) + def checkout_version( self, version: int | str | Tuple[Optional[str], Optional[int]] ) -> "LanceDataset": @@ -3479,6 +3504,7 @@ def commit( ds = LanceDataset.__new__(LanceDataset) ds._storage_options = storage_options + ds._storage_options_provider = storage_options_provider ds._ds = new_ds ds._uri = new_ds.uri ds._default_scan_options = None @@ -3577,6 +3603,7 @@ def commit_batch( ds._ds = new_ds ds._uri = new_ds.uri ds._storage_options = storage_options + ds._storage_options_provider = storage_options_provider ds._default_scan_options = None ds._read_params = None return BulkCommitResult( @@ -5855,6 +5882,7 @@ def write_dataset( ds = LanceDataset.__new__(LanceDataset) ds._storage_options = storage_options + ds._storage_options_provider = None ds._ds = inner_ds ds._uri = inner_ds.uri ds._default_scan_options = None