diff --git a/python/python/lance/file.py b/python/python/lance/file.py index cab73f2b633..dec4aea00b6 100644 --- a/python/python/lance/file.py +++ b/python/python/lance/file.py @@ -342,6 +342,36 @@ def list(self, path: Optional[str] = None) -> List[str]: """ return self._session.list(path) + def upload_file(self, local_path: Union[str, Path], remote_path: str) -> None: + """ + Upload a file from local filesystem to the object store. + + Parameters + ---------- + local_path : str or Path + Local file path to upload. + remote_path : str + Remote path relative to session's base_path. + """ + if isinstance(local_path, Path): + local_path = str(local_path) + self._session.upload_file(local_path, remote_path) + + def download_file(self, remote_path: str, local_path: Union[str, Path]) -> None: + """ + Download a file from object store to local filesystem. + + Parameters + ---------- + remote_path : str + Remote path relative to session's base_path. + local_path : str or Path + Local file path where the file will be saved. + """ + if isinstance(local_path, Path): + local_path = str(local_path) + self._session.download_file(remote_path, local_path) + class LanceFileWriter: """ diff --git a/python/python/lance/lance/__init__.pyi b/python/python/lance/lance/__init__.pyi index 3e3aef279c2..f0cf1243d61 100644 --- a/python/python/lance/lance/__init__.pyi +++ b/python/python/lance/lance/__init__.pyi @@ -126,6 +126,8 @@ class LanceFileSession: ) -> LanceFileWriter: ... def contains(self, path: str) -> bool: ... def list(self, path: Optional[str] = None) -> List[str]: ... + def upload_file(self, local_path: str, remote_path: str) -> None: ... + def download_file(self, remote_path: str, local_path: str) -> None: ... class LanceFileReader: def __init__( diff --git a/python/python/tests/test_s3_ddb.py b/python/python/tests/test_s3_ddb.py index 1e659569651..b9c9e4be6c0 100644 --- a/python/python/tests/test_s3_ddb.py +++ b/python/python/tests/test_s3_ddb.py @@ -17,7 +17,7 @@ import lance import pyarrow as pa import pytest -from lance.file import LanceFileReader, LanceFileWriter +from lance.file import LanceFileReader, LanceFileSession, LanceFileWriter from lance.fragment import write_fragments # These are all keys that are accepted by storage_options @@ -264,6 +264,54 @@ def test_file_writer_reader(s3_bucket: str): ) +@pytest.mark.integration +def test_file_session_upload_download(s3_bucket: str, tmp_path): + storage_options = copy.deepcopy(CONFIG) + del storage_options["dynamodb_endpoint"] + + session_uri = f"s3://{s3_bucket}/test_session" + session = LanceFileSession(session_uri, storage_options=storage_options) + + # Create a local file to upload + local_file = tmp_path / "test_upload.txt" + test_content = "Hello from LanceFileSession!" + local_file.write_text(test_content) + + # Test upload_file + session.upload_file(str(local_file), "uploaded.txt") + + # Test contains - file should exist after upload + assert session.contains("uploaded.txt"), "File should exist after upload" + assert not session.contains("nonexistent.txt"), "Nonexistent file should not exist" + + # Upload another file to test list + local_file2 = tmp_path / "test_upload2.txt" + local_file2.write_text("Second file") + session.upload_file(str(local_file2), "subdir/nested.txt") + + # Test list - should see both files + files = session.list() + assert "uploaded.txt" in files, f"uploaded.txt should be in list: {files}" + assert "subdir/nested.txt" in files, f"subdir/nested.txt should be in list: {files}" + + # Test list with prefix + subdir_files = session.list("subdir") + assert len(subdir_files) == 1, f"Should have 1 file in subdir: {subdir_files}" + assert "subdir/nested.txt" in subdir_files + + # Test download_file + download_path = tmp_path / "downloaded.txt" + session.download_file("uploaded.txt", str(download_path)) + + # Verify downloaded content matches + assert download_path.read_text() == test_content, "Downloaded content should match" + + # Test downloading nested file + download_nested = tmp_path / "downloaded_nested.txt" + session.download_file("subdir/nested.txt", str(download_nested)) + assert download_nested.read_text() == "Second file" + + @pytest.mark.integration def test_append_fragment(s3_bucket: str): storage_options = copy.deepcopy(CONFIG)