From 1123e5e3dff9603ec03e8b13cbea5574514fdf83 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Sat, 23 Nov 2024 17:12:14 +0800 Subject: [PATCH 1/2] fix: fix storage options for dataset builder --- python/python/tests/test_fragment.py | 19 +++++++++++++++++++ rust/lance/src/dataset/fragment/write.rs | 10 +++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/python/python/tests/test_fragment.py b/python/python/tests/test_fragment.py index c14cba5a731..97b42c8c572 100644 --- a/python/python/tests/test_fragment.py +++ b/python/python/tests/test_fragment.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright The Lance Authors +import copy import json import multiprocessing import uuid @@ -23,6 +24,15 @@ from lance.fragment import write_fragments from lance.progress import FileSystemFragmentWriteProgress +CONFIG = { + "allow_http": "true", + "aws_access_key_id": "ACCESSKEY", + "aws_secret_access_key": "SECRETKEY", + "aws_endpoint": "http://localhost:9000", + "dynamodb_endpoint": "http://localhost:8000", + "aws_region": "us-west-2", +} + def test_write_fragment(tmp_path: Path): with pytest.raises(OSError): @@ -354,3 +364,12 @@ def test_create_from_file(tmp_path): assert dataset.count_rows() == 1600 assert len(dataset.get_fragments()) == 1 assert dataset.get_fragments()[0].fragment_id == 2 + + +@pytest.mark.integration +def test_append(s3_bucket: str): + storage_options = copy.deepcopy(CONFIG) + table = pa.table({"a": [1, 2], "b": ["a", "b"]}) + lance.fragment.LanceFragment.create( + f"s3://{s3_bucket}/test_append.lance", table, storage_options=storage_options + ) diff --git a/rust/lance/src/dataset/fragment/write.rs b/rust/lance/src/dataset/fragment/write.rs index 5d330636613..1f6bd53cdd7 100644 --- a/rust/lance/src/dataset/fragment/write.rs +++ b/rust/lance/src/dataset/fragment/write.rs @@ -214,7 +214,15 @@ impl<'a> FragmentCreateBuilder<'a> { } async fn existing_dataset_schema(&self) -> Result> { - match DatasetBuilder::from_uri(self.dataset_uri).load().await { + let mut builder = DatasetBuilder::from_uri(self.dataset_uri); + let storage_options = self + .write_params + .and_then(|p| p.store_params.as_ref()) + .and_then(|p| p.storage_options.clone()); + if let Some(storage_options) = storage_options { + builder = builder.with_storage_options(storage_options); + } + match builder.load().await { Ok(dataset) => { // Use the schema from the dataset, because it has the correct // field ids. From b5b28ab1e4ebca7141aa5fbb8323ba01d2aa64ff Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Wed, 4 Dec 2024 08:16:33 +0800 Subject: [PATCH 2/2] move integration test --- python/python/tests/test_fragment.py | 19 ------------------- python/python/tests/test_s3_ddb.py | 9 +++++++++ 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/python/python/tests/test_fragment.py b/python/python/tests/test_fragment.py index 97b42c8c572..c14cba5a731 100644 --- a/python/python/tests/test_fragment.py +++ b/python/python/tests/test_fragment.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright The Lance Authors -import copy import json import multiprocessing import uuid @@ -24,15 +23,6 @@ from lance.fragment import write_fragments from lance.progress import FileSystemFragmentWriteProgress -CONFIG = { - "allow_http": "true", - "aws_access_key_id": "ACCESSKEY", - "aws_secret_access_key": "SECRETKEY", - "aws_endpoint": "http://localhost:9000", - "dynamodb_endpoint": "http://localhost:8000", - "aws_region": "us-west-2", -} - def test_write_fragment(tmp_path: Path): with pytest.raises(OSError): @@ -364,12 +354,3 @@ def test_create_from_file(tmp_path): assert dataset.count_rows() == 1600 assert len(dataset.get_fragments()) == 1 assert dataset.get_fragments()[0].fragment_id == 2 - - -@pytest.mark.integration -def test_append(s3_bucket: str): - storage_options = copy.deepcopy(CONFIG) - table = pa.table({"a": [1, 2], "b": ["a", "b"]}) - lance.fragment.LanceFragment.create( - f"s3://{s3_bucket}/test_append.lance", table, storage_options=storage_options - ) diff --git a/python/python/tests/test_s3_ddb.py b/python/python/tests/test_s3_ddb.py index 9e006fec60e..c2073aee74e 100644 --- a/python/python/tests/test_s3_ddb.py +++ b/python/python/tests/test_s3_ddb.py @@ -287,3 +287,12 @@ def test_file_writer_reader(s3_bucket: str): bytes(reader.read_global_buffer(global_buffer_pos)).decode() == global_buffer_text ) + + +@pytest.mark.integration +def test_append_fragment(s3_bucket: str): + storage_options = copy.deepcopy(CONFIG) + table = pa.table({"a": [1, 2], "b": ["a", "b"]}) + lance.fragment.LanceFragment.create( + f"s3://{s3_bucket}/test_append.lance", table, storage_options=storage_options + )