From 91ba04d5f0fd6404fcd3d685649692f1fb4a87a8 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Fri, 14 Feb 2025 10:23:26 +0100 Subject: [PATCH 1/5] add proxy option --- dvuploader/directupload.py | 4 +++- dvuploader/dvuploader.py | 2 ++ dvuploader/nativeupload.py | 6 ++++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dvuploader/directupload.py b/dvuploader/directupload.py index c2b84b3..cc1ade4 100644 --- a/dvuploader/directupload.py +++ b/dvuploader/directupload.py @@ -36,6 +36,7 @@ async def direct_upload( progress, pbars, n_parallel_uploads: int, + proxy: Optional[str] = None, ) -> None: """ Perform parallel direct upload of files to the specified Dataverse repository. @@ -48,7 +49,7 @@ async def direct_upload( progress: Progress object to track upload progress. pbars: List of progress bars for each file. n_parallel_uploads (int): Number of concurrent uploads to perform. - + proxy (str): The proxy to use for the upload. Returns: None """ @@ -58,6 +59,7 @@ async def direct_upload( session_params = { "timeout": None, "limits": httpx.Limits(max_connections=n_parallel_uploads), + "proxy": proxy, } async with httpx.AsyncClient(**session_params) as session: diff --git a/dvuploader/dvuploader.py b/dvuploader/dvuploader.py index aede715..511a918 100644 --- a/dvuploader/dvuploader.py +++ b/dvuploader/dvuploader.py @@ -58,6 +58,7 @@ def upload( n_parallel_uploads: int = 1, force_native: bool = False, replace_existing: bool = True, + proxy: Optional[str] = None, ) -> None: """ Uploads the files to the specified Dataverse repository. @@ -70,6 +71,7 @@ def upload( this restricts parallel chunks per upload. Use n_jobs to control parallel files. force_native (bool): Forces the use of the native upload method instead of direct upload. replace_existing (bool): Whether to replace files that already exist in the dataset. + proxy (str): The proxy to use for the upload. Returns: None diff --git a/dvuploader/nativeupload.py b/dvuploader/nativeupload.py index dc0292b..55a4f07 100644 --- a/dvuploader/nativeupload.py +++ b/dvuploader/nativeupload.py @@ -5,7 +5,7 @@ import os import tempfile import tenacity -from typing import List, Tuple, Dict +from typing import List, Optional, Tuple, Dict from rich.progress import Progress, TaskID @@ -29,6 +29,7 @@ async def native_upload( n_parallel_uploads: int, pbars, progress, + proxy: Optional[str] = None, ): """ Executes native uploads for the given files in parallel. @@ -41,7 +42,7 @@ async def native_upload( n_parallel_uploads (int): The number of parallel uploads to execute. pbars: List of progress bar IDs to track upload progress. progress: Progress object to manage progress bars. - + proxy (str): The proxy to use for the upload. Returns: None """ @@ -53,6 +54,7 @@ async def native_upload( "headers": {"X-Dataverse-key": api_token}, "timeout": None, "limits": httpx.Limits(max_connections=n_parallel_uploads), + "proxy": proxy, } async with httpx.AsyncClient(**session_params) as session: From 31982bba898e1d1a75e1895ed5fe94eea4fe72f7 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Fri, 14 Feb 2025 10:23:36 +0100 Subject: [PATCH 2/5] test native upload with proxy --- tests/integration/test_native_upload.py | 49 +++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/integration/test_native_upload.py b/tests/integration/test_native_upload.py index 821b4fa..8c17a36 100644 --- a/tests/integration/test_native_upload.py +++ b/tests/integration/test_native_upload.py @@ -105,6 +105,55 @@ def test_forced_native_upload( assert len(files) == 3 assert sorted([file["label"] for file in files]) == sorted(expected_files) + def test_native_upload_with_proxy( + self, + credentials, + ): + BASE_URL, API_TOKEN = credentials + proxy = "http://127.0.0.1:3128" + + with tempfile.TemporaryDirectory() as directory: + # Arrange + create_mock_file(directory, "small_file.txt", size=1) + create_mock_file(directory, "mid_file.txt", size=50) + create_mock_file(directory, "large_file.txt", size=200) + + # Add all files in the directory + files = add_directory(directory=directory) + + # Create Dataset + pid = create_dataset( + parent="Root", + server_url=BASE_URL, + api_token=API_TOKEN, + ) + + # Act + uploader = DVUploader(files=files) + uploader.upload( + persistent_id=pid, + api_token=API_TOKEN, + dataverse_url=BASE_URL, + n_parallel_uploads=1, + proxy=proxy, + ) + + # Assert + files = retrieve_dataset_files( + dataverse_url=BASE_URL, + persistent_id=pid, + api_token=API_TOKEN, + ) + + expected_files = [ + "small_file.txt", + "mid_file.txt", + "large_file.txt", + ] + + assert len(files) == 3 + assert sorted([file["label"] for file in files]) == sorted(expected_files) + def test_native_upload_by_handler( self, credentials, From 47c66ef9b74bea745d1174ebae6efb9da54644d8 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Fri, 14 Feb 2025 10:24:14 +0100 Subject: [PATCH 3/5] add proxy service for testing --- .github/workflows/test.yml | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d4b0229..9e6e346 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -5,10 +5,22 @@ on: [push] jobs: build: runs-on: ubuntu-latest + + services: + squid: + image: sameersbn/squid:latest + ports: + - 3128:3128 + options: >- + --health-cmd="curl --fail http://localhost:3128 || exit 1" + --health-interval=10s + --health-timeout=5s + --health-retries=3 + strategy: max-parallel: 4 matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ["3.8", "3.9", "3.10", "3.11"] env: PORT: 8080 @@ -29,8 +41,8 @@ jobs: poetry install --with test - name: Test with pytest env: - API_TOKEN: ${{ steps.dataverse.outputs.api_token }} - BASE_URL: ${{ steps.dataverse.outputs.base_url }} - DVUPLOADER_TESTING: "true" + API_TOKEN: ${{ steps.dataverse.outputs.api_token }} + BASE_URL: ${{ steps.dataverse.outputs.base_url }} + DVUPLOADER_TESTING: "true" run: | python3 -m poetry run pytest From 35a9d10092ce2b972acd1e7b266eedf47cf34fbd Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Fri, 14 Feb 2025 10:26:54 +0100 Subject: [PATCH 4/5] use netcat to check if port is open --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9e6e346..3fb5d23 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,7 +12,7 @@ jobs: ports: - 3128:3128 options: >- - --health-cmd="curl --fail http://localhost:3128 || exit 1" + --health-cmd="nc -z localhost 3128 || exit 1" --health-interval=10s --health-timeout=5s --health-retries=3 From 79a05510d39fa8beae99cae113772b3a420c7bf7 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Fri, 14 Feb 2025 10:45:37 +0100 Subject: [PATCH 5/5] remove healtcheck and use other image --- .github/workflows/test.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3fb5d23..5aecc42 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,14 +8,9 @@ jobs: services: squid: - image: sameersbn/squid:latest + image: ubuntu/squid:latest ports: - 3128:3128 - options: >- - --health-cmd="nc -z localhost 3128 || exit 1" - --health-interval=10s - --health-timeout=5s - --health-retries=3 strategy: max-parallel: 4