Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,17 @@ on: [push, pull_request]
jobs:
build:
runs-on: ubuntu-latest

services:
squid:
image: ubuntu/squid:latest
ports:
- 3128:3128

strategy:
max-parallel: 4
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11']
python-version: ["3.8", "3.9", "3.10", "3.11"]

env:
PORT: 8080
Expand All @@ -29,8 +36,8 @@ jobs:
poetry install --with test
- name: Test with pytest
env:
API_TOKEN: ${{ steps.dataverse.outputs.api_token }}
BASE_URL: ${{ steps.dataverse.outputs.base_url }}
DVUPLOADER_TESTING: "true"
API_TOKEN: ${{ steps.dataverse.outputs.api_token }}
BASE_URL: ${{ steps.dataverse.outputs.base_url }}
DVUPLOADER_TESTING: "true"
run: |
python3 -m poetry run pytest
4 changes: 3 additions & 1 deletion dvuploader/directupload.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ async def direct_upload(
progress,
pbars,
n_parallel_uploads: int,
proxy: Optional[str] = None,
) -> None:
"""
Perform parallel direct upload of files to the specified Dataverse repository.
Expand All @@ -48,7 +49,7 @@ async def direct_upload(
progress: Progress object to track upload progress.
pbars: List of progress bars for each file.
n_parallel_uploads (int): Number of concurrent uploads to perform.

proxy (str): The proxy to use for the upload.
Returns:
None
"""
Expand All @@ -58,6 +59,7 @@ async def direct_upload(
session_params = {
"timeout": None,
"limits": httpx.Limits(max_connections=n_parallel_uploads),
"proxy": proxy,
}

async with httpx.AsyncClient(**session_params) as session:
Expand Down
2 changes: 2 additions & 0 deletions dvuploader/dvuploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def upload(
n_parallel_uploads: int = 1,
force_native: bool = False,
replace_existing: bool = True,
proxy: Optional[str] = None,
) -> None:
"""
Uploads the files to the specified Dataverse repository.
Expand All @@ -70,6 +71,7 @@ def upload(
this restricts parallel chunks per upload. Use n_jobs to control parallel files.
force_native (bool): Forces the use of the native upload method instead of direct upload.
replace_existing (bool): Whether to replace files that already exist in the dataset.
proxy (str): The proxy to use for the upload.

Returns:
None
Expand Down
6 changes: 4 additions & 2 deletions dvuploader/nativeupload.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os
import tempfile
import tenacity
from typing import List, Tuple, Dict
from typing import List, Optional, Tuple, Dict

from rich.progress import Progress, TaskID

Expand All @@ -29,6 +29,7 @@ async def native_upload(
n_parallel_uploads: int,
pbars,
progress,
proxy: Optional[str] = None,
):
"""
Executes native uploads for the given files in parallel.
Expand All @@ -41,7 +42,7 @@ async def native_upload(
n_parallel_uploads (int): The number of parallel uploads to execute.
pbars: List of progress bar IDs to track upload progress.
progress: Progress object to manage progress bars.

proxy (str): The proxy to use for the upload.
Returns:
None
"""
Expand All @@ -53,6 +54,7 @@ async def native_upload(
"headers": {"X-Dataverse-key": api_token},
"timeout": None,
"limits": httpx.Limits(max_connections=n_parallel_uploads),
"proxy": proxy,
}

async with httpx.AsyncClient(**session_params) as session:
Expand Down
49 changes: 49 additions & 0 deletions tests/integration/test_native_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,55 @@ def test_forced_native_upload(
assert len(files) == 3
assert sorted([file["label"] for file in files]) == sorted(expected_files)

def test_native_upload_with_proxy(
self,
credentials,
):
BASE_URL, API_TOKEN = credentials
proxy = "http://127.0.0.1:3128"

with tempfile.TemporaryDirectory() as directory:
# Arrange
create_mock_file(directory, "small_file.txt", size=1)
create_mock_file(directory, "mid_file.txt", size=50)
create_mock_file(directory, "large_file.txt", size=200)

# Add all files in the directory
files = add_directory(directory=directory)

# Create Dataset
pid = create_dataset(
parent="Root",
server_url=BASE_URL,
api_token=API_TOKEN,
)

# Act
uploader = DVUploader(files=files)
uploader.upload(
persistent_id=pid,
api_token=API_TOKEN,
dataverse_url=BASE_URL,
n_parallel_uploads=1,
proxy=proxy,
)

# Assert
files = retrieve_dataset_files(
dataverse_url=BASE_URL,
persistent_id=pid,
api_token=API_TOKEN,
)

expected_files = [
"small_file.txt",
"mid_file.txt",
"large_file.txt",
]

assert len(files) == 3
assert sorted([file["label"] for file in files]) == sorted(expected_files)

def test_native_upload_by_handler(
self,
credentials,
Expand Down