Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified bin/dvuploader-macos-latest
Binary file not shown.
Binary file modified bin/dvuploader-ubuntu-latest
Binary file not shown.
Binary file modified bin/dvuploader-windows-latest.exe
Binary file not shown.
1 change: 1 addition & 0 deletions dvuploader/checksum.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import hashlib
from enum import Enum
import os
from typing import Callable

from pydantic import BaseModel, Field
Expand Down
49 changes: 26 additions & 23 deletions dvuploader/directupload.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,20 @@
import requests
from dotted_dict import DottedDict
from requests.exceptions import HTTPError
from requests.models import PreparedRequest
from tqdm import tqdm
from tqdm.utils import CallbackIOWrapper

from dvuploader.file import File
from dvuploader.chunkstream import ChunkStream
from dvuploader.utils import build_url

global MAX_RETRIES

MAX_RETRIES = 10
TICKET_ENDPOINT = "/api/datasets/:persistentId/uploadurls"
ADD_FILE_ENDPOINT = "/api/datasets/:persistentId/addFiles"
UPLOAD_ENDPOINT = "/api/datasets/:persistentId/add?persistentId="
REPLACE_ENDPOINT = "/api/files/{FILE_ID}/replace"


def direct_upload(
Expand All @@ -28,6 +29,7 @@ def direct_upload(
dataverse_url: str,
api_token: str,
position: int,
n_parallel_uploads: int,
) -> bool:
"""
Uploads a file to a Dataverse collection using direct upload.
Expand All @@ -38,6 +40,7 @@ def direct_upload(
dataverse_url (str): The URL of the Dataverse instance to upload to.
api_token (str): The API token to use for authentication.
position (int): The position of the file in the list of files to upload.
n_parallel_uploads (int): The number of parallel uploads to perform.

Returns:
bool: True if the upload was successful, False otherwise.
Expand Down Expand Up @@ -68,13 +71,15 @@ def direct_upload(
dataverse_url=dataverse_url,
api_token=api_token,
pbar=pbar,
n_parallel_uploads=n_parallel_uploads,
)

result = _add_file_to_ds(
dataverse_url,
persistent_id,
api_token,
file,
n_parallel_uploads,
)

if result is True:
Expand Down Expand Up @@ -111,7 +116,7 @@ def _request_ticket(
"""

# Build request URL
query = _build_url(
query = build_url(
endpoint=TICKET_ENDPOINT,
dataverse_url=dataverse_url,
key=api_token,
Expand All @@ -126,26 +131,11 @@ def _request_ticket(
raise HTTPError(
f"Could not request a ticket for dataset '{persistent_id}' at '{dataverse_url}' \
\n\n{json.dumps(response.json(), indent=2)}"
)
) # type: ignore

return DottedDict(response.json()["data"])


def _build_url(
dataverse_url: str,
endpoint: str,
**kwargs,
) -> str:
"""Builds a URL string, given access points and credentials"""

req = PreparedRequest()
req.prepare_url(urljoin(dataverse_url, endpoint), kwargs)

assert req.url is not None, f"Could not build URL for '{dataverse_url}'"

return req.url


def _upload_singlepart(
response: Dict,
filepath: str,
Expand Down Expand Up @@ -179,7 +169,7 @@ def _upload_singlepart(
raise HTTPError(
f"Could not upload file \
\n\n{resp.headers}"
)
) # type: ignore

return storage_identifier

Expand All @@ -190,6 +180,7 @@ def _upload_multipart(
dataverse_url: str,
api_token: str,
pbar: tqdm,
n_parallel_uploads: int,
):
"""
Uploads a file to Dataverse using multipart upload.
Expand All @@ -200,6 +191,7 @@ def _upload_multipart(
dataverse_url (str): The URL of the Dataverse instance.
api_token (str): The API token for the Dataverse instance.
pbar (tqdm): A progress bar to track the upload progress.
n_parallel_uploads (int): The number of parallel uploads to perform.

Returns:
str: The storage identifier for the uploaded file.
Expand Down Expand Up @@ -228,7 +220,10 @@ def _upload_multipart(
)

# Execute upload
responses = grequests.map(rs)
responses = grequests.map(
requests=rs,
size=n_parallel_uploads,
)
e_tags = [response.headers["ETag"] for response in responses]

except Exception as e:
Expand Down Expand Up @@ -302,7 +297,7 @@ def _complete_upload(
raise HTTPError(
f"Could not complete upload \
\n\n{json.dumps(response.json(), indent=2)}"
)
) # type: ignore


def _abort_upload(
Expand All @@ -321,8 +316,16 @@ def _add_file_to_ds(
file: File,
):
headers = {"X-Dataverse-key": api_token}
url = urljoin(dataverse_url, UPLOAD_ENDPOINT + pid)
payload = {"jsonData": file.json(by_alias=True)}

if not file.to_replace:
url = urljoin(dataverse_url, UPLOAD_ENDPOINT + pid)
else:
url = build_url(
dataverse_url=dataverse_url,
endpoint=REPLACE_ENDPOINT.format(FILE_ID=file.file_id),
)

payload = {"jsonData": file.json(by_alias=True, exclude={"to_replace", "file_id"})}

for _ in range(MAX_RETRIES):
response = requests.post(url, headers=headers, files=payload)
Expand Down
Loading