Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions dvuploader/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ class File(BaseModel):
Private Attributes:
_size (int): Size of the file in bytes.
_unchanged_data (bool): Indicates if the file data has not changed since last upload.
_enforce_metadata_update (bool): Indicates if metadata update is enforced.
_is_inside_zip (bool): Indicates if the file is packaged inside a zip archive.

Methods:
extract_file_name(): Extracts filename from filepath and initializes file handler.
Expand Down Expand Up @@ -59,6 +61,8 @@ class File(BaseModel):

_size: int = PrivateAttr(default=0)
_unchanged_data: bool = PrivateAttr(default=False)
_enforce_metadata_update: bool = PrivateAttr(default=False)
_is_inside_zip: bool = PrivateAttr(default=False)

def extract_file_name(self):
"""
Expand Down
11 changes: 9 additions & 2 deletions dvuploader/nativeupload.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ async def native_upload(
files_new_metadata = [file for file in files if file.to_replace and file._unchanged_data]
files_replace = [file for file in files if file.to_replace and not file._unchanged_data]

# These are not in a package but need a metadtata update, ensure even for zips
for file in files_new_metadata:
file._enforce_metadata_update = True

async with httpx.AsyncClient(**session_params) as session:
with tempfile.TemporaryDirectory() as tmp_dir:
packages = distribute_files(files_new)
Expand Down Expand Up @@ -369,8 +373,11 @@ async def _update_metadata(
try:
if _tab_extension(dv_path) in file_mapping:
file_id = file_mapping[_tab_extension(dv_path)]
elif file.file_name and _is_zip(file.file_name):
# When the file is a zip it will be unpacked and thus
elif (
file.file_name and _is_zip(file.file_name)
and not file._is_inside_zip and not file._enforce_metadata_update
):
# When the file is a zip package it will be unpacked and thus
# the expected file name of the zip will not be in the
# dataset, since it has been unpacked.
continue
Expand Down
1 change: 1 addition & 0 deletions dvuploader/packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def zip_files(
data=file.handler.read(), # type: ignore
zinfo_or_arcname=_create_arcname(file),
)
file._is_inside_zip = True

return path

Expand Down
70 changes: 70 additions & 0 deletions tests/integration/test_native_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,76 @@ def test_zipzip_file_upload(

assert sorted([file["label"] for file in files]) == sorted(expected_files)

def test_metadata_with_zip_files_in_package(self, credentials):
BASE_URL, API_TOKEN = credentials

# Create Dataset
pid = create_dataset(
parent="Root",
server_url=BASE_URL,
api_token=API_TOKEN,
)

# Arrange
files = [
File(filepath="tests/fixtures/archive.zip",
dv_dir="subdir2",
description="This file should not be unzipped",
categories=["Test file"]
),
File(filepath="tests/fixtures/add_dir_files/somefile.txt",
dv_dir="subdir",
description="A simple text file",
categories=["Test file"]
),
]

# Act
uploader = DVUploader(files=files)
uploader.upload(
persistent_id=pid,
api_token=API_TOKEN,
dataverse_url=BASE_URL,
n_parallel_uploads=10,
)

# Assert
files = retrieve_dataset_files(
dataverse_url=BASE_URL,
persistent_id=pid,
api_token=API_TOKEN,
)

assert len(files) == 2, f"Expected 2 files, got {len(files)}"

expected_files = [
{
"label": "archive.zip",
"description": "This file should not be unzipped",
"categories": ["Test file"]
},
{
"label": "somefile.txt",
"description": "A simple text file",
"categories": ["Test file"]
},
]

files_as_expected = sorted(
[
{
k: (f[k] if k in f else None)
for k in expected_files[0].keys()
}
for f in files
],
key=lambda x: x["label"]
)
assert files_as_expected == expected_files, (
f"File metadata not as expected: {json.dumps(files, indent=2)}"
)


def test_too_many_zip_files(
self,
credentials,
Expand Down