diff --git a/src/onc/modules/_OncArchive.py b/src/onc/modules/_OncArchive.py index 567fb4a..3b96675 100644 --- a/src/onc/modules/_OncArchive.py +++ b/src/onc/modules/_OncArchive.py @@ -112,7 +112,7 @@ def downloadDirectArchivefile( filePath = outPath / filename fileExists = os.path.exists(filePath) - if not fileExists or overwrite: + if not fileExists or os.path.getsize(filePath) == 0 or overwrite: print(f' ({tries} of {n}) Downloading file: "{filename}"') downInfo = self.downloadArchivefile(filename, overwrite) size += downInfo["size"] diff --git a/src/onc/modules/_util.py b/src/onc/modules/_util.py index f8c3720..37c1a8c 100644 --- a/src/onc/modules/_util.py +++ b/src/onc/modules/_util.py @@ -1,3 +1,4 @@ +import os import time from datetime import timedelta from pathlib import Path @@ -17,16 +18,15 @@ def saveAsFile( filePath = outPath / fileName outPath.mkdir(parents=True, exist_ok=True) - # Save file in outPath if it doesn't exist yet - if Path.exists(filePath) and not overwrite: + # Save/Overwrite file in outPath if the file doesn't exist yet + # or it is there but with 0 file size + if not overwrite and Path.exists(filePath) and os.path.getsize(filePath) != 0: raise FileExistsError(filePath) start = time.time() size = 0 - with open(filePath, "wb+") as file: - for chunk in response.iter_content(chunk_size=128): - file.write(chunk) - size += len(chunk) + with open(filePath, "wb") as file: + file.write(response.content) downloadTime = time.time() - start return (size, round(downloadTime, 3)) diff --git a/src/onc/onc.py b/src/onc/onc.py index 2aa8300..830879a 100644 --- a/src/onc/onc.py +++ b/src/onc/onc.py @@ -1319,7 +1319,8 @@ def downloadArchivefile(self, filename: str = "", overwrite: bool = False): filename : str, default "" A valid name of a file in DMAS Archiving System. overwrite : bool, default False - Whether to overwrite the file if it exists. + Whether to overwrite the file if it exists. 0 size file is treated as non-existent, + meaning it gets overwritten even when overwrite=False. Returns ------- diff --git a/tests/archive_file/conftest.py b/tests/archive_file/conftest.py index dba05d7..273ae56 100644 --- a/tests/archive_file/conftest.py +++ b/tests/archive_file/conftest.py @@ -18,3 +18,9 @@ def params_location(): def params_location_multiple_pages(params_location): # rowLimit should be less than the total number of rows. return params_location | {"rowLimit": 2} + + +@pytest.fixture +def params_location_single_file(params_location): + # Returned archivefile name should be BPR-Folger-59_20191126T000000.000Z.txt + return params_location | {"dateFrom": "2019-11-26", "dateTo": "2019-11-27"} diff --git a/tests/archive_file/test_archivefile_direct_download.py b/tests/archive_file/test_archivefile_direct_download.py index 36501e6..59b062e 100644 --- a/tests/archive_file/test_archivefile_direct_download.py +++ b/tests/archive_file/test_archivefile_direct_download.py @@ -1,3 +1,6 @@ +import os + + def test_valid_params_one_page(requester, params_location, util): data = requester.getArchivefile(params_location) result = requester.downloadDirectArchivefile(params_location) @@ -16,3 +19,24 @@ def test_valid_params_multiple_pages(requester, params_location_multiple_pages, ) assert result["stats"]["fileCount"] == params_location_multiple_pages["rowLimit"] + + +def test_valid_params_overwrite_zero_file_size( + requester, params_location_single_file, util +): + filename = "BPR-Folger-59_20191126T000000.000Z.txt" + + file_path = requester.outPath / filename + + # Touch an empty file + with open(file_path, "w"): + pass + + # Case when downloading failed, leaving an empty file behind + assert os.path.getsize(file_path) == 0 + + requester.downloadDirectArchivefile(params_location_single_file, overwrite=False) + + assert ( + os.path.getsize(file_path) != 0 + ), "0-size file should be overwritten even if overwrite is False" diff --git a/tests/archive_file/test_archivefile_download.py b/tests/archive_file/test_archivefile_download.py index f61b449..8ae6c9b 100644 --- a/tests/archive_file/test_archivefile_download.py +++ b/tests/archive_file/test_archivefile_download.py @@ -1,3 +1,5 @@ +import os + import pytest import requests @@ -25,3 +27,22 @@ def test_valid_params(requester, util): requester.downloadArchivefile(filename, overwrite=True) assert util.get_download_files_num(requester) == 1 + + +def test_valid_params_overwrite_zero_file_size(requester): + filename = "BPR-Folger-59_20191123T000000.000Z.txt" + + file_path = requester.outPath / filename + + # Touch an empty file + with open(file_path, "w"): + pass + + # Case when downloading failed, leaving an empty file behind + assert os.path.getsize(file_path) == 0 + + requester.downloadArchivefile(filename, overwrite=False) + + assert ( + os.path.getsize(file_path) != 0 + ), "0-size file should be overwritten even if overwrite is False"