From 9d9fd6fc459fddde03e2b57cd385b7bd9bfe6b9a Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 13 Oct 2023 11:24:37 +0200 Subject: [PATCH 01/20] add reset_local_changes() --- mergin/client.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/mergin/client.py b/mergin/client.py index a00f8f07..f8f71be4 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -14,6 +14,7 @@ import ssl from enum import Enum, auto import re +import typing from .common import ClientError, LoginError, InvalidProject from .merginproject import MerginProject @@ -1061,3 +1062,34 @@ def has_writing_permissions(self, project_path): """ info = self.project_info(project_path) return info["permissions"]["upload"] + + def reset_local_changes(self, directory: str, files_to_reset: typing.List[str] = None) -> None: + """ + Reset local changes to either all files or only listed files. + Added files are removed, removed files are brought back and updates are discarded. + + :param directory: Project's directory + :type directory: String + :param files_to_reset List of files to reset, relative paths of file + :type files_to_reset: List of strings, default None + """ + all_files = files_to_reset is None + + mp = MerginProject(directory) + + push_changes = mp.get_push_changes() + + # remove all added files + for file in push_changes["added"]: + if all_files or file["path"] in files_to_reset: + os.remove(mp.fpath(file["path"])) + + # update files get override with previous version + for file in push_changes["updated"]: + if all_files or file["path"] in files_to_reset: + mp.geodiff.make_copy_sqlite(mp.fpath_meta(file["path"]), mp.fpath(file["path"])) + + # removed files are redownloaded + for file in push_changes["removed"]: + if all_files or file["path"] in files_to_reset: + self.download_file(directory, file["path"], mp.fpath(file["path"])) From e6b3c8aa4e90831e468bdf50418f966e2287ff62 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 13 Oct 2023 11:26:16 +0200 Subject: [PATCH 02/20] tests for reset_local_changes --- mergin/test/test_client.py | 75 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 891ab178..8e6f2abb 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -1952,3 +1952,78 @@ def test_clean_diff_files(mc): diff_files = glob.glob("*-diff-*", root_dir=os.path.split(mp.fpath_meta("inserted_1_A.gpkg"))[0]) assert diff_files == [] + + +def test_reset_local_changes(mc: MerginClient): + test_project = f"test_reset_local_changes" + project = API_USER + "/" + test_project + project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates + + cleanup(mc, project, [project_dir]) + # create remote project + shutil.copytree(TEST_DATA_DIR, project_dir) + mc.create_project_and_push(test_project, project_dir) + + # test push changes with diffs: + mp = MerginProject(project_dir) + + f_updated = "base.gpkg" + shutil.move(mp.fpath(f_updated), mp.fpath_meta(f_updated)) # make local copy for changeset calculation + shutil.copy(mp.fpath("inserted_1_A.gpkg"), mp.fpath(f_updated)) + shutil.copy(mp.fpath("test.txt"), mp.fpath("new_test.txt")) + shutil.copy(mp.fpath("test.txt"), mp.fpath("new_dir/new_test.txt")) + os.remove(mp.fpath("test.txt")) + os.remove(mp.fpath("test_dir/test2.txt")) + + # push changes prior to reset + mp = MerginProject(project_dir) + push_changes = mp.get_push_changes() + + assert len(push_changes["added"]) == 2 + assert len(push_changes["removed"]) == 2 + assert len(push_changes["updated"]) == 1 + + # reset all files back + mc.reset_local_changes(project_dir) + + # push changes after the reset + mp = MerginProject(project_dir) + push_changes = mp.get_push_changes() + + assert len(push_changes["added"]) == 0 + assert len(push_changes["removed"]) == 0 + assert len(push_changes["updated"]) == 0 + + cleanup(mc, project, [project_dir]) + # create remote project + shutil.copytree(TEST_DATA_DIR, project_dir) + mc.create_project_and_push(test_project, project_dir) + + # test push changes with diffs: + mp = MerginProject(project_dir) + + shutil.move(mp.fpath(f_updated), mp.fpath_meta(f_updated)) # make local copy for changeset calculation + shutil.copy(mp.fpath("inserted_1_A.gpkg"), mp.fpath(f_updated)) + shutil.copy(mp.fpath("test.txt"), mp.fpath("new_test.txt")) + shutil.copy(mp.fpath("test.txt"), mp.fpath("new_dir/new_test.txt")) + os.remove(mp.fpath("test.txt")) + os.remove(mp.fpath("test_dir/test2.txt")) + + # push changes prior to reset + mp = MerginProject(project_dir) + push_changes = mp.get_push_changes() + + assert len(push_changes["added"]) == 2 + assert len(push_changes["removed"]) == 2 + assert len(push_changes["updated"]) == 1 + + # reset local changes only to certain files, one added and one removed + mc.reset_local_changes(project_dir, files_to_reset=["new_test.txt", "test_dir/test2.txt"]) + + # push changes after the reset + mp = MerginProject(project_dir) + push_changes = mp.get_push_changes() + + assert len(push_changes["added"]) == 1 + assert len(push_changes["removed"]) == 1 + assert len(push_changes["updated"]) == 1 From 41b67268cd8da3b2dfa73b58f8ec8c0cef05f123 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 13 Oct 2023 14:12:52 +0200 Subject: [PATCH 03/20] handle versioned and unversioned files --- mergin/client.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mergin/client.py b/mergin/client.py index f8f71be4..9b8dbf8e 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -1087,7 +1087,10 @@ def reset_local_changes(self, directory: str, files_to_reset: typing.List[str] = # update files get override with previous version for file in push_changes["updated"]: if all_files or file["path"] in files_to_reset: - mp.geodiff.make_copy_sqlite(mp.fpath_meta(file["path"]), mp.fpath(file["path"])) + if mp.is_versioned_file(file["path"]): + mp.geodiff.make_copy_sqlite(mp.fpath_meta(file["path"]), mp.fpath(file["path"])) + else: + self.download_file(directory, file["path"], mp.fpath(file["path"])) # removed files are redownloaded for file in push_changes["removed"]: From fa2909ff54bef0d47b8b2a95bf2181133a016f0c Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 13 Oct 2023 14:13:13 +0200 Subject: [PATCH 04/20] add modified unversioned file --- mergin/test/test_client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 8e6f2abb..6ab84c2e 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -1974,6 +1974,8 @@ def test_reset_local_changes(mc: MerginClient): shutil.copy(mp.fpath("test.txt"), mp.fpath("new_dir/new_test.txt")) os.remove(mp.fpath("test.txt")) os.remove(mp.fpath("test_dir/test2.txt")) + with open(mp.fpath("test3.txt"), mode="a", encoding="utf-8") as file: + file.write(" Add some text.") # push changes prior to reset mp = MerginProject(project_dir) @@ -1981,7 +1983,7 @@ def test_reset_local_changes(mc: MerginClient): assert len(push_changes["added"]) == 2 assert len(push_changes["removed"]) == 2 - assert len(push_changes["updated"]) == 1 + assert len(push_changes["updated"]) == 2 # reset all files back mc.reset_local_changes(project_dir) From 0b39591bfd3b4161d4ddb222a98f2b0ce73af413 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 13 Oct 2023 14:40:26 +0200 Subject: [PATCH 05/20] when donwloading files consider version --- mergin/client.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 9b8dbf8e..c0af495a 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -1077,6 +1077,8 @@ def reset_local_changes(self, directory: str, files_to_reset: typing.List[str] = mp = MerginProject(directory) + current_version = mp.version() + push_changes = mp.get_push_changes() # remove all added files @@ -1090,9 +1092,9 @@ def reset_local_changes(self, directory: str, files_to_reset: typing.List[str] = if mp.is_versioned_file(file["path"]): mp.geodiff.make_copy_sqlite(mp.fpath_meta(file["path"]), mp.fpath(file["path"])) else: - self.download_file(directory, file["path"], mp.fpath(file["path"])) + self.download_file(directory, file["path"], mp.fpath(file["path"]), version=current_version) # removed files are redownloaded for file in push_changes["removed"]: if all_files or file["path"] in files_to_reset: - self.download_file(directory, file["path"], mp.fpath(file["path"])) + self.download_file(directory, file["path"], mp.fpath(file["path"]), version=current_version) From 767224ca3f884cc00e473be7a332d5233958f6b4 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 13 Oct 2023 15:06:06 +0200 Subject: [PATCH 06/20] add test for situation when user does not have latest version of project --- mergin/test/test_client.py | 44 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 6ab84c2e..097def9a 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -1958,6 +1958,7 @@ def test_reset_local_changes(mc: MerginClient): test_project = f"test_reset_local_changes" project = API_USER + "/" + test_project project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates + project_dir_2 = os.path.join(TMP_DIR, test_project + "_v2") # primary project dir for updates cleanup(mc, project, [project_dir]) # create remote project @@ -2029,3 +2030,46 @@ def test_reset_local_changes(mc: MerginClient): assert len(push_changes["added"]) == 1 assert len(push_changes["removed"]) == 1 assert len(push_changes["updated"]) == 1 + + cleanup(mc, project, [project_dir, project_dir_2]) + # create remote project + shutil.copytree(TEST_DATA_DIR, project_dir) + mc.create_project_and_push(test_project, project_dir) + + # test push changes with diffs: + mp = MerginProject(project_dir) + + # make changes creating two another versions + shutil.move(mp.fpath(f_updated), mp.fpath_meta(f_updated)) # make local copy for changeset calculation + shutil.copy(mp.fpath("inserted_1_A.gpkg"), mp.fpath(f_updated)) + mc.push_project(project_dir) + shutil.copy(mp.fpath("test.txt"), mp.fpath("new_test.txt")) + shutil.copy(mp.fpath("test.txt"), mp.fpath("new_dir/new_test.txt")) + mc.push_project(project_dir) + os.remove(mp.fpath("test.txt")) + os.remove(mp.fpath("test_dir/test2.txt")) + + # download version 2 and create MerginProject for it + mc.download_project(project, project_dir_2, version="v2") + mp = MerginProject(project_dir_2) + + # make some changes + shutil.copy(mp.fpath("test.txt"), mp.fpath("new_test.txt")) + shutil.copy(mp.fpath("test.txt"), mp.fpath("new_dir/new_test.txt")) + os.remove(mp.fpath("test.txt")) + os.remove(mp.fpath("test_dir/test2.txt")) + + # check changes + push_changes = mp.get_push_changes() + assert len(push_changes["added"]) == 2 + assert len(push_changes["removed"]) == 2 + assert len(push_changes["updated"]) == 0 + + # reset back to original version we had - v2 + mc.reset_local_changes(project_dir_2) + + # push changes after the reset - should be none + push_changes = mp.get_push_changes() + assert len(push_changes["added"]) == 0 + assert len(push_changes["removed"]) == 0 + assert len(push_changes["updated"]) == 0 From 0a84de17c8c36fe19ecdd018bfa6640a89b8cc4d Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 13 Oct 2023 15:09:52 +0200 Subject: [PATCH 07/20] remove changes to mp.fpath_meta() copied from other test --- mergin/test/test_client.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 097def9a..4c7ccc0f 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -1969,7 +1969,6 @@ def test_reset_local_changes(mc: MerginClient): mp = MerginProject(project_dir) f_updated = "base.gpkg" - shutil.move(mp.fpath(f_updated), mp.fpath_meta(f_updated)) # make local copy for changeset calculation shutil.copy(mp.fpath("inserted_1_A.gpkg"), mp.fpath(f_updated)) shutil.copy(mp.fpath("test.txt"), mp.fpath("new_test.txt")) shutil.copy(mp.fpath("test.txt"), mp.fpath("new_dir/new_test.txt")) @@ -2005,7 +2004,6 @@ def test_reset_local_changes(mc: MerginClient): # test push changes with diffs: mp = MerginProject(project_dir) - shutil.move(mp.fpath(f_updated), mp.fpath_meta(f_updated)) # make local copy for changeset calculation shutil.copy(mp.fpath("inserted_1_A.gpkg"), mp.fpath(f_updated)) shutil.copy(mp.fpath("test.txt"), mp.fpath("new_test.txt")) shutil.copy(mp.fpath("test.txt"), mp.fpath("new_dir/new_test.txt")) @@ -2040,7 +2038,6 @@ def test_reset_local_changes(mc: MerginClient): mp = MerginProject(project_dir) # make changes creating two another versions - shutil.move(mp.fpath(f_updated), mp.fpath_meta(f_updated)) # make local copy for changeset calculation shutil.copy(mp.fpath("inserted_1_A.gpkg"), mp.fpath(f_updated)) mc.push_project(project_dir) shutil.copy(mp.fpath("test.txt"), mp.fpath("new_test.txt")) From f5cf5088434864f84351dd13f547f862fe3bb72b Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 18 Oct 2023 09:55:45 +0200 Subject: [PATCH 08/20] add download_files_async() and download_files_finalize() --- mergin/client_pull.py | 87 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/mergin/client_pull.py b/mergin/client_pull.py index 0b1b5ef3..c0ad2f5c 100644 --- a/mergin/client_pull.py +++ b/mergin/client_pull.py @@ -15,6 +15,7 @@ import pprint import shutil import tempfile +import typing import concurrent.futures @@ -819,3 +820,89 @@ def download_diffs_finalize(job): job.mp.log.info("--- diffs pull finished") return diffs + + +def download_files_async(mc, project_dir: str, file_paths: typing.List[str], version: str): + """ + Starts background download project files at specified version. + Returns handle to the pending download. + """ + mp = MerginProject(project_dir) + project_path = mp.project_full_name() + ver_info = f"at version {version}" if version is not None else "at latest version" + mp.log.info(f"Getting [{', '.join(file_paths)}] {ver_info}") + latest_proj_info = mc.project_info(project_path) + if version: + project_info = mc.project_info(project_path, version=version) + else: + project_info = latest_proj_info + mp.log.info(f"Got project info. version {project_info['version']}") + + # set temporary directory for download + temp_dir = tempfile.mkdtemp(prefix="mergin-py-client-") + + download_list = [] + update_tasks = [] + total_size = 0 + # None can not be used to indicate latest version of the file, so + # it is necessary to pass actual version. + if version is None: + version = latest_proj_info["version"] + for file in project_info["files"]: + if file["path"] in file_paths: + file["version"] = version + items = _download_items(file, temp_dir) + is_latest_version = version == latest_proj_info["version"] + task = UpdateTask(file["path"], items, mp.fpath(file["path"]), latest_version=is_latest_version) + download_list.extend(task.download_queue_items) + for item in task.download_queue_items: + total_size += item.size + update_tasks.append(task) + + missing_files = [] + files_to_download = [] + project_file_paths = [file["path"] for file in project_info["files"]] + for file in file_paths: + if file not in project_file_paths: + missing_files.append(file) + else: + files_to_download.append(file) + + if not download_list: + warn = f"No [{', '.join(missing_files)}] exists at version {version}" + mp.log.warning(warn) + shutil.rmtree(temp_dir) + raise ClientError(warn) + + mp.log.info( + f"will download files [{', '.join(files_to_download)}] in {len(download_list)} chunks, total size {total_size}" + ) + job = DownloadJob(project_path, total_size, version, update_tasks, download_list, temp_dir, mp, project_info) + job.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) + job.futures = [] + for item in download_list: + future = job.executor.submit(_do_download, item, mc, mp, project_path, job) + job.futures.append(future) + + return job + + +def download_files_finalize(job): + """ + To be called when download_file_async is finished + """ + job.executor.shutdown(wait=True) + + # make sure any exceptions from threads are not lost + for future in job.futures: + if future.exception() is not None: + raise future.exception() + + job.mp.log.info("--- download finished") + + for task in job.update_tasks: + task.apply(job.directory, job.mp) + + # Remove temporary download directory + if job.directory is not None and os.path.exists(job.directory): + shutil.rmtree(job.directory) From 989f3e55b9f4ef3749de0be908f3f8aca9dbc02e Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 18 Oct 2023 09:58:16 +0200 Subject: [PATCH 09/20] add download_files() --- mergin/client.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/mergin/client.py b/mergin/client.py index c0af495a..b9effd24 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -1097,4 +1097,18 @@ def reset_local_changes(self, directory: str, files_to_reset: typing.List[str] = # removed files are redownloaded for file in push_changes["removed"]: if all_files or file["path"] in files_to_reset: - self.download_file(directory, file["path"], mp.fpath(file["path"]), version=current_version) + + def download_files(self, project_dir, file_paths, version=None): + """ + Download project files at specified version. Get the latest if no version specified. + + :param project_dir: project local directory + :type project_dir: String + :param file_path: List of relative paths of files to download in the project directory + :type file_path: List[String] + :param version: optional version tag for downloaded file + :type version: String + """ + job = download_files_async(self, project_dir, file_paths, version=version) + pull_project_wait(job) + download_files_finalize(job) From ecdc251bb62f3bba7fc562ec6c622a901d56f5be Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 18 Oct 2023 09:58:55 +0200 Subject: [PATCH 10/20] files to download are downloaded in single job --- mergin/client.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mergin/client.py b/mergin/client.py index b9effd24..c3af1523 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -22,6 +22,8 @@ download_file_finalize, download_project_async, download_file_async, + download_files_async, + download_files_finalize, download_diffs_async, download_project_finalize, download_project_wait, @@ -1081,6 +1083,8 @@ def reset_local_changes(self, directory: str, files_to_reset: typing.List[str] = push_changes = mp.get_push_changes() + files_download = [] + # remove all added files for file in push_changes["added"]: if all_files or file["path"] in files_to_reset: @@ -1092,11 +1096,14 @@ def reset_local_changes(self, directory: str, files_to_reset: typing.List[str] = if mp.is_versioned_file(file["path"]): mp.geodiff.make_copy_sqlite(mp.fpath_meta(file["path"]), mp.fpath(file["path"])) else: - self.download_file(directory, file["path"], mp.fpath(file["path"]), version=current_version) + files_download.append(file["path"]) # removed files are redownloaded for file in push_changes["removed"]: if all_files or file["path"] in files_to_reset: + files_download.append(file["path"]) + + self.download_files(directory, files_download, version=current_version) def download_files(self, project_dir, file_paths, version=None): """ From bb519cedbc222aad4492927321d38849ca872429 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 20 Oct 2023 13:42:43 +0200 Subject: [PATCH 11/20] add typing and output_paths parameter --- mergin/client_pull.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mergin/client_pull.py b/mergin/client_pull.py index 3fc676cd..e4ece5b9 100644 --- a/mergin/client_pull.py +++ b/mergin/client_pull.py @@ -807,7 +807,9 @@ def download_diffs_finalize(job): return diffs -def download_files_async(mc, project_dir: str, file_paths: typing.List[str], version: str): +def download_files_async( + mc, project_dir: str, file_paths: typing.List[str], output_paths: typing.List[str], version: str +): """ Starts background download project files at specified version. Returns handle to the pending download. From 419d18302879b97eb18f3947056e8ac3558f97e1 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 20 Oct 2023 13:43:42 +0200 Subject: [PATCH 12/20] use output_paths --- mergin/client_pull.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/mergin/client_pull.py b/mergin/client_pull.py index e4ece5b9..ded3adf6 100644 --- a/mergin/client_pull.py +++ b/mergin/client_pull.py @@ -828,6 +828,17 @@ def download_files_async( # set temporary directory for download temp_dir = tempfile.mkdtemp(prefix="mergin-py-client-") + if output_paths is None: + output_paths = [] + for file in file_paths: + output_paths.append(mp.fpath(file)) + + if len(output_paths) != len(file_paths): + warn = "Output file paths are not of the same length as file paths. Cannot store required files." + mp.log.warning(warn) + shutil.rmtree(temp_dir) + raise ClientError(warn) + download_list = [] update_tasks = [] total_size = 0 @@ -837,10 +848,11 @@ def download_files_async( version = latest_proj_info["version"] for file in project_info["files"]: if file["path"] in file_paths: + index = file_paths.index(file["path"]) file["version"] = version items = _download_items(file, temp_dir) is_latest_version = version == latest_proj_info["version"] - task = UpdateTask(file["path"], items, mp.fpath(file["path"]), latest_version=is_latest_version) + task = UpdateTask(file["path"], items, output_paths[index], latest_version=is_latest_version) download_list.extend(task.download_queue_items) for item in task.download_queue_items: total_size += item.size @@ -855,7 +867,7 @@ def download_files_async( else: files_to_download.append(file) - if not download_list: + if not download_list or missing_files: warn = f"No [{', '.join(missing_files)}] exists at version {version}" mp.log.warning(warn) shutil.rmtree(temp_dir) From 2381a0ea4e27e128ceb963acb77fb5302695c1de Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 20 Oct 2023 13:44:08 +0200 Subject: [PATCH 13/20] use download_files_async to download single file --- mergin/client_pull.py | 48 +------------------------------------------ 1 file changed, 1 insertion(+), 47 deletions(-) diff --git a/mergin/client_pull.py b/mergin/client_pull.py index ded3adf6..c4f6eaef 100644 --- a/mergin/client_pull.py +++ b/mergin/client_pull.py @@ -622,53 +622,7 @@ def download_file_async(mc, project_dir, file_path, output_file, version): Starts background download project file at specified version. Returns handle to the pending download. """ - mp = MerginProject(project_dir) - project_path = mp.project_full_name() - ver_info = f"at version {version}" if version is not None else "at latest version" - mp.log.info(f"Getting {file_path} {ver_info}") - latest_proj_info = mc.project_info(project_path) - if version: - project_info = mc.project_info(project_path, version=version) - else: - project_info = latest_proj_info - mp.log.info(f"Got project info. version {project_info['version']}") - - # set temporary directory for download - temp_dir = tempfile.mkdtemp(prefix="mergin-py-client-") - - download_list = [] - update_tasks = [] - total_size = 0 - # None can not be used to indicate latest version of the file, so - # it is necessary to pass actual version. - if version is None: - version = latest_proj_info["version"] - for file in project_info["files"]: - if file["path"] == file_path: - file["version"] = version - items = _download_items(file, temp_dir) - is_latest_version = version == latest_proj_info["version"] - task = UpdateTask(file["path"], items, output_file, latest_version=is_latest_version) - download_list.extend(task.download_queue_items) - for item in task.download_queue_items: - total_size += item.size - update_tasks.append(task) - break - if not download_list: - warn = f"No {file_path} exists at version {version}" - mp.log.warning(warn) - shutil.rmtree(temp_dir) - raise ClientError(warn) - - mp.log.info(f"will download file {file_path} in {len(download_list)} chunks, total size {total_size}") - job = DownloadJob(project_path, total_size, version, update_tasks, download_list, temp_dir, mp, project_info) - job.executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) - job.futures = [] - for item in download_list: - future = job.executor.submit(_do_download, item, mc, mp, project_path, job) - job.futures.append(future) - - return job + return download_files_async(mc, project_dir, [file_path], [output_file], version) def download_file_finalize(job): From 3a60793a1c260c2982d89f24271a35d6388d5b78 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 20 Oct 2023 13:44:24 +0200 Subject: [PATCH 14/20] add output_paths --- mergin/client.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index 27096854..9d3dfd75 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -1171,7 +1171,9 @@ def reset_local_changes(self, directory: str, files_to_reset: typing.List[str] = self.download_files(directory, files_download, version=current_version) - def download_files(self, project_dir, file_paths, version=None): + def download_files( + self, project_dir: str, file_paths: typing.List[str], output_paths: typing.List[str] = None, version: str = None + ): """ Download project files at specified version. Get the latest if no version specified. @@ -1182,6 +1184,6 @@ def download_files(self, project_dir, file_paths, version=None): :param version: optional version tag for downloaded file :type version: String """ - job = download_files_async(self, project_dir, file_paths, version=version) + job = download_files_async(self, project_dir, file_paths, output_paths, version=version) pull_project_wait(job) download_files_finalize(job) From 090459bb89d3462ff2168854bb26939cb55f64a9 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 20 Oct 2023 13:44:39 +0200 Subject: [PATCH 15/20] update error message --- mergin/test/test_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 626b46ce..6cf9d7cb 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -1021,8 +1021,8 @@ def test_download_file(mc): assert check_gpkg_same_content(mp, f_downloaded, expected) # make sure there will be exception raised if a file doesn't exist in the version - with pytest.raises(ClientError, match=f"No {f_updated} exists at version v5"): - mc.download_file(project_dir, f_updated, f_downloaded, version=f"v5") + with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v5"): + mc.download_file(project_dir, f_updated, f_downloaded, version="v5") def test_download_diffs(mc): From b40c9771cf577096bec3682d681932a3e7cfdd51 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 20 Oct 2023 13:45:14 +0200 Subject: [PATCH 16/20] test mc.download_files() --- mergin/test/test_client.py | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 6cf9d7cb..3067a32a 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -2156,3 +2156,44 @@ def test_project_metadata(mc): assert mp.project_name() == test_project assert mp.workspace_name() == API_USER assert mp.version() == "v0" + + +def test_download_files(mc: MerginClient): + """Test downloading files at specified versions.""" + test_project = "test_download_files" + project = API_USER + "/" + test_project + project_dir = os.path.join(TMP_DIR, test_project) + f_updated = "base.gpkg" + download_dir = os.path.join(TMP_DIR, "test-download-files-tmp") + + cleanup(mc, project, [project_dir, download_dir]) + + mp = create_versioned_project(mc, test_project, project_dir, f_updated) + + project_info = mc.project_info(project) + assert project_info["version"] == "v5" + assert project_info["id"] == mp.project_id() + + # Versioned file should have the following content at versions 2-4 + expected_content = ("inserted_1_A.gpkg", "inserted_1_A_mod.gpkg", "inserted_1_B.gpkg") + + downloaded_file = os.path.join(download_dir, f_updated) + + # if output_paths is specified look at that location + for ver in range(2, 5): + mc.download_files(project_dir, [f_updated], [downloaded_file], version=f"v{ver}") + expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 2]) # GeoPackage with expected content + assert check_gpkg_same_content(mp, downloaded_file, expected) + + # if output_paths is not specified look in the mergin project folder + for ver in range(2, 5): + mc.download_files(project_dir, [f_updated], version=f"v{ver}") + expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 2]) # GeoPackage with expected content + assert check_gpkg_same_content(mp, mp.fpath(f_updated), expected) + + # make sure there will be exception raised if a file doesn't exist in the version + with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v5"): + mc.download_files(project_dir, [f_updated], version="v5") + + with pytest.raises(ClientError, match=f"No \\[non_existing\\.file\\] exists at version v3"): + mc.download_files(project_dir, [f_updated, "non_existing.file"], version="v3") From f28e2221b332cd716fadee400ab4ad615bd6c225 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Fri, 20 Oct 2023 14:19:16 +0200 Subject: [PATCH 17/20] test download of multiple files into specific dir --- mergin/test/test_client.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 3067a32a..36bdaaef 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -2191,6 +2191,20 @@ def test_download_files(mc: MerginClient): expected = os.path.join(TEST_DATA_DIR, expected_content[ver - 2]) # GeoPackage with expected content assert check_gpkg_same_content(mp, mp.fpath(f_updated), expected) + # download two files from v1 and check their content + file_2 = "test.txt" + downloaded_file_2 = os.path.join(download_dir, file_2) + + mc.download_files(project_dir, [f_updated, file_2], [downloaded_file, downloaded_file_2], version="v1") + assert check_gpkg_same_content(mp, downloaded_file, os.path.join(TEST_DATA_DIR, f_updated)) + + with open(os.path.join(TEST_DATA_DIR, file_2), mode="r", encoding="utf-8") as file: + content_exp = file.read() + + with open(os.path.join(download_dir, file_2), mode="r", encoding="utf-8") as file: + content = file.read() + assert content_exp == content + # make sure there will be exception raised if a file doesn't exist in the version with pytest.raises(ClientError, match=f"No \\[{f_updated}\\] exists at version v5"): mc.download_files(project_dir, [f_updated], version="v5") From 210997efbafea7b6f048b16b18c00fd58f507f74 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 25 Oct 2023 08:40:22 +0200 Subject: [PATCH 18/20] add missing docstring --- mergin/client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mergin/client.py b/mergin/client.py index 9d3dfd75..baa0f363 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -1181,6 +1181,8 @@ def download_files( :type project_dir: String :param file_path: List of relative paths of files to download in the project directory :type file_path: List[String] + :param output_paths: List of paths for files to download to. Should be same length of as file_path. Default is `None` which means that files are downloaded into MerginProject at project_dir. + :type output_paths: List[String] :param version: optional version tag for downloaded file :type version: String """ From d992acedb725f8f41f2356ec9834189192125486 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 25 Oct 2023 09:44:42 +0200 Subject: [PATCH 19/20] use download_files_finalize call --- mergin/client_pull.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/mergin/client_pull.py b/mergin/client_pull.py index c4f6eaef..645a61ac 100644 --- a/mergin/client_pull.py +++ b/mergin/client_pull.py @@ -629,24 +629,7 @@ def download_file_finalize(job): """ To be called when download_file_async is finished """ - job.executor.shutdown(wait=True) - - # make sure any exceptions from threads are not lost - for future in job.futures: - if future.exception() is not None: - raise future.exception() - - job.mp.log.info("--- download finished") - - temp_dir = None - for task in job.update_tasks: - task.apply(job.directory, job.mp) - if task.download_queue_items: - temp_dir = os.path.dirname(task.download_queue_items[0].download_file_path) - - # Remove temporary download directory - if temp_dir is not None: - shutil.rmtree(temp_dir) + download_files_finalize(job) def download_diffs_async(mc, project_directory, file_path, versions): From 50c8a7959beb5f15113a048e0b911d19ab442178 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 25 Oct 2023 14:53:08 +0200 Subject: [PATCH 20/20] ad check that download_files() is only called if there are files to download --- mergin/client.py | 3 ++- mergin/test/test_client.py | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/mergin/client.py b/mergin/client.py index baa0f363..169b2198 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -1169,7 +1169,8 @@ def reset_local_changes(self, directory: str, files_to_reset: typing.List[str] = if all_files or file["path"] in files_to_reset: files_download.append(file["path"]) - self.download_files(directory, files_download, version=current_version) + if files_download: + self.download_files(directory, files_download, version=current_version) def download_files( self, project_dir: str, file_paths: typing.List[str], output_paths: typing.List[str] = None, version: str = None diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index 36bdaaef..e4ca10d8 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -2019,6 +2019,9 @@ def test_reset_local_changes(mc: MerginClient): # test push changes with diffs: mp = MerginProject(project_dir) + # test with no changes, should pass by doing nothing + mc.reset_local_changes(project_dir) + f_updated = "base.gpkg" shutil.copy(mp.fpath("inserted_1_A.gpkg"), mp.fpath(f_updated)) shutil.copy(mp.fpath("test.txt"), mp.fpath("new_test.txt"))