From 1d583120a927df1e019fbccd07d8ead0d261921f Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 28 Jun 2023 10:20:07 +0200 Subject: [PATCH 1/9] add function to clean after push_project --- mergin/client.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/mergin/client.py b/mergin/client.py index 8e098742..c5955a1d 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -14,6 +14,7 @@ import ssl from enum import Enum, auto import re +from pathlib import Path from .common import ClientError, LoginError, InvalidProject from .merginproject import MerginProject @@ -758,6 +759,7 @@ def push_project(self, directory): return # there is nothing to push (or we only deleted some files) push_project_wait(job) push_project_finalize(job) + self.clean_temp_files(directory) def pull_project(self, directory): """ @@ -990,3 +992,15 @@ def has_writing_permissions(self, project_path): """ info = self.project_info(project_path) return info["permissions"]["upload"] + + def clean_temp_files(self, directory: str) -> None: + """ + Removes all files matchning patter "*-diff-*" from .mergin folder inside specified folder. + + :param directory: project's local path + :type directory: str + """ + mergin_dir = Path(directory) / ".mergin" + if mergin_dir.exists(): + for file in mergin_dir.glob("*-diff-*"): + file.unlink() From 777bc7f5826285f7d57021daaa0f4553c4dc6bc7 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 28 Jun 2023 10:21:05 +0200 Subject: [PATCH 2/9] if file size is zero remove and consider file not_updated --- mergin/merginproject.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/mergin/merginproject.py b/mergin/merginproject.py index b5a236f9..8a2a7d37 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -347,15 +347,19 @@ def get_push_changes(self): self.geodiff.create_changeset(origin_file, current_file, diff_file) if self.geodiff.has_changes(diff_file): diff_size = os.path.getsize(diff_file) - file["checksum"] = file["origin_checksum"] # need to match basefile on server - file["chunks"] = [str(uuid.uuid4()) for i in range(math.ceil(diff_size / UPLOAD_CHUNK_SIZE))] - file["mtime"] = datetime.fromtimestamp(os.path.getmtime(current_file), tzlocal()) - file["diff"] = { - "path": diff_name, - "checksum": generate_checksum(diff_file), - "size": diff_size, - "mtime": datetime.fromtimestamp(os.path.getmtime(diff_file), tzlocal()), - } + if diff_size > 0: + file["checksum"] = file["origin_checksum"] # need to match basefile on server + file["chunks"] = [str(uuid.uuid4()) for i in range(math.ceil(diff_size / UPLOAD_CHUNK_SIZE))] + file["mtime"] = datetime.fromtimestamp(os.path.getmtime(current_file), tzlocal()) + file["diff"] = { + "path": diff_name, + "checksum": generate_checksum(diff_file), + "size": diff_size, + "mtime": datetime.fromtimestamp(os.path.getmtime(diff_file), tzlocal()), + } + else: + os.remove(diff_file) + not_updated.append(file) else: not_updated.append(file) except (pygeodiff.GeoDiffLibError, pygeodiff.GeoDiffLibConflictError) as e: From e6a6cf950b5edda98bd800ee7900676f117ea4ff Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 28 Jun 2023 12:05:34 +0200 Subject: [PATCH 3/9] do not check for size, only remove existing file --- mergin/merginproject.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/mergin/merginproject.py b/mergin/merginproject.py index 8a2a7d37..3ec5a8f6 100644 --- a/mergin/merginproject.py +++ b/mergin/merginproject.py @@ -347,20 +347,18 @@ def get_push_changes(self): self.geodiff.create_changeset(origin_file, current_file, diff_file) if self.geodiff.has_changes(diff_file): diff_size = os.path.getsize(diff_file) - if diff_size > 0: - file["checksum"] = file["origin_checksum"] # need to match basefile on server - file["chunks"] = [str(uuid.uuid4()) for i in range(math.ceil(diff_size / UPLOAD_CHUNK_SIZE))] - file["mtime"] = datetime.fromtimestamp(os.path.getmtime(current_file), tzlocal()) - file["diff"] = { - "path": diff_name, - "checksum": generate_checksum(diff_file), - "size": diff_size, - "mtime": datetime.fromtimestamp(os.path.getmtime(diff_file), tzlocal()), - } - else: - os.remove(diff_file) - not_updated.append(file) + file["checksum"] = file["origin_checksum"] # need to match basefile on server + file["chunks"] = [str(uuid.uuid4()) for i in range(math.ceil(diff_size / UPLOAD_CHUNK_SIZE))] + file["mtime"] = datetime.fromtimestamp(os.path.getmtime(current_file), tzlocal()) + file["diff"] = { + "path": diff_name, + "checksum": generate_checksum(diff_file), + "size": diff_size, + "mtime": datetime.fromtimestamp(os.path.getmtime(diff_file), tzlocal()), + } else: + if os.path.exists(diff_file): + os.remove(diff_file) not_updated.append(file) except (pygeodiff.GeoDiffLibError, pygeodiff.GeoDiffLibConflictError) as e: self.log.warning("failed to create changeset for " + path) From 03791359078bfc0572e9a376594abee2173d5ce3 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 28 Jun 2023 12:11:28 +0200 Subject: [PATCH 4/9] add remove_diff_files() --- mergin/client_push.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mergin/client_push.py b/mergin/client_push.py index 5112cfd5..6c9c224e 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -316,3 +316,13 @@ def _do_upload(item, job): item.upload_blocking(job.mc, job.mp) job.transferred_size += item.size + + +def remove_diff_files(job) -> None: + """Looks for diff files in the job and removes them.""" + + for change in job.changes["updated"]: + if "diff" in change.keys(): + diff_file = job.mp.fpath_meta(change["diff"]["path"]) + if os.path.exists(diff_file): + os.remove(diff_file) From cfc18ea93f23ffab1c37cffd4ba493259dd41e9b Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 28 Jun 2023 12:12:03 +0200 Subject: [PATCH 5/9] call cleaning function --- mergin/client_push.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mergin/client_push.py b/mergin/client_push.py index 6c9c224e..2e00fc16 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -286,6 +286,8 @@ def push_project_finalize(job): job.tmp_dir.cleanup() # delete our temporary dir and all its content + remove_diff_files(job) + job.mp.log.info("--- push finished - new project version " + job.server_resp["version"]) From ace2b5b4d7071d1b58727e5979ac7a908a3fd9dd Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 28 Jun 2023 12:52:35 +0200 Subject: [PATCH 6/9] import --- mergin/client_push.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mergin/client_push.py b/mergin/client_push.py index 2e00fc16..30f258d4 100644 --- a/mergin/client_push.py +++ b/mergin/client_push.py @@ -14,6 +14,7 @@ import pprint import tempfile import concurrent.futures +import os from .common import UPLOAD_CHUNK_SIZE, ClientError from .merginproject import MerginProject From 80d3745aeaea1198bd686fdc67f6ee32510744e3 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Wed, 28 Jun 2023 12:52:55 +0200 Subject: [PATCH 7/9] remove cleaning --- mergin/client.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/mergin/client.py b/mergin/client.py index c5955a1d..ff97f579 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -759,7 +759,6 @@ def push_project(self, directory): return # there is nothing to push (or we only deleted some files) push_project_wait(job) push_project_finalize(job) - self.clean_temp_files(directory) def pull_project(self, directory): """ @@ -992,15 +991,3 @@ def has_writing_permissions(self, project_path): """ info = self.project_info(project_path) return info["permissions"]["upload"] - - def clean_temp_files(self, directory: str) -> None: - """ - Removes all files matchning patter "*-diff-*" from .mergin folder inside specified folder. - - :param directory: project's local path - :type directory: str - """ - mergin_dir = Path(directory) / ".mergin" - if mergin_dir.exists(): - for file in mergin_dir.glob("*-diff-*"): - file.unlink() From 1e8b183c04deb06b40229cdd6689e59668c4986d Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Thu, 13 Jul 2023 13:06:34 +0200 Subject: [PATCH 8/9] revert changes --- mergin/client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mergin/client.py b/mergin/client.py index ff97f579..8e098742 100644 --- a/mergin/client.py +++ b/mergin/client.py @@ -14,7 +14,6 @@ import ssl from enum import Enum, auto import re -from pathlib import Path from .common import ClientError, LoginError, InvalidProject from .merginproject import MerginProject From 901daaca930c92f89222de33cc674aafe6d79db1 Mon Sep 17 00:00:00 2001 From: Jan Caha Date: Thu, 13 Jul 2023 13:51:32 +0200 Subject: [PATCH 9/9] add test checking if diff files are cleaned --- mergin/test/test_client.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/mergin/test/test_client.py b/mergin/test/test_client.py index f6f1c329..42ab3b36 100644 --- a/mergin/test/test_client.py +++ b/mergin/test/test_client.py @@ -8,6 +8,7 @@ import pytest import pytz import sqlite3 +import glob from .. import InvalidProject from ..client import MerginClient, ClientError, MerginProject, LoginError, decode_token_data, TokenError, ServerType @@ -1890,3 +1891,27 @@ def test_version_info(mc): created = datetime.strptime(info["created"], "%Y-%m-%dT%H:%M:%SZ") assert created.date() == date.today() assert info["changes"]["updated"][0]["size"] == 98304 + + +def test_clean_diff_files(mc): + test_project = "test_clean" + project = API_USER + "/" + test_project + project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates + project_dir_2 = os.path.join(TMP_DIR, test_project + "_2") # concurrent project dir + + cleanup(mc, project, [project_dir, project_dir_2]) + # create remote project + shutil.copytree(TEST_DATA_DIR, project_dir) + mc.create_project_and_push(test_project, project_dir) + + # test push changes with diffs: + mp = MerginProject(project_dir) + f_updated = "base.gpkg" + # step 1) base.gpkg updated to inserted_1_A (inserted A feature) + shutil.move(mp.fpath(f_updated), mp.fpath_meta(f_updated)) # make local copy for changeset calculation + shutil.copy(mp.fpath("inserted_1_A.gpkg"), mp.fpath(f_updated)) + mc.push_project(project_dir) + + diff_files = glob.glob("*-diff-*", root_dir=os.path.split(mp.fpath_meta("inserted_1_A.gpkg"))[0]) + + assert diff_files == []