From 8694f1b6eda8292d603f9e2c7dd322a6d60ef365 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 15 Aug 2025 14:57:38 +0200 Subject: [PATCH 1/4] Improve download endpoint - head request cannot create new celery job - flask does not remove partial zip - celery care of cleaning up abandoned zip partials --- server/mergin/sync/private_api_controller.py | 25 ++-- server/mergin/sync/tasks.py | 12 +- server/mergin/tests/test_celery.py | 42 ++++++- .../mergin/tests/test_private_project_api.py | 116 ++++++++++++++---- 4 files changed, 149 insertions(+), 46 deletions(-) diff --git a/server/mergin/sync/private_api_controller.py b/server/mergin/sync/private_api_controller.py index aaa77216..13f059b9 100644 --- a/server/mergin/sync/private_api_controller.py +++ b/server/mergin/sync/private_api_controller.py @@ -4,7 +4,6 @@ import os from datetime import datetime, timedelta, timezone from urllib.parse import quote -from blinker import signal from connexion import NoContent from flask import ( render_template, @@ -353,17 +352,17 @@ def download_project(id: str, version=None): # noqa: E501 # pylint: disable=W06 f"attachment; filename*=UTF-8''{file_name}" ) return resp - - temp_zip_path = project_version.zip_path + ".partial" - # to be safe we are not in vicious circle remove inactive partial zip - if os.path.exists(temp_zip_path) and datetime.fromtimestamp( - os.path.getmtime(temp_zip_path), tz=timezone.utc - ) < datetime.now(timezone.utc) - timedelta( - seconds=current_app.config["PARTIAL_ZIP_EXPIRATION"] - ): - move_to_tmp(temp_zip_path) - - if not os.path.exists(temp_zip_path): - create_project_version_zip.delay(project_version.id) + # GET request triggers background job if no partial zip or expired one + if request.method == "GET": + temp_zip_path = project_version.zip_path + ".partial" + # create zip if it does not exist yet or has expired + partial_exists = os.path.exists(temp_zip_path) + is_expired = partial_exists and datetime.fromtimestamp( + os.path.getmtime(temp_zip_path), tz=timezone.utc + ) < datetime.now(timezone.utc) - timedelta( + seconds=current_app.config["PARTIAL_ZIP_EXPIRATION"] + ) + if not partial_exists or is_expired: + create_project_version_zip.delay(project_version.id) return "Project zip being prepared, please try again later", 202 diff --git a/server/mergin/sync/tasks.py b/server/mergin/sync/tasks.py index f56fb273..ce92171e 100644 --- a/server/mergin/sync/tasks.py +++ b/server/mergin/sync/tasks.py @@ -114,9 +114,17 @@ def create_project_version_zip(version_id: int): return zip_path = project_version.zip_path + ".partial" - # already running job if os.path.exists(zip_path): - return + mtime = datetime.fromtimestamp(os.path.getmtime(zip_path), tz=timezone.utc) + expiration_time = datetime.now(timezone.utc) - timedelta( + seconds=current_app.config["PARTIAL_ZIP_EXPIRATION"] + ) + if mtime > expiration_time: + # partial zip is recent -> another job is likely running + return + else: + # partial zip is too old -> remove and creating new one + os.remove(zip_path) os.makedirs(os.path.dirname(zip_path), exist_ok=True) try: diff --git a/server/mergin/tests/test_celery.py b/server/mergin/tests/test_celery.py index a5d07f47..348208d9 100644 --- a/server/mergin/tests/test_celery.py +++ b/server/mergin/tests/test_celery.py @@ -4,6 +4,8 @@ import os from datetime import datetime, timedelta +from pathlib import Path + from flask import current_app from flask_mail import Mail from unittest.mock import patch @@ -144,16 +146,44 @@ def test_remove_deleted_project_backups(client): def test_create_project_version_zip(diff_project): + """Test celery tasks for creating project zip and cleaning them up""" latest_version = diff_project.get_latest_version() - assert not os.path.exists(latest_version.zip_path) + zip_path = Path(latest_version.zip_path) + partial_zip_path = Path(latest_version.zip_path + ".partial") + assert not zip_path.exists() + create_project_version_zip(diff_project.latest_version) + assert zip_path.exists() + assert not partial_zip_path.exists() + before_mtime = zip_path.stat().st_mtime + # mock expired partial zip -> celery removes it and creates new zip + partial_zip_path.parent.mkdir(parents=True, exist_ok=True) + partial_zip_path.touch() + new_time = datetime.now() - timedelta( + seconds=current_app.config["PARTIAL_ZIP_EXPIRATION"] + 1 + ) + modify_file_times(partial_zip_path, new_time) + create_project_version_zip(diff_project.latest_version) + assert ( + not partial_zip_path.exists() + ) # after creating zip archive, celery remove partial zip + after_mtime = zip_path.stat().st_mtime + assert before_mtime < after_mtime + before_mtime = after_mtime + # mock valid partial zip -> celery skips creating new zip and returns + partial_zip_path.parent.mkdir(parents=True, exist_ok=True) + partial_zip_path.touch() create_project_version_zip(diff_project.latest_version) - assert os.path.exists(latest_version.zip_path) - assert not os.path.exists(latest_version.zip_path + ".partial") - remove_projects_archives() - assert os.path.exists(latest_version.zip_path) + assert ( + partial_zip_path.exists() + ) # celery does not create zip archive and does not clean partial zip + after_mtime = zip_path.stat().st_mtime + assert before_mtime == after_mtime + os.remove(partial_zip_path) + remove_projects_archives() # zip is valid -> keep + assert zip_path.exists() new_time = datetime.now() - timedelta( days=current_app.config["PROJECTS_ARCHIVES_EXPIRATION"] + 1 ) modify_file_times(latest_version.zip_path, new_time) - remove_projects_archives() + remove_projects_archives() # zip has expired -> remove assert not os.path.exists(latest_version.zip_path) diff --git a/server/mergin/tests/test_private_project_api.py b/server/mergin/tests/test_private_project_api.py index 27021ecb..ef3fd140 100644 --- a/server/mergin/tests/test_private_project_api.py +++ b/server/mergin/tests/test_private_project_api.py @@ -2,15 +2,16 @@ # # SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial -import datetime import json import os -from unittest.mock import patch - +import shutil import pytest +from unittest.mock import patch +from pathlib import Path +from datetime import datetime, timedelta from flask import url_for -from ..app import db +from ..app import db, current_app from ..sync.models import AccessRequest, Project, ProjectRole, RequestStatus from ..auth.models import User from ..config import Configuration @@ -20,6 +21,7 @@ login, create_project, create_workspace, + modify_file_times, ) @@ -423,37 +425,76 @@ def test_admin_project_list(client): test_download_proj_data = [ - (None, 202), - ("v1", 202), - ("v100", 404), - (None, 200), + # zips do not exist, version not specified -> call celery task to create zip with latest version + (0, 0, 0, None, 202, 1), + # expired partial zip exists -> call celery task + (0, 1, 1, None, 202, 1), + # valid partial zip exists -> return, do not call celery + (0, 1, 0, None, 202, 0), + # zips do not exist, version specified -> call celery task with specified version + (0, 0, 0, "v1", 202, 1), + # specified version does not exist -> 404 + (0, 0, 0, "v100", 404, 0), + # zip is ready to download + (1, 0, 0, None, 200, 0), ] -@pytest.mark.parametrize("version,expected", test_download_proj_data) +@pytest.mark.parametrize( + "zipfile,partial,expired,version,exp_resp,exp_call", test_download_proj_data +) @patch("mergin.sync.tasks.create_project_version_zip.delay") -def test_download_project(mock_create_zip, client, version, expected, diff_project): - if expected == 200: - project_version = diff_project.get_latest_version() - os.mknod(project_version.zip_path) - resp = client.get( - url_for( - "/app.mergin_sync_private_api_controller_download_project", - id=diff_project.id, - version=version if version else "", +def test_download_project( + mock_create_zip, + client, + zipfile, + partial, + expired, + version, + exp_resp, + exp_call, + diff_project, +): + """Test download endpoint responses and celery task calling""" + # prepare initial state according to testcase + project_version = diff_project.get_latest_version() + if zipfile: + zip_path = Path(project_version.zip_path) + zip_path.parent.mkdir(parents=True, exist_ok=True) + zip_path.touch() + if partial: + temp_zip_path = Path(project_version.zip_path + ".partial") + temp_zip_path.parent.mkdir(parents=True, exist_ok=True) + temp_zip_path.touch() + if expired: + new_time = datetime.now() - timedelta( + seconds=current_app.config["PARTIAL_ZIP_EXPIRATION"] + 1 + ) + modify_file_times(temp_zip_path, new_time) + try: + resp = client.get( + url_for( + "/app.mergin_sync_private_api_controller_download_project", + id=diff_project.id, + version=version if version else "", + ) ) - ) - if expected == 200: - os.remove(project_version.zip_path) - assert resp.status_code == expected - assert mock_create_zip.called if expected == 202 else not mock_create_zip.called - if not version and expected != 200: + finally: + # cleanup + if zipfile: + shutil.rmtree(zip_path.parent, ignore_errors=True) + if partial: + shutil.rmtree(temp_zip_path.parent, ignore_errors=True) + assert resp.status_code == exp_resp + assert mock_create_zip.called == exp_call + if not version and exp_call: call_args, _ = mock_create_zip.call_args args = call_args[0] assert args == diff_project.latest_version def test_large_project_download_fail(client, diff_project): + """Test downloading too large project is refused""" resp = client.get( url_for( "/app.mergin_sync_private_api_controller_download_project", @@ -474,6 +515,11 @@ def test_large_project_download_fail(client, diff_project): assert resp.status_code == 400 +# exists, valid -> return +# exists, expired -> remove, create +# not exists -> create + + @patch("mergin.sync.tasks.create_project_version_zip.delay") def test_remove_abandoned_zip(mock_prepare_zip, client, diff_project): """Test project download removes partial zip which is inactive for some time""" @@ -494,4 +540,24 @@ def test_remove_abandoned_zip(mock_prepare_zip, client, diff_project): ) assert mock_prepare_zip.called assert resp.status_code == 202 - assert not os.path.exists(partial_zip_path) + + +@patch("mergin.sync.tasks.create_project_version_zip.delay") +def test_download_project_request_method(mock_prepare_zip, client, diff_project): + """Test head request does not create a celery job""" + resp = client.head( + url_for( + "/app.mergin_sync_private_api_controller_download_project", + id=diff_project.id, + ) + ) + assert not mock_prepare_zip.called + assert resp.status_code == 202 + resp = client.get( + url_for( + "/app.mergin_sync_private_api_controller_download_project", + id=diff_project.id, + ) + ) + assert mock_prepare_zip.called + assert resp.status_code == 202 From fe217cb95f2f59407abd9cb14f0726e263397a47 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 15 Aug 2025 15:00:34 +0200 Subject: [PATCH 2/4] cleanup comments --- server/mergin/tests/test_private_project_api.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/server/mergin/tests/test_private_project_api.py b/server/mergin/tests/test_private_project_api.py index ef3fd140..549f3990 100644 --- a/server/mergin/tests/test_private_project_api.py +++ b/server/mergin/tests/test_private_project_api.py @@ -515,11 +515,6 @@ def test_large_project_download_fail(client, diff_project): assert resp.status_code == 400 -# exists, valid -> return -# exists, expired -> remove, create -# not exists -> create - - @patch("mergin.sync.tasks.create_project_version_zip.delay") def test_remove_abandoned_zip(mock_prepare_zip, client, diff_project): """Test project download removes partial zip which is inactive for some time""" From 4c2dd7708391d32a074134cd8c0467d3490f85dc Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 15 Aug 2025 15:12:27 +0200 Subject: [PATCH 3/4] fix datetime usage after import change --- server/mergin/tests/test_private_project_api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/mergin/tests/test_private_project_api.py b/server/mergin/tests/test_private_project_api.py index 549f3990..2cd815c9 100644 --- a/server/mergin/tests/test_private_project_api.py +++ b/server/mergin/tests/test_private_project_api.py @@ -176,7 +176,7 @@ def test_project_access_request(client): # try with inactive project rp = create_project("removed_project", test_workspace, user) - rp.removed_at = datetime.datetime.utcnow() + rp.removed_at = datetime.utcnow() db.session.commit() resp = client.post( @@ -195,7 +195,7 @@ def test_project_access_request(client): assert resp.status_code == 200 resp = client.get("/app/project/access-requests?page=1&per_page=10") assert resp.json.get("count") == 1 - rp.removed_at = datetime.datetime.utcnow() + rp.removed_at = datetime.utcnow() db.session.commit() access_request = AccessRequest.query.filter( AccessRequest.project_id == rp.id @@ -377,7 +377,7 @@ def test_admin_project_list(client): assert resp.json.get("count") == 1 # mark as inactive p = Project.query.get(resp.json["items"][0]["id"]) - p.removed_at = datetime.datetime.utcnow() + p.removed_at = datetime.utcnow() p.removed_by = user.id db.session.commit() From 03f62a3b88f1f3dae95d7c4547dda9b12fd28925 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 15 Aug 2025 15:23:23 +0200 Subject: [PATCH 4/4] improve tests readability --- server/mergin/tests/test_celery.py | 2 +- .../mergin/tests/test_private_project_api.py | 22 ++++++++----------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/server/mergin/tests/test_celery.py b/server/mergin/tests/test_celery.py index 348208d9..50420cf0 100644 --- a/server/mergin/tests/test_celery.py +++ b/server/mergin/tests/test_celery.py @@ -168,8 +168,8 @@ def test_create_project_version_zip(diff_project): ) # after creating zip archive, celery remove partial zip after_mtime = zip_path.stat().st_mtime assert before_mtime < after_mtime - before_mtime = after_mtime # mock valid partial zip -> celery skips creating new zip and returns + before_mtime = zip_path.stat().st_mtime partial_zip_path.parent.mkdir(parents=True, exist_ok=True) partial_zip_path.touch() create_project_version_zip(diff_project.latest_version) diff --git a/server/mergin/tests/test_private_project_api.py b/server/mergin/tests/test_private_project_api.py index 2cd815c9..54575114 100644 --- a/server/mergin/tests/test_private_project_api.py +++ b/server/mergin/tests/test_private_project_api.py @@ -460,10 +460,13 @@ def test_download_project( project_version = diff_project.get_latest_version() if zipfile: zip_path = Path(project_version.zip_path) + if zip_path.parent.exists(): + shutil.rmtree(zip_path.parent, ignore_errors=True) zip_path.parent.mkdir(parents=True, exist_ok=True) zip_path.touch() if partial: temp_zip_path = Path(project_version.zip_path + ".partial") + shutil.rmtree(temp_zip_path.parent, ignore_errors=True) temp_zip_path.parent.mkdir(parents=True, exist_ok=True) temp_zip_path.touch() if expired: @@ -471,20 +474,13 @@ def test_download_project( seconds=current_app.config["PARTIAL_ZIP_EXPIRATION"] + 1 ) modify_file_times(temp_zip_path, new_time) - try: - resp = client.get( - url_for( - "/app.mergin_sync_private_api_controller_download_project", - id=diff_project.id, - version=version if version else "", - ) + resp = client.get( + url_for( + "/app.mergin_sync_private_api_controller_download_project", + id=diff_project.id, + version=version if version else "", ) - finally: - # cleanup - if zipfile: - shutil.rmtree(zip_path.parent, ignore_errors=True) - if partial: - shutil.rmtree(temp_zip_path.parent, ignore_errors=True) + ) assert resp.status_code == exp_resp assert mock_create_zip.called == exp_call if not version and exp_call: