diff --git a/server/mergin/sync/private_api_controller.py b/server/mergin/sync/private_api_controller.py index aaa77216..13f059b9 100644 --- a/server/mergin/sync/private_api_controller.py +++ b/server/mergin/sync/private_api_controller.py @@ -4,7 +4,6 @@ import os from datetime import datetime, timedelta, timezone from urllib.parse import quote -from blinker import signal from connexion import NoContent from flask import ( render_template, @@ -353,17 +352,17 @@ def download_project(id: str, version=None): # noqa: E501 # pylint: disable=W06 f"attachment; filename*=UTF-8''{file_name}" ) return resp - - temp_zip_path = project_version.zip_path + ".partial" - # to be safe we are not in vicious circle remove inactive partial zip - if os.path.exists(temp_zip_path) and datetime.fromtimestamp( - os.path.getmtime(temp_zip_path), tz=timezone.utc - ) < datetime.now(timezone.utc) - timedelta( - seconds=current_app.config["PARTIAL_ZIP_EXPIRATION"] - ): - move_to_tmp(temp_zip_path) - - if not os.path.exists(temp_zip_path): - create_project_version_zip.delay(project_version.id) + # GET request triggers background job if no partial zip or expired one + if request.method == "GET": + temp_zip_path = project_version.zip_path + ".partial" + # create zip if it does not exist yet or has expired + partial_exists = os.path.exists(temp_zip_path) + is_expired = partial_exists and datetime.fromtimestamp( + os.path.getmtime(temp_zip_path), tz=timezone.utc + ) < datetime.now(timezone.utc) - timedelta( + seconds=current_app.config["PARTIAL_ZIP_EXPIRATION"] + ) + if not partial_exists or is_expired: + create_project_version_zip.delay(project_version.id) return "Project zip being prepared, please try again later", 202 diff --git a/server/mergin/sync/tasks.py b/server/mergin/sync/tasks.py index f56fb273..ce92171e 100644 --- a/server/mergin/sync/tasks.py +++ b/server/mergin/sync/tasks.py @@ -114,9 +114,17 @@ def create_project_version_zip(version_id: int): return zip_path = project_version.zip_path + ".partial" - # already running job if os.path.exists(zip_path): - return + mtime = datetime.fromtimestamp(os.path.getmtime(zip_path), tz=timezone.utc) + expiration_time = datetime.now(timezone.utc) - timedelta( + seconds=current_app.config["PARTIAL_ZIP_EXPIRATION"] + ) + if mtime > expiration_time: + # partial zip is recent -> another job is likely running + return + else: + # partial zip is too old -> remove and creating new one + os.remove(zip_path) os.makedirs(os.path.dirname(zip_path), exist_ok=True) try: diff --git a/server/mergin/tests/test_celery.py b/server/mergin/tests/test_celery.py index a5d07f47..50420cf0 100644 --- a/server/mergin/tests/test_celery.py +++ b/server/mergin/tests/test_celery.py @@ -4,6 +4,8 @@ import os from datetime import datetime, timedelta +from pathlib import Path + from flask import current_app from flask_mail import Mail from unittest.mock import patch @@ -144,16 +146,44 @@ def test_remove_deleted_project_backups(client): def test_create_project_version_zip(diff_project): + """Test celery tasks for creating project zip and cleaning them up""" latest_version = diff_project.get_latest_version() - assert not os.path.exists(latest_version.zip_path) + zip_path = Path(latest_version.zip_path) + partial_zip_path = Path(latest_version.zip_path + ".partial") + assert not zip_path.exists() + create_project_version_zip(diff_project.latest_version) + assert zip_path.exists() + assert not partial_zip_path.exists() + before_mtime = zip_path.stat().st_mtime + # mock expired partial zip -> celery removes it and creates new zip + partial_zip_path.parent.mkdir(parents=True, exist_ok=True) + partial_zip_path.touch() + new_time = datetime.now() - timedelta( + seconds=current_app.config["PARTIAL_ZIP_EXPIRATION"] + 1 + ) + modify_file_times(partial_zip_path, new_time) + create_project_version_zip(diff_project.latest_version) + assert ( + not partial_zip_path.exists() + ) # after creating zip archive, celery remove partial zip + after_mtime = zip_path.stat().st_mtime + assert before_mtime < after_mtime + # mock valid partial zip -> celery skips creating new zip and returns + before_mtime = zip_path.stat().st_mtime + partial_zip_path.parent.mkdir(parents=True, exist_ok=True) + partial_zip_path.touch() create_project_version_zip(diff_project.latest_version) - assert os.path.exists(latest_version.zip_path) - assert not os.path.exists(latest_version.zip_path + ".partial") - remove_projects_archives() - assert os.path.exists(latest_version.zip_path) + assert ( + partial_zip_path.exists() + ) # celery does not create zip archive and does not clean partial zip + after_mtime = zip_path.stat().st_mtime + assert before_mtime == after_mtime + os.remove(partial_zip_path) + remove_projects_archives() # zip is valid -> keep + assert zip_path.exists() new_time = datetime.now() - timedelta( days=current_app.config["PROJECTS_ARCHIVES_EXPIRATION"] + 1 ) modify_file_times(latest_version.zip_path, new_time) - remove_projects_archives() + remove_projects_archives() # zip has expired -> remove assert not os.path.exists(latest_version.zip_path) diff --git a/server/mergin/tests/test_private_project_api.py b/server/mergin/tests/test_private_project_api.py index 27021ecb..54575114 100644 --- a/server/mergin/tests/test_private_project_api.py +++ b/server/mergin/tests/test_private_project_api.py @@ -2,15 +2,16 @@ # # SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial -import datetime import json import os -from unittest.mock import patch - +import shutil import pytest +from unittest.mock import patch +from pathlib import Path +from datetime import datetime, timedelta from flask import url_for -from ..app import db +from ..app import db, current_app from ..sync.models import AccessRequest, Project, ProjectRole, RequestStatus from ..auth.models import User from ..config import Configuration @@ -20,6 +21,7 @@ login, create_project, create_workspace, + modify_file_times, ) @@ -174,7 +176,7 @@ def test_project_access_request(client): # try with inactive project rp = create_project("removed_project", test_workspace, user) - rp.removed_at = datetime.datetime.utcnow() + rp.removed_at = datetime.utcnow() db.session.commit() resp = client.post( @@ -193,7 +195,7 @@ def test_project_access_request(client): assert resp.status_code == 200 resp = client.get("/app/project/access-requests?page=1&per_page=10") assert resp.json.get("count") == 1 - rp.removed_at = datetime.datetime.utcnow() + rp.removed_at = datetime.utcnow() db.session.commit() access_request = AccessRequest.query.filter( AccessRequest.project_id == rp.id @@ -375,7 +377,7 @@ def test_admin_project_list(client): assert resp.json.get("count") == 1 # mark as inactive p = Project.query.get(resp.json["items"][0]["id"]) - p.removed_at = datetime.datetime.utcnow() + p.removed_at = datetime.utcnow() p.removed_by = user.id db.session.commit() @@ -423,19 +425,55 @@ def test_admin_project_list(client): test_download_proj_data = [ - (None, 202), - ("v1", 202), - ("v100", 404), - (None, 200), + # zips do not exist, version not specified -> call celery task to create zip with latest version + (0, 0, 0, None, 202, 1), + # expired partial zip exists -> call celery task + (0, 1, 1, None, 202, 1), + # valid partial zip exists -> return, do not call celery + (0, 1, 0, None, 202, 0), + # zips do not exist, version specified -> call celery task with specified version + (0, 0, 0, "v1", 202, 1), + # specified version does not exist -> 404 + (0, 0, 0, "v100", 404, 0), + # zip is ready to download + (1, 0, 0, None, 200, 0), ] -@pytest.mark.parametrize("version,expected", test_download_proj_data) +@pytest.mark.parametrize( + "zipfile,partial,expired,version,exp_resp,exp_call", test_download_proj_data +) @patch("mergin.sync.tasks.create_project_version_zip.delay") -def test_download_project(mock_create_zip, client, version, expected, diff_project): - if expected == 200: - project_version = diff_project.get_latest_version() - os.mknod(project_version.zip_path) +def test_download_project( + mock_create_zip, + client, + zipfile, + partial, + expired, + version, + exp_resp, + exp_call, + diff_project, +): + """Test download endpoint responses and celery task calling""" + # prepare initial state according to testcase + project_version = diff_project.get_latest_version() + if zipfile: + zip_path = Path(project_version.zip_path) + if zip_path.parent.exists(): + shutil.rmtree(zip_path.parent, ignore_errors=True) + zip_path.parent.mkdir(parents=True, exist_ok=True) + zip_path.touch() + if partial: + temp_zip_path = Path(project_version.zip_path + ".partial") + shutil.rmtree(temp_zip_path.parent, ignore_errors=True) + temp_zip_path.parent.mkdir(parents=True, exist_ok=True) + temp_zip_path.touch() + if expired: + new_time = datetime.now() - timedelta( + seconds=current_app.config["PARTIAL_ZIP_EXPIRATION"] + 1 + ) + modify_file_times(temp_zip_path, new_time) resp = client.get( url_for( "/app.mergin_sync_private_api_controller_download_project", @@ -443,17 +481,16 @@ def test_download_project(mock_create_zip, client, version, expected, diff_proje version=version if version else "", ) ) - if expected == 200: - os.remove(project_version.zip_path) - assert resp.status_code == expected - assert mock_create_zip.called if expected == 202 else not mock_create_zip.called - if not version and expected != 200: + assert resp.status_code == exp_resp + assert mock_create_zip.called == exp_call + if not version and exp_call: call_args, _ = mock_create_zip.call_args args = call_args[0] assert args == diff_project.latest_version def test_large_project_download_fail(client, diff_project): + """Test downloading too large project is refused""" resp = client.get( url_for( "/app.mergin_sync_private_api_controller_download_project", @@ -494,4 +531,24 @@ def test_remove_abandoned_zip(mock_prepare_zip, client, diff_project): ) assert mock_prepare_zip.called assert resp.status_code == 202 - assert not os.path.exists(partial_zip_path) + + +@patch("mergin.sync.tasks.create_project_version_zip.delay") +def test_download_project_request_method(mock_prepare_zip, client, diff_project): + """Test head request does not create a celery job""" + resp = client.head( + url_for( + "/app.mergin_sync_private_api_controller_download_project", + id=diff_project.id, + ) + ) + assert not mock_prepare_zip.called + assert resp.status_code == 202 + resp = client.get( + url_for( + "/app.mergin_sync_private_api_controller_download_project", + id=diff_project.id, + ) + ) + assert mock_prepare_zip.called + assert resp.status_code == 202