Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
28ca996
Initial version for zip download rework
varmar05 Apr 11, 2025
70c0c7c
Merge remote-tracking branch 'origin/develop' into rewrite_zip_download
harminius Apr 14, 2025
7ce05d6
Move download() function to private api
harminius Apr 16, 2025
e09fe49
Add safecheck for inactive partials
harminius Apr 16, 2025
0b9fd20
Tests
harminius Apr 17, 2025
c8bc4b6
Rm legacy tests
harminius Apr 17, 2025
75e5255
Rm unused deps
harminius Apr 17, 2025
4b6a743
Fix marshmallow version
harminius Apr 17, 2025
d32fb67
Fix marshmallow version II
harminius Apr 17, 2025
a30aac2
initial version of fe
MarcelGeo Apr 22, 2025
a8dcfc1
cleanup docker-compose
MarcelGeo Apr 24, 2025
f811c96
Merge remote-tracking branch 'origin/develop' into rewrite_zip_download
MarcelGeo Apr 24, 2025
9661ed2
Cleanup of version and more detailed description about /download endp…
MarcelGeo Apr 24, 2025
fbb56af
Introduce get_x_accel method to create valid accel url for nginx
MarcelGeo Apr 24, 2025
272ceef
add max 100 retries for download
MarcelGeo Apr 24, 2025
900902e
cancel using method + make retries maximum working properly
MarcelGeo Apr 24, 2025
ede4e3c
Cleanup of download progress
MarcelGeo Apr 24, 2025
2629588
Address review
harminius Apr 25, 2025
6bbd678
address comments @varmar05
MarcelGeo Apr 25, 2025
3f31b3f
Fix:
MarcelGeo Apr 25, 2025
ca448c6
200 response definition
harminius Apr 28, 2025
79739bb
Add configurable x accel buffering for project download
MarcelGeo Apr 28, 2025
fffe7ca
disable auth_required from project download as it was not possible to…
MarcelGeo Apr 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 1 addition & 9 deletions LICENSES/EE-used-libs.json
Original file line number Diff line number Diff line change
Expand Up @@ -255,10 +255,6 @@
"library": "requests",
"version": "2.31.0"
},
{
"library": "requests-toolbelt",
"version": "0.9.1"
},
{
"library": "result",
"version": "0.5.0"
Expand Down Expand Up @@ -335,10 +331,6 @@
"library": "WTForms-JSON",
"version": "0.3.5"
},
{
"library": "zipfly",
"version": "6.0.3"
},
{
"library": "zipp",
"version": "3.17.0"
Expand Down Expand Up @@ -623,4 +615,4 @@
"library": "xml-utils",
"version": "1.7.0"
}
]
]
9 changes: 5 additions & 4 deletions deployment/common/nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,15 @@ server {

# proxy to backend
# we need to disable buffering for these endpoints which use stream (up or down)
# /v1/project/download/
location ~ /v1/project/download/ {
location ~ /app/projects/([^/]+)/download {
# unfortunately, proxy settings do not support inheritance within nested locations, hence copied set up from root location
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Host $http_host;
# we don't want nginx trying to do something clever with
# redirects, we set the Host: header above already.
proxy_redirect off;
proxy_pass http://merginmaps-server:5000;
proxy_pass http://merginmaps-server:5000;

# disable buffering
client_max_body_size 0; # No maximum client body size
Expand All @@ -54,6 +53,8 @@ server {

location /download/ {
internal;
alias /data/; # we need to mount data from mergin server here
# We need to mount data from mergin server here.
# This should have the same path as LOCAL_PROJECTS env variable with slash at the end
alias /data/;
}
}
5 changes: 0 additions & 5 deletions deployment/community/.env.template
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# This file should contain a set of Mergin Maps configuration definitions along with their default values

# Mind that any major change to this file MUST BE reflected in docs


FLASK_APP=application

Expand All @@ -22,9 +20,6 @@ MAINTENANCE_FILE=/data/MAINTENANCE
#TEMP_DIR=gettempdir() # trash dir for temp files being cleaned regularly
TEMP_DIR=/data/tmp

#VERSION=get_version()


# Mergin DB related

#DB_APPLICATION_NAME=mergin
Expand Down
2 changes: 0 additions & 2 deletions deployment/enterprise/.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ TEMP_DIR=/data/tmp

#USER_SELF_REGISTRATION=True

#VERSION=get_version()


# Mergin DB related

Expand Down
4 changes: 3 additions & 1 deletion development.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,14 @@ If you want to run the whole stack locally, you can use the docker. Docker will
# Enter community edition deployment folder
cd deployment/community/

# Create .prod.env file from .env.template
cp .env.template .prod.env

# Run the docker composition with the current Dockerfiles
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d

# Give ownership of the ./projects folder to user that is running the gunicorn container
sudo chown 901:999 projects
sudo chown 101:999 logs

# init db and create user
docker exec -it merginmaps-server flask init-db
Expand Down
3 changes: 1 addition & 2 deletions server/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,13 @@ name = "pypi"
connexion = {extras = ["swagger-ui"],version = "==2.14.1"}
flask = "==2.2.5"
python-dateutil = "==2.8.2"
marshmallow = "==3.20.1"
flask-marshmallow = "==0.14.0"
marshmallow-sqlalchemy = "==1.1.0"
psycopg2-binary = "==2.9.9"
itsdangerous = "==2.2.0"
requests-toolbelt = "==1.0.0"
Flask-SQLAlchemy = "==2.5.1"
sqlalchemy = "==1.4.53"
zipfly = "==6.0.3"
gunicorn = {extras = ["gevent"],version = "==19.9"}
python-dotenv = "==0.20.0"
flask-login = "==0.6.2"
Expand Down
651 changes: 315 additions & 336 deletions server/Pipfile.lock

Large diffs are not rendered by default.

11 changes: 10 additions & 1 deletion server/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@
from celery.schedules import crontab
from mergin.app import create_app
from mergin.auth.tasks import anonymize_removed_users
from mergin.sync.tasks import remove_temp_files, remove_projects_backups
from mergin.sync.tasks import (
remove_projects_archives,
remove_temp_files,
remove_projects_backups,
)
from mergin.celery import celery, configure_celery
from mergin.stats.config import Configuration
from mergin.stats.tasks import save_statistics, send_statistics
Expand Down Expand Up @@ -76,3 +80,8 @@ def setup_periodic_tasks(sender, **kwargs):
send_statistics,
name="send usage statistics",
)
sender.add_periodic_task(
crontab(hour=3, minute=0),
remove_projects_archives,
name="remove old project archives",
)
14 changes: 12 additions & 2 deletions server/mergin/sync/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ class Configuration(object):
"LOCAL_PROJECTS",
default=os.path.join(config_dir, os.pardir, os.pardir, os.pardir, "projects"),
)
PROJECTS_ARCHIVES_DIR = config(
"PROJECTS_ARCHIVES_DIR",
default=os.path.join(LOCAL_PROJECTS, "projects_archives"),
)
PROJECTS_ARCHIVES_EXPIRATION = config(
"PROJECTS_ARCHIVES_EXPIRATION", cast=int, default=7
)
# locking file when backups are created
MAINTENANCE_FILE = config(
"MAINTENANCE_FILE", default=os.path.join(LOCAL_PROJECTS, "MAINTENANCE")
Expand All @@ -26,15 +33,18 @@ class Configuration(object):
) # in bytes
# use nginx (in front of gunicorn) to serve files (https://www.nginx.com/resources/wiki/start/topics/examples/x-accel/)
USE_X_ACCEL = config("USE_X_ACCEL", default=False, cast=bool)
PROJECTS_ARCHIVES_X_ACCEL_BUFFERING = config(
"PROJECTS_ARCHIVES_X_ACCEL_BUFFERING", default="no"
) # no buffering for large files
# for clean up of old files where diffs were applied, in seconds
FILE_EXPIRATION = config("FILE_EXPIRATION", default=48 * 3600, cast=int)
BLACKLIST = config(
"BLACKLIST", default=".mergin/, .DS_Store, .directory", cast=Csv()
)
# max total files size for archive download
MAX_DOWNLOAD_ARCHIVE_SIZE = config(
"MAX_DOWNLOAD_ARCHIVE_SIZE", default=1024 * 1024 * 1024, cast=int
)
"MAX_DOWNLOAD_ARCHIVE_SIZE", default=1024 * 1024 * 1024 * 20, cast=int
) # 20 GB
PROJECT_ACCESS_REQUEST = config(
"PROJECT_ACCESS_REQUEST", default=7 * 24 * 3600, cast=int
)
Expand Down
7 changes: 7 additions & 0 deletions server/mergin/sync/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,6 +988,13 @@ def changes_count(self) -> Dict:
result = db.session.execute(query, params).fetchall()
return {row[0]: row[1] for row in result}

@property
def zip_path(self):
return os.path.join(
current_app.config["PROJECTS_ARCHIVES_DIR"],
f"{self.project_id}-{self.to_v_name(self.name)}.zip",
)


class Upload(db.Model):
id = db.Column(db.String, primary_key=True)
Expand Down
39 changes: 39 additions & 0 deletions server/mergin/sync/private_api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,38 @@ paths:
"404":
$ref: "#/components/responses/NotFoundResp"
x-openapi-router-controller: mergin.sync.private_api_controller
/projects/{id}/download:
get:
tags:
- project
summary: Download full project
description: Download whole project folder as zip file
operationId: download_project
parameters:
- $ref: "#/components/parameters/ProjectId"
- name: version
in: query
description: Particular version to download
required: false
schema:
$ref: "#/components/schemas/VersionName"
responses:
"200":
description: Zip file
content:
application/octet-stream:
schema:
type: string
format: binary
"202":
description: Accepted
"400":
$ref: "#/components/responses/BadStatusResp"
"403":
$ref: "#/components/responses/Forbidden"
"404":
$ref: "#/components/responses/NotFoundResp"
x-openapi-router-controller: mergin.sync.private_api_controller
components:
responses:
UnauthorizedError:
Expand Down Expand Up @@ -436,6 +468,13 @@ components:
schema:
type: string
example: project_1
ProjectId:
name: id
in: path
description: Project uuid
required: true
schema:
type: string
schemas:
CustomError:
type: object
Expand Down
68 changes: 67 additions & 1 deletion server/mergin/sync/private_api_controller.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
# Copyright (C) Lutra Consulting Limited
#
# SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial
import os
from datetime import datetime, timedelta
from blinker import signal
from connexion import NoContent
from flask import render_template, request, current_app, jsonify, abort
from flask import (
render_template,
request,
current_app,
jsonify,
abort,
make_response,
send_file,
)
from flask_login import current_user
from sqlalchemy.orm import defer
from sqlalchemy import text
Expand All @@ -16,6 +26,7 @@
AccessRequest,
ProjectRole,
RequestStatus,
ProjectVersion,
)
from .schemas import (
ProjectListSchema,
Expand All @@ -29,8 +40,12 @@
check_workspace_permissions,
)
from ..utils import parse_order_params, split_order_param, get_order_param
from .tasks import create_project_version_zip
from .storages.disk import move_to_tmp
from .utils import get_x_accel_uri

project_access_granted = signal("project_access_granted")
PARTIAL_ZIP_EXPIRATION = 300 # seconds


@auth_required
Expand Down Expand Up @@ -309,3 +324,54 @@ def get_project_access(id: str):
result = current_app.ws_handler.project_access(project)
data = ProjectAccessDetailSchema(many=True).dump(result)
return data, 200


def download_project(id: str, version=None): # noqa: E501 # pylint: disable=W0622
"""Download whole project folder as zip file in any version
Return zip file if it exists, otherwise trigger background job to create it"""
project = require_project_by_uuid(id, ProjectPermissions.Read)
lookup_version = (
ProjectVersion.from_v_name(version) if version else project.latest_version
)
project_version = ProjectVersion.query.filter_by(
project_id=project.id, name=lookup_version
).first_or_404("Project version does not exist")

if project_version.project_size > current_app.config["MAX_DOWNLOAD_ARCHIVE_SIZE"]:
abort(
400,
"The total size of requested files is too large to download as a single zip, "
"please use different method/client for download",
)

# check zip is already created
if os.path.exists(project_version.zip_path):
if current_app.config["USE_X_ACCEL"]:
resp = make_response()
resp.headers["X-Accel-Redirect"] = get_x_accel_uri(project_version.zip_path)
resp.headers["X-Accel-Buffering"] = current_app.config.get(
"PROJECTS_ARCHIVES_X_ACCEL_BUFFERING"
)
resp.headers["X-Accel-Expires"] = "off"
resp.headers["Content-Type"] = "application/zip"
else:
resp = send_file(project_version.zip_path, mimetype="application/zip")

resp.headers["Content-Disposition"] = (
f"attachment; filename={project.name}-v{lookup_version}.zip"
)
return resp

temp_zip_path = project_version.zip_path + ".partial"
# to be safe we are not in vicious circle remove inactive partial zip
if os.path.exists(temp_zip_path) and datetime.fromtimestamp(
os.path.getmtime(temp_zip_path)
) < datetime.now(datetime.timezone.utc) - timedelta(
seconds=current_app.config["PARTIAL_ZIP_EXPIRATION"]
):
move_to_tmp(temp_zip_path)

if not os.path.exists(temp_zip_path):
create_project_version_zip.delay(project_version.id)

return "Project zip being prepared, please try again later", 202
39 changes: 0 additions & 39 deletions server/mergin/sync/public_api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -494,45 +494,6 @@ paths:
'404':
$ref: '#/components/responses/NotFoundResp'
x-openapi-router-controller: mergin.sync.public_api_controller
/project/download/{namespace}/{project_name}:
get:
tags:
- project
summary: Download full project
description: Download whole project folder as zip file or multipart stream
operationId: download_project
parameters:
- $ref: "#/components/parameters/projectName"
- $ref: "#/components/parameters/namespace"
- name: format
in: query
description: Output format (only zip available).
required: false
schema:
type: string
enum:
- zip
- name: version
in: query
description: Particular version to download
required: false
schema:
$ref: "#/components/schemas/VersionName"
responses:
"200":
description: Zip file or stream
content:
application/octet-stream:
schema:
type: string
format: binary
"400":
$ref: "#/components/responses/BadStatusResp"
"403":
$ref: "#/components/responses/Forbidden"
"404":
$ref: "#/components/responses/NotFoundResp"
x-openapi-router-controller: mergin.sync.public_api_controller
/project/raw/{namespace}/{project_name}:
get:
tags:
Expand Down
Loading
Loading