Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion server/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from mergin.sync.tasks import remove_temp_files, remove_projects_backups
from mergin.celery import celery, configure_celery
from mergin.stats.config import Configuration
from mergin.stats.tasks import send_statistics
from mergin.stats.tasks import save_statistics, send_statistics
from mergin.stats.app import register as register_stats

Configuration.SERVER_TYPE = "ce"
Expand Down Expand Up @@ -65,6 +65,11 @@ def setup_periodic_tasks(sender, **kwargs):
remove_projects_backups,
name="remove old project backups",
)
sender.add_periodic_task(
crontab(hour="*/12"),
save_statistics,
name="Save usage statistics to database",
)
if Configuration.COLLECT_STATISTICS:
sender.add_periodic_task(
crontab(hour=randint(0, 5), minute=randint(0, 60)),
Expand Down
31 changes: 31 additions & 0 deletions server/mergin/stats/api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,37 @@ paths:
"404":
$ref: "#/components/responses/NotFoundResp"
x-openapi-router-controller: mergin.stats.controller
/app/admin/report:
get:
summary: Download statistics for server
operationId: download_report
x-openapi-router-controller: mergin.stats.controller
parameters:
- name: date_from
in: query
description: Start date for statistics (YYYY-MM-DD)
required: true
schema:
type: string
format: date
- name: date_to
in: query
description: End date for statistics (YYYY-MM-DD)
required: true
schema:
type: string
format: date
responses:
"200":
description: CSV file with statistics
content:
text/csv:
schema:
type: string
"400":
$ref: "#/components/responses/BadStatusResp"
"404":
$ref: "#/components/responses/NotFoundResp"
components:
responses:
UnauthorizedError:
Expand Down
63 changes: 61 additions & 2 deletions server/mergin/stats/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,29 @@
#
# SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial

from dataclasses import asdict
import requests
from flask import abort, current_app
from flask import abort, current_app, make_response
from datetime import datetime, time
from csv import DictWriter

from mergin.auth.app import auth_required
from mergin.stats.models import MerginStatistics, ServerCallhomeData

from .config import Configuration
from ..app import parse_version_string
from ..app import parse_version_string, db


class CsvTextBuilder(object):
"""
Mock csv writer that writes to text buffer
"""

def __init__(self):
self.data = []

def write(self, row):
self.data.append(row)


def get_latest_version():
Expand All @@ -29,3 +47,44 @@ def get_latest_version():

data = {**data, **parsed_version}
return data, 200


@auth_required(permissions=["admin"])
def download_report(date_from: str, date_to: str):
"""Download statistics from server instance"""
try:
# try to validate dates to prevent unhandled date formats
# add start of the day time and end of the day time to prevent bad filtering in db
parsed_from = datetime.combine(
datetime.strptime(date_from, "%Y-%m-%d"), time.min
)
parsed_to = datetime.combine(datetime.strptime(date_to, "%Y-%m-%d"), time.max)
except ValueError:
abort(400, "Invalid date format")

stats = (
db.session.query(MerginStatistics.created_at, MerginStatistics.data)
.filter(MerginStatistics.created_at.between(parsed_from, parsed_to))
.order_by(MerginStatistics.created_at.desc())
.all()
)
created_column = "created_at"
data = [
{
**stat.data,
"created_at": datetime.isoformat(stat.created_at),
}
for stat in stats
]
columns = list(ServerCallhomeData.__dataclass_fields__.keys()) + [created_column]
# get columns for data, this is usefull when we will update data json format (removing columns, adding new ones)

builder = CsvTextBuilder()
writer = DictWriter(builder, fieldnames=columns, extrasaction="ignore")
writer.writeheader()
writer.writerows(data)
csv_data = "".join(builder.data)
response = make_response(csv_data)
response.headers["Content-Disposition"] = f"attachment; filename=usage-report.csv"
response.mimetype = "text/csv"
return response
31 changes: 30 additions & 1 deletion server/mergin/stats/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,30 @@
#
# SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial

from dataclasses import dataclass
from typing import Optional
import uuid
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.dialects.postgresql import UUID, JSONB
from datetime import datetime, timezone

from ..app import db


@dataclass
class ServerCallhomeData:
service_uuid: Optional[str]
url: Optional[str]
contact_email: Optional[str]
licence: Optional[str]
projects_count: Optional[int]
users_count: Optional[int]
workspaces_count: Optional[int]
last_change: Optional[str]
server_version: Optional[str]
monthly_contributors: Optional[int]
editors: Optional[int]


class MerginInfo(db.Model):
"""Information about deployment"""

Expand All @@ -19,3 +37,14 @@ def __init__(self, service_id: str = None):
self.service_id = uuid.UUID(service_id)
else:
self.service_id = uuid.uuid4()


class MerginStatistics(db.Model):
"""Information about deployment"""

id = db.Column(db.Integer, primary_key=True, autoincrement=True)
created_at = db.Column(
db.DateTime, index=True, nullable=False, server_default="now()"
)
# data with statistics
data = db.Column(JSONB, nullable=False)
69 changes: 43 additions & 26 deletions server/mergin/stats/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,57 @@
#
# SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial

from dataclasses import asdict
import requests
import datetime
from datetime import datetime, timedelta, timezone
import json
import logging
from flask import current_app
from sqlalchemy.sql.operators import is_

from .models import MerginInfo
from .models import MerginInfo, MerginStatistics, ServerCallhomeData
from ..celery import celery
from ..app import db
from ..auth.models import User
from ..sync.models import Project


def get_callhome_data(info: MerginInfo | None = None) -> ServerCallhomeData:
"""
Get data about server to send to callhome service
"""
last_change_item = (
db.session.query(Project.updated).order_by(Project.updated.desc()).first()
)
service_uuid = str(info.service_id) if info else None
data = ServerCallhomeData(
service_uuid=service_uuid,
url=current_app.config["MERGIN_BASE_URL"],
contact_email=current_app.config["CONTACT_EMAIL"],
licence=current_app.config["SERVER_TYPE"],
projects_count=Project.query.filter(Project.removed_at.is_(None)).count(),
users_count=User.query.filter(
is_(User.username.ilike("deleted_%"), False)
).count(),
workspaces_count=current_app.ws_handler.workspace_count(),
last_change=str(last_change_item.updated) + "Z" if last_change_item else "",
server_version=current_app.config["VERSION"],
monthly_contributors=current_app.ws_handler.monthly_contributors_count(),
editors=current_app.ws_handler.server_editors_count(),
)
return data


@celery.task(ignore_result=True)
def save_statistics():
"""Save statistics about usage."""
info = MerginInfo.query.first()
data = get_callhome_data(info)
stat = MerginStatistics(data=data)
db.session.add(stat)
db.session.commit()


@celery.task(ignore_result=True)
def send_statistics():
"""Send statistics about usage."""
Expand Down Expand Up @@ -45,40 +82,20 @@ def send_statistics():
db.session.add(info)
db.session.commit()

if (
info.last_reported
and datetime.datetime.utcnow()
< info.last_reported + datetime.timedelta(hours=12)
if info.last_reported and datetime.utcnow() < info.last_reported + timedelta(
hours=12
):
return

last_change_item = (
db.session.query(Project.updated).order_by(Project.updated.desc()).first()
)

data = {
"service_uuid": str(info.service_id),
"url": current_app.config["MERGIN_BASE_URL"],
"contact_email": current_app.config["CONTACT_EMAIL"],
"licence": current_app.config["SERVER_TYPE"],
"projects_count": Project.query.filter(Project.removed_at.is_(None)).count(),
"users_count": User.query.filter(
is_(User.username.ilike("deleted_%"), False)
).count(),
"workspaces_count": current_app.ws_handler.workspace_count(),
"last_change": str(last_change_item.updated) + "Z" if last_change_item else "",
"server_version": current_app.config["VERSION"],
"monthly_contributors": current_app.ws_handler.monthly_contributors_count(),
"editors": current_app.ws_handler.server_editors_count(),
}
data = asdict(get_callhome_data(info))

try:
resp = requests.post(
current_app.config["STATISTICS_URL"] + "/usage-statistic",
data=json.dumps(data),
)
if resp.ok:
info.last_reported = datetime.datetime.utcnow()
info.last_reported = datetime.utcnow()
db.session.commit()
else:
logging.warning("Statistics error: " + str(resp.text))
Expand Down
2 changes: 1 addition & 1 deletion server/mergin/sync/permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def is_active_workspace(workspace):
return workspace.is_active or is_admin


def require_project(ws, project_name, permission):
def require_project(ws, project_name, permission) -> Project:
workspace = current_app.ws_handler.get_by_name(ws)
if not workspace:
abort(404)
Expand Down
70 changes: 68 additions & 2 deletions server/mergin/tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
#
# SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial

import csv
from dataclasses import asdict
from datetime import timedelta, timezone, datetime
import json
from unittest.mock import patch
import requests
Expand All @@ -11,8 +14,8 @@
from mergin.sync.models import Project, ProjectRole

from ..app import db
from ..stats.tasks import send_statistics
from ..stats.models import MerginInfo
from ..stats.tasks import get_callhome_data, save_statistics, send_statistics
from ..stats.models import MerginInfo, MerginStatistics, ServerCallhomeData
from .utils import Response, add_user, create_project, create_workspace


Expand Down Expand Up @@ -152,3 +155,66 @@ def test_server_updates(client):
mock.side_effect = requests.exceptions.RequestException("Some failure")
resp = client.get(url)
assert resp.status_code == 400


def test_save_statistics(app, client):
"""Test save statistics celery job"""
info = MerginInfo.query.first()
app.config["CONTACT_EMAIL"] = "test@example.com"
assert MerginStatistics.query.count() == 0
save_statistics.s().apply()
assert MerginStatistics.query.count() == 1
stats = MerginStatistics.query.order_by(MerginStatistics.created_at.desc()).first()
stats_json_data = get_callhome_data(info)
assert stats.created_at
assert stats.data == asdict(stats_json_data)


def test_download_report(app, client):
"""Test download report endpoint"""
url = "/app/admin/report"
resp = client.get(url)
resp.status_code == 400

# bad date format
resp = client.get(f"{url}?date_from=2021-01-01T00:00:00&date_to=2021-01-01")
assert resp.status_code == 400

app.config["CONTACT_EMAIL"] = "test@example.com"
save_statistics.s().apply()
resp = client.get(
f"{url}?date_from=2021-01-01&date_to={datetime.now(timezone.utc).strftime('%Y-%m-%d')}"
)
assert resp.status_code == 200
assert resp.mimetype == "text/csv"
lines = resp.data.splitlines()
assert len(lines) == 2

stat = MerginStatistics.query.first()
keys = list(asdict(ServerCallhomeData(**stat.data)).keys()) + ["created_at"]
assert lines[0].decode("UTF-8") == ",".join(keys)

# test same day
stat.created_at = datetime(2021, 1, 1, tzinfo=timezone.utc)
db.session.commit()

resp = client.get(
f"{url}?date_from=2021-01-01&date_to={datetime.now(timezone.utc).strftime('%Y-%m-%d')}"
)
assert resp.status_code == 200
assert resp.mimetype == "text/csv"
lines = resp.data.splitlines()
assert len(lines) == 2

# empty response
stat.created_at = datetime(2020, 1, 1, tzinfo=timezone.utc)
db.session.commit()
resp = client.get(
f"{url}?date_from=2021-01-01&date_to={datetime.now(timezone.utc).strftime('%Y-%m-%d')}"
)
assert resp.status_code == 200
assert resp.mimetype == "text/csv"
lines = resp.data.splitlines()
empty_file = f"{','.join(keys)}\r\n"
assert resp.data.decode("UTF-8") == empty_file
assert len(lines) == 1
Loading
Loading