From 8fb35c82e51843cb4ce9c0baff572e6f163d9233 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 25 Oct 2024 14:57:00 +0200 Subject: [PATCH 01/20] Add email validation function to forms.py --- server/mergin/auth/models.py | 1 + ...1a6175c78a10_email_address_format_check.py | 28 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 server/migrations/community/1a6175c78a10_email_address_format_check.py diff --git a/server/mergin/auth/models.py b/server/mergin/auth/models.py index 85741470..d842a780 100644 --- a/server/mergin/auth/models.py +++ b/server/mergin/auth/models.py @@ -37,6 +37,7 @@ class User(db.Model): __table_args__ = ( db.Index("ix_user_username", func.lower(username), unique=True), db.Index("ix_user_email", func.lower(email), unique=True), + CheckConstraint("email ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Za-z]{2,}$'", name="email_format"), ) def __init__(self, username, email, passwd, is_admin=False): diff --git a/server/migrations/community/1a6175c78a10_email_address_format_check.py b/server/migrations/community/1a6175c78a10_email_address_format_check.py new file mode 100644 index 00000000..cb7604c2 --- /dev/null +++ b/server/migrations/community/1a6175c78a10_email_address_format_check.py @@ -0,0 +1,28 @@ +"""Email address format check + +Revision ID: 1a6175c78a10 +Revises: 1ab5b02ce532 +Create Date: 2024-10-17 15:13:11.360991 + +""" +from alembic import op + + +# revision identifiers, used by Alembic. +revision = '1a6175c78a10' +down_revision = '1ab5b02ce532' +branch_labels = None +depends_on = None + + +def upgrade(): + op.create_check_constraint( + constraint_name="email_format", + table_name="user", + condition="email ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Za-z]{2,}$'" + ) + + +def downgrade(): + op.drop_constraint("email_format", "user", type_="check") + From dd4dfed966c00cb5b5bbad471ef377f152ab549b Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 25 Oct 2024 14:59:29 +0200 Subject: [PATCH 02/20] Add email address validation in registration --- server/mergin/auth/forms.py | 4 ++++ server/mergin/auth/utils.py | 11 +++++++++++ server/mergin/tests/test_auth.py | 22 ++++++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 server/mergin/auth/utils.py diff --git a/server/mergin/auth/forms.py b/server/mergin/auth/forms.py index a5def163..3ce1449e 100644 --- a/server/mergin/auth/forms.py +++ b/server/mergin/auth/forms.py @@ -17,6 +17,7 @@ from .models import User from ..app import UpdateForm, CustomStringField +from .utils import is_email_address_valid def username_validation(form, field): @@ -77,6 +78,9 @@ class RegisterUserForm(FlaskForm): def validate(self): if not super().validate(): return False + if not is_email_address_valid(self.email.data): + self.email.errors.append("Email address contains non-ASCII characters. Only ASCII emails are allowed.") + return False user = User.query.filter( (func.lower(User.username) == func.lower(self.username.data.strip())) diff --git a/server/mergin/auth/utils.py b/server/mergin/auth/utils.py new file mode 100644 index 00000000..97295401 --- /dev/null +++ b/server/mergin/auth/utils.py @@ -0,0 +1,11 @@ +# Copyright (C) Lutra Consulting Limited +# +# SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial + +import re + + +def is_email_address_valid(email: str) -> bool: + """Check if email address contains only ASCII characters and basic email address requirements""" + email_ascii = r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Za-z]{2,}$" + return re.match(email_ascii, email) is not None diff --git a/server/mergin/tests/test_auth.py b/server/mergin/tests/test_auth.py index f70dbd53..7102bb6c 100644 --- a/server/mergin/tests/test_auth.py +++ b/server/mergin/tests/test_auth.py @@ -10,6 +10,7 @@ from flask import url_for from itsdangerous import URLSafeTimedSerializer from sqlalchemy import desc +from sqlalchemy.exc import IntegrityError from unittest.mock import patch from mergin.tests import test_workspace @@ -116,8 +117,13 @@ def test_logout(client): "#pwd1234", 400, ), # tests with upper case, but email already exists +<<<<<<< HEAD (" mergin@mergin.com ", "#pwd123", 400), # invalid password ("verylonglonglonglonglonglonglongemail@example.com", "#pwd1234", 201), +======= + ("XmerginX", " mergin@mergin.com ", "#pwd123", 400), # invalid password + ("mergin4", "invalid\360@email.com", "#pwd1234", 400), # non-ascii character in the email +>>>>>>> 2935d80 (Add email address validation in registration) ] @@ -889,3 +895,19 @@ def test_server_usage(client): assert resp.json["editors"] == 1 assert resp.json["users"] == 1 assert resp.json["projects"] == 1 + + +user_data = [ + ("user1", True), # no problem + ("user\360", False), # non-ascii character + ("user\\", False), # disallowed character +] + +@pytest.mark.parametrize("username,success", user_data) +def test_user_email_db_constraint(client, username, success): + if success: + add_user(username=username) + else: + with pytest.raises(IntegrityError): + add_user(username=username) + db.session.rollback() From ce879f7d500ab104c425c81a673b0e1f517bc58e Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 25 Oct 2024 15:09:01 +0200 Subject: [PATCH 03/20] black . --- server/mergin/auth/forms.py | 4 +++- server/mergin/auth/models.py | 5 ++++- server/mergin/tests/test_auth.py | 10 +++++++--- .../1a6175c78a10_email_address_format_check.py | 8 ++++---- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/server/mergin/auth/forms.py b/server/mergin/auth/forms.py index 3ce1449e..9e528d3c 100644 --- a/server/mergin/auth/forms.py +++ b/server/mergin/auth/forms.py @@ -79,7 +79,9 @@ def validate(self): if not super().validate(): return False if not is_email_address_valid(self.email.data): - self.email.errors.append("Email address contains non-ASCII characters. Only ASCII emails are allowed.") + self.email.errors.append( + "Email address contains non-ASCII characters. Only ASCII emails are allowed." + ) return False user = User.query.filter( diff --git a/server/mergin/auth/models.py b/server/mergin/auth/models.py index d842a780..d8cf7b9b 100644 --- a/server/mergin/auth/models.py +++ b/server/mergin/auth/models.py @@ -37,7 +37,10 @@ class User(db.Model): __table_args__ = ( db.Index("ix_user_username", func.lower(username), unique=True), db.Index("ix_user_email", func.lower(email), unique=True), - CheckConstraint("email ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Za-z]{2,}$'", name="email_format"), + CheckConstraint( + "email ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Za-z]{2,}$'", + name="email_format", + ), ) def __init__(self, username, email, passwd, is_admin=False): diff --git a/server/mergin/tests/test_auth.py b/server/mergin/tests/test_auth.py index 7102bb6c..fbdfb051 100644 --- a/server/mergin/tests/test_auth.py +++ b/server/mergin/tests/test_auth.py @@ -117,13 +117,16 @@ def test_logout(client): "#pwd1234", 400, ), # tests with upper case, but email already exists -<<<<<<< HEAD (" mergin@mergin.com ", "#pwd123", 400), # invalid password ("verylonglonglonglonglonglonglongemail@example.com", "#pwd1234", 201), -======= ("XmerginX", " mergin@mergin.com ", "#pwd123", 400), # invalid password ("mergin4", "invalid\360@email.com", "#pwd1234", 400), # non-ascii character in the email ->>>>>>> 2935d80 (Add email address validation in registration) + ( + "mergin4", + "invalid\360@email.com", + "#pwd1234", + 400, + ), # non-ascii character in the email ] @@ -903,6 +906,7 @@ def test_server_usage(client): ("user\\", False), # disallowed character ] + @pytest.mark.parametrize("username,success", user_data) def test_user_email_db_constraint(client, username, success): if success: diff --git a/server/migrations/community/1a6175c78a10_email_address_format_check.py b/server/migrations/community/1a6175c78a10_email_address_format_check.py index cb7604c2..5911c4e2 100644 --- a/server/migrations/community/1a6175c78a10_email_address_format_check.py +++ b/server/migrations/community/1a6175c78a10_email_address_format_check.py @@ -5,12 +5,13 @@ Create Date: 2024-10-17 15:13:11.360991 """ + from alembic import op # revision identifiers, used by Alembic. -revision = '1a6175c78a10' -down_revision = '1ab5b02ce532' +revision = "1a6175c78a10" +down_revision = "1ab5b02ce532" branch_labels = None depends_on = None @@ -19,10 +20,9 @@ def upgrade(): op.create_check_constraint( constraint_name="email_format", table_name="user", - condition="email ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Za-z]{2,}$'" + condition="email ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Za-z]{2,}$'", ) def downgrade(): op.drop_constraint("email_format", "user", type_="check") - From 7062f60dc126fa3b60c23a788a8f1607b938fd51 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 29 Nov 2024 16:49:49 +0100 Subject: [PATCH 04/20] Do not allow non utf8 encoding and invalid characters in email address --- server/mergin/auth/forms.py | 18 +++++----- server/mergin/auth/models.py | 4 --- server/mergin/auth/utils.py | 17 ++++++--- server/mergin/tests/test_auth.py | 36 +++++++++++++------ ...1a6175c78a10_email_address_format_check.py | 28 --------------- 5 files changed, 47 insertions(+), 56 deletions(-) delete mode 100644 server/migrations/community/1a6175c78a10_email_address_format_check.py diff --git a/server/mergin/auth/forms.py b/server/mergin/auth/forms.py index 9e528d3c..03439b4e 100644 --- a/server/mergin/auth/forms.py +++ b/server/mergin/auth/forms.py @@ -17,7 +17,7 @@ from .models import User from ..app import UpdateForm, CustomStringField -from .utils import is_email_address_valid +from .utils import is_utf8, contains_email_invalid_characters def username_validation(form, field): @@ -43,6 +43,11 @@ def username_validation(form, field): raise ValidationError(error) +def email_format(form, field): + if contains_email_invalid_characters(field.data) or not is_utf8(field.data): + raise ValidationError("Email address contains invalid characters.") + + class PasswordValidator: def __init__(self, min_length=8): self.min_length = min_length @@ -72,17 +77,12 @@ class RegisterUserForm(FlaskForm): ) email = CustomStringField( "Email Address", - validators=[DataRequired(), Email()], + validators=[DataRequired(), Email(), email_format], ) def validate(self): if not super().validate(): return False - if not is_email_address_valid(self.email.data): - self.email.errors.append( - "Email address contains non-ASCII characters. Only ASCII emails are allowed." - ) - return False user = User.query.filter( (func.lower(User.username) == func.lower(self.username.data.strip())) @@ -100,7 +100,7 @@ def validate(self): class ResetPasswordForm(FlaskForm): email = CustomStringField( "Email Address", - [DataRequired(), Email()], + [DataRequired(), Email(), email_format], ) @@ -135,7 +135,7 @@ class UserProfileDataForm(UpdateForm): receive_notifications = BooleanField("Receive notifications", [Optional()]) first_name = CustomStringField("First Name", [Optional()]) last_name = CustomStringField("Last Name", [Optional()]) - email = CustomStringField("Email", [Optional(), Email()]) + email = CustomStringField("Email", [Optional(), Email(), email_format]) class ApiLoginForm(LoginForm): diff --git a/server/mergin/auth/models.py b/server/mergin/auth/models.py index d8cf7b9b..85741470 100644 --- a/server/mergin/auth/models.py +++ b/server/mergin/auth/models.py @@ -37,10 +37,6 @@ class User(db.Model): __table_args__ = ( db.Index("ix_user_username", func.lower(username), unique=True), db.Index("ix_user_email", func.lower(email), unique=True), - CheckConstraint( - "email ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Za-z]{2,}$'", - name="email_format", - ), ) def __init__(self, username, email, passwd, is_admin=False): diff --git a/server/mergin/auth/utils.py b/server/mergin/auth/utils.py index 97295401..53c763b0 100644 --- a/server/mergin/auth/utils.py +++ b/server/mergin/auth/utils.py @@ -5,7 +5,16 @@ import re -def is_email_address_valid(email: str) -> bool: - """Check if email address contains only ASCII characters and basic email address requirements""" - email_ascii = r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Za-z]{2,}$" - return re.match(email_ascii, email) is not None +def contains_email_invalid_characters(email: str) -> bool: + """Check for invalid characters for the email address""" + invalid_characters = r"[ ;:’\–\—|,]" + return bool(re.search(invalid_characters, email)) + + +def is_utf8(text: str) -> bool: + """Check if given text is in UTF-8 encoding""" + try: + text.encode("utf-8").decode("utf-8") + return "�" not in text + except UnicodeDecodeError: + return False diff --git a/server/mergin/tests/test_auth.py b/server/mergin/tests/test_auth.py index fbdfb051..83b25c68 100644 --- a/server/mergin/tests/test_auth.py +++ b/server/mergin/tests/test_auth.py @@ -900,18 +900,32 @@ def test_server_usage(client): assert resp.json["projects"] == 1 +# Due to Python's internal handling of strings which is inherently Unicode we need to decode the string in the test user_data = [ - ("user1", True), # no problem - ("user\360", False), # non-ascii character - ("user\\", False), # disallowed character + ("user1", True, 201), # no problem + ("user\260", True, 201), # non-ascii character + ("usér", True, 201), # non-ascii character + ("user\\", True, 400), # disallowed character + ("日人日本人", True, 201), # non-ascii character + ("usér", False, 400), # not utf-8 encoding + ("us er", True, 400), # whitespace + ("us—er", True, 400), # dash ] -@pytest.mark.parametrize("username,success", user_data) -def test_user_email_db_constraint(client, username, success): - if success: - add_user(username=username) - else: - with pytest.raises(IntegrityError): - add_user(username=username) - db.session.rollback() +@pytest.mark.parametrize("username,utf8,expected", user_data) +def test_user_email_format(client, username, expected, utf8): + login_as_admin(client) + email = username + "@example.com" + if not utf8: + # simulate server misinterpreting the input encoding + email = email.encode("latin1").decode("utf-8", errors="replace") + url = url_for("/.mergin_auth_controller_register_user") + data = { + "username": username, + "email": email, + "password": "#pwd1234", + "confirm": "#pwd1234", + } + resp = client.post(url, data=json.dumps(data), headers=json_headers) + assert resp.status_code == expected diff --git a/server/migrations/community/1a6175c78a10_email_address_format_check.py b/server/migrations/community/1a6175c78a10_email_address_format_check.py deleted file mode 100644 index 5911c4e2..00000000 --- a/server/migrations/community/1a6175c78a10_email_address_format_check.py +++ /dev/null @@ -1,28 +0,0 @@ -"""Email address format check - -Revision ID: 1a6175c78a10 -Revises: 1ab5b02ce532 -Create Date: 2024-10-17 15:13:11.360991 - -""" - -from alembic import op - - -# revision identifiers, used by Alembic. -revision = "1a6175c78a10" -down_revision = "1ab5b02ce532" -branch_labels = None -depends_on = None - - -def upgrade(): - op.create_check_constraint( - constraint_name="email_format", - table_name="user", - condition="email ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Za-z]{2,}$'", - ) - - -def downgrade(): - op.drop_constraint("email_format", "user", type_="check") From 97581071628f0f925917b13199d5707b1356f773 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 29 Nov 2024 22:58:10 +0100 Subject: [PATCH 05/20] Revert unwanted changes --- server/mergin/tests/test_auth.py | 6 ------ .../1ab5b02ce532_version_author_name_to_user_id.py | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/server/mergin/tests/test_auth.py b/server/mergin/tests/test_auth.py index 83b25c68..85727b99 100644 --- a/server/mergin/tests/test_auth.py +++ b/server/mergin/tests/test_auth.py @@ -121,12 +121,6 @@ def test_logout(client): ("verylonglonglonglonglonglonglongemail@example.com", "#pwd1234", 201), ("XmerginX", " mergin@mergin.com ", "#pwd123", 400), # invalid password ("mergin4", "invalid\360@email.com", "#pwd1234", 400), # non-ascii character in the email - ( - "mergin4", - "invalid\360@email.com", - "#pwd1234", - 400, - ), # non-ascii character in the email ] diff --git a/server/migrations/community/1ab5b02ce532_version_author_name_to_user_id.py b/server/migrations/community/1ab5b02ce532_version_author_name_to_user_id.py index 93747d70..3dfbadb4 100644 --- a/server/migrations/community/1ab5b02ce532_version_author_name_to_user_id.py +++ b/server/migrations/community/1ab5b02ce532_version_author_name_to_user_id.py @@ -1,7 +1,7 @@ """Migrage project version author name to user.id Revision ID: 1ab5b02ce532 -Revises: 1c23e3be03a3 +Revises: 57d0de13ce4a Create Date: 2024-09-06 14:01:40.668483 """ From cd664ae7a590188757a3ddb297eec11b3954a7f5 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Mon, 9 Dec 2024 17:29:39 +0100 Subject: [PATCH 06/20] Add valid email function --- server/mergin/auth/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/mergin/auth/utils.py b/server/mergin/auth/utils.py index 53c763b0..a4307754 100644 --- a/server/mergin/auth/utils.py +++ b/server/mergin/auth/utils.py @@ -18,3 +18,7 @@ def is_utf8(text: str) -> bool: return "�" not in text except UnicodeDecodeError: return False + + +def is_valid_email(email: str) -> bool: + return not contains_email_invalid_characters(email) and is_utf8(email) From 5f0006f3736cacde6d06a3fe65987a84fb264df6 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 28 Feb 2025 10:10:12 +0100 Subject: [PATCH 07/20] Unify error messages for unsuported file type in public_api_controller.py --- server/mergin/auth/utils.py | 24 --------------------- server/mergin/sync/public_api_controller.py | 8 +++++-- 2 files changed, 6 insertions(+), 26 deletions(-) delete mode 100644 server/mergin/auth/utils.py diff --git a/server/mergin/auth/utils.py b/server/mergin/auth/utils.py deleted file mode 100644 index a4307754..00000000 --- a/server/mergin/auth/utils.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) Lutra Consulting Limited -# -# SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial - -import re - - -def contains_email_invalid_characters(email: str) -> bool: - """Check for invalid characters for the email address""" - invalid_characters = r"[ ;:’\–\—|,]" - return bool(re.search(invalid_characters, email)) - - -def is_utf8(text: str) -> bool: - """Check if given text is in UTF-8 encoding""" - try: - text.encode("utf-8").decode("utf-8") - return "�" not in text - except UnicodeDecodeError: - return False - - -def is_valid_email(email: str) -> bool: - return not contains_email_invalid_characters(email) and is_utf8(email) diff --git a/server/mergin/sync/public_api_controller.py b/server/mergin/sync/public_api_controller.py index 5aada5b6..a4c15eef 100644 --- a/server/mergin/sync/public_api_controller.py +++ b/server/mergin/sync/public_api_controller.py @@ -824,7 +824,7 @@ def project_push(namespace, project_name): abort( 400, f"Unsupported file type detected: {item.path}. " - f"Please remove the file or try compressing it into a ZIP file before uploading", + f"Please remove the file or try compressing it into a ZIP file before uploading.", ) # changes' files must be unique @@ -1056,7 +1056,11 @@ def push_finish(transaction_id): continue if not is_supported_type(dest_file): logging.info(f"Rejecting blacklisted file: {dest_file}") - abort(400, f"Unsupported file type detected: {f.path}") + abort( + 400, + f"Unsupported file type detected: {f.path}. " + f"Please remove the file or try compressing it into a ZIP file before uploading.", + ) if expected_size != os.path.getsize(dest_file): logging.error( From 6ec65bbbd4d8ecb89bd051dbc8b7ba5965c82e3d Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 28 Feb 2025 10:13:27 +0100 Subject: [PATCH 08/20] Extend email validator with additional forbiddedn characters --- server/mergin/auth/forms.py | 22 ++++++++++++++-------- server/mergin/sync/utils.py | 2 +- server/mergin/tests/test_auth.py | 30 ++++++++++++------------------ 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/server/mergin/auth/forms.py b/server/mergin/auth/forms.py index 03439b4e..b723854a 100644 --- a/server/mergin/auth/forms.py +++ b/server/mergin/auth/forms.py @@ -1,7 +1,7 @@ # Copyright (C) Lutra Consulting Limited # # SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial - +import re import safe from flask_wtf import FlaskForm from sqlalchemy import func @@ -17,7 +17,6 @@ from .models import User from ..app import UpdateForm, CustomStringField -from .utils import is_utf8, contains_email_invalid_characters def username_validation(form, field): @@ -43,9 +42,16 @@ def username_validation(form, field): raise ValidationError(error) -def email_format(form, field): - if contains_email_invalid_characters(field.data) or not is_utf8(field.data): - raise ValidationError("Email address contains invalid characters.") +class ExtendedEmail(Email): + """Custom email validation extending WTForms email validation""" + + def __call__(self, form, field): + super().__call__(form, field) + + if re.search(r"[|'—]", field.data): + raise ValidationError( + f"Email address '{field.data}' contains invalid characters." + ) class PasswordValidator: @@ -77,7 +83,7 @@ class RegisterUserForm(FlaskForm): ) email = CustomStringField( "Email Address", - validators=[DataRequired(), Email(), email_format], + validators=[DataRequired(), ExtendedEmail()], ) def validate(self): @@ -100,7 +106,7 @@ def validate(self): class ResetPasswordForm(FlaskForm): email = CustomStringField( "Email Address", - [DataRequired(), Email(), email_format], + [DataRequired(), ExtendedEmail()], ) @@ -135,7 +141,7 @@ class UserProfileDataForm(UpdateForm): receive_notifications = BooleanField("Receive notifications", [Optional()]) first_name = CustomStringField("First Name", [Optional()]) last_name = CustomStringField("Last Name", [Optional()]) - email = CustomStringField("Email", [Optional(), Email(), email_format]) + email = CustomStringField("Email", [Optional(), ExtendedEmail()]) class ApiLoginForm(LoginForm): diff --git a/server/mergin/sync/utils.py b/server/mergin/sync/utils.py index 38e2fd68..8ee0f480 100644 --- a/server/mergin/sync/utils.py +++ b/server/mergin/sync/utils.py @@ -285,7 +285,7 @@ def has_valid_characters(name: str) -> str | None: def has_valid_first_character(name: str) -> str | None: """Check if name contains only valid characters in first position""" - if re.match(r"^[\s^\.].*$", name) is not None: + if re.match(r"^[\s.].*$", name) is not None: return f"Value can not start with space or dot." return None diff --git a/server/mergin/tests/test_auth.py b/server/mergin/tests/test_auth.py index 85727b99..7dfe9040 100644 --- a/server/mergin/tests/test_auth.py +++ b/server/mergin/tests/test_auth.py @@ -119,8 +119,6 @@ def test_logout(client): ), # tests with upper case, but email already exists (" mergin@mergin.com ", "#pwd123", 400), # invalid password ("verylonglonglonglonglonglonglongemail@example.com", "#pwd1234", 201), - ("XmerginX", " mergin@mergin.com ", "#pwd123", 400), # invalid password - ("mergin4", "invalid\360@email.com", "#pwd1234", 400), # non-ascii character in the email ] @@ -894,32 +892,28 @@ def test_server_usage(client): assert resp.json["projects"] == 1 -# Due to Python's internal handling of strings which is inherently Unicode we need to decode the string in the test user_data = [ - ("user1", True, 201), # no problem - ("user\260", True, 201), # non-ascii character - ("usér", True, 201), # non-ascii character - ("user\\", True, 400), # disallowed character - ("日人日本人", True, 201), # non-ascii character - ("usér", False, 400), # not utf-8 encoding - ("us er", True, 400), # whitespace - ("us—er", True, 400), # dash + ("user1", 201), # no problem + ("user\260", 400), # disallowed character + ("usér", 201), # non-ascii character + ("user\\", 400), # disallowed character + ("日人日本人", 201), # non-ascii character + ("user|", 400), # vertical bar + ("us er", 400), # whitespace + ("us,er", 400), # comma + ("us—er", 400), # dash + ("us'er", 400), # apostrophe ] -@pytest.mark.parametrize("username,utf8,expected", user_data) -def test_user_email_format(client, username, expected, utf8): +@pytest.mark.parametrize("username,expected", user_data) +def test_user_email_format(client, username, expected): login_as_admin(client) email = username + "@example.com" - if not utf8: - # simulate server misinterpreting the input encoding - email = email.encode("latin1").decode("utf-8", errors="replace") url = url_for("/.mergin_auth_controller_register_user") data = { - "username": username, "email": email, "password": "#pwd1234", - "confirm": "#pwd1234", } resp = client.post(url, data=json.dumps(data), headers=json_headers) assert resp.status_code == expected From 04ae6c3f564d85d65cbad9203be820334588805e Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 28 Feb 2025 10:15:52 +0100 Subject: [PATCH 09/20] rm dash from username in generate_username() --- server/mergin/auth/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/mergin/auth/models.py b/server/mergin/auth/models.py index 85741470..997ee3ea 100644 --- a/server/mergin/auth/models.py +++ b/server/mergin/auth/models.py @@ -199,7 +199,7 @@ def generate_username(cls, email: str) -> Optional[str]: username = email.split("@")[0].strip().lower() # remove forbidden chars username = re.sub( - r"[\@\#\$\%\^\&\*\(\)\{\}\[\]\?\'\"`,;\:\+\=\~\\\/\|\<\>]", "", username + r"[\@\#\$\%\^\&\*\(\)\{\}\[\]\?\'\"`,;\:\+\=\~\\\/\|\<\>—]", "", username ).ljust(4, "0") # additional check for reserved words username = f"{username}0" if is_reserved_word(username) else username From 5a358193c10fd340ae10cb56fdf83f3749907405 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 28 Feb 2025 10:21:29 +0100 Subject: [PATCH 10/20] Fix migration file typo --- .../community/1ab5b02ce532_version_author_name_to_user_id.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/migrations/community/1ab5b02ce532_version_author_name_to_user_id.py b/server/migrations/community/1ab5b02ce532_version_author_name_to_user_id.py index 3dfbadb4..99d2f337 100644 --- a/server/migrations/community/1ab5b02ce532_version_author_name_to_user_id.py +++ b/server/migrations/community/1ab5b02ce532_version_author_name_to_user_id.py @@ -1,7 +1,7 @@ -"""Migrage project version author name to user.id +"""Migrate project version author name to user.id Revision ID: 1ab5b02ce532 -Revises: 57d0de13ce4a +Revises: 1c23e3be03a3 Create Date: 2024-09-06 14:01:40.668483 """ From b2f25c458b160b2285a22b4f96da7e139efb9092 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 28 Feb 2025 10:27:19 +0100 Subject: [PATCH 11/20] Fix assert messages in tests. --- server/mergin/tests/test_project_controller.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/mergin/tests/test_project_controller.py b/server/mergin/tests/test_project_controller.py index ac839d95..b95b701f 100644 --- a/server/mergin/tests/test_project_controller.py +++ b/server/mergin/tests/test_project_controller.py @@ -2576,7 +2576,7 @@ def test_supported_file_upload(client): assert resp.status_code == 400 assert ( resp.json["detail"] - == f"Unsupported file type detected: {script_filename}. Please remove the file or try compressing it into a ZIP file before uploading" + == f"Unsupported file type detected: {script_filename}. Please remove the file or try compressing it into a ZIP file before uploading." ) # Extension spoofing to trick the validator spoof_name = "script.gpkg" @@ -2613,4 +2613,4 @@ def test_supported_file_upload(client): # Unsupported file type is revealed when reconstructed from chunks - based on the mime type - and upload is refused resp = client.post(f"/v1/project/push/finish/{upload.id}") assert resp.status_code == 400 - assert resp.json["detail"] == f"Unsupported file type detected: {spoof_name}" + assert resp.json["detail"] == f"Unsupported file type detected: {spoof_name}. Please remove the file or try compressing it into a ZIP file before uploading." From d4a86a5eb1b782d920e90e14f25033cc25b65000 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 28 Feb 2025 14:12:00 +0100 Subject: [PATCH 12/20] black --- server/mergin/sync/schemas.py | 1 - server/mergin/tests/test_project_controller.py | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/server/mergin/sync/schemas.py b/server/mergin/sync/schemas.py index 0d8a11d5..75b6f09e 100644 --- a/server/mergin/sync/schemas.py +++ b/server/mergin/sync/schemas.py @@ -16,7 +16,6 @@ FileHistory, PushChangeType, ProjectRole, - ProjectUser, ) from .workspace import WorkspaceRole from ..app import DateTimeWithZ, ma diff --git a/server/mergin/tests/test_project_controller.py b/server/mergin/tests/test_project_controller.py index b95b701f..9761ea05 100644 --- a/server/mergin/tests/test_project_controller.py +++ b/server/mergin/tests/test_project_controller.py @@ -2613,4 +2613,7 @@ def test_supported_file_upload(client): # Unsupported file type is revealed when reconstructed from chunks - based on the mime type - and upload is refused resp = client.post(f"/v1/project/push/finish/{upload.id}") assert resp.status_code == 400 - assert resp.json["detail"] == f"Unsupported file type detected: {spoof_name}. Please remove the file or try compressing it into a ZIP file before uploading." + assert ( + resp.json["detail"] + == f"Unsupported file type detected: {spoof_name}. Please remove the file or try compressing it into a ZIP file before uploading." + ) From d6824218097a9f166ae4533d19d63da024f83ae8 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 28 Feb 2025 14:31:59 +0100 Subject: [PATCH 13/20] add required parameter in test --- server/mergin/tests/test_auth.py | 1 + 1 file changed, 1 insertion(+) diff --git a/server/mergin/tests/test_auth.py b/server/mergin/tests/test_auth.py index 7dfe9040..4a46f1c1 100644 --- a/server/mergin/tests/test_auth.py +++ b/server/mergin/tests/test_auth.py @@ -914,6 +914,7 @@ def test_user_email_format(client, username, expected): data = { "email": email, "password": "#pwd1234", + "confirm": "#pwd1234", } resp = client.post(url, data=json.dumps(data), headers=json_headers) assert resp.status_code == expected From 1a8ad35ffc8782441115401baf356b2c5f1516a7 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Fri, 28 Feb 2025 15:05:12 +0100 Subject: [PATCH 14/20] rm unused imports --- server/mergin/tests/test_auth.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/server/mergin/tests/test_auth.py b/server/mergin/tests/test_auth.py index 4a46f1c1..b0d154a7 100644 --- a/server/mergin/tests/test_auth.py +++ b/server/mergin/tests/test_auth.py @@ -3,17 +3,13 @@ # SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial from datetime import datetime, timedelta -import os import time import pytest import json from flask import url_for -from itsdangerous import URLSafeTimedSerializer from sqlalchemy import desc -from sqlalchemy.exc import IntegrityError from unittest.mock import patch -from mergin.tests import test_workspace from ..auth.app import generate_confirmation_token, confirm_token from ..auth.models import User, UserProfile, LoginHistory from ..auth.tasks import anonymize_removed_users @@ -33,7 +29,6 @@ login_as_admin, login, upload_file_to_project, - test_project_dir, ) From 5d1767e4f8bcd7e423f87d647acfc41e3124cf4d Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Mon, 3 Mar 2025 10:46:39 +0100 Subject: [PATCH 15/20] Clarify what email validation checks --- server/mergin/auth/forms.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/server/mergin/auth/forms.py b/server/mergin/auth/forms.py index b723854a..b4c2a14d 100644 --- a/server/mergin/auth/forms.py +++ b/server/mergin/auth/forms.py @@ -43,14 +43,22 @@ def username_validation(form, field): class ExtendedEmail(Email): - """Custom email validation extending WTForms email validation""" + """Custom email validation extending WTForms email validation. + WTForms checks for + 1. spaces, + 2. special characters ,:;()<>[]\" + 3, multiple @ symbols, + 4, leading, trailing, or consecutive dots in the local part + 5, invalid domain part - missing top level domain (user@example), consecutive dots + Custom check for additional invalid characters disallows |'— because they make our email sending service to fail + """ def __call__(self, form, field): super().__call__(form, field) if re.search(r"[|'—]", field.data): raise ValidationError( - f"Email address '{field.data}' contains invalid characters." + f"Email address '{field.data}' contains an invalid character." ) From 52cd3074848a0b715d626758933872dc26b4d2d3 Mon Sep 17 00:00:00 2001 From: Herman Snevajs Date: Mon, 3 Mar 2025 10:47:10 +0100 Subject: [PATCH 16/20] Test validation not an endpoint --- server/mergin/tests/test_auth.py | 47 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/server/mergin/tests/test_auth.py b/server/mergin/tests/test_auth.py index b0d154a7..845bbee5 100644 --- a/server/mergin/tests/test_auth.py +++ b/server/mergin/tests/test_auth.py @@ -10,6 +10,7 @@ from sqlalchemy import desc from unittest.mock import patch +from ..auth.forms import ResetPasswordForm from ..auth.app import generate_confirmation_token, confirm_token from ..auth.models import User, UserProfile, LoginHistory from ..auth.tasks import anonymize_removed_users @@ -114,6 +115,8 @@ def test_logout(client): ), # tests with upper case, but email already exists (" mergin@mergin.com ", "#pwd123", 400), # invalid password ("verylonglonglonglonglonglonglongemail@example.com", "#pwd1234", 201), + ("us.er@mergin.com", "#pwd1234", 201), # dot is allowed + ("us er@mergin.com", "#pwd1234", 400), # space is disallowed ] @@ -888,28 +891,28 @@ def test_server_usage(client): user_data = [ - ("user1", 201), # no problem - ("user\260", 400), # disallowed character - ("usér", 201), # non-ascii character - ("user\\", 400), # disallowed character - ("日人日本人", 201), # non-ascii character - ("user|", 400), # vertical bar - ("us er", 400), # whitespace - ("us,er", 400), # comma - ("us—er", 400), # dash - ("us'er", 400), # apostrophe + ("user1", True), # no problem + ("日人日本人", True), # non-ascii character + ("usér", True), # non-ascii character + ("user\\", False), # disallowed character + ("user\260", True), # non-ascii character (°) + ("user|", False), # vertical bar + ("us er", False), # space in the middle + ("us,er", False), # comma + ("us—er", False), # dash + ("us'er", False), # apostrophe + (" user", True), # starting with space (will be stripped) + ("us.er", True), # dot in the middle + (".user", False), # starting with dot ] -@pytest.mark.parametrize("username,expected", user_data) -def test_user_email_format(client, username, expected): - login_as_admin(client) - email = username + "@example.com" - url = url_for("/.mergin_auth_controller_register_user") - data = { - "email": email, - "password": "#pwd1234", - "confirm": "#pwd1234", - } - resp = client.post(url, data=json.dumps(data), headers=json_headers) - assert resp.status_code == expected +@pytest.mark.parametrize("username,is_valid", user_data) +def test_email_format_validation(app, username, is_valid): + """Test email form format validation""" + email = username + "@email.com" + with app.test_request_context(method="POST"): + form = ResetPasswordForm( + data={"email": email} + ) # ResetPasswordForm has only email field + assert form.validate() == is_valid From e3f5117a86ac2481138c872d4e789dd538df1b3c Mon Sep 17 00:00:00 2001 From: Martin Varga Date: Mon, 3 Mar 2025 12:28:35 +0100 Subject: [PATCH 17/20] Add db rollback on gevent timeout error --- server/mergin/sync/public_api_controller.py | 8 +++++ server/mergin/tests/test_middleware.py | 40 ++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/server/mergin/sync/public_api_controller.py b/server/mergin/sync/public_api_controller.py index 5aada5b6..45957482 100644 --- a/server/mergin/sync/public_api_controller.py +++ b/server/mergin/sync/public_api_controller.py @@ -13,6 +13,7 @@ import uuid from datetime import datetime +import gevent import psycopg2 from blinker import signal from connexion import NoContent, request @@ -951,6 +952,9 @@ def project_push(namespace, project_name): f"Failed to upload a new project version using transaction id: {upload.id}: {str(err)}" ) abort(422, "Failed to upload a new project version. Please try later.") + except gevent.timeout.Timeout: + db.session.rollback() + raise finally: upload.clear() @@ -1165,6 +1169,10 @@ def push_finish(transaction_id): f"transaction id: {transaction_id}.: {str(err)}" ) abort(422, "Failed to create new version: {}".format(str(err))) + # catch exception during pg transaction so we can rollback and prevent PendingRollbackError during upload clean up + except gevent.timeout.Timeout: + db.session.rollback() + raise finally: # remove artifacts upload.clear() diff --git a/server/mergin/tests/test_middleware.py b/server/mergin/tests/test_middleware.py index 09f4bf05..aa7805fc 100644 --- a/server/mergin/tests/test_middleware.py +++ b/server/mergin/tests/test_middleware.py @@ -3,9 +3,11 @@ # SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial import gevent +import psycogreen.gevent import pytest +import sqlalchemy -from ..app import create_simple_app, GeventTimeoutMiddleware +from ..app import create_simple_app, GeventTimeoutMiddleware, db from ..config import Configuration @@ -31,3 +33,39 @@ def ping(): if use_middleware else 200 ) + + +def test_catch_timeout(): + """Test proper handling of gevent timeout with db.session.rollback""" + psycogreen.gevent.patch_psycopg() + Configuration.GEVENT_WORKER = True + Configuration.GEVENT_REQUEST_TIMEOUT = 1 + application = create_simple_app() + + def unhandled(): + try: + db.session.execute("SELECT pg_sleep(1.1);") + finally: + db.session.execute("SELECT 1;") + return "" + + def timeout(): + try: + db.session.execute("SELECT pg_sleep(1.1);") + except gevent.timeout.Timeout: + db.session.rollback() + raise + finally: + db.session.execute("SELECT 1;") + return "" + + application.add_url_rule("/unhandled", "unhandled", unhandled) + application.add_url_rule("/timeout", "timeout", timeout) + app_context = application.app_context() + app_context.push() + + assert application.test_client().get("/timeout").status_code == 504 + + # in case of missing rollback sqlalchemy would raise error + with pytest.raises(sqlalchemy.exc.PendingRollbackError): + application.test_client().get("/unhandled") From 32005ef63939600294cc53522dd3df1778901d4e Mon Sep 17 00:00:00 2001 From: Martin Varga Date: Mon, 3 Mar 2025 13:29:17 +0100 Subject: [PATCH 18/20] Fix tests --- server/mergin/tests/test_middleware.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/mergin/tests/test_middleware.py b/server/mergin/tests/test_middleware.py index aa7805fc..3da39bc8 100644 --- a/server/mergin/tests/test_middleware.py +++ b/server/mergin/tests/test_middleware.py @@ -69,3 +69,5 @@ def timeout(): # in case of missing rollback sqlalchemy would raise error with pytest.raises(sqlalchemy.exc.PendingRollbackError): application.test_client().get("/unhandled") + + db.session.rollback() \ No newline at end of file From 08f395eb5d79f9206d54c5309a7e8e4c1f970087 Mon Sep 17 00:00:00 2001 From: Martin Varga Date: Mon, 3 Mar 2025 13:30:20 +0100 Subject: [PATCH 19/20] black --- server/mergin/tests/test_middleware.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/mergin/tests/test_middleware.py b/server/mergin/tests/test_middleware.py index 3da39bc8..508334d0 100644 --- a/server/mergin/tests/test_middleware.py +++ b/server/mergin/tests/test_middleware.py @@ -69,5 +69,5 @@ def timeout(): # in case of missing rollback sqlalchemy would raise error with pytest.raises(sqlalchemy.exc.PendingRollbackError): application.test_client().get("/unhandled") - - db.session.rollback() \ No newline at end of file + + db.session.rollback() From e878bb7569fe9561ebfc1967ceeff7a6ff0cd036 Mon Sep 17 00:00:00 2001 From: Martin Varga Date: Wed, 5 Mar 2025 11:28:49 +0100 Subject: [PATCH 20/20] Fix generate_username to be case insensitive --- server/mergin/auth/models.py | 8 ++++---- server/mergin/tests/test_auth.py | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/server/mergin/auth/models.py b/server/mergin/auth/models.py index 997ee3ea..e580e814 100644 --- a/server/mergin/auth/models.py +++ b/server/mergin/auth/models.py @@ -210,12 +210,12 @@ def generate_username(cls, email: str) -> Optional[str]: text( """ SELECT - replace(username, :username, '0')::int AS suffix + replace(lower(username), :username, '0')::int AS suffix FROM "user" WHERE - username = :username OR - username SIMILAR TO :username_like - ORDER BY replace(username, :username, '0')::int DESC + lower(username) = :username OR + lower(username) SIMILAR TO :username_like + ORDER BY replace(lower(username), :username, '0')::int DESC LIMIT 1; """ ), diff --git a/server/mergin/tests/test_auth.py b/server/mergin/tests/test_auth.py index 845bbee5..c8b9a682 100644 --- a/server/mergin/tests/test_auth.py +++ b/server/mergin/tests/test_auth.py @@ -856,6 +856,13 @@ def test_username_generation(client): == "verylonglonglonglonglo" ) + # test username generation with existing user, case insensitive + user = add_user("Testuser") + assert User.generate_username("Testuser@example.com") == "testuser1" + assert User.generate_username("testuser@example.com") == "testuser1" + user = add_user("testuser1") + assert User.generate_username("Testuser@example.com") == "testuser2" + def test_server_usage(client): """Test server usage endpoint"""