From abf557bf285c5964e28c5c8f349011805bceab07 Mon Sep 17 00:00:00 2001 From: Raphael Yancey Date: Sun, 9 Feb 2020 00:28:40 +0100 Subject: [PATCH 1/3] Fix accented characters not being matched in author name Fixes #2004 --- poetry/packages/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poetry/packages/package.py b/poetry/packages/package.py index 5b7dfea09d1..3e73c52fe97 100644 --- a/poetry/packages/package.py +++ b/poetry/packages/package.py @@ -25,7 +25,7 @@ from .vcs_dependency import VCSDependency -AUTHOR_REGEX = re.compile(r"(?u)^(?P[- .,\w\d'’\"()]+)(?: <(?P.+?)>)?$") +AUTHOR_REGEX = re.compile(r"(?u)^(?P[- .,a-zA-ZÀ-ÖØ-öø-ÿ\d'’\"()]+)(?: <(?P.+?)>)?$") logger = logging.getLogger(__name__) From 14da1658fc87980211d595e6fd5995088a74302b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Raphae=CC=88l=20Yancey?= Date: Sun, 9 Feb 2020 21:01:07 +0100 Subject: [PATCH 2/3] Normalized the strings instead of modifying the pattern --- poetry/packages/package.py | 7 ++++--- tests/packages/test_package.py | 8 ++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/poetry/packages/package.py b/poetry/packages/package.py index 3e73c52fe97..b19d5a36bc3 100644 --- a/poetry/packages/package.py +++ b/poetry/packages/package.py @@ -6,6 +6,7 @@ from contextlib import contextmanager from typing import Union from warnings import warn +from unicodedata import normalize from poetry.semver import Version from poetry.semver import parse_constraint @@ -25,7 +26,7 @@ from .vcs_dependency import VCSDependency -AUTHOR_REGEX = re.compile(r"(?u)^(?P[- .,a-zA-ZÀ-ÖØ-öø-ÿ\d'’\"()]+)(?: <(?P.+?)>)?$") +AUTHOR_REGEX = re.compile(r"(?u)^(?P[- .,\w\d'’\"()]+)(?: <(?P.+?)>)?$") logger = logging.getLogger(__name__) @@ -160,7 +161,7 @@ def _get_author(self): # type: () -> dict if not self._authors: return {"name": None, "email": None} - m = AUTHOR_REGEX.match(self._authors[0]) + m = AUTHOR_REGEX.match(normalize("NFC", self._authors[0])) name = m.group("name") email = m.group("email") @@ -171,7 +172,7 @@ def _get_maintainer(self): # type: () -> dict if not self._maintainers: return {"name": None, "email": None} - m = AUTHOR_REGEX.match(self._maintainers[0]) + m = AUTHOR_REGEX.match(normalize("NFC", self._maintainers[0])) name = m.group("name") email = m.group("email") diff --git a/tests/packages/test_package.py b/tests/packages/test_package.py index b61de86e540..08e62a9861c 100644 --- a/tests/packages/test_package.py +++ b/tests/packages/test_package.py @@ -13,6 +13,14 @@ def test_package_authors(): assert package.author_name == "Sébastien Eustace" assert package.author_email == "sebastien@eustace.io" + package.authors.insert(0, "Raphaël Yancey ") # With combining diacritics (ë = e + ¨ = e\u0308) + assert package.author_name == "Raphaël Yancey" # Is normalized into \u00EB + assert package.author_email == "raphael@badfile.net" + + package.authors.insert(0, "Raphaël Yancey ") # Without (ë = \u00EB) + assert package.author_name == "Raphaël Yancey" + assert package.author_email == "raphael@badfile.net" + package.authors.insert(0, "John Doe") assert package.author_name == "John Doe" assert package.author_email is None From 236efbe767f700223bc2665b47595845a8e18d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Raphae=CC=88l=20Yancey?= Date: Tue, 11 Feb 2020 23:43:45 +0100 Subject: [PATCH 3/3] Applied isort & black --- poetry/packages/package.py | 2 +- tests/packages/test_package.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/poetry/packages/package.py b/poetry/packages/package.py index b19d5a36bc3..cb28ef70c7f 100644 --- a/poetry/packages/package.py +++ b/poetry/packages/package.py @@ -5,8 +5,8 @@ from contextlib import contextmanager from typing import Union -from warnings import warn from unicodedata import normalize +from warnings import warn from poetry.semver import Version from poetry.semver import parse_constraint diff --git a/tests/packages/test_package.py b/tests/packages/test_package.py index 08e62a9861c..510f542f222 100644 --- a/tests/packages/test_package.py +++ b/tests/packages/test_package.py @@ -13,11 +13,15 @@ def test_package_authors(): assert package.author_name == "Sébastien Eustace" assert package.author_email == "sebastien@eustace.io" - package.authors.insert(0, "Raphaël Yancey ") # With combining diacritics (ë = e + ¨ = e\u0308) + package.authors.insert( + 0, "Raphaël Yancey " + ) # With combining diacritics (ë = e + ¨ = e\u0308) assert package.author_name == "Raphaël Yancey" # Is normalized into \u00EB assert package.author_email == "raphael@badfile.net" - package.authors.insert(0, "Raphaël Yancey ") # Without (ë = \u00EB) + package.authors.insert( + 0, "Raphaël Yancey " + ) # Without (ë = \u00EB) assert package.author_name == "Raphaël Yancey" assert package.author_email == "raphael@badfile.net"