From cdb5c3caf094484c3c8063b87bd6e361d866cdcd Mon Sep 17 00:00:00 2001 From: Evan Rittenhouse Date: Sun, 6 Nov 2022 22:00:28 -0600 Subject: [PATCH 1/6] Correctly parse Git submodule URLs --- src/poetry/vcs/git/backend.py | 27 ++++++++++++++++++++++--- tests/integration/test_utils_vcs_git.py | 11 ++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/poetry/vcs/git/backend.py b/src/poetry/vcs/git/backend.py index 20d7a1223af..85608c7e0fd 100644 --- a/src/poetry/vcs/git/backend.py +++ b/src/poetry/vcs/git/backend.py @@ -324,23 +324,44 @@ def _clone(cls, url: str, refspec: GitRefSpec, target: Path) -> Repo: return local @classmethod - def _clone_submodules(cls, repo: Repo) -> None: + def _clone_submodules(cls, repo: Repo, main_url: str) -> None: """ Helper method to identify configured submodules and clone them recursively. """ repo_root = Path(repo.path) modules_config = repo_root.joinpath(".gitmodules") + # A relative URL by definition starts with ../ or ./ + relative_submodule_regex = re.compile("^(\\.\\./|\\./),?") + if modules_config.exists(): config = ConfigFile.from_path(str(modules_config)) url: bytes path: bytes submodules = parse_submodules(config) + for path, url, name in submodules: path_relative = Path(path.decode("utf-8")) path_absolute = repo_root.joinpath(path_relative) + url_string = url.decode() + final_url = url_string + submodule_is_relative = bool( + relative_submodule_regex.search(url_string) + ) + if submodule_is_relative: + # Find a root list of url sections to mutate according + # to the relative url + url_sections = [section + "/" for section in main_url.split("/")] + directories_upward = url_string.count("../") + # Walk up the main URL until we know where to insert + # the submodule URL + url_portion_remaining = "".join(url_sections[:-directories_upward]) + # Insert the submodule URL - the last segment of url_string.split() + # is the path relative to the main URL + final_url = url_portion_remaining + url_string.split("../")[-1] + source_root = path_absolute.parent source_root.mkdir(parents=True, exist_ok=True) @@ -357,7 +378,7 @@ def _clone_submodules(cls, repo: Repo) -> None: continue cls.clone( - url=url.decode("utf-8"), + url=final_url, source_root=source_root, name=path_relative.name, revision=revision, @@ -427,7 +448,7 @@ def clone( try: if not cls.is_using_legacy_client(): local = cls._clone(url=url, refspec=refspec, target=target) - cls._clone_submodules(repo=local) + cls._clone_submodules(repo=local, main_url=url) return local except HTTPUnauthorized: # we do this here to handle http authenticated repositories as dulwich diff --git a/tests/integration/test_utils_vcs_git.py b/tests/integration/test_utils_vcs_git.py index 5c399d5311b..81fbb61fd9e 100644 --- a/tests/integration/test_utils_vcs_git.py +++ b/tests/integration/test_utils_vcs_git.py @@ -240,6 +240,17 @@ def test_git_clone_clones_submodules(source_url: str) -> None: assert len(list(submodule_package_directory.glob("*"))) > 1 +def test_git_clone_clones_submodules1_with_relative_urls(source_url: str) -> None: + with Git.clone(url=source_url, branch="relative_submodule") as repo: + submodule_package_directory = ( + Path(repo.path) / "submodules" / "relative-url-submodule" + ) + + assert submodule_package_directory.exists() + assert submodule_package_directory.joinpath("README.md").exists() + assert len(list(submodule_package_directory.glob("*"))) > 1 + + def test_system_git_fallback_on_http_401( mocker: MockerFixture, source_url: str, From 96b30d1264401d732d9f7fca8602791d5cc74fc8 Mon Sep 17 00:00:00 2001 From: Evan Rittenhouse Date: Sat, 26 Nov 2022 14:46:04 -0600 Subject: [PATCH 2/6] Simplify busines logic for relative submodules --- src/poetry/vcs/git/backend.py | 14 +++----------- tests/integration/test_utils_vcs_git.py | 2 +- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/src/poetry/vcs/git/backend.py b/src/poetry/vcs/git/backend.py index 85608c7e0fd..d4e2c83130d 100644 --- a/src/poetry/vcs/git/backend.py +++ b/src/poetry/vcs/git/backend.py @@ -7,6 +7,7 @@ from pathlib import Path from subprocess import CalledProcessError from typing import TYPE_CHECKING +from urllib.parse import urljoin from dulwich import porcelain from dulwich.client import HTTPUnauthorized @@ -345,22 +346,13 @@ def _clone_submodules(cls, repo: Repo, main_url: str) -> None: path_relative = Path(path.decode("utf-8")) path_absolute = repo_root.joinpath(path_relative) - url_string = url.decode() + url_string = url.decode("utf-8") final_url = url_string submodule_is_relative = bool( relative_submodule_regex.search(url_string) ) if submodule_is_relative: - # Find a root list of url sections to mutate according - # to the relative url - url_sections = [section + "/" for section in main_url.split("/")] - directories_upward = url_string.count("../") - # Walk up the main URL until we know where to insert - # the submodule URL - url_portion_remaining = "".join(url_sections[:-directories_upward]) - # Insert the submodule URL - the last segment of url_string.split() - # is the path relative to the main URL - final_url = url_portion_remaining + url_string.split("../")[-1] + final_url = urljoin(f"{Git.get_remote_url(repo)}/", url_string) source_root = path_absolute.parent source_root.mkdir(parents=True, exist_ok=True) diff --git a/tests/integration/test_utils_vcs_git.py b/tests/integration/test_utils_vcs_git.py index 81fbb61fd9e..082741110e7 100644 --- a/tests/integration/test_utils_vcs_git.py +++ b/tests/integration/test_utils_vcs_git.py @@ -240,7 +240,7 @@ def test_git_clone_clones_submodules(source_url: str) -> None: assert len(list(submodule_package_directory.glob("*"))) > 1 -def test_git_clone_clones_submodules1_with_relative_urls(source_url: str) -> None: +def test_git_clone_clones_submodules_with_relative_urls(source_url: str) -> None: with Git.clone(url=source_url, branch="relative_submodule") as repo: submodule_package_directory = ( Path(repo.path) / "submodules" / "relative-url-submodule" From 805347066f27340f091325a208b9f2678fc1e849 Mon Sep 17 00:00:00 2001 From: Evan Rittenhouse Date: Sat, 24 Dec 2022 01:25:52 -0600 Subject: [PATCH 3/6] Add integration test for a relative submodule with an explicit base --- tests/integration/test_utils_vcs_git.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/integration/test_utils_vcs_git.py b/tests/integration/test_utils_vcs_git.py index 082741110e7..ec0f558f3fe 100644 --- a/tests/integration/test_utils_vcs_git.py +++ b/tests/integration/test_utils_vcs_git.py @@ -251,6 +251,19 @@ def test_git_clone_clones_submodules_with_relative_urls(source_url: str) -> None assert len(list(submodule_package_directory.glob("*"))) > 1 +def test_git_clone_clones_submodules_with_relative_urls_and_explicit_base( + source_url: str, +) -> None: + with Git.clone(url=source_url, branch="relative_submodule") as repo: + submodule_package_directory = ( + Path(repo.path) / "submodules" / "relative-url-submodule-with-base" + ) + + assert submodule_package_directory.exists() + assert submodule_package_directory.joinpath("README.md").exists() + assert len(list(submodule_package_directory.glob("*"))) > 1 + + def test_system_git_fallback_on_http_401( mocker: MockerFixture, source_url: str, From 9b59e88ecf4f972ddef8ab088d1dc4b4889167cc Mon Sep 17 00:00:00 2001 From: Evan Rittenhouse Date: Mon, 13 Feb 2023 22:21:51 -0600 Subject: [PATCH 4/6] remove unnecessary main_url parameter --- src/poetry/vcs/git/backend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/poetry/vcs/git/backend.py b/src/poetry/vcs/git/backend.py index d4e2c83130d..3ef94750883 100644 --- a/src/poetry/vcs/git/backend.py +++ b/src/poetry/vcs/git/backend.py @@ -325,7 +325,7 @@ def _clone(cls, url: str, refspec: GitRefSpec, target: Path) -> Repo: return local @classmethod - def _clone_submodules(cls, repo: Repo, main_url: str) -> None: + def _clone_submodules(cls, repo: Repo) -> None: """ Helper method to identify configured submodules and clone them recursively. """ @@ -440,7 +440,7 @@ def clone( try: if not cls.is_using_legacy_client(): local = cls._clone(url=url, refspec=refspec, target=target) - cls._clone_submodules(repo=local, main_url=url) + cls._clone_submodules(repo=local) return local except HTTPUnauthorized: # we do this here to handle http authenticated repositories as dulwich From 042dc72ffcbb6a7f2e43ef5c7a527f13b0edd568 Mon Sep 17 00:00:00 2001 From: Evan Rittenhouse Date: Wed, 15 Feb 2023 19:25:52 -0600 Subject: [PATCH 5/6] Implement reviewer comments --- src/poetry/vcs/git/backend.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/poetry/vcs/git/backend.py b/src/poetry/vcs/git/backend.py index 3ef94750883..44adea2fa2d 100644 --- a/src/poetry/vcs/git/backend.py +++ b/src/poetry/vcs/git/backend.py @@ -333,7 +333,7 @@ def _clone_submodules(cls, repo: Repo) -> None: modules_config = repo_root.joinpath(".gitmodules") # A relative URL by definition starts with ../ or ./ - relative_submodule_regex = re.compile("^(\\.\\./|\\./),?") + relative_submodule_regex = re.compile(r"^(\.{1,2})/?") if modules_config.exists(): config = ConfigFile.from_path(str(modules_config)) @@ -347,12 +347,8 @@ def _clone_submodules(cls, repo: Repo) -> None: path_absolute = repo_root.joinpath(path_relative) url_string = url.decode("utf-8") - final_url = url_string - submodule_is_relative = bool( - relative_submodule_regex.search(url_string) - ) - if submodule_is_relative: - final_url = urljoin(f"{Git.get_remote_url(repo)}/", url_string) + if relative_submodule_regex.search(url_string): + url_string = urljoin(f"{Git.get_remote_url(repo)}/", url_string) source_root = path_absolute.parent source_root.mkdir(parents=True, exist_ok=True) @@ -370,7 +366,7 @@ def _clone_submodules(cls, repo: Repo) -> None: continue cls.clone( - url=final_url, + url=url_string, source_root=source_root, name=path_relative.name, revision=revision, From 543dcac64c6f098c158022852499cdb95821fafc Mon Sep 17 00:00:00 2001 From: Evan Rittenhouse Date: Fri, 17 Feb 2023 10:20:12 -0600 Subject: [PATCH 6/6] Fix regex --- src/poetry/vcs/git/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/poetry/vcs/git/backend.py b/src/poetry/vcs/git/backend.py index 44adea2fa2d..2e3032147f8 100644 --- a/src/poetry/vcs/git/backend.py +++ b/src/poetry/vcs/git/backend.py @@ -333,7 +333,7 @@ def _clone_submodules(cls, repo: Repo) -> None: modules_config = repo_root.joinpath(".gitmodules") # A relative URL by definition starts with ../ or ./ - relative_submodule_regex = re.compile(r"^(\.{1,2})/?") + relative_submodule_regex = re.compile(r"^\.{1,2}/") if modules_config.exists(): config = ConfigFile.from_path(str(modules_config))