From 4d5135f671434e05d8ba77e85616f3e8ab0369f3 Mon Sep 17 00:00:00 2001 From: GanesanRengasamy Date: Mon, 6 Apr 2026 07:05:05 +0530 Subject: [PATCH 1/7] support virtual packages on generic git hosts (Gitea) --- src/apm_cli/deps/github_downloader.py | 17 ++++++++++++++++- src/apm_cli/models/dependency/reference.py | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index 9776f7efb..146482d83 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -1056,6 +1056,21 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re # All raw attempts failed — fall through to API path which # handles private repos, rate-limit messaging, and SAML errors. + # Try raw URL for generic hosts (Gitea, GitLab, etc.) + if host.lower() not in ("github.com",) and not host.lower().endswith(".ghe.com"): + raw_url = f"https://{host}/{owner}/{repo}/raw/{ref}/{file_path}" + raw_headers = {} + if token: + raw_headers['Authorization'] = f'token {token}' + try: + response = self._resilient_get(raw_url, headers=raw_headers, timeout=30) + if response.status_code == 200: + if verbose_callback: + verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}") + return response.content + except: + pass + # --- Contents API path (authenticated, enterprise, or raw fallback) --- # Build GitHub API URL - format differs by host type if host == "github.com": @@ -1063,7 +1078,7 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re elif host.lower().endswith(".ghe.com"): api_url = f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" else: - api_url = f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + api_url = f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" # Set up authentication headers headers = { diff --git a/src/apm_cli/models/dependency/reference.py b/src/apm_cli/models/dependency/reference.py index 1c6df16b9..e41e1e174 100644 --- a/src/apm_cli/models/dependency/reference.py +++ b/src/apm_cli/models/dependency/reference.py @@ -580,7 +580,7 @@ def _detect_virtual_package(cls, dependency_str: str): for seg in path_segments ) has_collection = "collections" in path_segments - if has_virtual_ext or has_collection: + if has_virtual_ext or has_collection or len(path_segments) > 2: min_base_segments = 2 else: min_base_segments = len(path_segments) From 3dedd942da20d34c09bb6d372f0a2a95e9e16d1c Mon Sep 17 00:00:00 2001 From: GanesanRengasamy Date: Tue, 7 Apr 2026 13:04:21 +0530 Subject: [PATCH 2/7] Addressed reviewed corrections --- src/apm_cli/deps/github_downloader.py | 88 +++++++++---- src/apm_cli/models/dependency/reference.py | 15 ++- src/apm_cli/utils/github_host.py | 17 +++ tests/test_github_downloader.py | 142 +++++++++++++++++++++ tests/unit/test_generic_git_urls.py | 59 +++++++++ tests/unit/test_github_host.py | 23 ++++ 6 files changed, 315 insertions(+), 29 deletions(-) diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index 3792a542c..49cf373c0 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -1068,25 +1068,35 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re if verbose_callback: verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}") return response.content - except: + except (requests.RequestException, OSError): pass # --- Contents API path (authenticated, enterprise, or raw fallback) --- - # Build GitHub API URL - format differs by host type + # Build API URL candidates - format differs by host type if host == "github.com": - api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + api_url_candidates = [ + f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + ] elif host.lower().endswith(".ghe.com"): - api_url = f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + api_url_candidates = [ + f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" + ] else: - api_url = f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" - + # Generic host: negotiate API version (Gitea=v1, older Gitea/Gogs=v3, GitLab=v4) + api_url_candidates = [ + f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", + f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", + f"https://{host}/api/v4/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", + ] + api_url = api_url_candidates[0] + # Set up authentication headers headers = { 'Accept': 'application/vnd.github.v3.raw' # Returns raw content directly } if token: headers['Authorization'] = f'token {token}' - + # Try to download with the specified ref try: response = self._resilient_get(api_url, headers=headers, timeout=30) @@ -1096,33 +1106,59 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re return response.content except requests.exceptions.HTTPError as e: if e.response.status_code == 404: - # Try fallback branches if the specified ref fails + # For generic hosts, try remaining API version candidates before ref fallback + for candidate_url in api_url_candidates[1:]: + try: + candidate_resp = self._resilient_get(candidate_url, headers=headers, timeout=30) + candidate_resp.raise_for_status() + if verbose_callback: + verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}") + return candidate_resp.content + except requests.exceptions.HTTPError as ce: + if ce.response.status_code != 404: + raise RuntimeError( + f"Failed to download {file_path}: HTTP {ce.response.status_code}" + ) + # 404 on this version too -- try next + + # All API versions returned 404 -- try fallback ref if ref not in ["main", "master"]: # If original ref failed, don't try fallbacks - it might be a specific version raise RuntimeError(f"File not found: {file_path} at ref '{ref}' in {dep_ref.repo_url}") - + # Try the other default branch fallback_ref = "master" if ref == "main" else "main" - - # Build fallback API URL + + # Build fallback URL candidates if host == "github.com": - fallback_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}" + fallback_url_candidates = [ + f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}" + ] elif host.lower().endswith(".ghe.com"): - fallback_url = f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}" + fallback_url_candidates = [ + f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}" + ] else: - fallback_url = f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}" - - try: - response = self._resilient_get(fallback_url, headers=headers, timeout=30) - response.raise_for_status() - if verbose_callback: - verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}") - return response.content - except requests.exceptions.HTTPError: - raise RuntimeError( - f"File not found: {file_path} in {dep_ref.repo_url} " - f"(tried refs: {ref}, {fallback_ref})" - ) + fallback_url_candidates = [ + f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", + f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", + f"https://{host}/api/v4/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", + ] + + for fallback_url in fallback_url_candidates: + try: + response = self._resilient_get(fallback_url, headers=headers, timeout=30) + response.raise_for_status() + if verbose_callback: + verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}") + return response.content + except requests.exceptions.HTTPError: + pass # Try next version or ref + + raise RuntimeError( + f"File not found: {file_path} in {dep_ref.repo_url} " + f"(tried refs: {ref}, {fallback_ref})" + ) elif e.response.status_code == 401 or e.response.status_code == 403: # Distinguish rate limiting from auth failure. # GitHub returns 403 with X-RateLimit-Remaining: 0 when the diff --git a/src/apm_cli/models/dependency/reference.py b/src/apm_cli/models/dependency/reference.py index e41e1e174..742c83ebf 100644 --- a/src/apm_cli/models/dependency/reference.py +++ b/src/apm_cli/models/dependency/reference.py @@ -11,6 +11,7 @@ is_artifactory_path, is_azure_devops_hostname, is_github_hostname, + is_gitlab_hostname, is_supported_git_host, parse_artifactory_path, unsupported_host_error, @@ -580,10 +581,18 @@ def _detect_virtual_package(cls, dependency_str: str): for seg in path_segments ) has_collection = "collections" in path_segments - if has_virtual_ext or has_collection or len(path_segments) > 2: + # GitLab supports nested groups (group/subgroup/repo), so the full + # path is the repo -- no shorthand subdirectory splitting. + # Use https://gitlab.com/group/subgroup/repo.git for GitLab nested + # groups; shorthand subdirectory syntax is not supported for GitLab. + # All other generic hosts (Gitea, Bitbucket, self-hosted, etc.) use + # the owner/repo convention, so extra segments are a virtual subdir. + if has_virtual_ext or has_collection: min_base_segments = 2 - else: + elif is_gitlab_hostname(validated_host): min_base_segments = len(path_segments) + else: + min_base_segments = 2 else: min_base_segments = 2 @@ -734,7 +743,7 @@ def _parse_standard_url( user_repo = "/".join(parts[1:]) else: user_repo = "/".join(parts[1:3]) - elif len(parts) >= 2 and "." not in parts[0]: + elif len(parts) >= 2 and ("." not in parts[0] or validated_host is not None): if not host: host = default_host() if is_azure_devops_hostname(host) and len(parts) >= 3: diff --git a/src/apm_cli/utils/github_host.py b/src/apm_cli/utils/github_host.py index d45fdabc2..728df3bd9 100644 --- a/src/apm_cli/utils/github_host.py +++ b/src/apm_cli/utils/github_host.py @@ -29,6 +29,23 @@ def is_azure_devops_hostname(hostname: Optional[str]) -> bool: return False +def is_gitlab_hostname(hostname: Optional[str]) -> bool: + """Return True if hostname is GitLab (cloud or self-hosted). + + GitLab supports nested groups (group/subgroup/repo), so paths with + more than two segments should be treated as repo paths, not virtual + subdirectory packages. + + Accepts: + - gitlab.com + - Any hostname starting with 'gitlab.' (common self-hosted convention) + """ + if not hostname: + return False + h = hostname.lower() + return h == "gitlab.com" or h.startswith("gitlab.") + + def is_github_hostname(hostname: Optional[str]) -> bool: """Return True if hostname should be treated as GitHub (cloud or enterprise). diff --git a/tests/test_github_downloader.py b/tests/test_github_downloader.py index b6e0ea316..de2fb1fb3 100644 --- a/tests/test_github_downloader.py +++ b/tests/test_github_downloader.py @@ -1629,5 +1629,147 @@ def test_try_raw_download_returns_content_on_200(self): assert result == b'hello world' +# --------------------------------------------------------------------------- +# Generic host (Gitea / GitLab) download tests +# --------------------------------------------------------------------------- + +def _make_resp(status_code: int, content: bytes = b"") -> Mock: + """Build a minimal mock requests.Response.""" + resp = Mock() + resp.status_code = status_code + resp.content = content + if status_code >= 400: + resp.raise_for_status = Mock( + side_effect=requests_lib.exceptions.HTTPError(response=resp) + ) + else: + resp.raise_for_status = Mock() + return resp + + +class TestGiteaRawUrlDownload: + """Gitea raw URL path: /{owner}/{repo}/raw/{ref}/{file}.""" + + def setup_method(self): + with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: + self.downloader = GitHubPackageDownloader() + + def test_raw_url_succeeds_on_first_attempt(self): + """Raw URL returns 200 -- content returned without calling the API.""" + dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo") + expected = b"# README content" + raw_ok = _make_resp(200, expected) + + with patch.object(self.downloader, "_resilient_get", return_value=raw_ok) as mock_get: + result = self.downloader.download_raw_file(dep_ref, "README.md", "main") + + assert result == expected + first_url = mock_get.call_args_list[0][0][0] + assert first_url == "https://gitea.myorg.com/owner/repo/raw/main/README.md" + assert mock_get.call_count == 1 + + def test_raw_url_with_token_adds_auth_header(self): + """Token is forwarded as Authorization header in the raw URL request. + + Token resolution is lazy, so the env patch must stay active for the + duration of the download call. + """ + dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo") + raw_ok = _make_resp(200, b"data") + + with patch.dict(os.environ, {"GITHUB_APM_PAT": "gta-tok"}, clear=True): + with _CRED_FILL_PATCH: + downloader = GitHubPackageDownloader() + with patch.object(downloader, "_resilient_get", return_value=raw_ok) as mock_get: + downloader.download_raw_file(dep_ref, "README.md", "main") + + raw_headers = mock_get.call_args_list[0][1].get("headers", {}) + assert "Authorization" in raw_headers + + def test_falls_back_to_api_v1_when_raw_returns_non_200(self): + """When the raw URL returns 404, the API v1 path is tried next.""" + dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo") + expected = b"file via API" + + with patch.object( + self.downloader, "_resilient_get", + side_effect=[_make_resp(404), _make_resp(200, expected)] + ) as mock_get: + result = self.downloader.download_raw_file(dep_ref, "README.md", "main") + + assert result == expected + urls = [c[0][0] for c in mock_get.call_args_list] + assert urls[0] == "https://gitea.myorg.com/owner/repo/raw/main/README.md" + assert "/api/v1/" in urls[1] + + +class TestGitLabApiVersionNegotiation: + """API version negotiation: v1 -> v3 -> v4 for generic hosts.""" + + def setup_method(self): + with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: + self.downloader = GitHubPackageDownloader() + + def test_gitlab_v4_reached_after_v1_and_v3_return_404(self): + """GitLab uses /api/v4/ -- negotiation must try v1, v3, then v4.""" + dep_ref = DependencyReference.parse("gitlab.myorg.com/owner/repo") + expected = b"gitlab file content" + + side_effects = [ + _make_resp(404), # raw URL + _make_resp(404), # v1 + _make_resp(404), # v3 + _make_resp(200, expected), # v4 + ] + with patch.object(self.downloader, "_resilient_get", side_effect=side_effects) as mock_get: + result = self.downloader.download_raw_file(dep_ref, "skill.md", "main") + + assert result == expected + urls = [c[0][0] for c in mock_get.call_args_list] + assert "/api/v1/" in urls[1] + assert "/api/v3/" in urls[2] + assert "/api/v4/" in urls[3] + + def test_gitea_v1_succeeds_without_trying_v3_or_v4(self): + """When v1 returns 200, v3 and v4 must never be called.""" + dep_ref = DependencyReference.parse("gitea.example.com/owner/repo") + expected = b"gitea content" + + with patch.object( + self.downloader, "_resilient_get", + side_effect=[_make_resp(404), _make_resp(200, expected)] + ) as mock_get: + result = self.downloader.download_raw_file(dep_ref, "file.md", "main") + + assert result == expected + urls = [c[0][0] for c in mock_get.call_args_list] + assert all("/api/v3/" not in u and "/api/v4/" not in u for u in urls) + + def test_all_api_versions_404_raises_runtime_error(self): + """When every API version returns 404 for both refs, a clear error is raised.""" + dep_ref = DependencyReference.parse("git.example.com/owner/repo") + # raw + v1 + v3 + v4 for 'main', then v1 + v3 + v4 for 'master' fallback + side_effects = [_make_resp(404)] * 8 + + with patch.object(self.downloader, "_resilient_get", side_effect=side_effects): + with pytest.raises(RuntimeError, match="File not found"): + self.downloader.download_raw_file(dep_ref, "missing.md", "main") + + def test_github_com_uses_api_github_com_not_api_v4(self): + """github.com must still use api.github.com, never /api/v4/.""" + dep_ref = DependencyReference.parse("owner/repo") + expected = b"github content" + api_ok = _make_resp(200, expected) + + with patch.object(self.downloader, "_try_raw_download", return_value=None): + with patch.object(self.downloader, "_resilient_get", return_value=api_ok) as mock_get: + result = self.downloader.download_raw_file(dep_ref, "README.md", "main") + + assert result == expected + url_called = mock_get.call_args_list[0][0][0] + assert url_called.startswith("https://api.github.com/") + assert "/api/v4/" not in url_called + + if __name__ == '__main__': pytest.main([__file__]) \ No newline at end of file diff --git a/tests/unit/test_generic_git_urls.py b/tests/unit/test_generic_git_urls.py index 31e35381a..1dbbfe888 100644 --- a/tests/unit/test_generic_git_urls.py +++ b/tests/unit/test_generic_git_urls.py @@ -682,3 +682,62 @@ def test_https_nested_group_with_virtual_ext_rejected(self): """HTTPS URLs can't embed virtual paths even with nested groups.""" with pytest.raises(ValueError, match="virtual file extension"): DependencyReference.parse("https://gitlab.com/group/subgroup/file.prompt.md") + + +class TestGiteaVirtualPackageDetection: + """Gitea-specific virtual package detection -- supplements TestFQDNVirtualPaths + and TestNestedGroupSupport with Gitea host fixtures and regression guards + for the len(path_segments) > 2 over-trigger.""" + + # --- Must NOT be virtual (nested-group repo, no virtual indicators) --- + + def test_three_segment_gitea_path_is_not_virtual(self): + """group/subgroup/repo on Gitea is a nested-group repo, not virtual.""" + dep = DependencyReference.parse("gitea.myorg.com/group/subgroup/repo") + assert dep.host == "gitea.myorg.com" + assert dep.repo_url == "group/subgroup/repo" + assert dep.is_virtual is False + + def test_two_segment_gitea_path_is_not_virtual(self): + """Simple owner/repo on a Gitea host is never virtual.""" + dep = DependencyReference.parse("gitea.myorg.com/owner/repo") + assert dep.host == "gitea.myorg.com" + assert dep.repo_url == "owner/repo" + assert dep.is_virtual is False + + def test_four_segment_generic_path_without_indicators_is_not_virtual(self): + """Deep nested groups without file extensions or /collections/ are not virtual.""" + dep = DependencyReference.parse("git.company.internal/team/skills/brand-guidelines") + assert dep.is_virtual is False + assert dep.repo_url == "team/skills/brand-guidelines" + + # --- Must be virtual (explicit virtual indicators) --- + + def test_gitea_virtual_file_extension(self): + """Path with virtual file extension on Gitea is detected as virtual.""" + dep = DependencyReference.parse("gitea.myorg.com/owner/repo/file.prompt.md") + assert dep.host == "gitea.myorg.com" + assert dep.repo_url == "owner/repo" + assert dep.virtual_path == "file.prompt.md" + assert dep.is_virtual is True + assert dep.is_virtual_file() is True + + def test_gitea_collections_path_is_virtual(self): + """Path with /collections/ on Gitea is detected as a virtual collection.""" + dep = DependencyReference.parse("gitea.myorg.com/owner/repo/collections/security") + assert dep.host == "gitea.myorg.com" + assert dep.repo_url == "owner/repo" + assert dep.virtual_path == "collections/security" + assert dep.is_virtual is True + assert dep.is_virtual_collection() is True + + def test_dict_format_virtual_on_gitea(self): + """Dict format with path= on Gitea host yields a virtual package.""" + dep = DependencyReference.parse_from_dict({ + "git": "gitea.myorg.com/owner/repo", + "path": "prompts/review.prompt.md", + }) + assert dep.host == "gitea.myorg.com" + assert dep.repo_url == "owner/repo" + assert dep.virtual_path == "prompts/review.prompt.md" + assert dep.is_virtual is True diff --git a/tests/unit/test_github_host.py b/tests/unit/test_github_host.py index 90bc8a71d..a675dbe1e 100644 --- a/tests/unit/test_github_host.py +++ b/tests/unit/test_github_host.py @@ -71,6 +71,29 @@ def test_is_github_hostname_defaults(): assert not github_host.is_github_hostname("example.com") +def test_is_gitlab_hostname(): + """is_gitlab_hostname() matches gitlab.com and self-hosted gitlab.* instances. + + This drives _detect_virtual_package: GitLab supports nested groups so the + full path is kept as the repo URL. All other generic hosts (Gitea, + Bitbucket, etc.) use the owner/repo convention (2 base segments). + """ + # Cloud and conventional self-hosted GitLab + assert github_host.is_gitlab_hostname("gitlab.com") + assert github_host.is_gitlab_hostname("gitlab.mycompany.com") + assert github_host.is_gitlab_hostname("gitlab.internal") + assert github_host.is_gitlab_hostname("GITLAB.COM") # case-insensitive + + # Non-GitLab hosts must NOT match + assert not github_host.is_gitlab_hostname("github.com") + assert not github_host.is_gitlab_hostname("bitbucket.org") + assert not github_host.is_gitlab_hostname("gitea.myorg.com") + assert not github_host.is_gitlab_hostname("git.company.internal") + assert not github_host.is_gitlab_hostname("dev.azure.com") + assert not github_host.is_gitlab_hostname(None) + assert not github_host.is_gitlab_hostname("") + + def test_is_azure_devops_hostname(): """Test Azure DevOps hostname detection.""" # Valid Azure DevOps hosts From 13dbf73b71455130410964a1984376e4fde97792 Mon Sep 17 00:00:00 2001 From: ganesanviji Date: Thu, 9 Apr 2026 16:09:00 +0530 Subject: [PATCH 3/7] Update src/apm_cli/deps/github_downloader.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/apm_cli/deps/github_downloader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index cd9097b9a..bd377c763 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -1237,11 +1237,10 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" ] else: - # Generic host: negotiate API version (Gitea=v1, older Gitea/Gogs=v3, GitLab=v4) + # Generic host: negotiate Gitea/Gogs-style contents API versions. api_url_candidates = [ f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", - f"https://{host}/api/v4/repos/{owner}/{repo}/contents/{file_path}?ref={ref}", ] api_url = api_url_candidates[0] From e4557767a5e1e26511331ca01de013c0c7a004a0 Mon Sep 17 00:00:00 2001 From: GanesanRengasamy Date: Wed, 15 Apr 2026 05:54:25 +0530 Subject: [PATCH 4/7] Review comments addressed --- src/apm_cli/deps/github_downloader.py | 3 +-- tests/test_github_downloader.py | 33 +++++++++++++++------------ 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index bd377c763..098552484 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -1295,8 +1295,7 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re else: fallback_url_candidates = [ f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", - f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", - f"https://{host}/api/v4/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", + f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}", ] for fallback_url in fallback_url_candidates: diff --git a/tests/test_github_downloader.py b/tests/test_github_downloader.py index de2fb1fb3..62a92abc6 100644 --- a/tests/test_github_downloader.py +++ b/tests/test_github_downloader.py @@ -1703,23 +1703,28 @@ def test_falls_back_to_api_v1_when_raw_returns_non_200(self): assert "/api/v1/" in urls[1] -class TestGitLabApiVersionNegotiation: - """API version negotiation: v1 -> v3 -> v4 for generic hosts.""" +class TestGiteaGogsApiVersionNegotiation: + """API version negotiation: raw URL -> v1 -> v3 for Gitea/Gogs generic hosts. + + The implementation intentionally stops at v3. GitLab uses a completely + different API shape (/api/v4/projects/:id/repository/files/...) that is + not compatible with the GitHub Contents-style endpoint negotiated here; + GitLab support is limited to git-clone operations only. + """ def setup_method(self): with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: self.downloader = GitHubPackageDownloader() - def test_gitlab_v4_reached_after_v1_and_v3_return_404(self): - """GitLab uses /api/v4/ -- negotiation must try v1, v3, then v4.""" - dep_ref = DependencyReference.parse("gitlab.myorg.com/owner/repo") - expected = b"gitlab file content" + def test_v1_falls_back_to_v3_for_generic_hosts(self): + """When Gitea raw URL and v1 both return 404, v3 is tried and succeeds.""" + dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo") + expected = b"gitea v3 file content" side_effects = [ _make_resp(404), # raw URL _make_resp(404), # v1 - _make_resp(404), # v3 - _make_resp(200, expected), # v4 + _make_resp(200, expected), # v3 ] with patch.object(self.downloader, "_resilient_get", side_effect=side_effects) as mock_get: result = self.downloader.download_raw_file(dep_ref, "skill.md", "main") @@ -1728,10 +1733,10 @@ def test_gitlab_v4_reached_after_v1_and_v3_return_404(self): urls = [c[0][0] for c in mock_get.call_args_list] assert "/api/v1/" in urls[1] assert "/api/v3/" in urls[2] - assert "/api/v4/" in urls[3] + assert len(mock_get.call_args_list) == 3 - def test_gitea_v1_succeeds_without_trying_v3_or_v4(self): - """When v1 returns 200, v3 and v4 must never be called.""" + def test_gitea_v1_succeeds_without_trying_v3(self): + """When v1 returns 200, v3 must never be called.""" dep_ref = DependencyReference.parse("gitea.example.com/owner/repo") expected = b"gitea content" @@ -1743,13 +1748,13 @@ def test_gitea_v1_succeeds_without_trying_v3_or_v4(self): assert result == expected urls = [c[0][0] for c in mock_get.call_args_list] - assert all("/api/v3/" not in u and "/api/v4/" not in u for u in urls) + assert all("/api/v3/" not in u for u in urls) def test_all_api_versions_404_raises_runtime_error(self): """When every API version returns 404 for both refs, a clear error is raised.""" dep_ref = DependencyReference.parse("git.example.com/owner/repo") - # raw + v1 + v3 + v4 for 'main', then v1 + v3 + v4 for 'master' fallback - side_effects = [_make_resp(404)] * 8 + # raw(main) + v1(main) + v3(main) = 3 calls, then v1(master) + v3(master) = 2 calls + side_effects = [_make_resp(404)] * 5 with patch.object(self.downloader, "_resilient_get", side_effect=side_effects): with pytest.raises(RuntimeError, match="File not found"): From 00f88e7ed3e3379147e153a1b27e7ea932da4274 Mon Sep 17 00:00:00 2001 From: GanesanRengasamy Date: Fri, 24 Apr 2026 05:43:12 +0530 Subject: [PATCH 5/7] Update reference.py --- src/apm_cli/models/dependency/reference.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/apm_cli/models/dependency/reference.py b/src/apm_cli/models/dependency/reference.py index 742c83ebf..bed1dbddb 100644 --- a/src/apm_cli/models/dependency/reference.py +++ b/src/apm_cli/models/dependency/reference.py @@ -11,7 +11,6 @@ is_artifactory_path, is_azure_devops_hostname, is_github_hostname, - is_gitlab_hostname, is_supported_git_host, parse_artifactory_path, unsupported_host_error, @@ -581,18 +580,10 @@ def _detect_virtual_package(cls, dependency_str: str): for seg in path_segments ) has_collection = "collections" in path_segments - # GitLab supports nested groups (group/subgroup/repo), so the full - # path is the repo -- no shorthand subdirectory splitting. - # Use https://gitlab.com/group/subgroup/repo.git for GitLab nested - # groups; shorthand subdirectory syntax is not supported for GitLab. - # All other generic hosts (Gitea, Bitbucket, self-hosted, etc.) use - # the owner/repo convention, so extra segments are a virtual subdir. if has_virtual_ext or has_collection: min_base_segments = 2 - elif is_gitlab_hostname(validated_host): - min_base_segments = len(path_segments) else: - min_base_segments = 2 + min_base_segments = len(path_segments) else: min_base_segments = 2 From 77c077d694920297cbb56dfd58e0dd7405412385 Mon Sep 17 00:00:00 2001 From: GanesanRengasamy Date: Fri, 24 Apr 2026 05:55:58 +0530 Subject: [PATCH 6/7] restore: re-add TestVirtualFilePackageYamlGeneration and TestSCPPortDetection These two test classes were accidentally removed from the branch. Restoring them from upstream main (8665f4b) to ensure full coverage is preserved alongside the Gitea virtual package detection changes. --- tests/test_github_downloader.py | 175 ++++++++++++++++++++++++++++ tests/unit/test_generic_git_urls.py | 115 ++++++++++++++++++ 2 files changed, 290 insertions(+) diff --git a/tests/test_github_downloader.py b/tests/test_github_downloader.py index b2d1b2efc..585c85215 100644 --- a/tests/test_github_downloader.py +++ b/tests/test_github_downloader.py @@ -1656,6 +1656,181 @@ def test_try_raw_download_returns_content_on_200(self): assert result == b'hello world' +class TestVirtualFilePackageYamlGeneration: + """Tests that apm.yml for virtual packages is always valid YAML.""" + + def _make_dep_ref(self, virtual_path): + """Helper: build a minimal DependencyReference for a virtual file.""" + from apm_cli.models.apm_package import DependencyReference + dep_ref = Mock(spec=DependencyReference) + dep_ref.is_virtual = True + dep_ref.virtual_path = virtual_path + dep_ref.reference = "main" + dep_ref.repo_url = "github/awesome-copilot" + dep_ref.get_virtual_package_name.return_value = "awesome-copilot-swe-subagent" + dep_ref.to_github_url.return_value = f"https://github.com/github/awesome-copilot/blob/main/{virtual_path}" + dep_ref.is_virtual_file.return_value = True + dep_ref.VIRTUAL_FILE_EXTENSIONS = [".prompt.md", ".instructions.md", ".chatmode.md", ".agent.md"] + return dep_ref + + def _make_collection_dep_ref(self, virtual_path): + """Helper: build a minimal DependencyReference for a virtual collection.""" + from apm_cli.models.apm_package import DependencyReference + dep_ref = Mock(spec=DependencyReference) + dep_ref.is_virtual = True + dep_ref.virtual_path = virtual_path + dep_ref.reference = "main" + dep_ref.repo_url = "github/my-org" + dep_ref.get_virtual_package_name.return_value = "my-org-my-collection" + dep_ref.to_github_url.return_value = f"https://github.com/github/my-org/blob/main/{virtual_path}" + dep_ref.is_virtual_collection.return_value = True + return dep_ref + + def test_yaml_with_colon_in_description(self, tmp_path): + """apm.yml must be valid when the agent description contains a colon.""" + import yaml + + agent_content = ( + b"---\n" + b"name: 'SWE'\n" + b"description: 'Senior software engineer subagent for implementation tasks:" + b" feature development, debugging, refactoring, and testing.'\n" + b"tools: ['vscode']\n" + b"---\n\n## Body\n" + ) + + dep_ref = self._make_dep_ref("agents/swe-subagent.agent.md") + target_path = tmp_path / "pkg" + + downloader = GitHubPackageDownloader() + with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: + with patch.object(downloader, "download_raw_file", return_value=agent_content): + downloader.download_virtual_file_package(dep_ref, target_path) + + apm_yml_path = target_path / "apm.yml" + assert apm_yml_path.exists(), "apm.yml was not created" + + content = apm_yml_path.read_text(encoding="utf-8") + parsed = yaml.safe_load(content) # must not raise + + expected = ( + "Senior software engineer subagent for implementation tasks:" + " feature development, debugging, refactoring, and testing." + ) + assert parsed["description"] == expected + + def test_yaml_with_colon_in_name(self, tmp_path): + """apm.yml must be valid even when the package name contains a colon.""" + import yaml + + dep_ref = self._make_dep_ref("agents/my-agent.agent.md") + dep_ref.get_virtual_package_name.return_value = "org-name: special" + + agent_content = b"---\nname: 'plain'\ndescription: 'plain'\n---\n" + target_path = tmp_path / "pkg" + + downloader = GitHubPackageDownloader() + with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: + with patch.object(downloader, "download_raw_file", return_value=agent_content): + downloader.download_virtual_file_package(dep_ref, target_path) + + content = (target_path / "apm.yml").read_text(encoding="utf-8") + parsed = yaml.safe_load(content) + assert parsed["name"] == "org-name: special" + + def test_yaml_without_special_characters_still_valid(self, tmp_path): + """apm.yml generation must still work for ordinary descriptions.""" + import yaml + + agent_content = ( + b"---\n" + b"name: 'Simple Agent'\n" + b"description: 'A simple agent without special chars'\n" + b"---\n" + ) + + dep_ref = self._make_dep_ref("agents/simple.agent.md") + target_path = tmp_path / "pkg" + + downloader = GitHubPackageDownloader() + with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: + with patch.object(downloader, "download_raw_file", return_value=agent_content): + downloader.download_virtual_file_package(dep_ref, target_path) + + content = (target_path / "apm.yml").read_text(encoding="utf-8") + parsed = yaml.safe_load(content) + assert parsed["description"] == "A simple agent without special chars" + + def test_collection_yaml_with_colon_in_description(self, tmp_path): + """apm.yml for collection packages must be valid when description contains a colon.""" + import yaml + + collection_manifest = ( + b"id: my-collection\n" + b"name: My Collection\n" + b"description: 'A collection for tasks: feature development, debugging.'\n" + b"items:\n" + b" - path: agents/my-agent.agent.md\n" + b" kind: agent\n" + ) + agent_file = b"---\nname: My Agent\n---\n## Body\n" + + dep_ref = self._make_collection_dep_ref("collections/my-collection") + target_path = tmp_path / "pkg" + + downloader = GitHubPackageDownloader() + + def _fake_download(dep_ref_arg, path, ref): + if "collection" in path: + return collection_manifest + return agent_file + + with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: + with patch.object(downloader, "download_raw_file", side_effect=_fake_download): + downloader.download_collection_package(dep_ref, target_path) + + content = (target_path / "apm.yml").read_text(encoding="utf-8") + parsed = yaml.safe_load(content) # must not raise + + assert parsed["description"] == "A collection for tasks: feature development, debugging." + + def test_collection_yaml_with_colon_in_tags(self, tmp_path): + """apm.yml for collection packages must be valid when tags contain a colon.""" + import yaml + + collection_manifest = ( + b"id: tagged-collection\n" + b"name: Tagged\n" + b"description: Normal description\n" + b"tags:\n" + b" - 'scope: engineering'\n" + b" - plain-tag\n" + b"items:\n" + b" - path: agents/my-agent.agent.md\n" + b" kind: agent\n" + ) + agent_file = b"---\nname: My Agent\n---\n## Body\n" + + dep_ref = self._make_collection_dep_ref("collections/tagged-collection") + target_path = tmp_path / "pkg" + + downloader = GitHubPackageDownloader() + + def _fake_download(dep_ref_arg, path, ref): + if "collection" in path: + return collection_manifest + return agent_file + + with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH: + with patch.object(downloader, "download_raw_file", side_effect=_fake_download): + downloader.download_collection_package(dep_ref, target_path) + + content = (target_path / "apm.yml").read_text(encoding="utf-8") + parsed = yaml.safe_load(content) + + assert parsed["tags"] == ["scope: engineering", "plain-tag"] + + # --------------------------------------------------------------------------- # Generic host (Gitea / GitLab) download tests # --------------------------------------------------------------------------- diff --git a/tests/unit/test_generic_git_urls.py b/tests/unit/test_generic_git_urls.py index 61246f871..8a9538661 100644 --- a/tests/unit/test_generic_git_urls.py +++ b/tests/unit/test_generic_git_urls.py @@ -783,6 +783,121 @@ def test_https_nested_group_with_virtual_ext_rejected(self): DependencyReference.parse("https://gitlab.com/group/subgroup/file.prompt.md") +class TestSCPPortDetection: + """Detect port-like first path segment in SCP shorthand (git@host:port/path). + + SCP shorthand uses ':' as the path separator and cannot carry a port. + When the first path segment is a valid TCP port (1-65535), APM should + raise a ValueError with an actionable suggestion to use ssh:// instead. + """ + + def test_scp_with_port_7999_raises(self): + """Bitbucket Datacenter: git@host:7999/project/repo.git.""" + with pytest.raises(ValueError, match="ssh://"): + DependencyReference.parse("git@bitbucket.example.com:7999/project/repo.git") + + def test_scp_with_port_22_raises(self): + """Default SSH port 22 should still be detected.""" + with pytest.raises(ValueError, match="ssh://"): + DependencyReference.parse("git@host.example.com:22/owner/repo.git") + + def test_scp_with_port_65535_raises(self): + """Max valid TCP port should trigger detection.""" + with pytest.raises(ValueError, match="ssh://"): + DependencyReference.parse("git@host.example.com:65535/owner/repo.git") + + def test_scp_with_port_1_raises(self): + """Min valid TCP port should trigger detection.""" + with pytest.raises(ValueError, match="ssh://"): + DependencyReference.parse("git@host.example.com:1/owner/repo.git") + + def test_scp_with_leading_zeros_raises(self): + """Leading zeros: 007999 -> int 7999, still a valid port.""" + with pytest.raises(ValueError, match="ssh://"): + DependencyReference.parse("git@host.example.com:007999/project/repo.git") + + def test_scp_port_only_no_path_raises(self): + """git@host:7999 with no repo path after the port.""" + with pytest.raises(ValueError, match="no repository path follows"): + DependencyReference.parse("git@host.example.com:7999") + + def test_scp_port_trailing_slash_no_path_raises(self): + """git@host:7999/ -- trailing slash but empty remaining path.""" + with pytest.raises(ValueError, match="no repository path follows"): + DependencyReference.parse("git@host.example.com:7999/") + + def test_scp_port_with_ref_raises_and_preserves_ref(self): + """Port-like segment with #ref should be caught; suggestion preserves the ref.""" + with pytest.raises( + ValueError, + match=r"ssh://git@host\.example\.com:7999/project/repo\.git#main", + ): + DependencyReference.parse("git@host.example.com:7999/project/repo.git#main") + + def test_scp_port_with_alias_raises_and_preserves_alias(self): + """Port-like segment with @alias should be caught; suggestion preserves the alias.""" + with pytest.raises( + ValueError, + match=r"ssh://git@host\.example\.com:7999/project/repo\.git@my-alias", + ): + DependencyReference.parse("git@host.example.com:7999/project/repo.git@my-alias") + + def test_scp_port_with_ref_and_alias_preserves_both(self): + """Suggestion should include both #ref and @alias when present.""" + with pytest.raises( + ValueError, + match=r"ssh://git@host\.example\.com:7999/project/repo\.git#v1\.0@my-alias", + ): + DependencyReference.parse("git@host.example.com:7999/project/repo.git#v1.0@my-alias") + + def test_suggestion_includes_git_suffix(self): + """When the user wrote .git, the suggestion should preserve it.""" + with pytest.raises( + ValueError, + match=r"ssh://git@host\.example\.com:7999/project/repo\.git", + ): + DependencyReference.parse("git@host.example.com:7999/project/repo.git") + + def test_suggestion_omits_git_suffix_when_absent(self): + """When the user omitted .git, the suggestion should not add it.""" + with pytest.raises(ValueError) as excinfo: + DependencyReference.parse("git@host.example.com:7999/project/repo") + msg = str(excinfo.value) + assert "ssh://git@host.example.com:7999/project/repo" in msg + assert not msg.endswith(".git") + + def test_port_zero_not_detected(self): + """Port 0 is invalid -- should NOT trigger port detection, parses as org name.""" + dep = DependencyReference.parse("git@host.example.com:0/repo") + assert dep.repo_url == "0/repo" + assert dep.port is None + + def test_port_out_of_range_not_detected(self): + """99999 > 65535 -- not a valid port, should NOT trigger port detection.""" + dep = DependencyReference.parse("git@host.example.com:99999/repo") + assert dep.repo_url == "99999/repo" + assert dep.port is None + + def test_normal_org_name_not_detected(self): + """Non-numeric org name should parse normally.""" + dep = DependencyReference.parse("git@gitlab.com:acme/repo.git") + assert dep.repo_url == "acme/repo" + assert dep.port is None + + def test_alphanumeric_first_segment_not_detected(self): + """'v2' is not purely numeric -- should parse normally.""" + dep = DependencyReference.parse("git@gitlab.com:v2/repo.git") + assert dep.repo_url == "v2/repo" + assert dep.port is None + + def test_ssh_protocol_with_port_still_works(self): + """ssh:// URL form with port must continue working (regression guard).""" + dep = DependencyReference.parse("ssh://git@bitbucket.example.com:7999/project/repo.git") + assert dep.host == "bitbucket.example.com" + assert dep.port == 7999 + assert dep.repo_url == "project/repo" + + class TestGiteaVirtualPackageDetection: """Gitea-specific virtual package detection -- supplements TestFQDNVirtualPaths and TestNestedGroupSupport with Gitea host fixtures and regression guards From 8b56ffaaac88265021b958998470e2958c00e04e Mon Sep 17 00:00:00 2001 From: GanesanRengasamy Date: Sat, 25 Apr 2026 13:38:43 +0530 Subject: [PATCH 7/7] Review concerns addressed --- CHANGELOG.md | 1 + src/apm_cli/utils/github_host.py | 17 ----------------- tests/unit/test_github_host.py | 23 ----------------------- 3 files changed, 1 insertion(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e3496861..55c1e9a7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - CI: add `APM Self-Check` to `ci.yml` for `apm audit --ci`, regeneration-drift validation, and `merge-gate.yml` `EXPECTED_CHECKS` coverage. (#885) +- Virtual package support for self-hosted Git services (Gitea, Gogs): `apm install` now resolves subdirectory packages and raw file downloads from generic Git hosts via raw URL and API version negotiation (v1/v3). GitLab nested-group paths (`group/subgroup/repo`) are treated as full repo URLs (dict form required for virtual packages). -- by @ganesanviji (#587) ### Changed diff --git a/src/apm_cli/utils/github_host.py b/src/apm_cli/utils/github_host.py index d821fc679..eda296bc4 100644 --- a/src/apm_cli/utils/github_host.py +++ b/src/apm_cli/utils/github_host.py @@ -29,23 +29,6 @@ def is_azure_devops_hostname(hostname: Optional[str]) -> bool: return False -def is_gitlab_hostname(hostname: Optional[str]) -> bool: - """Return True if hostname is GitLab (cloud or self-hosted). - - GitLab supports nested groups (group/subgroup/repo), so paths with - more than two segments should be treated as repo paths, not virtual - subdirectory packages. - - Accepts: - - gitlab.com - - Any hostname starting with 'gitlab.' (common self-hosted convention) - """ - if not hostname: - return False - h = hostname.lower() - return h == "gitlab.com" or h.startswith("gitlab.") - - def is_github_hostname(hostname: Optional[str]) -> bool: """Return True if hostname should be treated as GitHub (cloud or enterprise). diff --git a/tests/unit/test_github_host.py b/tests/unit/test_github_host.py index 59070b49b..f767ad477 100644 --- a/tests/unit/test_github_host.py +++ b/tests/unit/test_github_host.py @@ -71,29 +71,6 @@ def test_is_github_hostname_defaults(): assert not github_host.is_github_hostname("example.com") -def test_is_gitlab_hostname(): - """is_gitlab_hostname() matches gitlab.com and self-hosted gitlab.* instances. - - This drives _detect_virtual_package: GitLab supports nested groups so the - full path is kept as the repo URL. All other generic hosts (Gitea, - Bitbucket, etc.) use the owner/repo convention (2 base segments). - """ - # Cloud and conventional self-hosted GitLab - assert github_host.is_gitlab_hostname("gitlab.com") - assert github_host.is_gitlab_hostname("gitlab.mycompany.com") - assert github_host.is_gitlab_hostname("gitlab.internal") - assert github_host.is_gitlab_hostname("GITLAB.COM") # case-insensitive - - # Non-GitLab hosts must NOT match - assert not github_host.is_gitlab_hostname("github.com") - assert not github_host.is_gitlab_hostname("bitbucket.org") - assert not github_host.is_gitlab_hostname("gitea.myorg.com") - assert not github_host.is_gitlab_hostname("git.company.internal") - assert not github_host.is_gitlab_hostname("dev.azure.com") - assert not github_host.is_gitlab_hostname(None) - assert not github_host.is_gitlab_hostname("") - - def test_is_azure_devops_hostname(): """Test Azure DevOps hostname detection.""" # Valid Azure DevOps hosts