diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f2432674..3b21f9cc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -81,10 +81,30 @@ uv pip install -e ".[dev]" ## Testing -We use pytest for testing: +We use pytest for testing. The project uses `uv` to manage virtual environments and dependencies — the recommended way to run tests is: ```bash uv run pytest +# install dev dependencies (creates .venv managed by uv) +uv sync --extra dev + +# run the test suite +uv run pytest -q +``` + +If you don't have `uv` available, you can use a standard Python venv and pip: + +```bash +# create and activate a venv (POSIX / WSL) +python -m venv .venv +source .venv/bin/activate + +# install this package in editable mode and test deps +pip install -U pip +pip install -e .[dev] + +# run tests +pytest -q ``` ## Coding Style diff --git a/src/apm_cli/adapters/client/copilot.py b/src/apm_cli/adapters/client/copilot.py index bc5c709e..f22c0602 100644 --- a/src/apm_cli/adapters/client/copilot.py +++ b/src/apm_cli/adapters/client/copilot.py @@ -12,6 +12,7 @@ from ...registry.client import SimpleRegistryClient from ...registry.integration import RegistryIntegration from ...core.docker_args import DockerArgsProcessor +from ...utils.github_host import is_github_hostname class CopilotClientAdapter(MCPClientAdapter): @@ -671,20 +672,9 @@ def _is_github_server(self, server_name, url): hostname = parsed_url.hostname if hostname: - # Allowlist of valid GitHub hostnames - valid_github_hostnames = [ - "api.githubcopilot.com", - "api.github.com", - "github.com" - ] - - # Exact hostname match - if hostname.lower() in [host.lower() for host in valid_github_hostnames]: - return True - - # Allow subdomains of github.com (with proper validation) - if hostname.lower().endswith(".github.com"): - return True + # Use helper to determine whether hostname is a GitHub host (cloud or enterprise) + if is_github_hostname(hostname): + return True except Exception: # If URL parsing fails, assume it's not a GitHub server diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index 744a11a7..44e757f9 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -20,6 +20,7 @@ validate_apm_package, APMPackage ) +from ..utils.github_host import build_https_clone_url, build_ssh_url, sanitize_token_url_in_message, is_github_hostname, default_host class GitHubPackageDownloader: @@ -62,8 +63,9 @@ def _sanitize_git_error(self, error_message: str) -> str: """ import re - # Remove any tokens that might appear in URLs (format: https://token@github.com) - sanitized = re.sub(r'https://[^@\s]+@github\.com', 'https://***@github.com', error_message) + # Remove any tokens that might appear in URLs for github hosts (format: https://token@host) + # Sanitize for default host and common enterprise hosts via helper + sanitized = sanitize_token_url_in_message(error_message, host=default_host()) # Remove any tokens that might appear as standalone values sanitized = re.sub(r'(ghp_|gho_|ghu_|ghs_|ghr_)[a-zA-Z0-9_]+', '***', sanitized) @@ -88,16 +90,17 @@ def _build_repo_url(self, repo_ref: str, use_ssh: bool = False) -> str: Returns: str: Repository URL suitable for git clone operations """ + # Determine host to use. If repo_ref is namespaced with a host (like host/owner/repo), + # the DependencyReference.parse will have normalized repo_ref to owner/repo and stored host separately. + # For this method, callers should pass repo_ref as owner/repo and optionally set self.github_host. + host = getattr(self, 'github_host', None) or default_host() + if use_ssh: - # Use SSH URL for private repository access with SSH keys - return f"git@github.com:{repo_ref}.git" + return build_ssh_url(host, repo_ref) elif self.github_token: - # Use GitHub Enterprise x-access-token format for authenticated access - # This is the standard format for GitHub Actions and Enterprise environments - return f"https://x-access-token:{self.github_token}@github.com/{repo_ref}.git" + return build_https_clone_url(host, repo_ref, token=self.github_token) else: - # Use standard HTTPS URL for public repositories - return f"https://github.com/{repo_ref}" + return build_https_clone_url(host, repo_ref, token=None) def _clone_with_fallback(self, repo_url_base: str, target_path: Path, **clone_kwargs) -> Repo: """Attempt to clone a repository with fallback authentication methods. @@ -192,6 +195,9 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference: if is_likely_commit: # For commit SHAs, clone full repository first, then checkout the commit try: + # Ensure host is set for enterprise repos + if getattr(dep_ref, 'host', None): + self.github_host = dep_ref.host repo = self._clone_with_fallback(dep_ref.repo_url, temp_dir) commit = repo.commit(ref) ref_type = GitReferenceType.COMMIT @@ -204,6 +210,8 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference: # For branches and tags, try shallow clone first try: # Try to clone with specific branch/tag first + if getattr(dep_ref, 'host', None): + self.github_host = dep_ref.host repo = self._clone_with_fallback( dep_ref.repo_url, temp_dir, @@ -213,12 +221,14 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference: ref_type = GitReferenceType.BRANCH # Could be branch or tag resolved_commit = repo.head.commit.hexsha ref_name = ref - + except GitCommandError: # If branch/tag clone fails, try full clone and resolve reference try: + if getattr(dep_ref, 'host', None): + self.github_host = dep_ref.host repo = self._clone_with_fallback(dep_ref.repo_url, temp_dir) - + # Try to resolve the reference try: # Try as branch first @@ -236,11 +246,11 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference: ref_name = ref except IndexError: raise ValueError(f"Reference '{ref}' not found in repository {dep_ref.repo_url}") - + except Exception as e: sanitized_error = self._sanitize_git_error(str(e)) raise ValueError(f"Could not resolve reference '{ref}' in repository {dep_ref.repo_url}: {sanitized_error}") - + except GitCommandError as e: # Check if this might be a private repository access issue if "Authentication failed" in str(e) or "remote: Repository not found" in str(e): diff --git a/src/apm_cli/models/apm_package.py b/src/apm_cli/models/apm_package.py index a349085f..dd6d9acc 100644 --- a/src/apm_cli/models/apm_package.py +++ b/src/apm_cli/models/apm_package.py @@ -2,6 +2,7 @@ import re import urllib.parse +from ..utils.github_host import is_github_hostname, default_host import yaml from dataclasses import dataclass from enum import Enum @@ -47,6 +48,7 @@ def __str__(self) -> str: class DependencyReference: """Represents a reference to an APM dependency.""" repo_url: str # e.g., "user/repo" or "github.com/user/repo" + host: Optional[str] = None # Optional host (github.com or enterprise host) reference: Optional[str] = None # e.g., "main", "v1.0.0", "abc123" alias: Optional[str] = None # Optional alias for the dependency @@ -81,26 +83,30 @@ def parse(cls, dependency_str: str) -> "DependencyReference": # Handle SSH URLs first (before @ processing) to avoid conflict with alias separator original_str = dependency_str - if dependency_str.startswith("git@github.com:"): - # For SSH URLs, extract repo part before @ processing - ssh_repo_part = dependency_str[len("git@github.com:"):] - if ssh_repo_part.endswith(".git"): + ssh_repo_part = None + host = None + # Match patterns like git@host:owner/repo.git + m = re.match(r'^git@([^:]+):(.+)$', dependency_str) + if m: + host = m.group(1) + ssh_repo_part = m.group(2) + if ssh_repo_part.endswith('.git'): ssh_repo_part = ssh_repo_part[:-4] - + # Handle reference and alias in SSH URL reference = None alias = None - + if "@" in ssh_repo_part: ssh_repo_part, alias = ssh_repo_part.rsplit("@", 1) alias = alias.strip() - + if "#" in ssh_repo_part: repo_part, reference = ssh_repo_part.rsplit("#", 1) reference = reference.strip() else: repo_part = ssh_repo_part - + repo_url = repo_part.strip() else: # Handle alias (@alias) for non-SSH URLs @@ -125,40 +131,47 @@ def parse(cls, dependency_str: str) -> "DependencyReference": if repo_url.startswith(("https://", "http://")): # Already a full URL - parse directly parsed_url = urllib.parse.urlparse(repo_url) + host = parsed_url.hostname or "" else: - # Safely construct GitHub URL from various input formats + # Safely construct a URL from various input formats. Support both github.com + # and GitHub Enterprise hostnames like orgname.ghe.com (org-specific GHE instances). parts = repo_url.split("/") - if len(parts) >= 3 and parts[0] == "github.com": - # Format: github.com/user/repo (must be precisely so) + # host/user/repo OR user/repo (no host) + if len(parts) >= 3 and is_github_hostname(parts[0]): + # Format: github.com/user/repo OR orgname.ghe.com/user/repo OR custom host + host = parts[0] user_repo = "/".join(parts[1:3]) elif len(parts) >= 2 and "." not in parts[0]: - # Format: user/repo (no dot in user part, so not a domain) + # Format: user/repo (no dot in first segment, so treat as user) + host = "github.com" user_repo = "/".join(parts[:2]) else: - raise ValueError(f"Only GitHub repositories are supported. Use 'user/repo' or 'github.com/user/repo' format") - + raise ValueError(f"Only GitHub repositories are supported. Use 'user/repo' or 'github.com/user/repo' or '.ghe.com/user/repo' format") + # Validate format before URL construction (security critical) if not user_repo or "/" not in user_repo: - raise ValueError(f"Invalid repository format: {repo_url}. Expected 'user/repo' or 'github.com/user/repo'") - - parts = user_repo.split("/") - if len(parts) < 2 or not parts[0] or not parts[1]: - raise ValueError(f"Invalid repository format: {repo_url}. Expected 'user/repo' or 'github.com/user/repo'") - - user, repo = parts[0], parts[1] - + raise ValueError(f"Invalid repository format: {repo_url}. Expected 'user/repo' or 'github.com/user/repo' or '.ghe.com/user/repo'") + + uparts = user_repo.split("/") + if len(uparts) < 2 or not uparts[0] or not uparts[1]: + raise ValueError(f"Invalid repository format: {repo_url}. Expected 'user/repo' or 'github.com/user/repo' or '.ghe.com/user/repo'") + + user, repo = uparts[0], uparts[1] + # Security: validate characters to prevent injection if not re.match(r'^[a-zA-Z0-9._-]+$', user): raise ValueError(f"Invalid user name: {user}") if not re.match(r'^[a-zA-Z0-9._-]+$', repo.rstrip('.git')): raise ValueError(f"Invalid repository name: {repo}") - - # Safely construct URL - this is now secure - github_url = urllib.parse.urljoin("https://github.com/", f"{user}/{repo}") + + # Safely construct URL using detected host + github_url = urllib.parse.urljoin(f"https://{host}/", f"{user}/{repo}") parsed_url = urllib.parse.urlparse(github_url) - - # SECURITY: Validate that this is actually a GitHub URL with exact hostname match - if parsed_url.netloc != "github.com": + + # SECURITY: Validate that this is actually a supported GitHub URL. + # Accept github.com and GitHub Enterprise hostnames like '.ghe.com'. Use parsed_url.hostname + hostname = parsed_url.hostname or "" + if not is_github_hostname(hostname): raise ValueError(f"Only GitHub repositories are supported, got hostname: {parsed_url.netloc}") # Extract and validate the path @@ -191,6 +204,10 @@ def parse(cls, dependency_str: str) -> "DependencyReference": if repo_url.endswith(".git"): repo_url = repo_url[:-4] + # If host not set via SSH or parsed parts, default to default_host() + if not host: + host = default_host() + # Validate repo format (should be user/repo) if not re.match(r'^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$', repo_url): @@ -199,19 +216,21 @@ def parse(cls, dependency_str: str) -> "DependencyReference": # Validate alias characters if present if alias and not re.match(r'^[a-zA-Z0-9._-]+$', alias): raise ValueError(f"Invalid alias: {alias}. Aliases can only contain letters, numbers, dots, underscores, and hyphens") - - return cls(repo_url=repo_url, reference=reference, alias=alias) - + + return cls(repo_url=repo_url, host=host, reference=reference, alias=alias) + def to_github_url(self) -> str: """Convert to full GitHub URL.""" - return f"https://github.com/{self.repo_url}" - + # Use stored host if present, otherwise default host (supports enterprise via GITHUB_HOST env var) + host = self.host or default_host() + return f"https://{host}/{self.repo_url}" + def get_display_name(self) -> str: """Get display name for this dependency (alias or repo name).""" if self.alias: return self.alias return self.repo_url # Full repo URL for disambiguation - + def __str__(self) -> str: """String representation of the dependency reference.""" result = self.repo_url diff --git a/src/apm_cli/utils/github_host.py b/src/apm_cli/utils/github_host.py new file mode 100644 index 00000000..5a6e451e --- /dev/null +++ b/src/apm_cli/utils/github_host.py @@ -0,0 +1,61 @@ +"""Utilities for handling GitHub and GitHub Enterprise hostnames and URLs.""" +from typing import Optional +import os +import re +import urllib.parse + + +def default_host() -> str: + """Return the default GitHub host (can be overridden via GITHUB_HOST env var).""" + return os.environ.get('GITHUB_HOST', 'github.com') + + +def is_github_hostname(hostname: Optional[str]) -> bool: + """Return True if hostname should be treated as GitHub (cloud or enterprise). + + Accepts 'github.com' and hosts that end with '.ghe.com'. + """ + if not hostname: + return False + h = hostname.lower() + if h == 'github.com': + return True + if h.endswith('.ghe.com'): + return True + # Allow explicit override via comma-separated env var APM_GITHUB_HOSTS + extra = os.environ.get('APM_GITHUB_HOSTS', '') + if extra: + for e in [x.strip().lower() for x in extra.split(',') if x.strip()]: + if h == e: + return True + return False + + +def build_ssh_url(host: str, repo_ref: str) -> str: + """Build an SSH clone URL for the given host and repo_ref (owner/repo).""" + return f"git@{host}:{repo_ref}.git" + + +def build_https_clone_url(host: str, repo_ref: str, token: Optional[str] = None) -> str: + """Build an HTTPS clone URL. If token provided, use x-access-token format (no escaping done). + + Note: callers must avoid logging raw token-bearing URLs. + """ + if token: + # Use x-access-token format which is compatible with GitHub Enterprise and GH Actions + return f"https://x-access-token:{token}@{host}/{repo_ref}.git" + return f"https://{host}/{repo_ref}" + + +def sanitize_token_url_in_message(message: str, host: Optional[str] = None) -> str: + """Sanitize occurrences of token-bearing https URLs for the given host in message. + + If host is None, default_host() is used. Replaces https://@host with https://***@host + """ + if not host: + host = default_host() + + # Escape host for regex + host_re = re.escape(host) + pattern = rf'https://[^@\s]+@{host_re}' + return re.sub(pattern, f'https://***@{host}', message) diff --git a/tests/test_apm_package_models.py b/tests/test_apm_package_models.py index 321d04dd..c3df2aa8 100644 --- a/tests/test_apm_package_models.py +++ b/tests/test_apm_package_models.py @@ -17,6 +17,7 @@ validate_apm_package, parse_git_reference, ) +from apm_cli.utils import github_host class TestDependencyReference: @@ -66,17 +67,30 @@ def test_parse_with_reference_and_alias(self): def test_parse_github_urls(self): """Test parsing various GitHub URL formats.""" + host = github_host.default_host() formats = [ - "github.com/user/repo", - "https://github.com/user/repo", - "https://github.com/user/repo.git", - "git@github.com:user/repo", - "git@github.com:user/repo.git", + f"{host}/user/repo", + f"https://{host}/user/repo", + f"https://{host}/user/repo.git", + f"git@{host}:user/repo", + f"git@{host}:user/repo.git", ] for url_format in formats: dep = DependencyReference.parse(url_format) assert dep.repo_url == "user/repo" + + def test_parse_ghe_urls(self): + """Test parsing GitHub Enterprise (GHE) hostname formats like orgname.ghe.com.""" + formats = [ + "orgname.ghe.com/user/repo", + "https://orgname.ghe.com/user/repo", + "https://orgname.ghe.com/user/repo.git", + ] + + for url_format in formats: + dep = DependencyReference.parse(url_format) + assert dep.repo_url == "user/repo" def test_parse_invalid_formats(self): """Test parsing invalid dependency formats.""" @@ -97,7 +111,8 @@ def test_parse_invalid_formats(self): def test_to_github_url(self): """Test converting to GitHub URL.""" dep = DependencyReference.parse("user/repo") - assert dep.to_github_url() == "https://github.com/user/repo" + expected = f"https://{github_host.default_host()}/user/repo" + assert dep.to_github_url() == expected def test_get_display_name(self): """Test getting display name.""" diff --git a/tests/test_github_downloader_token_precedence.py b/tests/test_github_downloader_token_precedence.py index cae6367c..805df3ef 100644 --- a/tests/test_github_downloader_token_precedence.py +++ b/tests/test_github_downloader_token_precedence.py @@ -5,6 +5,7 @@ import pytest from src.apm_cli.deps.github_downloader import GitHubPackageDownloader +from apm_cli.utils import github_host class TestGitHubDownloaderTokenPrecedence: @@ -66,7 +67,8 @@ def test_public_repo_access_without_token(self): # Build URL should work for public repos public_url = downloader._build_repo_url('octocat/Hello-World', use_ssh=False) - assert public_url == 'https://github.com/octocat/Hello-World' + expected_public = f"https://{github_host.default_host()}/octocat/Hello-World" + assert public_url == expected_public def test_private_repo_url_building_with_token(self): """Test URL building for private repos with authentication.""" @@ -77,7 +79,7 @@ def test_private_repo_url_building_with_token(self): # Should build authenticated URL for private repos auth_url = downloader._build_repo_url('private-org/private-repo', use_ssh=False) - expected_url = 'https://x-access-token:private-repo-token@github.com/private-org/private-repo.git' + expected_url = github_host.build_https_clone_url(github_host.default_host(), 'private-org/private-repo', token='private-repo-token') assert auth_url == expected_url def test_ssh_url_building(self): @@ -89,7 +91,8 @@ def test_ssh_url_building(self): # Should build SSH URL when requested ssh_url = downloader._build_repo_url('user/repo', use_ssh=True) - assert ssh_url == 'git@github.com:user/repo.git' + expected_ssh = github_host.build_ssh_url(github_host.default_host(), 'user/repo') + assert ssh_url == expected_ssh def test_error_message_sanitization_with_new_token(self): """Test that error messages properly sanitize the new token names.""" @@ -102,10 +105,11 @@ def test_error_message_sanitization_with_new_token(self): assert 'GITHUB_APM_PAT=***' in sanitized # Test sanitization of URLs with tokens - error_with_url = "fatal: Authentication failed for 'https://ghp_secrettoken123@github.com/user/repo.git'" + host = github_host.default_host() + error_with_url = f"fatal: Authentication failed for 'https://ghp_secrettoken123@{host}/user/repo.git'" sanitized = downloader._sanitize_git_error(error_with_url) assert 'ghp_secrettoken123' not in sanitized - assert 'https://***@github.com' in sanitized + assert f'https://***@{host}' in sanitized class TestGitHubDownloaderErrorMessages: diff --git a/tests/unit/test_github_host.py b/tests/unit/test_github_host.py new file mode 100644 index 00000000..34f15030 --- /dev/null +++ b/tests/unit/test_github_host.py @@ -0,0 +1,21 @@ +import os +from apm_cli.utils import github_host + + +def test_default_host_env_override(monkeypatch): + monkeypatch.setenv('GITHUB_HOST', 'example.ghe.com') + assert github_host.default_host() == 'example.ghe.com' + monkeypatch.delenv('GITHUB_HOST', raising=False) + + +def test_is_github_hostname_defaults(): + assert github_host.is_github_hostname(github_host.default_host()) + assert github_host.is_github_hostname('org.ghe.com') + assert not github_host.is_github_hostname('example.com') + + +def test_sanitize_token_url_in_message(): + host = github_host.default_host() + msg = f"fatal: Authentication failed for 'https://ghp_secret@{host}/user/repo.git'" + sanitized = github_host.sanitize_token_url_in_message(msg, host=host) + assert f'***@{host}' in sanitized diff --git a/tests/unit/test_registry_client.py b/tests/unit/test_registry_client.py index 920c6914..138d9be4 100644 --- a/tests/unit/test_registry_client.py +++ b/tests/unit/test_registry_client.py @@ -4,6 +4,7 @@ import os from unittest import mock from apm_cli.registry.client import SimpleRegistryClient +from apm_cli.utils import github_host class TestSimpleRegistryClient(unittest.TestCase): @@ -103,7 +104,7 @@ def test_get_server_info(self, mock_get): "name": "test-server", "description": "Test server description", "repository": { - "url": "https://github.com/test/test-server", + "url": f"https://{github_host.default_host()}/test/test-server", "source": "github", "id": "12345" }, diff --git a/tests/unit/test_registry_integration.py b/tests/unit/test_registry_integration.py index 15d59163..fa18fd3b 100644 --- a/tests/unit/test_registry_integration.py +++ b/tests/unit/test_registry_integration.py @@ -4,6 +4,7 @@ from unittest import mock import requests from apm_cli.registry.integration import RegistryIntegration +from apm_cli.utils import github_host class TestRegistryIntegration(unittest.TestCase): @@ -23,13 +24,13 @@ def test_list_available_packages(self, mock_list_servers): "id": "123", "name": "server1", "description": "Description 1", - "repository": {"url": "https://github.com/test/server1"} + "repository": {"url": f"https://{github_host.default_host()}/test/server1"} }, { "id": "456", "name": "server2", "description": "Description 2", - "repository": {"url": "https://github.com/test/server2"} + "repository": {"url": f"https://{github_host.default_host()}/test/server2"} } ], None @@ -42,7 +43,7 @@ def test_list_available_packages(self, mock_list_servers): self.assertEqual(len(packages), 2) self.assertEqual(packages[0]["name"], "server1") self.assertEqual(packages[0]["id"], "123") - self.assertEqual(packages[0]["repository"]["url"], "https://github.com/test/server1") + self.assertEqual(packages[0]["repository"]["url"], f"https://{github_host.default_host()}/test/server1") self.assertEqual(packages[1]["name"], "server2") @mock.patch('apm_cli.registry.client.SimpleRegistryClient.search_servers') @@ -74,7 +75,7 @@ def test_get_package_info(self, mock_find_server_by_reference): "name": "test-server", "description": "Test server description", "repository": { - "url": "https://github.com/test/test-server", + "url": f"https://{github_host.default_host()}/test/test-server", "source": "github" }, "version_detail": { @@ -97,7 +98,7 @@ def test_get_package_info(self, mock_find_server_by_reference): # Assertions self.assertEqual(package_info["name"], "test-server") self.assertEqual(package_info["description"], "Test server description") - self.assertEqual(package_info["repository"]["url"], "https://github.com/test/test-server") + self.assertEqual(package_info["repository"]["url"], f"https://{github_host.default_host()}/test/test-server") self.assertEqual(package_info["version_detail"]["version"], "1.0.0") self.assertEqual(package_info["packages"][0]["name"], "test-package") self.assertEqual(len(package_info["versions"]), 1)