Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,30 @@ uv pip install -e ".[dev]"

## Testing

We use pytest for testing:
We use pytest for testing. The project uses `uv` to manage virtual environments and dependencies — the recommended way to run tests is:

```bash
uv run pytest
# install dev dependencies (creates .venv managed by uv)
uv sync --extra dev

# run the test suite
uv run pytest -q
```

If you don't have `uv` available, you can use a standard Python venv and pip:

```bash
# create and activate a venv (POSIX / WSL)
python -m venv .venv
source .venv/bin/activate

# install this package in editable mode and test deps
pip install -U pip
pip install -e .[dev]

# run tests
pytest -q
```

## Coding Style
Expand Down
18 changes: 4 additions & 14 deletions src/apm_cli/adapters/client/copilot.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ...registry.client import SimpleRegistryClient
from ...registry.integration import RegistryIntegration
from ...core.docker_args import DockerArgsProcessor
from ...utils.github_host import is_github_hostname


class CopilotClientAdapter(MCPClientAdapter):
Expand Down Expand Up @@ -671,20 +672,9 @@ def _is_github_server(self, server_name, url):
hostname = parsed_url.hostname

if hostname:
# Allowlist of valid GitHub hostnames
valid_github_hostnames = [
"api.githubcopilot.com",
"api.github.com",
"github.com"
]

# Exact hostname match
if hostname.lower() in [host.lower() for host in valid_github_hostnames]:
return True

# Allow subdomains of github.com (with proper validation)
if hostname.lower().endswith(".github.com"):
return True
# Use helper to determine whether hostname is a GitHub host (cloud or enterprise)
if is_github_hostname(hostname):
return True

except Exception:
# If URL parsing fails, assume it's not a GitHub server
Expand Down
36 changes: 23 additions & 13 deletions src/apm_cli/deps/github_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
validate_apm_package,
APMPackage
)
from ..utils.github_host import build_https_clone_url, build_ssh_url, sanitize_token_url_in_message, is_github_hostname, default_host


class GitHubPackageDownloader:
Expand Down Expand Up @@ -62,8 +63,9 @@ def _sanitize_git_error(self, error_message: str) -> str:
"""
import re

# Remove any tokens that might appear in URLs (format: https://token@github.com)
sanitized = re.sub(r'https://[^@\s]+@github\.com', 'https://***@github.com', error_message)
# Remove any tokens that might appear in URLs for github hosts (format: https://token@host)
# Sanitize for default host and common enterprise hosts via helper
sanitized = sanitize_token_url_in_message(error_message, host=default_host())

# Remove any tokens that might appear as standalone values
sanitized = re.sub(r'(ghp_|gho_|ghu_|ghs_|ghr_)[a-zA-Z0-9_]+', '***', sanitized)
Expand All @@ -88,16 +90,17 @@ def _build_repo_url(self, repo_ref: str, use_ssh: bool = False) -> str:
Returns:
str: Repository URL suitable for git clone operations
"""
# Determine host to use. If repo_ref is namespaced with a host (like host/owner/repo),
# the DependencyReference.parse will have normalized repo_ref to owner/repo and stored host separately.
# For this method, callers should pass repo_ref as owner/repo and optionally set self.github_host.
host = getattr(self, 'github_host', None) or default_host()

if use_ssh:
# Use SSH URL for private repository access with SSH keys
return f"git@github.com:{repo_ref}.git"
return build_ssh_url(host, repo_ref)
elif self.github_token:
# Use GitHub Enterprise x-access-token format for authenticated access
# This is the standard format for GitHub Actions and Enterprise environments
return f"https://x-access-token:{self.github_token}@github.com/{repo_ref}.git"
return build_https_clone_url(host, repo_ref, token=self.github_token)
else:
# Use standard HTTPS URL for public repositories
return f"https://github.com/{repo_ref}"
return build_https_clone_url(host, repo_ref, token=None)

def _clone_with_fallback(self, repo_url_base: str, target_path: Path, **clone_kwargs) -> Repo:
"""Attempt to clone a repository with fallback authentication methods.
Expand Down Expand Up @@ -192,6 +195,9 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference:
if is_likely_commit:
# For commit SHAs, clone full repository first, then checkout the commit
try:
# Ensure host is set for enterprise repos
if getattr(dep_ref, 'host', None):
self.github_host = dep_ref.host
Comment on lines +198 to +200
Copy link

Copilot AI Oct 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This pattern of checking and setting self.github_host is duplicated three times (lines 199-200, 213-214, 228-229). Consider extracting this into a helper method or setting it once before the conditional blocks to reduce duplication.

Copilot uses AI. Check for mistakes.
repo = self._clone_with_fallback(dep_ref.repo_url, temp_dir)
commit = repo.commit(ref)
ref_type = GitReferenceType.COMMIT
Expand All @@ -204,6 +210,8 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference:
# For branches and tags, try shallow clone first
try:
# Try to clone with specific branch/tag first
if getattr(dep_ref, 'host', None):
self.github_host = dep_ref.host
repo = self._clone_with_fallback(
dep_ref.repo_url,
temp_dir,
Expand All @@ -213,12 +221,14 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference:
ref_type = GitReferenceType.BRANCH # Could be branch or tag
resolved_commit = repo.head.commit.hexsha
ref_name = ref

except GitCommandError:
# If branch/tag clone fails, try full clone and resolve reference
try:
if getattr(dep_ref, 'host', None):
self.github_host = dep_ref.host
repo = self._clone_with_fallback(dep_ref.repo_url, temp_dir)

# Try to resolve the reference
try:
# Try as branch first
Expand All @@ -236,11 +246,11 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference:
ref_name = ref
except IndexError:
raise ValueError(f"Reference '{ref}' not found in repository {dep_ref.repo_url}")

except Exception as e:
sanitized_error = self._sanitize_git_error(str(e))
raise ValueError(f"Could not resolve reference '{ref}' in repository {dep_ref.repo_url}: {sanitized_error}")

except GitCommandError as e:
# Check if this might be a private repository access issue
if "Authentication failed" in str(e) or "remote: Repository not found" in str(e):
Expand Down
87 changes: 53 additions & 34 deletions src/apm_cli/models/apm_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import re
import urllib.parse
from ..utils.github_host import is_github_hostname, default_host
import yaml
from dataclasses import dataclass
from enum import Enum
Expand Down Expand Up @@ -47,6 +48,7 @@ def __str__(self) -> str:
class DependencyReference:
"""Represents a reference to an APM dependency."""
repo_url: str # e.g., "user/repo" or "github.com/user/repo"
host: Optional[str] = None # Optional host (github.com or enterprise host)
Copy link

Copilot AI Oct 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment describes the host field but doesn't explain when it's None vs when it's populated. Consider clarifying that host is extracted from the parsed dependency string and defaults to default_host() if not explicitly specified.

Suggested change
host: Optional[str] = None # Optional host (github.com or enterprise host)
host: Optional[str] = None # Host (e.g., github.com or enterprise host); extracted from the dependency string if specified, otherwise None. If None, defaults to default_host() when needed.

Copilot uses AI. Check for mistakes.
reference: Optional[str] = None # e.g., "main", "v1.0.0", "abc123"
alias: Optional[str] = None # Optional alias for the dependency

Expand Down Expand Up @@ -81,26 +83,30 @@ def parse(cls, dependency_str: str) -> "DependencyReference":

# Handle SSH URLs first (before @ processing) to avoid conflict with alias separator
original_str = dependency_str
if dependency_str.startswith("git@github.com:"):
# For SSH URLs, extract repo part before @ processing
ssh_repo_part = dependency_str[len("git@github.com:"):]
if ssh_repo_part.endswith(".git"):
ssh_repo_part = None
host = None
# Match patterns like git@host:owner/repo.git
m = re.match(r'^git@([^:]+):(.+)$', dependency_str)
if m:
host = m.group(1)
ssh_repo_part = m.group(2)
if ssh_repo_part.endswith('.git'):
ssh_repo_part = ssh_repo_part[:-4]

# Handle reference and alias in SSH URL
reference = None
alias = None

if "@" in ssh_repo_part:
ssh_repo_part, alias = ssh_repo_part.rsplit("@", 1)
alias = alias.strip()

if "#" in ssh_repo_part:
repo_part, reference = ssh_repo_part.rsplit("#", 1)
reference = reference.strip()
else:
repo_part = ssh_repo_part

repo_url = repo_part.strip()
else:
# Handle alias (@alias) for non-SSH URLs
Expand All @@ -125,40 +131,47 @@ def parse(cls, dependency_str: str) -> "DependencyReference":
if repo_url.startswith(("https://", "http://")):
# Already a full URL - parse directly
parsed_url = urllib.parse.urlparse(repo_url)
host = parsed_url.hostname or ""
else:
# Safely construct GitHub URL from various input formats
# Safely construct a URL from various input formats. Support both github.com
# and GitHub Enterprise hostnames like orgname.ghe.com (org-specific GHE instances).
parts = repo_url.split("/")
if len(parts) >= 3 and parts[0] == "github.com":
# Format: github.com/user/repo (must be precisely so)
# host/user/repo OR user/repo (no host)
if len(parts) >= 3 and is_github_hostname(parts[0]):
# Format: github.com/user/repo OR orgname.ghe.com/user/repo OR custom host
host = parts[0]
user_repo = "/".join(parts[1:3])
elif len(parts) >= 2 and "." not in parts[0]:
# Format: user/repo (no dot in user part, so not a domain)
# Format: user/repo (no dot in first segment, so treat as user)
host = "github.com"
user_repo = "/".join(parts[:2])
else:
raise ValueError(f"Only GitHub repositories are supported. Use 'user/repo' or 'github.com/user/repo' format")
raise ValueError(f"Only GitHub repositories are supported. Use 'user/repo' or 'github.com/user/repo' or '<org>.ghe.com/user/repo' format")

# Validate format before URL construction (security critical)
if not user_repo or "/" not in user_repo:
raise ValueError(f"Invalid repository format: {repo_url}. Expected 'user/repo' or 'github.com/user/repo'")
parts = user_repo.split("/")
if len(parts) < 2 or not parts[0] or not parts[1]:
raise ValueError(f"Invalid repository format: {repo_url}. Expected 'user/repo' or 'github.com/user/repo'")
user, repo = parts[0], parts[1]
raise ValueError(f"Invalid repository format: {repo_url}. Expected 'user/repo' or 'github.com/user/repo' or '<org>.ghe.com/user/repo'")

uparts = user_repo.split("/")
if len(uparts) < 2 or not uparts[0] or not uparts[1]:
raise ValueError(f"Invalid repository format: {repo_url}. Expected 'user/repo' or 'github.com/user/repo' or '<org>.ghe.com/user/repo'")

user, repo = uparts[0], uparts[1]

# Security: validate characters to prevent injection
if not re.match(r'^[a-zA-Z0-9._-]+$', user):
raise ValueError(f"Invalid user name: {user}")
if not re.match(r'^[a-zA-Z0-9._-]+$', repo.rstrip('.git')):
raise ValueError(f"Invalid repository name: {repo}")
# Safely construct URL - this is now secure
github_url = urllib.parse.urljoin("https://github.com/", f"{user}/{repo}")

# Safely construct URL using detected host
github_url = urllib.parse.urljoin(f"https://{host}/", f"{user}/{repo}")
parsed_url = urllib.parse.urlparse(github_url)

# SECURITY: Validate that this is actually a GitHub URL with exact hostname match
if parsed_url.netloc != "github.com":

# SECURITY: Validate that this is actually a supported GitHub URL.
# Accept github.com and GitHub Enterprise hostnames like '<org>.ghe.com'. Use parsed_url.hostname
hostname = parsed_url.hostname or ""
if not is_github_hostname(hostname):
raise ValueError(f"Only GitHub repositories are supported, got hostname: {parsed_url.netloc}")

# Extract and validate the path
Expand Down Expand Up @@ -191,6 +204,10 @@ def parse(cls, dependency_str: str) -> "DependencyReference":
if repo_url.endswith(".git"):
repo_url = repo_url[:-4]

# If host not set via SSH or parsed parts, default to default_host()
if not host:
host = default_host()


# Validate repo format (should be user/repo)
if not re.match(r'^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$', repo_url):
Expand All @@ -199,19 +216,21 @@ def parse(cls, dependency_str: str) -> "DependencyReference":
# Validate alias characters if present
if alias and not re.match(r'^[a-zA-Z0-9._-]+$', alias):
raise ValueError(f"Invalid alias: {alias}. Aliases can only contain letters, numbers, dots, underscores, and hyphens")
return cls(repo_url=repo_url, reference=reference, alias=alias)

return cls(repo_url=repo_url, host=host, reference=reference, alias=alias)

def to_github_url(self) -> str:
"""Convert to full GitHub URL."""
return f"https://github.com/{self.repo_url}"

# Use stored host if present, otherwise default host (supports enterprise via GITHUB_HOST env var)
host = self.host or default_host()
return f"https://{host}/{self.repo_url}"

def get_display_name(self) -> str:
"""Get display name for this dependency (alias or repo name)."""
if self.alias:
return self.alias
return self.repo_url # Full repo URL for disambiguation

def __str__(self) -> str:
"""String representation of the dependency reference."""
result = self.repo_url
Expand Down
61 changes: 61 additions & 0 deletions src/apm_cli/utils/github_host.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""Utilities for handling GitHub and GitHub Enterprise hostnames and URLs."""
from typing import Optional
import os
import re
import urllib.parse


def default_host() -> str:
"""Return the default GitHub host (can be overridden via GITHUB_HOST env var)."""
return os.environ.get('GITHUB_HOST', 'github.com')


def is_github_hostname(hostname: Optional[str]) -> bool:
"""Return True if hostname should be treated as GitHub (cloud or enterprise).

Accepts 'github.com' and hosts that end with '.ghe.com'.
"""
if not hostname:
return False
h = hostname.lower()
if h == 'github.com':
return True
if h.endswith('.ghe.com'):
return True
# Allow explicit override via comma-separated env var APM_GITHUB_HOSTS
extra = os.environ.get('APM_GITHUB_HOSTS', '')
if extra:
for e in [x.strip().lower() for x in extra.split(',') if x.strip()]:
if h == e:
return True
return False


def build_ssh_url(host: str, repo_ref: str) -> str:
"""Build an SSH clone URL for the given host and repo_ref (owner/repo)."""
return f"git@{host}:{repo_ref}.git"


def build_https_clone_url(host: str, repo_ref: str, token: Optional[str] = None) -> str:
"""Build an HTTPS clone URL. If token provided, use x-access-token format (no escaping done).

Note: callers must avoid logging raw token-bearing URLs.
"""
if token:
# Use x-access-token format which is compatible with GitHub Enterprise and GH Actions
return f"https://x-access-token:{token}@{host}/{repo_ref}.git"
return f"https://{host}/{repo_ref}"


def sanitize_token_url_in_message(message: str, host: Optional[str] = None) -> str:
"""Sanitize occurrences of token-bearing https URLs for the given host in message.

If host is None, default_host() is used. Replaces https://<anything>@host with https://***@host
"""
if not host:
host = default_host()

# Escape host for regex
host_re = re.escape(host)
pattern = rf'https://[^@\s]+@{host_re}'
return re.sub(pattern, f'https://***@{host}', message)
Loading