-
Notifications
You must be signed in to change notification settings - Fork 36
Add GitHub Enterprise (GHE) hostname support to dependency parsing #8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| validate_apm_package, | ||
| APMPackage | ||
| ) | ||
| from ..utils.github_host import build_https_clone_url, build_ssh_url, sanitize_token_url_in_message, is_github_hostname, default_host | ||
|
|
||
|
|
||
| class GitHubPackageDownloader: | ||
|
|
@@ -62,8 +63,9 @@ def _sanitize_git_error(self, error_message: str) -> str: | |
| """ | ||
| import re | ||
|
|
||
| # Remove any tokens that might appear in URLs (format: https://token@github.com) | ||
| sanitized = re.sub(r'https://[^@\s]+@github\.com', 'https://***@github.com', error_message) | ||
| # Remove any tokens that might appear in URLs for github hosts (format: https://token@host) | ||
| # Sanitize for default host and common enterprise hosts via helper | ||
| sanitized = sanitize_token_url_in_message(error_message, host=default_host()) | ||
|
|
||
| # Remove any tokens that might appear as standalone values | ||
| sanitized = re.sub(r'(ghp_|gho_|ghu_|ghs_|ghr_)[a-zA-Z0-9_]+', '***', sanitized) | ||
|
|
@@ -88,16 +90,17 @@ def _build_repo_url(self, repo_ref: str, use_ssh: bool = False) -> str: | |
| Returns: | ||
| str: Repository URL suitable for git clone operations | ||
| """ | ||
| # Determine host to use. If repo_ref is namespaced with a host (like host/owner/repo), | ||
| # the DependencyReference.parse will have normalized repo_ref to owner/repo and stored host separately. | ||
| # For this method, callers should pass repo_ref as owner/repo and optionally set self.github_host. | ||
| host = getattr(self, 'github_host', None) or default_host() | ||
|
|
||
| if use_ssh: | ||
| # Use SSH URL for private repository access with SSH keys | ||
| return f"git@github.com:{repo_ref}.git" | ||
| return build_ssh_url(host, repo_ref) | ||
| elif self.github_token: | ||
| # Use GitHub Enterprise x-access-token format for authenticated access | ||
| # This is the standard format for GitHub Actions and Enterprise environments | ||
| return f"https://x-access-token:{self.github_token}@github.com/{repo_ref}.git" | ||
| return build_https_clone_url(host, repo_ref, token=self.github_token) | ||
| else: | ||
| # Use standard HTTPS URL for public repositories | ||
| return f"https://github.com/{repo_ref}" | ||
| return build_https_clone_url(host, repo_ref, token=None) | ||
|
|
||
| def _clone_with_fallback(self, repo_url_base: str, target_path: Path, **clone_kwargs) -> Repo: | ||
| """Attempt to clone a repository with fallback authentication methods. | ||
|
|
@@ -192,6 +195,9 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference: | |
| if is_likely_commit: | ||
| # For commit SHAs, clone full repository first, then checkout the commit | ||
| try: | ||
| # Ensure host is set for enterprise repos | ||
| if getattr(dep_ref, 'host', None): | ||
| self.github_host = dep_ref.host | ||
|
Comment on lines
+198
to
+200
|
||
| repo = self._clone_with_fallback(dep_ref.repo_url, temp_dir) | ||
| commit = repo.commit(ref) | ||
| ref_type = GitReferenceType.COMMIT | ||
|
|
@@ -204,6 +210,8 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference: | |
| # For branches and tags, try shallow clone first | ||
| try: | ||
| # Try to clone with specific branch/tag first | ||
| if getattr(dep_ref, 'host', None): | ||
| self.github_host = dep_ref.host | ||
| repo = self._clone_with_fallback( | ||
| dep_ref.repo_url, | ||
| temp_dir, | ||
|
|
@@ -213,12 +221,14 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference: | |
| ref_type = GitReferenceType.BRANCH # Could be branch or tag | ||
| resolved_commit = repo.head.commit.hexsha | ||
| ref_name = ref | ||
|
|
||
| except GitCommandError: | ||
| # If branch/tag clone fails, try full clone and resolve reference | ||
| try: | ||
| if getattr(dep_ref, 'host', None): | ||
| self.github_host = dep_ref.host | ||
| repo = self._clone_with_fallback(dep_ref.repo_url, temp_dir) | ||
|
|
||
| # Try to resolve the reference | ||
| try: | ||
| # Try as branch first | ||
|
|
@@ -236,11 +246,11 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference: | |
| ref_name = ref | ||
| except IndexError: | ||
| raise ValueError(f"Reference '{ref}' not found in repository {dep_ref.repo_url}") | ||
|
|
||
| except Exception as e: | ||
| sanitized_error = self._sanitize_git_error(str(e)) | ||
| raise ValueError(f"Could not resolve reference '{ref}' in repository {dep_ref.repo_url}: {sanitized_error}") | ||
|
|
||
| except GitCommandError as e: | ||
| # Check if this might be a private repository access issue | ||
| if "Authentication failed" in str(e) or "remote: Repository not found" in str(e): | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |||||
|
|
||||||
| import re | ||||||
| import urllib.parse | ||||||
| from ..utils.github_host import is_github_hostname, default_host | ||||||
| import yaml | ||||||
| from dataclasses import dataclass | ||||||
| from enum import Enum | ||||||
|
|
@@ -47,6 +48,7 @@ def __str__(self) -> str: | |||||
| class DependencyReference: | ||||||
| """Represents a reference to an APM dependency.""" | ||||||
| repo_url: str # e.g., "user/repo" or "github.com/user/repo" | ||||||
| host: Optional[str] = None # Optional host (github.com or enterprise host) | ||||||
|
||||||
| host: Optional[str] = None # Optional host (github.com or enterprise host) | |
| host: Optional[str] = None # Host (e.g., github.com or enterprise host); extracted from the dependency string if specified, otherwise None. If None, defaults to default_host() when needed. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| """Utilities for handling GitHub and GitHub Enterprise hostnames and URLs.""" | ||
| from typing import Optional | ||
| import os | ||
| import re | ||
| import urllib.parse | ||
|
|
||
|
|
||
| def default_host() -> str: | ||
| """Return the default GitHub host (can be overridden via GITHUB_HOST env var).""" | ||
| return os.environ.get('GITHUB_HOST', 'github.com') | ||
|
|
||
|
|
||
| def is_github_hostname(hostname: Optional[str]) -> bool: | ||
| """Return True if hostname should be treated as GitHub (cloud or enterprise). | ||
|
|
||
| Accepts 'github.com' and hosts that end with '.ghe.com'. | ||
| """ | ||
| if not hostname: | ||
| return False | ||
| h = hostname.lower() | ||
| if h == 'github.com': | ||
| return True | ||
| if h.endswith('.ghe.com'): | ||
danielmeppiel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return True | ||
| # Allow explicit override via comma-separated env var APM_GITHUB_HOSTS | ||
| extra = os.environ.get('APM_GITHUB_HOSTS', '') | ||
| if extra: | ||
| for e in [x.strip().lower() for x in extra.split(',') if x.strip()]: | ||
| if h == e: | ||
| return True | ||
| return False | ||
|
|
||
|
|
||
| def build_ssh_url(host: str, repo_ref: str) -> str: | ||
| """Build an SSH clone URL for the given host and repo_ref (owner/repo).""" | ||
| return f"git@{host}:{repo_ref}.git" | ||
|
|
||
|
|
||
| def build_https_clone_url(host: str, repo_ref: str, token: Optional[str] = None) -> str: | ||
| """Build an HTTPS clone URL. If token provided, use x-access-token format (no escaping done). | ||
|
|
||
| Note: callers must avoid logging raw token-bearing URLs. | ||
| """ | ||
| if token: | ||
| # Use x-access-token format which is compatible with GitHub Enterprise and GH Actions | ||
| return f"https://x-access-token:{token}@{host}/{repo_ref}.git" | ||
| return f"https://{host}/{repo_ref}" | ||
|
|
||
|
|
||
| def sanitize_token_url_in_message(message: str, host: Optional[str] = None) -> str: | ||
| """Sanitize occurrences of token-bearing https URLs for the given host in message. | ||
|
|
||
| If host is None, default_host() is used. Replaces https://<anything>@host with https://***@host | ||
| """ | ||
| if not host: | ||
| host = default_host() | ||
|
|
||
| # Escape host for regex | ||
| host_re = re.escape(host) | ||
| pattern = rf'https://[^@\s]+@{host_re}' | ||
| return re.sub(pattern, f'https://***@{host}', message) | ||
Uh oh!
There was an error while loading. Please reload this page.