Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed

- Remove redundant `seen` set from `_scan_patterns()` discovery walk (#918)
- `apm marketplace build` now respects `GITHUB_HOST` for GitHub Enterprise repos -- ref resolution, token lookup, and metadata fetch all use the configured host instead of hardcoded `github.com`. `git ls-remote` is authenticated so private repos work without separate credential setup. (#1008)
- `apm marketplace build` now accepts multiple Git URL forms (GitHub, GHES, GitLab, Bitbucket, ADO, SSH) for `type: url` parsing via `DependencyReference.parse()`. Host resolution is still driven by `GITHUB_HOST`, so non-`github.com` hosts require `GITHUB_HOST` to be set accordingly. (#1008)

## [0.10.0] - 2026-04-27

Expand Down
11 changes: 11 additions & 0 deletions docs/src/content/docs/guides/marketplace-authoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,17 @@ Run it first when `build` or `publish` fails in an unfamiliar environment.
| `No cached refs (offline)` | First-ever `--offline` build. | Run once online to populate the cache, then retry offline. |
| `git ls-remote` auth failure | Private source without credentials. | Ensure your git credentials (SSH agent or `gh auth login`) can reach the source repo. |

### GitHub Enterprise Server

`apm marketplace build` respects the `GITHUB_HOST` environment variable. Set it before building to resolve packages from a GHES instance:

```bash
export GITHUB_HOST=github.company.com
apm marketplace build
```

Token resolution and metadata fetch use the same host, so existing auth configuration (see [Authentication](../../getting-started/authentication/)) works automatically. `git ls-remote` calls are authenticated with the resolved token, so private GHES repos work without a separate git credential helper. `type: url` sources accept Git-style repository URLs as input, including HTTPS and SSH forms, but APM resolves auth and metadata against `GITHUB_HOST`. In practice, the URL host is ignored unless it matches `GITHUB_HOST`, so do not rely on `type: url` for true cross-host resolution.

## Discovering upgrades

`apm marketplace outdated` compares the currently resolved version of each package (as captured in `marketplace.json`) against the latest tag available in the source repo.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ If `ADO_APM_PAT` is set but ADO returns 401, APM silently retries with the `az`
export GITHUB_HOST=github.company.com
export GITHUB_APM_PAT_MYORG=ghp_ghes_token
apm install myorg/internal-package # resolves to github.company.com
apm marketplace build # also resolves to github.company.com
```

## GHE Cloud data residency (*.ghe.com)
Expand Down
104 changes: 92 additions & 12 deletions src/apm_cli/marketplace/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple

if TYPE_CHECKING:
from ..core.auth import HostInfo

import yaml

Expand All @@ -42,6 +45,7 @@
from .semver import SemVer, parse_semver, satisfies_range
from .tag_pattern import build_tag_regex, render_tag
from ..utils.path_security import ensure_path_within
from ..utils.github_host import default_host
from .yml_schema import MarketplaceYml, PackageEntry, load_marketplace_yml

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -151,6 +155,9 @@ def __init__(
self._auth_resolver = auth_resolver
# Resolved once per build, used by worker threads (read-only).
self._github_token: Optional[str] = None
self._host: str = default_host() or "github.com"
self._host_info: Optional["HostInfo"] = None
self._auth_resolved: bool = False

# -- lazy loaders -------------------------------------------------------

Expand All @@ -161,12 +168,32 @@ def _load_yml(self) -> MarketplaceYml:

def _get_resolver(self) -> RefResolver:
if self._resolver is None:
self._ensure_auth()
self._resolver = RefResolver(
timeout_seconds=self._options.timeout_seconds,
offline=self._options.offline,
host=self._host,
token=self._github_token,
)
return self._resolver

def _ensure_auth(self) -> None:
"""Lazily resolve host classification and GitHub token.

Short-circuits when already resolved (even if no token was found)
or when running in offline mode. Offline mode is still marked as
resolved so repeated calls remain idempotent. Called by
``_get_resolver()`` so both ``resolve()`` and ``build()`` benefit
from authenticated ``git ls-remote`` when available.
"""
if self._auth_resolved:
return
if self._options.offline:
self._auth_resolved = True
return
self._github_token = self._resolve_github_token()
self._auth_resolved = True

# -- output path --------------------------------------------------------

def _output_path(self) -> Path:
Expand Down Expand Up @@ -365,6 +392,11 @@ def resolve(self) -> ResolveResult:
results: Dict[int, ResolvedPackage] = {}
errors: List[Tuple[str, str]] = []

# Eagerly resolve auth + create the shared RefResolver before
# spawning workers -- avoids a race on _ensure_auth() and
# matches the pattern used in _prefetch_metadata().
self._get_resolver()

with ThreadPoolExecutor(
max_workers=min(self._options.concurrency, len(entries))
) as pool:
Expand Down Expand Up @@ -413,16 +445,60 @@ def _fetch_remote_metadata(self, pkg: ResolvedPackage) -> Optional[Dict[str, str
When a GitHub token is available (via ``self._github_token``), it
is included as an ``Authorization`` header so private repos can be
accessed.

For non-github.com GitHub-family hosts (GHES, GHE Cloud), uses the
GitHub REST API instead of raw.githubusercontent.com (which is only
available for github.com). For non-GitHub hosts, metadata
enrichment is skipped.
"""
try:
path_prefix = f"{pkg.subdir}/" if pkg.subdir else ""
url = (
f"https://raw.githubusercontent.com/"
f"{pkg.source_repo}/{pkg.sha}/{path_prefix}apm.yml"
)
req = urllib.request.Request(url)
if self._github_token:
req.add_header("Authorization", f"token {self._github_token}")
file_path = f"{path_prefix}apm.yml"

# Determine URL strategy based on host kind
host_kind = self._host_info.kind if self._host_info else "github"

if host_kind not in ("github", "ghe_cloud", "ghes"):
# Non-GitHub hosts -- skip metadata enrichment
logger.debug(
"Skipping metadata fetch for %s (non-GitHub host: %s)",
pkg.name,
self._host,
)
return None

if host_kind == "ghe_cloud" and not self._github_token:
logger.debug(
"Skipping metadata fetch for %s (GHE Cloud requires auth)",
pkg.name,
)
return None

if self._host == "github.com":
# github.com -- use fast raw.githubusercontent.com CDN
url = (
f"https://raw.githubusercontent.com/"
f"{pkg.source_repo}/{pkg.sha}/{file_path}"
)
req = urllib.request.Request(url)
if self._github_token:
req.add_header("Authorization", f"token {self._github_token}")
else:
# GHES / GHE Cloud -- use REST API
api_base = (
self._host_info.api_base
if self._host_info
else None
) or f"https://{self._host}/api/v3"
url = (
f"{api_base}/repos/{pkg.source_repo}/contents/{file_path}"
f"?ref={pkg.sha}"
)
req = urllib.request.Request(url)
req.add_header("Accept", "application/vnd.github.raw")
if self._github_token:
req.add_header("Authorization", f"token {self._github_token}")

with urllib.request.urlopen(req, timeout=5) as resp: # noqa: S310
raw = resp.read().decode("utf-8")
data = yaml.safe_load(raw)
Expand Down Expand Up @@ -460,13 +536,17 @@ def _resolve_github_token(self) -> Optional[str]:
auth failures are logged at debug and silently ignored.
"""
try:
from ..core.auth import AuthResolver # lazy import

resolver = self._auth_resolver
if resolver is None:
from ..core.auth import AuthResolver # lazy import

resolver = AuthResolver()
self._auth_resolver = resolver
ctx = resolver.resolve("github.com") # type: ignore[union-attr]
# Always classify the host, regardless of token availability,
# so _fetch_remote_metadata() can branch on host kind.
if self._host_info is None:
self._host_info = AuthResolver.classify_host(self._host)
ctx = resolver.resolve(self._host) # type: ignore[union-attr]
if ctx.token:
logger.debug("Resolved GitHub token for metadata fetch (source=%s)", ctx.source)
return ctx.token
Expand All @@ -492,7 +572,7 @@ def _prefetch_metadata(
return {}

# Resolve token once -- threads read self._github_token (immutable).
self._github_token = self._resolve_github_token()
self._ensure_auth()

results: Dict[str, Dict[str, str]] = {}
workers = min(self._options.concurrency, len(resolved))
Expand Down
19 changes: 16 additions & 3 deletions src/apm_cli/marketplace/ref_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from .errors import GitLsRemoteError, OfflineMissError
from ._git_utils import redact_token as _redact_token
from .git_stderr import translate_git_stderr
from ..utils.github_host import default_host, build_https_clone_url

__all__ = [
"RemoteRef",
Expand Down Expand Up @@ -136,6 +137,10 @@ class RefResolver:
stderr_translator_enabled:
When ``True`` (default), stderr from failed ``git`` calls is
classified via ``translate_git_stderr``.
token:
Optional GitHub PAT to embed in the ``https://`` URL. When set
the URL uses ``x-access-token`` authentication; when ``None``
(default) git runs unauthenticated.
"""

def __init__(
Expand All @@ -144,10 +149,14 @@ def __init__(
timeout_seconds: float = 10.0,
offline: bool = False,
stderr_translator_enabled: bool = True,
host: Optional[str] = None,
token: Optional[str] = None,
) -> None:
self._timeout = timeout_seconds
self._offline = offline
self._stderr_translator = stderr_translator_enabled
self._host: str = host or default_host() or "github.com"
self._token: Optional[str] = token
self._cache = RefCache()
self._lock = threading.Lock()
# Per-remote locks to serialise calls to the same remote while
Expand All @@ -166,7 +175,7 @@ def _remote_lock(self, owner_repo: str) -> threading.Lock:
return self._remote_locks[owner_repo]

def list_remote_refs(self, owner_repo: str) -> List[RemoteRef]:
"""Fetch all tags and heads from ``https://github.com/<owner_repo>.git``.
"""Fetch all tags and heads from the configured Git host.

Results are cached; subsequent calls for the same remote return
the cached value until the TTL expires.
Expand Down Expand Up @@ -198,7 +207,9 @@ def list_remote_refs(self, owner_repo: str) -> List[RemoteRef]:
if self._offline:
raise OfflineMissError(package="", remote=owner_repo)

url = f"https://github.com/{owner_repo}.git"
url = build_https_clone_url(self._host, owner_repo, token=self._token)
if not url.endswith(".git"):
url += ".git"
env = {**os.environ, "GIT_TERMINAL_PROMPT": "0", "GIT_ASKPASS": "echo"}
try:
result = subprocess.run(
Expand Down Expand Up @@ -273,7 +284,9 @@ def resolve_ref_sha(self, owner_repo: str, ref: str = "HEAD") -> str:
GitLsRemoteError
When the ref does not exist or the subprocess fails.
"""
url = f"https://github.com/{owner_repo}.git"
url = build_https_clone_url(self._host, owner_repo, token=self._token)
if not url.endswith(".git"):
url += ".git"
env = {**os.environ, "GIT_TERMINAL_PROMPT": "0", "GIT_ASKPASS": "echo"}
try:
result = subprocess.run(
Expand Down
38 changes: 21 additions & 17 deletions src/apm_cli/marketplace/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from typing import Callable, Optional, Tuple

from ..utils.path_security import PathTraversalError, validate_path_segments
from ..models.dependency.reference import DependencyReference
from .client import fetch_or_cache
from .errors import MarketplaceFetchError, PluginNotFoundError
from .models import MarketplacePlugin
Expand Down Expand Up @@ -95,25 +96,28 @@ def _resolve_github_source(source: dict) -> str:
def _resolve_url_source(source: dict) -> str:
"""Resolve a ``url`` source type.

APM is Git-native -- URL sources that point to GitHub repos are
resolved to ``owner/repo``. Non-GitHub URLs are rejected.
Delegates to ``DependencyReference.parse()`` to extract the
``owner/repo`` coordinate from any valid Git URL (GitHub, GHES, GitLab,
Bitbucket, ADO, SSH). The URL's host is *not* preserved -- downstream
resolution (``RefResolver``) uses the configured ``GITHUB_HOST`` for
``git ls-remote``. True cross-host resolution is tracked in #1010.
"""
url = source.get("url", "")
# Try to extract owner/repo from common GitHub URL patterns
for prefix in ("https://github.com/", "http://github.com/"):
if url.lower().startswith(prefix):
path = url[len(prefix) :].rstrip("/").split("?")[0]
# Remove .git suffix
if path.endswith(".git"):
path = path[:-4]
parts = path.split("/")
if len(parts) >= 2:
return f"{parts[0]}/{parts[1]}"

raise ValueError(
f"Cannot resolve URL source '{url}' to a Git coordinate. "
f"APM requires Git-based sources (owner/repo format)."
)
if not url:
raise ValueError("URL source requires a non-empty 'url' field")
try:
dep = DependencyReference.parse(url)
except ValueError as exc:
raise ValueError(
f"Cannot resolve URL source '{url}': {exc}"
) from exc
if dep.is_local:
raise ValueError(
f"URL source '{url}' resolves to a local path, not a Git coordinate."
)
if dep.reference:
return f"{dep.repo_url}#{dep.reference}"
return dep.repo_url
Comment thread
sergio-sisternes-epam marked this conversation as resolved.


def _resolve_git_subdir_source(source: dict) -> str:
Expand Down
20 changes: 20 additions & 0 deletions tests/unit/commands/test_marketplace_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,3 +427,23 @@ def test_no_traceback_without_verbose(self, MockBuilder, runner, yml_cwd):
assert result.exit_code == 1
assert "Traceback" not in result.output
assert "Build failed" in result.output


# ---------------------------------------------------------------------------
# GHE host support
# ---------------------------------------------------------------------------


class TestBuildGHEHost:
"""build command -- GHE / custom host scenarios."""

def test_build_ghe_host_env(
self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
) -> None:
"""MarketplaceBuilder respects GITHUB_HOST for token resolution."""
monkeypatch.setenv("GITHUB_HOST", "corp.ghe.com")
from apm_cli.marketplace.builder import MarketplaceBuilder, BuildOptions
yml_path = tmp_path / "marketplace.yml"
yml_path.write_text("name: test\noutput: marketplace.json\npackages: []\n")
builder = MarketplaceBuilder(yml_path)
assert builder._host == "corp.ghe.com"
Loading
Loading