diff --git a/pkg/cli/fetch.go b/pkg/cli/fetch.go index 13fcfe53eb8..94fe710eeca 100644 --- a/pkg/cli/fetch.go +++ b/pkg/cli/fetch.go @@ -83,7 +83,7 @@ func fetchRemoteWorkflow(spec *WorkflowSpec, verbose bool) (*FetchedWorkflow, er } // Resolve the ref to a commit SHA for source tracking - commitSHA, err := parser.ResolveRefToSHA(owner, repo, ref) + commitSHA, err := parser.ResolveRefToSHAForHost(owner, repo, ref, spec.Host) if err != nil { remoteWorkflowLog.Printf("Failed to resolve ref to SHA: %v", err) // Continue without SHA - we can still fetch the content @@ -96,7 +96,7 @@ func fetchRemoteWorkflow(spec *WorkflowSpec, verbose bool) (*FetchedWorkflow, er } // Download the workflow file from GitHub - content, err := parser.DownloadFileFromGitHub(owner, repo, spec.WorkflowPath, ref) + content, err := parser.DownloadFileFromGitHubForHost(owner, repo, spec.WorkflowPath, ref, spec.Host) if err != nil { // Try with common workflow directory prefixes if the direct path fails. // This handles short workflow names without path separators (e.g. "my-workflow.md"). @@ -107,7 +107,7 @@ func fetchRemoteWorkflow(spec *WorkflowSpec, verbose bool) (*FetchedWorkflow, er altPath += ".md" } remoteWorkflowLog.Printf("Direct path failed, trying: %s", altPath) - if altContent, altErr := parser.DownloadFileFromGitHub(owner, repo, altPath, ref); altErr == nil { + if altContent, altErr := parser.DownloadFileFromGitHubForHost(owner, repo, altPath, ref, spec.Host); altErr == nil { return &FetchedWorkflow{ Content: altContent, CommitSHA: commitSHA, diff --git a/pkg/cli/spec.go b/pkg/cli/spec.go index 5a5a158a4ec..02172136ebd 100644 --- a/pkg/cli/spec.go +++ b/pkg/cli/spec.go @@ -32,6 +32,7 @@ type WorkflowSpec struct { WorkflowPath string // e.g., "workflows/workflow-name.md" WorkflowName string // e.g., "workflow-name" IsWildcard bool // true if this is a wildcard spec (e.g., "owner/repo/*") + Host string // explicit hostname from URL (e.g., "github.com", "myorg.ghe.com"); empty = use configured GH_HOST } // isLocalWorkflowPath checks if a path refers to a local filesystem workflow. @@ -148,19 +149,21 @@ func parseRepoSpec(repoSpec string) (*RepoSpec, error) { // - https://raw.githubusercontent.com/owner/repo/refs/heads/branch/path/to/workflow.md // - https://raw.githubusercontent.com/owner/repo/COMMIT_SHA/path/to/workflow.md // - https://raw.githubusercontent.com/owner/repo/refs/tags/tag/path/to/workflow.md +// - https://myorg.ghe.com/owner/repo/blob/branch/path/to/workflow.md (GHE) func parseGitHubURL(spec string) (*WorkflowSpec, error) { specLog.Printf("Parsing GitHub URL: %s", spec) - // First validate that this is a GitHub URL (github.com or raw.githubusercontent.com) parsedURL, err := url.Parse(spec) if err != nil { specLog.Printf("Failed to parse URL: %v", err) return nil, fmt.Errorf("invalid URL: %w", err) } - // Must be a GitHub URL - if parsedURL.Host != "github.com" && parsedURL.Host != "raw.githubusercontent.com" { - specLog.Printf("Invalid host: %s", parsedURL.Host) - return nil, errors.New("URL must be from github.com or raw.githubusercontent.com") + if parsedURL.Host == "" { + return nil, fmt.Errorf("URL must include a host: %s", spec) + } + + if !isGitHubHost(parsedURL.Host) { + return nil, fmt.Errorf("URL must be from github.com or a GitHub Enterprise host (*.ghe.com), got %q", parsedURL.Host) } owner, repo, ref, filePath, err := parser.ParseRepoFileURL(spec) @@ -169,7 +172,7 @@ func parseGitHubURL(spec string) (*WorkflowSpec, error) { return nil, err } - specLog.Printf("Parsed GitHub URL: owner=%s, repo=%s, ref=%s, path=%s", owner, repo, ref, filePath) + specLog.Printf("Parsed GitHub URL: owner=%s, repo=%s, ref=%s, path=%s, host=%s", owner, repo, ref, filePath, parsedURL.Host) // Ensure the file path ends with .md if !strings.HasSuffix(filePath, ".md") { @@ -181,6 +184,13 @@ func parseGitHubURL(spec string) (*WorkflowSpec, error) { return nil, fmt.Errorf("invalid GitHub URL: '%s/%s' does not look like a valid GitHub repository", owner, repo) } + // For raw.githubusercontent.com content, the API host is github.com. + // For all other hosts (github.com, GHE), use the URL's host as-is. + host := parsedURL.Host + if host == "raw.githubusercontent.com" { + host = "github.com" + } + return &WorkflowSpec{ RepoSpec: RepoSpec{ RepoSlug: fmt.Sprintf("%s/%s", owner, repo), @@ -188,6 +198,7 @@ func parseGitHubURL(spec string) (*WorkflowSpec, error) { }, WorkflowPath: filePath, WorkflowName: normalizeWorkflowID(filePath), + Host: host, }, nil } @@ -195,6 +206,15 @@ func parseGitHubURL(spec string) (*WorkflowSpec, error) { // Format: owner/repo/workflows/workflow-name[@version] or owner/repo/workflow-name[@version] // Also supports full GitHub URLs like https://github.com/owner/repo/blob/branch/path/to/workflow.md // Also supports local paths like ./workflows/workflow-name.md + +// isGitHubHost returns true if the given host is a recognized GitHub or GitHub Enterprise host: +// github.com, raw.githubusercontent.com, or any *.ghe.com host. +func isGitHubHost(host string) bool { + return host == "github.com" || + host == "raw.githubusercontent.com" || + strings.HasSuffix(host, ".ghe.com") || + strings.HasSuffix(host, ".github.com") +} func parseWorkflowSpec(spec string) (*WorkflowSpec, error) { specLog.Printf("Parsing workflow spec: %q", spec) diff --git a/pkg/cli/spec_test.go b/pkg/cli/spec_test.go index 1831f7da036..fb920ab0aa1 100644 --- a/pkg/cli/spec_test.go +++ b/pkg/cli/spec_test.go @@ -132,6 +132,7 @@ func TestParseWorkflowSpec(t *testing.T) { wantWorkflowPath string wantWorkflowName string wantVersion string + wantHost string wantErr bool errContains string }{ @@ -142,6 +143,7 @@ func TestParseWorkflowSpec(t *testing.T) { wantWorkflowPath: "workflows/release-issue-linker.md", wantWorkflowName: "release-issue-linker", wantVersion: "main", + wantHost: "github.com", wantErr: false, }, { @@ -181,10 +183,20 @@ func TestParseWorkflowSpec(t *testing.T) { wantErr: false, }, { - name: "GitHub URL - invalid domain", + name: "GitHub URL - GHE.com instance", + spec: "https://myorg.ghe.com/owner/repo/blob/main/workflows/test.md", + wantRepo: "owner/repo", + wantWorkflowPath: "workflows/test.md", + wantWorkflowName: "test", + wantVersion: "main", + wantHost: "myorg.ghe.com", + wantErr: false, + }, + { + name: "GitHub URL - non-github.com host is rejected (e.g. gitlab.com)", spec: "https://gitlab.com/owner/repo/blob/main/workflows/test.md", wantErr: true, - errContains: "must be from github.com", + errContains: "github.com", }, { name: "GitHub URL - missing file extension", @@ -328,6 +340,9 @@ func TestParseWorkflowSpec(t *testing.T) { if spec.Version != tt.wantVersion { t.Errorf("parseWorkflowSpec() version = %q, want %q", spec.Version, tt.wantVersion) } + if tt.wantHost != "" && spec.Host != tt.wantHost { + t.Errorf("parseWorkflowSpec() host = %q, want %q", spec.Host, tt.wantHost) + } }) } } diff --git a/pkg/parser/import_remote_nested_test.go b/pkg/parser/import_remote_nested_test.go index 9c8c716b6e9..034ba81d75a 100644 --- a/pkg/parser/import_remote_nested_test.go +++ b/pkg/parser/import_remote_nested_test.go @@ -470,7 +470,7 @@ func TestResolveRemoteSymlinksPathConstruction(t *testing.T) { // GitHub API access, which is tested in integration tests. t.Run("single component path returns error", func(t *testing.T) { - _, err := resolveRemoteSymlinks("owner", "repo", "file.md", "main") + _, err := resolveRemoteSymlinks(nil, "owner", "repo", "file.md", "main") assert.Error(t, err, "Single component path has no directories to resolve") }) diff --git a/pkg/parser/remote_fetch.go b/pkg/parser/remote_fetch.go index 7388e7f3d98..8ba11575062 100644 --- a/pkg/parser/remote_fetch.go +++ b/pkg/parser/remote_fetch.go @@ -249,7 +249,7 @@ func downloadIncludeFromWorkflowSpec(spec string, cache *ImportCache) (string, e var sha string if cache != nil { // Only resolve SHA if we're using the cache - resolvedSHA, err := resolveRefToSHA(owner, repo, ref) + resolvedSHA, err := resolveRefToSHA(owner, repo, ref, "") if err != nil { // SHA resolution failure (including auth errors) only means we cannot cache; the // actual file download will be attempted below and may succeed via git fallback for @@ -316,10 +316,15 @@ func downloadIncludeFromWorkflowSpec(spec string, cache *ImportCache) (string, e // resolveRefToSHAViaGit resolves a git ref to SHA using git ls-remote // This is a fallback for when GitHub API authentication fails -func resolveRefToSHAViaGit(owner, repo, ref string) (string, error) { +func resolveRefToSHAViaGit(owner, repo, ref, host string) (string, error) { remoteLog.Printf("Attempting git ls-remote fallback for ref resolution: %s/%s@%s", owner, repo, ref) - githubHost := GetGitHubHostForRepo(owner, repo) + var githubHost string + if host != "" { + githubHost = "https://" + host + } else { + githubHost = GetGitHubHostForRepo(owner, repo) + } repoURL := fmt.Sprintf("%s/%s/%s.git", githubHost, owner, repo) // Try to resolve the ref using git ls-remote @@ -365,7 +370,7 @@ func resolveRefToSHAViaGit(owner, repo, ref string) (string, error) { } // resolveRefToSHA resolves a git ref (branch, tag, or SHA) to its commit SHA -func resolveRefToSHA(owner, repo, ref string) (string, error) { +func resolveRefToSHA(owner, repo, ref, host string) (string, error) { // If ref is already a full SHA (40 hex characters), return it as-is if len(ref) == 40 && gitutil.IsHexString(ref) { return ref, nil @@ -374,14 +379,21 @@ func resolveRefToSHA(owner, repo, ref string) (string, error) { // Use gh CLI to get the commit SHA for the ref // This works for branches, tags, and short SHAs // Using go-gh to properly handle enterprise GitHub instances via GH_HOST - stdout, stderr, err := gh.Exec("api", fmt.Sprintf("/repos/%s/%s/commits/%s", owner, repo, ref), "--jq", ".sha") + apiPath := fmt.Sprintf("/repos/%s/%s/commits/%s", owner, repo, ref) + var args []string + if host != "" { + args = []string{"api", "--hostname", host, apiPath, "--jq", ".sha"} + } else { + args = []string{"api", apiPath, "--jq", ".sha"} + } + stdout, stderr, err := gh.Exec(args...) if err != nil { outputStr := stderr.String() if gitutil.IsAuthError(outputStr) { remoteLog.Printf("GitHub API authentication failed, attempting git ls-remote fallback for %s/%s@%s", owner, repo, ref) // Try fallback using git ls-remote for public repositories - sha, gitErr := resolveRefToSHAViaGit(owner, repo, ref) + sha, gitErr := resolveRefToSHAViaGit(owner, repo, ref, host) if gitErr != nil { // If git fallback also fails, return both errors return "", fmt.Errorf("failed to resolve ref via GitHub API (auth error) and git ls-remote: API error: %w, Git error: %w", err, gitErr) @@ -406,20 +418,29 @@ func resolveRefToSHA(owner, repo, ref string) (string, error) { // downloadFileViaGit downloads a file from a Git repository using git commands // This is a fallback for when GitHub API authentication fails -func downloadFileViaGit(owner, repo, path, ref string) ([]byte, error) { +func downloadFileViaGit(owner, repo, path, ref, host string) ([]byte, error) { remoteLog.Printf("Attempting git fallback for %s/%s/%s@%s", owner, repo, path, ref) // First, try via raw.githubusercontent.com — no auth required for public repos and // no dependency on git being installed. - content, rawErr := downloadFileViaRawURL(owner, repo, path, ref) - if rawErr == nil { - return content, nil + // Only attempt raw URL for github.com repos (not GHE) since raw.githubusercontent.com + // only serves public GitHub content. + if host == "" || host == "github.com" { + content, rawErr := downloadFileViaRawURL(owner, repo, path, ref) + if rawErr == nil { + return content, nil + } + remoteLog.Printf("Raw URL download failed for %s/%s/%s@%s, trying git archive: %v", owner, repo, path, ref, rawErr) } - remoteLog.Printf("Raw URL download failed for %s/%s/%s@%s, trying git archive: %v", owner, repo, path, ref, rawErr) // Use git archive to get the file content without cloning // This works for public repositories without authentication - githubHost := GetGitHubHostForRepo(owner, repo) + var githubHost string + if host != "" { + githubHost = "https://" + host + } else { + githubHost = GetGitHubHostForRepo(owner, repo) + } repoURL := fmt.Sprintf("%s/%s/%s.git", githubHost, owner, repo) // git archive command: git archive --remote= @@ -429,11 +450,11 @@ func downloadFileViaGit(owner, repo, path, ref string) ([]byte, error) { archiveOutput, err := cmd.Output() if err != nil { // If git archive fails, try with git clone + git show as a fallback - return downloadFileViaGitClone(owner, repo, path, ref) + return downloadFileViaGitClone(owner, repo, path, ref, host) } // Extract the file from the tar archive using Go's archive/tar (cross-platform) - content, err = fileutil.ExtractFileFromTar(archiveOutput, path) + content, err := fileutil.ExtractFileFromTar(archiveOutput, path) if err != nil { return nil, fmt.Errorf("failed to extract file from git archive: %w", err) } @@ -474,7 +495,7 @@ func downloadFileViaRawURL(owner, repo, filePath, ref string) ([]byte, error) { // downloadFileViaGitClone downloads a file by shallow cloning the repository // This is used as a fallback when git archive doesn't work -func downloadFileViaGitClone(owner, repo, path, ref string) ([]byte, error) { +func downloadFileViaGitClone(owner, repo, path, ref, host string) ([]byte, error) { remoteLog.Printf("Attempting git clone fallback for %s/%s/%s@%s", owner, repo, path, ref) // Create a temporary directory for the shallow clone @@ -484,7 +505,12 @@ func downloadFileViaGitClone(owner, repo, path, ref string) ([]byte, error) { } defer os.RemoveAll(tmpDir) - githubHost := GetGitHubHostForRepo(owner, repo) + var githubHost string + if host != "" { + githubHost = "https://" + host + } else { + githubHost = GetGitHubHostForRepo(owner, repo) + } repoURL := fmt.Sprintf("%s/%s/%s.git", githubHost, owner, repo) // Check if ref is a SHA (40 hex characters) @@ -580,26 +606,19 @@ func checkRemoteSymlink(client *api.RESTClient, owner, repo, dirPath, ref string // if .github/workflows/shared is a symlink to ../../gh-agent-workflows/shared, // fetching .github/workflows/shared/elastic-tools.md returns 404. // This function walks the path components and resolves any symlinks found. -func resolveRemoteSymlinks(owner, repo, filePath, ref string) (string, error) { +// The caller must provide a REST client (already authenticated for the correct host). +func resolveRemoteSymlinks(client *api.RESTClient, owner, repo, filePath, ref string) (string, error) { parts := strings.Split(filePath, "/") if len(parts) <= 1 { return "", fmt.Errorf("no directory components to resolve in path: %s", filePath) } - remoteLog.Printf("Attempting symlink resolution for %s/%s/%s@%s (%d path components)", owner, repo, filePath, ref, len(parts)) - - client, err := api.DefaultRESTClient() - if err != nil { - // When auth is unavailable (e.g., running inside an agentic workflow without credentials), - // symlink resolution cannot proceed. Return a descriptive error so the caller can skip - // symlink resolution and proceed without it. - if gitutil.IsAuthError(err.Error()) { - remoteLog.Printf("REST client creation failed due to auth error, skipping symlink resolution for %s/%s/%s@%s", owner, repo, filePath, ref) - return "", fmt.Errorf("skipping symlink resolution: no auth available for %s/%s/%s@%s", owner, repo, filePath, ref) - } - return "", fmt.Errorf("failed to create REST client: %w", err) + if client == nil { + return "", fmt.Errorf("no REST client available for symlink resolution of %s/%s/%s@%s", owner, repo, filePath, ref) } + remoteLog.Printf("Attempting symlink resolution for %s/%s/%s@%s (%d path components)", owner, repo, filePath, ref, len(parts)) + // Check each directory prefix (not including the final filename) to find symlinks for i := 1; i < len(parts); i++ { dirPath := strings.Join(parts[:i], "/") @@ -665,30 +684,55 @@ func resolveRemoteSymlinks(owner, repo, filePath, ref string) (string, error) { // - ref: Git reference (branch, tag, or commit SHA) // Returns the file content as bytes or an error if the file cannot be retrieved. func DownloadFileFromGitHub(owner, repo, path, ref string) ([]byte, error) { - return downloadFileFromGitHub(owner, repo, path, ref) + return downloadFileFromGitHubWithDepth(owner, repo, path, ref, 0, "") +} + +// DownloadFileFromGitHubForHost downloads a file from a GitHub repository using the GitHub API, +// targeting a specific GitHub host. Use this when the target repository is on a different host +// than the one configured via GH_HOST (e.g., fetching from github.com while GH_HOST is a GHE instance). +// host is the hostname without scheme (e.g., "github.com", "myorg.ghe.com"). +// An empty host uses the default configured host (GH_HOST or github.com). +func DownloadFileFromGitHubForHost(owner, repo, path, ref, host string) ([]byte, error) { + return downloadFileFromGitHubWithDepth(owner, repo, path, ref, 0, host) } // ResolveRefToSHA resolves a git ref (branch, tag, or short SHA) to its full commit SHA. // This is the exported wrapper for resolveRefToSHA. // If the ref is already a 40-character hex SHA, it returns it as-is. func ResolveRefToSHA(owner, repo, ref string) (string, error) { - return resolveRefToSHA(owner, repo, ref) + return resolveRefToSHA(owner, repo, ref, "") +} + +// ResolveRefToSHAForHost resolves a git ref to its full commit SHA on a specific GitHub host. +// Use this when the target repository is on a different host than the one configured via GH_HOST. +// host is the hostname without scheme (e.g., "github.com", "myorg.ghe.com"). +// An empty host uses the default configured host (GH_HOST or github.com). +func ResolveRefToSHAForHost(owner, repo, ref, host string) (string, error) { + return resolveRefToSHA(owner, repo, ref, host) } func downloadFileFromGitHub(owner, repo, path, ref string) ([]byte, error) { - return downloadFileFromGitHubWithDepth(owner, repo, path, ref, 0) + return downloadFileFromGitHubWithDepth(owner, repo, path, ref, 0, "") } -func downloadFileFromGitHubWithDepth(owner, repo, path, ref string, symlinkDepth int) ([]byte, error) { - // Create REST client - client, err := api.DefaultRESTClient() +func downloadFileFromGitHubWithDepth(owner, repo, path, ref string, symlinkDepth int, host string) ([]byte, error) { + // Create a REST client targeting the correct host. + // When host is explicitly specified (e.g., "github.com"), use it directly so that + // cross-host fetches work correctly even when GH_HOST is set to a different instance. + var client *api.RESTClient + var err error + if host != "" { + client, err = api.NewRESTClient(api.ClientOptions{Host: host}) + } else { + client, err = api.DefaultRESTClient() + } if err != nil { // When the REST client cannot be created due to missing auth (e.g., running inside an // agentic workflow without gh CLI credentials), fall back to git-based download so that // public repositories are still accessible without authentication. if gitutil.IsAuthError(err.Error()) { remoteLog.Printf("REST client creation failed due to auth error, attempting git fallback for %s/%s/%s@%s: %v", owner, repo, path, ref, err) - content, gitErr := downloadFileViaGit(owner, repo, path, ref) + content, gitErr := downloadFileViaGit(owner, repo, path, ref, host) if gitErr != nil { // Both REST (auth error) and git fallback failed. Return the original auth error // so callers and tests can detect the auth-unavailable condition and skip/handle @@ -717,7 +761,7 @@ func downloadFileFromGitHubWithDepth(owner, repo, path, ref string, symlinkDepth if gitutil.IsAuthError(errStr) { remoteLog.Printf("GitHub API authentication failed, attempting git fallback for %s/%s/%s@%s", owner, repo, path, ref) // Try fallback using git commands for public repositories - content, gitErr := downloadFileViaGit(owner, repo, path, ref) + content, gitErr := downloadFileViaGit(owner, repo, path, ref, host) if gitErr != nil { // If git fallback also fails, return both errors return nil, fmt.Errorf("failed to fetch file content via GitHub API (auth error) and git fallback: API error: %w, Git error: %w", err, gitErr) @@ -728,10 +772,10 @@ func downloadFileFromGitHubWithDepth(owner, repo, path, ref string, symlinkDepth // Check if this is a 404 — the path may traverse a symlink that the API doesn't follow if isNotFoundError(errStr) && symlinkDepth < constants.MaxSymlinkDepth { remoteLog.Printf("File not found at %s/%s/%s@%s, checking for symlinks in path (depth: %d)", owner, repo, path, ref, symlinkDepth) - resolvedPath, resolveErr := resolveRemoteSymlinks(owner, repo, path, ref) + resolvedPath, resolveErr := resolveRemoteSymlinks(client, owner, repo, path, ref) if resolveErr == nil && resolvedPath != path { remoteLog.Printf("Retrying download with symlink-resolved path: %s -> %s", path, resolvedPath) - return downloadFileFromGitHubWithDepth(owner, repo, resolvedPath, ref, symlinkDepth+1) + return downloadFileFromGitHubWithDepth(owner, repo, resolvedPath, ref, symlinkDepth+1, host) } } diff --git a/pkg/parser/remote_fetch_integration_test.go b/pkg/parser/remote_fetch_integration_test.go index f8663c06d66..1e8187661fc 100644 --- a/pkg/parser/remote_fetch_integration_test.go +++ b/pkg/parser/remote_fetch_integration_test.go @@ -217,7 +217,12 @@ func TestCheckRemoteSymlink(t *testing.T) { // directory components of a real path and returns "no symlinks found" when none exist. func TestResolveRemoteSymlinksNoSymlinks(t *testing.T) { // "Global/Perl.gitignore" is a real path in github/gitignore with no symlinks - _, err := resolveRemoteSymlinks("github", "gitignore", "Global/Perl.gitignore", "main") + client, err := api.DefaultRESTClient() + if err != nil { + skipOnAuthError(t, err) + return + } + _, err = resolveRemoteSymlinks(client, "github", "gitignore", "Global/Perl.gitignore", "main") require.Error(t, err, "Expected error when no symlinks found") skipOnAuthError(t, err)