diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c08a90acc7c..40bc43c1387 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2304,3 +2304,61 @@ jobs: echo "Test completed with:" echo "- Success count: ${{ steps.add-workflows.outputs.success_count }}" echo "- Failure count: ${{ steps.add-workflows.outputs.failure_count }}" + + integration-unauthenticated-add: + name: Integration Unauthenticated Add (Public Repo) + runs-on: ubuntu-latest + permissions: + contents: read + concurrency: + group: ci-${{ github.ref }}-integration-unauthenticated-add + cancel-in-progress: true + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Set up Go + id: setup-go + uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6 + with: + go-version-file: go.mod + cache: true + + - name: Report Go cache status + run: | + if [ "${{ steps.setup-go.outputs.cache-hit }}" == "true" ]; then + echo "✅ Go cache hit" >> $GITHUB_STEP_SUMMARY + else + echo "⚠️ Go cache miss" >> $GITHUB_STEP_SUMMARY + fi + + - name: Download dependencies + run: go mod download + + - name: Verify dependencies + run: go mod verify + + - name: Build gh-aw binary + run: make build + + - name: Run unauthenticated integration tests + # Explicitly clear all GitHub auth tokens to reproduce the agentic-workflow + # environment where gh CLI is not authenticated. Tests must succeed for public + # repositories via the raw URL / git fallback path. + env: + GITHUB_TOKEN: "" + GH_TOKEN: "" + run: | + set -o pipefail + go test -v -parallel=4 -timeout=10m -tags 'integration' -json \ + -run 'TestAddPublicWorkflowUnauthenticated|TestDownloadFileFromGitHubUnauthenticated' \ + ./pkg/cli/ ./pkg/parser/ \ + | tee test-result-integration-unauthenticated.json + + - name: Upload test results + if: always() + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: test-result-integration-unauthenticated + path: test-result-integration-unauthenticated.json + retention-days: 14 diff --git a/pkg/cli/add_integration_test.go b/pkg/cli/add_integration_test.go index c2511f3443b..f4021ee43df 100644 --- a/pkg/cli/add_integration_test.go +++ b/pkg/cli/add_integration_test.go @@ -835,3 +835,54 @@ Please analyze the repository. // Should still have engine: claude (original) assert.Contains(t, contentStr, "engine: claude", "original engine should be preserved") } + +// TestAddPublicWorkflowUnauthenticated verifies that gh aw add works for a public +// repository even when no GitHub auth tokens are present. This tests the raw-URL +// fallback path that is used when api.DefaultRESTClient() fails due to missing auth, +// which is the scenario that occurs when running inside an agentic workflow without +// gh CLI credentials configured. +func TestAddPublicWorkflowUnauthenticated(t *testing.T) { + setup := setupAddIntegrationTest(t) + defer setup.cleanup() + + // Build a minimal environment that deliberately excludes all auth tokens. + // This reproduces the "authentication token not found" failure that occurs + // when gh aw add is invoked inside an agentic workflow without gh auth. + var filteredEnv []string + for _, e := range os.Environ() { + switch { + case strings.HasPrefix(e, "GITHUB_TOKEN="), + strings.HasPrefix(e, "GH_TOKEN="), + strings.HasPrefix(e, "GITHUB_ENTERPRISE_TOKEN="), + strings.HasPrefix(e, "GH_ENTERPRISE_TOKEN="): + // Exclude all GitHub auth tokens to simulate the unauthenticated environment + default: + filteredEnv = append(filteredEnv, e) + } + } + + // Use github/gh-aw with an explicit path spec (owner/repo/path/file.md@version). + // The file exists at v0.45.5 and the github org allows unauthenticated raw URL access + // for public repos (verified by TestDownloadFileFromGitHubUnauthenticated). + workflowSpec := "github/gh-aw/.github/workflows/github-mcp-tools-report.md@v0.45.5" + + cmd := exec.Command(setup.binaryPath, "add", workflowSpec, "--verbose") + cmd.Dir = setup.tempDir + cmd.Env = filteredEnv + output, err := cmd.CombinedOutput() + outputStr := string(output) + + t.Logf("Command output:\n%s", outputStr) + + require.NoError(t, err, "gh aw add should succeed for a public repo without auth tokens: %s", outputStr) + + // Verify the workflow file was downloaded and written + workflowsDir := filepath.Join(setup.tempDir, ".github", "workflows") + info, err := os.Stat(workflowsDir) + require.NoError(t, err, ".github/workflows directory should exist after add") + assert.True(t, info.IsDir(), ".github/workflows should be a directory") + + workflowFile := filepath.Join(workflowsDir, "github-mcp-tools-report.md") + _, err = os.Stat(workflowFile) + require.NoError(t, err, "downloaded workflow file should exist at %s", workflowFile) +} diff --git a/pkg/gitutil/gitutil.go b/pkg/gitutil/gitutil.go index f4f64d5cc59..71f82a33f4c 100644 --- a/pkg/gitutil/gitutil.go +++ b/pkg/gitutil/gitutil.go @@ -21,7 +21,8 @@ func IsAuthError(errMsg string) bool { strings.Contains(lowerMsg, "not logged into") || strings.Contains(lowerMsg, "unauthorized") || strings.Contains(lowerMsg, "forbidden") || - strings.Contains(lowerMsg, "permission denied") + strings.Contains(lowerMsg, "permission denied") || + strings.Contains(lowerMsg, "saml enforcement") if isAuth { log.Print("Detected authentication error") } diff --git a/pkg/parser/remote_fetch.go b/pkg/parser/remote_fetch.go index e29494cdf8e..cc3209a4199 100644 --- a/pkg/parser/remote_fetch.go +++ b/pkg/parser/remote_fetch.go @@ -7,11 +7,14 @@ import ( "encoding/json" "errors" "fmt" + "io" + "net/http" "os" "os/exec" pathpkg "path" "path/filepath" "strings" + "time" "github.com/cli/go-gh/v2" "github.com/cli/go-gh/v2/pkg/api" @@ -238,11 +241,9 @@ func downloadIncludeFromWorkflowSpec(spec string, cache *ImportCache) (string, e // Only resolve SHA if we're using the cache resolvedSHA, err := resolveRefToSHA(owner, repo, ref) if err != nil { - // If the error is an authentication error, propagate it immediately - lowerErr := strings.ToLower(err.Error()) - if strings.Contains(lowerErr, "auth") || strings.Contains(lowerErr, "unauthoriz") || strings.Contains(lowerErr, "forbidden") || strings.Contains(lowerErr, "token") || strings.Contains(lowerErr, "permission denied") { - return "", fmt.Errorf("failed to resolve ref to SHA due to authentication error: %w", err) - } + // SHA resolution failure (including auth errors) only means we cannot cache; the + // actual file download will be attempted below and may succeed via git fallback for + // public repositories. Do not propagate this error - just skip caching. remoteLog.Printf("Failed to resolve ref to SHA, will skip cache: %v", err) // Continue without caching if SHA resolution fails } else { @@ -398,6 +399,14 @@ func resolveRefToSHA(owner, repo, ref string) (string, error) { func downloadFileViaGit(owner, repo, path, ref string) ([]byte, error) { remoteLog.Printf("Attempting git fallback for %s/%s/%s@%s", owner, repo, path, ref) + // First, try via raw.githubusercontent.com — no auth required for public repos and + // no dependency on git being installed. + content, rawErr := downloadFileViaRawURL(owner, repo, path, ref) + if rawErr == nil { + return content, nil + } + remoteLog.Printf("Raw URL download failed for %s/%s/%s@%s, trying git archive: %v", owner, repo, path, ref, rawErr) + // Use git archive to get the file content without cloning // This works for public repositories without authentication githubHost := GetGitHubHostForRepo(owner, repo) @@ -414,7 +423,7 @@ func downloadFileViaGit(owner, repo, path, ref string) ([]byte, error) { } // Extract the file from the tar archive using Go's archive/tar (cross-platform) - content, err := fileutil.ExtractFileFromTar(archiveOutput, path) + content, err = fileutil.ExtractFileFromTar(archiveOutput, path) if err != nil { return nil, fmt.Errorf("failed to extract file from git archive: %w", err) } @@ -423,6 +432,36 @@ func downloadFileViaGit(owner, repo, path, ref string) ([]byte, error) { return content, nil } +// downloadFileViaRawURL fetches a file using the raw.githubusercontent.com URL. +// This requires no authentication for public repositories and no git installation. +func downloadFileViaRawURL(owner, repo, filePath, ref string) ([]byte, error) { + rawURL := fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", owner, repo, ref, filePath) + remoteLog.Printf("Attempting raw URL download: %s", rawURL) + + // Use a client with a timeout to prevent indefinite hangs on slow/unresponsive hosts. + rawClient := &http.Client{Timeout: 30 * time.Second} + + // #nosec G107 -- rawURL is constructed from workflow import configuration authored by + // the developer; the owner, repo, filePath, and ref are user-supplied workflow spec fields. + resp, err := rawClient.Get(rawURL) + if err != nil { + return nil, fmt.Errorf("raw URL request failed for %s: %w", rawURL, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("raw URL returned HTTP %d for %s", resp.StatusCode, rawURL) + } + + content, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read raw URL response body for %s: %w", rawURL, err) + } + + remoteLog.Printf("Successfully downloaded file via raw URL: %s", rawURL) + return content, nil +} + // downloadFileViaGitClone downloads a file by shallow cloning the repository // This is used as a fallback when git archive doesn't work func downloadFileViaGitClone(owner, repo, path, ref string) ([]byte, error) { @@ -541,6 +580,13 @@ func resolveRemoteSymlinks(owner, repo, filePath, ref string) (string, error) { client, err := api.DefaultRESTClient() if err != nil { + // When auth is unavailable (e.g., running inside an agentic workflow without credentials), + // symlink resolution cannot proceed. Return a descriptive error so the caller can skip + // symlink resolution and proceed without it. + if gitutil.IsAuthError(err.Error()) { + remoteLog.Printf("REST client creation failed due to auth error, skipping symlink resolution for %s/%s/%s@%s", owner, repo, filePath, ref) + return "", fmt.Errorf("skipping symlink resolution: no auth available for %s/%s/%s@%s", owner, repo, filePath, ref) + } return "", fmt.Errorf("failed to create REST client: %w", err) } @@ -627,6 +673,21 @@ func downloadFileFromGitHubWithDepth(owner, repo, path, ref string, symlinkDepth // Create REST client client, err := api.DefaultRESTClient() if err != nil { + // When the REST client cannot be created due to missing auth (e.g., running inside an + // agentic workflow without gh CLI credentials), fall back to git-based download so that + // public repositories are still accessible without authentication. + if gitutil.IsAuthError(err.Error()) { + remoteLog.Printf("REST client creation failed due to auth error, attempting git fallback for %s/%s/%s@%s: %v", owner, repo, path, ref, err) + content, gitErr := downloadFileViaGit(owner, repo, path, ref) + if gitErr != nil { + // Both REST (auth error) and git fallback failed. Return the original auth error + // so callers and tests can detect the auth-unavailable condition and skip/handle + // it gracefully (git fails too in unauthenticated environments for private/invalid repos). + remoteLog.Printf("Git fallback also failed for %s/%s/%s@%s: %v", owner, repo, path, ref, gitErr) + return nil, fmt.Errorf("failed to fetch file content: %w", err) + } + return content, nil + } return nil, fmt.Errorf("failed to create REST client: %w", err) } diff --git a/pkg/parser/remote_fetch_integration_test.go b/pkg/parser/remote_fetch_integration_test.go index 834edda3982..f8663c06d66 100644 --- a/pkg/parser/remote_fetch_integration_test.go +++ b/pkg/parser/remote_fetch_integration_test.go @@ -270,3 +270,46 @@ func TestDownloadIncludeFromWorkflowSpecWithCache(t *testing.T) { t.Logf("First download path: %s", path1) t.Logf("Second download path: %s", path2) } + +// TestDownloadFileFromGitHubUnauthenticated verifies that downloadFileFromGitHub +// falls back to raw URL / git-based download when api.DefaultRESTClient() fails because +// no auth token is available. This reproduces the scenario that occurs when running +// gh aw add inside an agentic workflow without gh CLI credentials configured. +func TestDownloadFileFromGitHubUnauthenticated(t *testing.T) { + // Clear all GitHub auth tokens to simulate the agentic-workflow environment + // where gh auth is not configured. + t.Setenv("GITHUB_TOKEN", "") + t.Setenv("GH_TOKEN", "") + t.Setenv("GITHUB_ENTERPRISE_TOKEN", "") + t.Setenv("GH_ENTERPRISE_TOKEN", "") + + owner := "github" + repo := "gitignore" + path := "Go.gitignore" + ref := "main" + + content, err := downloadFileFromGitHub(owner, repo, path, ref) + // If the REST client unexpectedly succeeds (e.g., gh config file has a token), + // that is also fine – the point is that the file is returned without error. + if err != nil { + // Skip only when the network or git executable is genuinely unavailable. + // Avoid matching on "git" alone because it would also match "gitignore". + errStr := err.Error() + if strings.Contains(errStr, `executable file not found`) || + strings.Contains(errStr, "failed to clone repository") || + strings.Contains(errStr, "connection refused") || + strings.Contains(errStr, "no route to host") || + strings.Contains(errStr, "dial tcp") { + t.Skipf("Skipping test: download fallback unavailable (%v)", err) + } + t.Fatalf("Expected successful download via raw URL / git fallback for public repo, got: %v", err) + } + + require.NotEmpty(t, content, "downloaded content should not be empty") + + // Sanity-check: Go.gitignore should contain typical Go patterns + contentStr := string(content) + assert.True(t, + strings.Contains(contentStr, "*.exe") || strings.Contains(contentStr, "# Binaries"), + "Go.gitignore content looks unexpected: %s", contentStr[:min(len(contentStr), 200)]) +}