diff --git a/.github/aw/network.md b/.github/aw/network.md index c20ee3e9ed7..756dbeb78ec 100644 --- a/.github/aw/network.md +++ b/.github/aw/network.md @@ -40,7 +40,7 @@ Each entry in `network.allowed` must be one of: | **Exact domain** | `api.example.com`, `registry.npmjs.org` | Must be a fully-qualified domain name (FQDN) | | **Wildcard subdomain** | `*.example.com` | Matches `sub.example.com`, `deep.nested.example.com`, and `example.com` itself | -> ⚠️ **Bare shorthands like `npm`, `pypi`, or `localhost` are NOT valid** unless they are listed in the ecosystem identifiers table below. Use ecosystem identifiers (`node`, `python`) or explicit FQDNs (`registry.npmjs.org`, `pypi.org`) instead. +> ⚠️ **Bare shorthands like `npm`, `pypi`, or `localhost` are NOT valid** unless they are listed in the ecosystem identifiers table below. Using an unrecognised single-word entry causes a **compile-time error**. Use ecosystem identifiers (`node`, `python`) or explicit FQDNs (`registry.npmjs.org`, `pypi.org`) instead. ## Ecosystem Identifiers @@ -87,7 +87,7 @@ These keywords expand to curated lists of domains maintained by gh-aw: ## Invalid Shorthands -These values look like they might work but are **not valid** ecosystem identifiers and will be passed through as literal domain names (and will almost certainly not match any real host): +These values look like ecosystem identifiers but are **not recognised** — using them in `network.allowed` causes a **compile-time error**: | Invalid value | What you probably meant | Correct value | |---|---|---| diff --git a/pkg/workflow/network_firewall_validation.go b/pkg/workflow/network_firewall_validation.go index d68b683d719..b9409f7a9dd 100644 --- a/pkg/workflow/network_firewall_validation.go +++ b/pkg/workflow/network_firewall_validation.go @@ -15,6 +15,7 @@ package workflow import ( "fmt" "regexp" + "sort" "strings" "github.com/github/gh-aw/pkg/constants" @@ -71,9 +72,24 @@ func (c *Compiler) validateNetworkAllowedDomains(network *NetworkPermissions) er continue } - // Skip ecosystem identifiers - they don't need domain pattern validation + // Check if this looks like an ecosystem identifier (single lowercase word with optional hyphens) if isEcosystemIdentifier(domain) { - networkFirewallValidationLog.Printf("Skipping ecosystem identifier: %s", domain) + // Validate it's a known ecosystem identifier using a direct map lookup to avoid allocations + if isKnownEcosystemIdentifier(domain) { + networkFirewallValidationLog.Printf("Skipping known ecosystem identifier: %s", domain) + continue + } + // Unknown ecosystem identifier - error + networkFirewallValidationLog.Printf("Validation error: unknown ecosystem identifier: %s", domain) + wrappedErr := fmt.Errorf("network.allowed[%d]: %w", i, NewValidationError( + "network.allowed", + domain, + fmt.Sprintf("'%s' is not a valid ecosystem identifier", domain), + "Use a valid ecosystem identifier or a domain name containing a dot (e.g., 'example.com').\n\nValid ecosystem identifiers: "+strings.Join(getValidEcosystemIdentifiers(), ", "), + )) + if returnErr := collector.Add(wrappedErr); returnErr != nil { + return returnErr // Fail-fast mode + } continue } @@ -105,6 +121,31 @@ func isEcosystemIdentifier(domain string) bool { return isEcosystemIdentifierPattern.MatchString(domain) } +// isKnownEcosystemIdentifier reports whether id is a recognised ecosystem identifier. +// It checks the base ecosystemDomains map and the compoundEcosystems map directly, +// avoiding the allocations that getEcosystemDomains incurs. +func isKnownEcosystemIdentifier(id string) bool { + if _, ok := ecosystemDomains[id]; ok { + return true + } + _, ok := compoundEcosystems[id] + return ok +} + +// getValidEcosystemIdentifiers returns a sorted list of all valid ecosystem identifiers, +// including both the base identifiers from ecosystemDomains and compound identifiers. +func getValidEcosystemIdentifiers() []string { + ids := make([]string, 0, len(ecosystemDomains)+len(compoundEcosystems)) + for id := range ecosystemDomains { + ids = append(ids, id) + } + for id := range compoundEcosystems { + ids = append(ids, id) + } + sort.Strings(ids) + return ids +} + // domainPattern validates domain patterns including wildcards // Valid patterns: // - Plain domains: github.com, api.github.com diff --git a/pkg/workflow/network_firewall_validation_test.go b/pkg/workflow/network_firewall_validation_test.go index 870dee7ec67..0e97e45255d 100644 --- a/pkg/workflow/network_firewall_validation_test.go +++ b/pkg/workflow/network_firewall_validation_test.go @@ -151,6 +151,63 @@ func TestValidateNetworkFirewallConfig_AllowURLsRequiresSSLBump(t *testing.T) { }) } +func TestValidateNetworkAllowedDomains_EcosystemIdentifiers(t *testing.T) { + compiler := NewCompiler() + + t.Run("known ecosystem identifiers pass validation", func(t *testing.T) { + // Use getValidEcosystemIdentifiers() to stay in sync with production code + for _, ecosystem := range getValidEcosystemIdentifiers() { + t.Run(ecosystem, func(t *testing.T) { + network := &NetworkPermissions{Allowed: []string{ecosystem}} + err := compiler.validateNetworkAllowedDomains(network) + assert.NoError(t, err, "Known ecosystem identifier '%s' should pass validation", ecosystem) + }) + } + }) + + t.Run("unknown single-word identifiers fail validation", func(t *testing.T) { + invalidEcosystems := []string{ + "rustxxxx", + "defaults2", + "githubx", + "nodexyz", + "unknown", + "fakeecosystem", + "notarealecosystem", + "invalidname123", + } + for _, ecosystem := range invalidEcosystems { + t.Run(ecosystem, func(t *testing.T) { + network := &NetworkPermissions{Allowed: []string{ecosystem}} + err := compiler.validateNetworkAllowedDomains(network) + require.Error(t, err, "Unknown ecosystem identifier '%s' should fail validation", ecosystem) + assert.Contains(t, err.Error(), "not a valid ecosystem identifier", "Error should indicate invalid ecosystem identifier") + }) + } + }) + + t.Run("domain names with dots still pass validation", func(t *testing.T) { + validDomains := []string{"github.com", "api.example.com", "*.example.org"} + for _, domain := range validDomains { + t.Run(domain, func(t *testing.T) { + network := &NetworkPermissions{Allowed: []string{domain}} + err := compiler.validateNetworkAllowedDomains(network) + assert.NoError(t, err, "Valid domain '%s' should pass validation", domain) + }) + } + }) + + t.Run("mixed valid and invalid entries collects all errors", func(t *testing.T) { + network := &NetworkPermissions{ + Allowed: []string{"defaults", "rustxxxx", "github.com", "fakeecosystem"}, + } + err := compiler.validateNetworkAllowedDomains(network) + require.Error(t, err, "Should fail when invalid ecosystem identifiers are present") + assert.Contains(t, err.Error(), "rustxxxx", "Error should mention the invalid identifier") + assert.Contains(t, err.Error(), "fakeecosystem", "Error should mention the other invalid identifier") + }) +} + func TestValidateNetworkFirewallConfig_Integration(t *testing.T) { t.Run("compiler rejects workflow with allow-urls but no ssl-bump", func(t *testing.T) { compiler := NewCompiler()