diff --git a/.changeset/patch-embed-engine-markdown.md b/.changeset/patch-embed-engine-markdown.md new file mode 100644 index 0000000000..48271d34b8 --- /dev/null +++ b/.changeset/patch-embed-engine-markdown.md @@ -0,0 +1,5 @@ +--- +"gh-aw": patch +--- + +Documented that built-in agentic engines now live in embedded Markdown shared workflows, keeping all engine metadata in schema-validated files instead of inline Go structs. diff --git a/pkg/parser/import_field_extractor.go b/pkg/parser/import_field_extractor.go index d2be74fa6b..8b868c5931 100644 --- a/pkg/parser/import_field_extractor.go +++ b/pkg/parser/import_field_extractor.go @@ -78,13 +78,15 @@ func (acc *importAccumulator) extractAllImportFields(content []byte, item import // Track import path for runtime-import macro generation (only if no inputs). // Imports with inputs must be inlined for compile-time substitution. + // Builtin paths (@builtin:…) are pure configuration — they carry no user-visible + // prompt content and must not generate runtime-import macros. importRelPath := computeImportRelPath(item.fullPath, item.importPath) - if len(item.inputs) == 0 { - // No inputs - use runtime-import macro + if len(item.inputs) == 0 && !strings.HasPrefix(importRelPath, BuiltinPathPrefix) { + // No inputs and not a builtin - use runtime-import macro acc.importPaths = append(acc.importPaths, importRelPath) log.Printf("Added import path for runtime-import: %s", importRelPath) - } else { + } else if len(item.inputs) > 0 { // Has inputs - must inline for compile-time substitution log.Printf("Import %s has inputs - will be inlined for compile-time substitution", importRelPath) markdownContent, err := processIncludedFileWithVisited(item.fullPath, item.sectionName, false, visited) diff --git a/pkg/parser/remote_fetch.go b/pkg/parser/remote_fetch.go index cc3209a419..7388e7f3d9 100644 --- a/pkg/parser/remote_fetch.go +++ b/pkg/parser/remote_fetch.go @@ -116,6 +116,16 @@ func isRepositoryImport(importPath string) bool { func ResolveIncludePath(filePath, baseDir string, cache *ImportCache) (string, error) { remoteLog.Printf("Resolving include path: file_path=%s, base_dir=%s", filePath, baseDir) + // Handle builtin paths - these are embedded files that bypass filesystem resolution. + // No security check is needed since the content is compiled into the binary. + if strings.HasPrefix(filePath, BuiltinPathPrefix) { + if !BuiltinVirtualFileExists(filePath) { + return "", fmt.Errorf("builtin file not found: %s", filePath) + } + remoteLog.Printf("Resolved builtin path: %s", filePath) + return filePath, nil + } + // Check if this is a workflowspec (contains owner/repo/path format) // Format: owner/repo/path@ref or owner/repo/path@ref#section if isWorkflowSpec(filePath) { diff --git a/pkg/parser/remote_fetch_wasm.go b/pkg/parser/remote_fetch_wasm.go index 574b629d95..2561295018 100644 --- a/pkg/parser/remote_fetch_wasm.go +++ b/pkg/parser/remote_fetch_wasm.go @@ -56,6 +56,14 @@ func isRepositoryImport(importPath string) bool { } func ResolveIncludePath(filePath, baseDir string, cache *ImportCache) (string, error) { + // Handle builtin paths - these are embedded files that bypass filesystem resolution. + if strings.HasPrefix(filePath, BuiltinPathPrefix) { + if !BuiltinVirtualFileExists(filePath) { + return "", fmt.Errorf("builtin file not found: %s", filePath) + } + return filePath, nil + } + if isWorkflowSpec(filePath) { return "", fmt.Errorf("remote imports not available in Wasm: %s", filePath) } diff --git a/pkg/parser/schemas/main_workflow_schema.json b/pkg/parser/schemas/main_workflow_schema.json index 59fbf41354..612b603c9b 100644 --- a/pkg/parser/schemas/main_workflow_schema.json +++ b/pkg/parser/schemas/main_workflow_schema.json @@ -8076,6 +8076,138 @@ }, "required": ["runtime"], "additionalProperties": false + }, + { + "type": "object", + "description": "Engine definition: full declarative metadata for a named engine entry (used in builtin engine shared workflow files such as @builtin:engines/*.md)", + "properties": { + "id": { + "type": "string", + "description": "Unique engine identifier (e.g. 'copilot', 'claude', 'codex', 'gemini')" + }, + "display-name": { + "type": "string", + "description": "Human-readable display name for the engine" + }, + "description": { + "type": "string", + "description": "Human-readable description of the engine" + }, + "runtime-id": { + "type": "string", + "description": "Runtime adapter identifier. Maps to the CodingAgentEngine registered in the engine registry. Defaults to id when omitted." + }, + "provider": { + "type": "object", + "description": "Provider metadata for the engine", + "properties": { + "name": { + "type": "string", + "description": "Provider name (e.g. 'anthropic', 'github', 'google', 'openai')" + }, + "auth": { + "type": "object", + "description": "Default authentication configuration for the provider", + "properties": { + "secret": { + "type": "string", + "description": "Name of the GitHub Actions secret that contains the API key" + }, + "strategy": { + "type": "string", + "enum": ["api-key", "oauth-client-credentials", "bearer"], + "description": "Authentication strategy" + }, + "token-url": { + "type": "string", + "description": "OAuth 2.0 token endpoint URL" + }, + "client-id": { + "type": "string", + "description": "GitHub Actions secret name for the OAuth client ID" + }, + "client-secret": { + "type": "string", + "description": "GitHub Actions secret name for the OAuth client secret" + }, + "token-field": { + "type": "string", + "description": "JSON field name in the token response containing the access token" + }, + "header-name": { + "type": "string", + "description": "HTTP header name to inject the API key or token into" + } + }, + "additionalProperties": false + }, + "request": { + "type": "object", + "description": "Request shaping configuration", + "properties": { + "path-template": { + "type": "string", + "description": "URL path template with variable placeholders" + }, + "query": { + "type": "object", + "description": "Static query parameters", + "additionalProperties": { "type": "string" } + }, + "body-inject": { + "type": "object", + "description": "Key/value pairs injected into the JSON request body", + "additionalProperties": { "type": "string" } + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + }, + "models": { + "type": "object", + "description": "Model selection configuration for the engine", + "properties": { + "default": { + "type": "string", + "description": "Default model identifier" + }, + "supported": { + "type": "array", + "items": { "type": "string" }, + "description": "List of supported model identifiers" + } + }, + "additionalProperties": false + }, + "auth": { + "type": "array", + "description": "Authentication bindings — maps logical roles (e.g. 'api-key') to GitHub Actions secret names", + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "description": "Logical authentication role (e.g. 'api-key', 'token')" + }, + "secret": { + "type": "string", + "description": "Name of the GitHub Actions secret that provides credentials for this role" + } + }, + "required": ["role", "secret"], + "additionalProperties": false + } + }, + "options": { + "type": "object", + "description": "Additional engine-specific options", + "additionalProperties": true + } + }, + "required": ["id", "display-name"], + "additionalProperties": false } ] }, diff --git a/pkg/parser/virtual_fs.go b/pkg/parser/virtual_fs.go index 63939d1c18..f66ae0044a 100644 --- a/pkg/parser/virtual_fs.go +++ b/pkg/parser/virtual_fs.go @@ -1,11 +1,68 @@ package parser -import "os" +import ( + "fmt" + "os" + "strings" + "sync" +) + +// builtinVirtualFiles holds embedded built-in files registered at startup. +// Keys use the "@builtin:" path prefix (e.g. "@builtin:engines/copilot.md"). +// The map is populated once and then read-only; concurrent reads are safe. +var ( + builtinVirtualFiles map[string][]byte + builtinVirtualFilesMu sync.RWMutex +) + +// RegisterBuiltinVirtualFile registers an embedded file under a canonical builtin path. +// Paths must start with BuiltinPathPrefix ("@builtin:"); it panics if they do not. +// If the same path is registered twice with identical content the call is a no-op. +// Registering the same path with different content panics to surface configuration errors early. +// This function is safe for concurrent use. +func RegisterBuiltinVirtualFile(path string, content []byte) { + if !strings.HasPrefix(path, BuiltinPathPrefix) { + panic(fmt.Sprintf("RegisterBuiltinVirtualFile: path %q does not start with %q", path, BuiltinPathPrefix)) + } + builtinVirtualFilesMu.Lock() + defer builtinVirtualFilesMu.Unlock() + if builtinVirtualFiles == nil { + builtinVirtualFiles = make(map[string][]byte) + } + if existing, ok := builtinVirtualFiles[path]; ok { + if string(existing) != string(content) { + panic(fmt.Sprintf("RegisterBuiltinVirtualFile: path %q already registered with different content", path)) + } + return // idempotent: same content, no-op + } + builtinVirtualFiles[path] = content +} + +// BuiltinVirtualFileExists returns true if the given path is registered as a builtin virtual file. +func BuiltinVirtualFileExists(path string) bool { + builtinVirtualFilesMu.RLock() + defer builtinVirtualFilesMu.RUnlock() + _, ok := builtinVirtualFiles[path] + return ok +} + +// BuiltinPathPrefix is the path prefix used for embedded builtin files. +// Paths with this prefix bypass filesystem resolution and security checks. +const BuiltinPathPrefix = "@builtin:" // readFileFunc is the function used to read file contents throughout the parser. // In wasm builds, this is overridden to read from a virtual filesystem // populated by the browser via SetVirtualFiles. -var readFileFunc = os.ReadFile +// In native builds, builtin virtual files are checked first, then os.ReadFile. +var readFileFunc = func(path string) ([]byte, error) { + builtinVirtualFilesMu.RLock() + content, ok := builtinVirtualFiles[path] + builtinVirtualFilesMu.RUnlock() + if ok { + return content, nil + } + return os.ReadFile(path) +} // ReadFile reads a file using the parser's file reading function, which // checks the virtual filesystem first in wasm builds. Use this instead of diff --git a/pkg/parser/virtual_fs_wasm.go b/pkg/parser/virtual_fs_wasm.go index 89d9bc1c0c..1bb3074d9e 100644 --- a/pkg/parser/virtual_fs_wasm.go +++ b/pkg/parser/virtual_fs_wasm.go @@ -33,6 +33,13 @@ func VirtualFileExists(path string) bool { func init() { // Override readFileFunc in wasm builds to check virtual files first. readFileFunc = func(path string) ([]byte, error) { + // Check builtin virtual files first (embedded engine .md files etc.) + builtinVirtualFilesMu.RLock() + builtinContent, builtinOK := builtinVirtualFiles[path] + builtinVirtualFilesMu.RUnlock() + if builtinOK { + return builtinContent, nil + } if virtualFiles != nil { if content, ok := virtualFiles[path]; ok { return content, nil diff --git a/pkg/workflow/compiler_orchestrator_engine.go b/pkg/workflow/compiler_orchestrator_engine.go index ae3edb0bf9..f1624176af 100644 --- a/pkg/workflow/compiler_orchestrator_engine.go +++ b/pkg/workflow/compiler_orchestrator_engine.go @@ -98,6 +98,27 @@ func (c *Compiler) setupEngineAndImports(result *parser.FrontmatterResult, clean c.IncrementWarningCount() } engineSetting = c.engineOverride + // Update engineConfig.ID so that downstream code (e.g. generateCreateAwInfo) uses + // the override engine ID, not the one parsed from the frontmatter. + if engineConfig != nil { + engineConfig.ID = c.engineOverride + } + } + + // When the engine is specified in short/string form ("engine: copilot") and no CLI + // override is active, inject the corresponding builtin shared-workflow .md as an + // import. This makes "engine: copilot" syntactic sugar for importing the builtin + // copilot.md, which carries the full engine definition. The engine field is removed + // from the frontmatter so the definition comes entirely from the import. + if c.engineOverride == "" && isStringFormEngine(result.Frontmatter) && engineSetting != "" { + builtinPath := builtinEnginePath(engineSetting) + if parser.BuiltinVirtualFileExists(builtinPath) { + orchestratorEngineLog.Printf("Injecting builtin engine import: %s", builtinPath) + addImportToFrontmatter(result.Frontmatter, builtinPath) + delete(result.Frontmatter, "engine") + engineSetting = "" + engineConfig = nil + } } // Process imports from frontmatter first (before @include directives) @@ -197,6 +218,19 @@ func (c *Compiler) setupEngineAndImports(result *parser.FrontmatterResult, clean return nil, fmt.Errorf("failed to extract engine config from included file: %w", err) } engineConfig = extractedConfig + + // If the imported engine is an inline definition (engine.runtime sub-object), + // validate and register it in the catalog. This mirrors the handling for inline + // definitions declared directly in the main workflow (above). + if engineConfig != nil && engineConfig.IsInlineDefinition { + if err := c.validateEngineInlineDefinition(engineConfig); err != nil { + return nil, err + } + if err := c.validateEngineAuthDefinition(engineConfig); err != nil { + return nil, err + } + c.registerInlineEngineDefinition(engineConfig) + } } // Apply the default AI engine setting if not specified @@ -299,3 +333,43 @@ func (c *Compiler) setupEngineAndImports(result *parser.FrontmatterResult, clean configSteps: configSteps, }, nil } + +// isStringFormEngine reports whether the "engine" field in the given frontmatter is a +// plain string (e.g. "engine: copilot"), as opposed to an object with an "id" or +// "runtime" sub-key. +func isStringFormEngine(frontmatter map[string]any) bool { + engine, exists := frontmatter["engine"] + if !exists { + return false + } + _, isString := engine.(string) + return isString +} + +// addImportToFrontmatter appends importPath to the "imports" slice in frontmatter. +// It handles the case where "imports" may be absent, a []any, a []string, or a +// single string (which is converted to a two-element slice preserving the original value). +// Any other unexpected type is left unchanged and importPath is not injected. +func addImportToFrontmatter(frontmatter map[string]any, importPath string) { + existing, hasImports := frontmatter["imports"] + if !hasImports { + frontmatter["imports"] = []any{importPath} + return + } + switch v := existing.(type) { + case []any: + frontmatter["imports"] = append(v, importPath) + case []string: + newSlice := make([]any, len(v)+1) + for i, s := range v { + newSlice[i] = s + } + newSlice[len(v)] = importPath + frontmatter["imports"] = newSlice + case string: + // Single string import — preserve it and append the new one. + frontmatter["imports"] = []any{v, importPath} + // For any other unexpected type, leave the field untouched so the + // downstream parser can still report its own error for the invalid value. + } +} diff --git a/pkg/workflow/compiler_yaml.go b/pkg/workflow/compiler_yaml.go index 5b2878fb42..86acf0b5ef 100644 --- a/pkg/workflow/compiler_yaml.go +++ b/pkg/workflow/compiler_yaml.go @@ -101,13 +101,22 @@ func (c *Compiler) generateWorkflowHeader(yaml *strings.Builder, data *WorkflowD } // Add manifest of imported/included files if any exist - if len(data.ImportedFiles) > 0 || len(data.IncludedFiles) > 0 { + // Build a user-visible imports list by filtering out internal builtin engine paths + // (e.g. "@builtin:engines/copilot.md") which are implementation details. + var visibleImports []string + for _, file := range data.ImportedFiles { + if !strings.HasPrefix(file, parser.BuiltinPathPrefix) { + visibleImports = append(visibleImports, file) + } + } + + if len(visibleImports) > 0 || len(data.IncludedFiles) > 0 { yaml.WriteString("#\n") yaml.WriteString("# Resolved workflow manifest:\n") - if len(data.ImportedFiles) > 0 { + if len(visibleImports) > 0 { yaml.WriteString("# Imports:\n") - for _, file := range data.ImportedFiles { + for _, file := range visibleImports { cleanFile := stringutil.StripANSI(file) // Normalize to Unix paths (forward slashes) for cross-platform compatibility cleanFile = filepath.ToSlash(cleanFile) diff --git a/pkg/workflow/data/engines/claude.md b/pkg/workflow/data/engines/claude.md new file mode 100644 index 0000000000..4ada605368 --- /dev/null +++ b/pkg/workflow/data/engines/claude.md @@ -0,0 +1,16 @@ +--- +engine: + id: claude + display-name: Claude Code + description: Uses Claude Code with full MCP tool support and allow-listing + runtime-id: claude + provider: + name: anthropic + auth: + - role: api-key + secret: ANTHROPIC_API_KEY +--- + + diff --git a/pkg/workflow/data/engines/codex.md b/pkg/workflow/data/engines/codex.md new file mode 100644 index 0000000000..cad3273a1c --- /dev/null +++ b/pkg/workflow/data/engines/codex.md @@ -0,0 +1,16 @@ +--- +engine: + id: codex + display-name: Codex + description: Uses OpenAI Codex CLI with MCP server support + runtime-id: codex + provider: + name: openai + auth: + - role: api-key + secret: CODEX_API_KEY +--- + + diff --git a/pkg/workflow/data/engines/copilot.md b/pkg/workflow/data/engines/copilot.md new file mode 100644 index 0000000000..cd51746c7b --- /dev/null +++ b/pkg/workflow/data/engines/copilot.md @@ -0,0 +1,16 @@ +--- +engine: + id: copilot + display-name: GitHub Copilot CLI + description: Uses GitHub Copilot CLI with MCP server support + runtime-id: copilot + provider: + name: github + auth: + - role: api-key + secret: COPILOT_GITHUB_TOKEN +--- + + diff --git a/pkg/workflow/data/engines/gemini.md b/pkg/workflow/data/engines/gemini.md new file mode 100644 index 0000000000..88386ae8a4 --- /dev/null +++ b/pkg/workflow/data/engines/gemini.md @@ -0,0 +1,16 @@ +--- +engine: + id: gemini + display-name: Google Gemini CLI + description: Google Gemini CLI with headless mode and LLM gateway support + runtime-id: gemini + provider: + name: google + auth: + - role: api-key + secret: GEMINI_API_KEY +--- + + diff --git a/pkg/workflow/engine_auth_test.go b/pkg/workflow/engine_auth_test.go index e9398d1f5e..30770dabc7 100644 --- a/pkg/workflow/engine_auth_test.go +++ b/pkg/workflow/engine_auth_test.go @@ -399,12 +399,12 @@ func TestBuiltInEngineAuthUnchanged(t *testing.T) { tests := []struct { engineID string - wantAuthSecret string // expected legacy AuthBinding secret (empty = no binding) + wantAuthSecret string // expected legacy AuthBinding secret }{ {"claude", "ANTHROPIC_API_KEY"}, {"codex", "CODEX_API_KEY"}, - {"copilot", ""}, // copilot has no API-key binding - {"gemini", ""}, // gemini has no API-key binding + {"copilot", "COPILOT_GITHUB_TOKEN"}, + {"gemini", "GEMINI_API_KEY"}, } for _, tt := range tests { @@ -416,13 +416,9 @@ func TestBuiltInEngineAuthUnchanged(t *testing.T) { assert.Nil(t, def.Provider.Auth, "built-in engine %s should have no Provider.Auth (uses legacy AuthBinding)", tt.engineID) - if tt.wantAuthSecret != "" { - require.Lenf(t, def.Auth, 1, "engine %s should have exactly one AuthBinding", tt.engineID) - assert.Equal(t, tt.wantAuthSecret, def.Auth[0].Secret, - "engine %s AuthBinding.Secret should be unchanged", tt.engineID) - } else { - assert.Empty(t, def.Auth, "engine %s should have no AuthBinding", tt.engineID) - } + require.Lenf(t, def.Auth, 1, "engine %s should have exactly one AuthBinding", tt.engineID) + assert.Equal(t, tt.wantAuthSecret, def.Auth[0].Secret, + "engine %s AuthBinding.Secret should be unchanged", tt.engineID) }) } } diff --git a/pkg/workflow/engine_catalog_test.go b/pkg/workflow/engine_catalog_test.go index 133a261ed5..d760aff5b0 100644 --- a/pkg/workflow/engine_catalog_test.go +++ b/pkg/workflow/engine_catalog_test.go @@ -111,7 +111,7 @@ func TestEngineCatalog_BuiltInsPresent(t *testing.T) { func TestEngineCatalogMatchesSchema(t *testing.T) { variants := engineSchemaOneOfVariants(t) - require.Len(t, variants, 3, "engine_config oneOf should have exactly 3 variants: string, object-with-id, object-with-runtime") + require.Len(t, variants, 4, "engine_config oneOf should have exactly 4 variants: string, object-with-id, object-with-runtime, engine-definition") // Variant 0: plain string (no enum — allows built-ins and custom named catalog entries) assert.Equal(t, "string", variants[0]["type"], @@ -140,4 +140,13 @@ func TestEngineCatalogMatchesSchema(t *testing.T) { "third variant should have a 'runtime' property for inline engine definitions") assert.Contains(t, props2, "provider", "third variant should have a 'provider' property for inline engine definitions") + + // Variant 3: engine definition form used in builtin engine shared workflow files + assert.Equal(t, "object", variants[3]["type"], + "fourth variant should be type object (engine definition)") + props3, ok := variants[3]["properties"].(map[string]any) + require.True(t, ok, "fourth variant should have properties") + assert.Contains(t, props3, "id", "engine definition variant should have an 'id' property") + assert.Contains(t, props3, "display-name", "engine definition variant should have a 'display-name' property") + assert.Contains(t, props3, "auth", "engine definition variant should have an 'auth' property") } diff --git a/pkg/workflow/engine_definition.go b/pkg/workflow/engine_definition.go index b670886beb..3282de58ab 100644 --- a/pkg/workflow/engine_definition.go +++ b/pkg/workflow/engine_definition.go @@ -57,31 +57,31 @@ const ( type AuthDefinition struct { // Strategy selects the authentication flow (api-key, oauth-client-credentials, bearer). // Defaults to api-key when Secret is non-empty and Strategy is unset. - Strategy AuthStrategy + Strategy AuthStrategy `yaml:"strategy,omitempty"` // Secret is the env-var / GitHub Actions secret name that holds the raw API key or token. // Required for api-key and bearer strategies. - Secret string + Secret string `yaml:"secret,omitempty"` // TokenURL is the OAuth token endpoint (e.g. "https://auth.example.com/oauth/token"). // Required for oauth-client-credentials strategy. - TokenURL string + TokenURL string `yaml:"token-url,omitempty"` // ClientIDRef is the secret name that holds the OAuth client ID. // Required for oauth-client-credentials strategy. - ClientIDRef string + ClientIDRef string `yaml:"client-id-ref,omitempty"` // ClientSecretRef is the secret name that holds the OAuth client secret. // Required for oauth-client-credentials strategy. - ClientSecretRef string + ClientSecretRef string `yaml:"client-secret-ref,omitempty"` // TokenField is the JSON field name in the token response that contains the access token. // Defaults to "access_token" when empty. - TokenField string + TokenField string `yaml:"token-field,omitempty"` // HeaderName is the HTTP header to inject the token into (e.g. "api-key"). // Required when strategy is not bearer (bearer always uses Authorization header). - HeaderName string + HeaderName string `yaml:"header-name,omitempty"` } // RequestShape describes non-standard URL and body transformations applied to each @@ -89,36 +89,36 @@ type AuthDefinition struct { type RequestShape struct { // PathTemplate is a URL path template with {model} and other variable placeholders // (e.g. "/openai/deployments/{model}/chat/completions"). - PathTemplate string + PathTemplate string `yaml:"path-template,omitempty"` // Query holds static or template query-parameter values appended to every request // (e.g. {"api-version": "2024-10-01-preview"}). - Query map[string]string + Query map[string]string `yaml:"query,omitempty"` // BodyInject holds key/value pairs injected into the JSON request body before sending // (e.g. {"appKey": "{APP_KEY_SECRET}"}). - BodyInject map[string]string + BodyInject map[string]string `yaml:"body-inject,omitempty"` } // ProviderSelection identifies the AI provider for an engine (e.g. "anthropic", "openai"). // It optionally carries advanced authentication and request-shaping configuration for // non-standard backends. type ProviderSelection struct { - Name string - Auth *AuthDefinition - Request *RequestShape + Name string `yaml:"name,omitempty"` + Auth *AuthDefinition `yaml:"auth,omitempty"` + Request *RequestShape `yaml:"request,omitempty"` } // ModelSelection specifies the default and supported models for an engine. type ModelSelection struct { - Default string - Supported []string + Default string `yaml:"default,omitempty"` + Supported []string `yaml:"supported,omitempty"` } // AuthBinding maps a logical authentication role to a secret name. type AuthBinding struct { - Role string - Secret string + Role string `yaml:"role"` + Secret string `yaml:"secret"` } // RequiredSecretNames returns the env-var names that must be provided at runtime for @@ -149,14 +149,16 @@ func (a *AuthDefinition) RequiredSecretNames() []string { // It is separate from the runtime adapter (CodingAgentEngine) to allow the catalog // layer to carry identity and provider information without coupling to implementation. type EngineDefinition struct { - ID string - DisplayName string - Description string - RuntimeID string // maps to the CodingAgentEngine registered in EngineRegistry - Provider ProviderSelection - Models ModelSelection - Auth []AuthBinding - Options map[string]any + ID string `yaml:"id"` + DisplayName string `yaml:"display-name,omitempty"` + Description string `yaml:"description,omitempty"` + // RuntimeID maps to the CodingAgentEngine registered in EngineRegistry. + // Defaults to ID when omitted. + RuntimeID string `yaml:"runtime-id,omitempty"` + Provider ProviderSelection `yaml:"provider,omitempty"` + Models ModelSelection `yaml:"models,omitempty"` + Auth []AuthBinding `yaml:"auth,omitempty"` + Options map[string]any `yaml:"options,omitempty"` } // EngineCatalog is a collection of EngineDefinition entries backed by an EngineRegistry @@ -176,50 +178,17 @@ type ResolvedEngineTarget struct { } // NewEngineCatalog creates an EngineCatalog that wraps the given EngineRegistry and -// pre-registers the four built-in engine definitions (claude, codex, copilot, gemini). +// pre-registers the four built-in engine definitions (claude, codex, copilot, gemini) +// loaded from the embedded Markdown files in data/engines/*.md. func NewEngineCatalog(registry *EngineRegistry) *EngineCatalog { catalog := &EngineCatalog{ definitions: make(map[string]*EngineDefinition), registry: registry, } - catalog.Register(&EngineDefinition{ - ID: "claude", - DisplayName: "Claude Code", - Description: "Uses Claude Code with full MCP tool support and allow-listing", - RuntimeID: "claude", - Provider: ProviderSelection{Name: "anthropic"}, - Auth: []AuthBinding{ - {Role: "api-key", Secret: "ANTHROPIC_API_KEY"}, - }, - }) - - catalog.Register(&EngineDefinition{ - ID: "codex", - DisplayName: "Codex", - Description: "Uses OpenAI Codex CLI with MCP server support", - RuntimeID: "codex", - Provider: ProviderSelection{Name: "openai"}, - Auth: []AuthBinding{ - {Role: "api-key", Secret: "CODEX_API_KEY"}, - }, - }) - - catalog.Register(&EngineDefinition{ - ID: "copilot", - DisplayName: "GitHub Copilot CLI", - Description: "Uses GitHub Copilot CLI with MCP server support", - RuntimeID: "copilot", - Provider: ProviderSelection{Name: "github"}, - }) - - catalog.Register(&EngineDefinition{ - ID: "gemini", - DisplayName: "Google Gemini CLI", - Description: "Google Gemini CLI with headless mode and LLM gateway support", - RuntimeID: "gemini", - Provider: ProviderSelection{Name: "google"}, - }) + for _, def := range loadBuiltinEngineDefinitions() { + catalog.Register(def) + } return catalog } diff --git a/pkg/workflow/engine_definition_loader.go b/pkg/workflow/engine_definition_loader.go new file mode 100644 index 0000000000..56a855eb60 --- /dev/null +++ b/pkg/workflow/engine_definition_loader.go @@ -0,0 +1,142 @@ +// This file provides the built-in engine definition loader. +// +// Built-in engine definitions are stored as shared agentic workflow Markdown files +// embedded in the binary. Each file uses YAML frontmatter with a top-level "engine:" +// key. The engine definition form is validated as part of the shared workflow schema +// when files are processed as imports during compilation. +// +// # Embedded Resources +// +// Engine Markdown files live in data/engines/*.md and are embedded at compile time +// via the //go:embed directive below. Adding a new built-in engine requires only a +// new .md file in that directory — no Go code changes are needed. +// +// # Builtin Virtual FS +// +// Each embedded .md file is also registered in the parser's builtin virtual FS under +// the path "@builtin:engines/.md". This allows the compiler to inject the file +// as an import when the short-form "engine: " is encountered. +package workflow + +import ( + "embed" + "errors" + "fmt" + "io/fs" + "path/filepath" + "strings" + + "github.com/github/gh-aw/pkg/logger" + "github.com/github/gh-aw/pkg/parser" + "github.com/goccy/go-yaml" +) + +var engineDefinitionLoaderLog = logger.New("workflow:engine_definition_loader") + +//go:embed data/engines/*.md +var builtinEngineFS embed.FS + +// engineDefinitionFile is the on-disk wrapper that holds the engine definition +// under the top-level "engine" key. +type engineDefinitionFile struct { + Engine EngineDefinition `yaml:"engine"` +} + +// extractMarkdownFrontmatterYAML extracts the YAML content between the first pair of +// "---" delimiters in a Markdown file. Both LF and CRLF line endings are supported. +func extractMarkdownFrontmatterYAML(content []byte) ([]byte, error) { + s := string(content) + const sep = "---" + + // Find the opening delimiter + start := strings.Index(s, sep) + if start == -1 { + return nil, errors.New("no frontmatter opening delimiter found") + } + s = s[start+len(sep):] + + // Find the closing delimiter, supporting both LF and CRLF line endings. + endLF := strings.Index(s, "\n"+sep) + endCRLF := strings.Index(s, "\r\n"+sep) + + end := -1 + switch { + case endLF >= 0 && endCRLF >= 0: + end = min(endLF, endCRLF) + case endLF >= 0: + end = endLF + case endCRLF >= 0: + end = endCRLF + } + + if end == -1 { + return nil, errors.New("no frontmatter closing delimiter found") + } + return []byte(strings.TrimSpace(s[:end])), nil +} + +// builtinEnginePath returns the canonical builtin virtual-FS path for an engine id. +func builtinEnginePath(engineID string) string { + return parser.BuiltinPathPrefix + "engines/" + engineID + ".md" +} + +// loadBuiltinEngineDefinitions reads all *.md files from the embedded data/engines/ +// directory, parses each EngineDefinition from its frontmatter, and registers the file +// content in the parser's builtin virtual FS. +// It panics on parse errors to surface misconfigured built-in definitions early. +func loadBuiltinEngineDefinitions() []*EngineDefinition { + engineDefinitionLoaderLog.Print("Loading built-in engine definitions from embedded Markdown files") + + var definitions []*EngineDefinition + + err := fs.WalkDir(builtinEngineFS, "data/engines", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + if filepath.Ext(path) != ".md" { + return nil + } + + data, readErr := builtinEngineFS.ReadFile(path) + if readErr != nil { + return fmt.Errorf("failed to read embedded engine file %s: %w", path, readErr) + } + + // Extract the frontmatter YAML from the Markdown file. + frontmatterYAML, fmErr := extractMarkdownFrontmatterYAML(data) + if fmErr != nil { + return fmt.Errorf("failed to extract frontmatter from %s: %w", path, fmErr) + } + + // Parse the engine definition from the frontmatter. + var wrapper engineDefinitionFile + if parseErr := yaml.Unmarshal(frontmatterYAML, &wrapper); parseErr != nil { + return fmt.Errorf("failed to parse embedded engine file %s: %w", path, parseErr) + } + + def := wrapper.Engine + + // Default runtime-id to engine id when omitted. + if def.RuntimeID == "" { + def.RuntimeID = def.ID + } + + // Register the full .md content in the parser's builtin virtual FS so the + // file can be resolved and read during import processing. + parser.RegisterBuiltinVirtualFile(builtinEnginePath(def.ID), data) + + engineDefinitionLoaderLog.Printf("Loaded built-in engine definition: id=%s runtime-id=%s", def.ID, def.RuntimeID) + definitions = append(definitions, &def) + return nil + }) + + if err != nil { + panic(fmt.Sprintf("failed to walk embedded engine definitions directory: %v", err)) + } + + engineDefinitionLoaderLog.Printf("Loaded %d built-in engine definitions", len(definitions)) + return definitions +} diff --git a/pkg/workflow/engine_definition_loader_test.go b/pkg/workflow/engine_definition_loader_test.go new file mode 100644 index 0000000000..f14ac62139 --- /dev/null +++ b/pkg/workflow/engine_definition_loader_test.go @@ -0,0 +1,146 @@ +//go:build !integration + +package workflow + +import ( + "os" + "path/filepath" + "testing" + + "github.com/github/gh-aw/pkg/constants" + "github.com/github/gh-aw/pkg/parser" + "github.com/github/gh-aw/pkg/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestBuiltinEngineMarkdownFiles verifies that every built-in engine has a corresponding +// .md file registered in the parser's builtin virtual FS and that the file's frontmatter +// parses as a valid shared agentic workflow (no on: field, engine.id matches engine id). +func TestBuiltinEngineMarkdownFiles(t *testing.T) { + // Initialise catalog so builtins are registered. + catalog := NewEngineCatalog(NewEngineRegistry()) + require.NotNil(t, catalog, "engine catalog should be created") + + builtinEngineIDs := []string{"claude", "codex", "copilot", "gemini"} + + for _, id := range builtinEngineIDs { + t.Run(id, func(t *testing.T) { + path := builtinEnginePath(id) + + // The file must be registered as a builtin virtual file. + assert.True(t, parser.BuiltinVirtualFileExists(path), + "builtin virtual file should exist for engine %s at path %s", id, path) + + // Read via the parser's ReadFile — exercises the virtual FS. + content, err := parser.ReadFile(path) + require.NoError(t, err, "should read builtin engine file for %s", id) + require.NotEmpty(t, content, "builtin engine file for %s should not be empty", id) + + // Parse the file as a shared agentic workflow. + result, parseErr := parser.ExtractFrontmatterFromContent(string(content)) + require.NoError(t, parseErr, "engine %s .md frontmatter should parse without error", id) + require.NotEmpty(t, result.Frontmatter, "engine %s .md should have frontmatter", id) + + // Must have an engine: key whose id matches the engine id. + engineField, hasEngine := result.Frontmatter["engine"] + require.True(t, hasEngine, "engine %s .md frontmatter must contain an engine: key", id) + engineObj, isMap := engineField.(map[string]any) + require.True(t, isMap, "engine %s .md engine: field must be an object, got %T", id, engineField) + assert.Equal(t, id, engineObj["id"], + "engine %s .md engine.id should match the engine id", id) + + // Must NOT have on: field (shared workflow, not a main workflow). + _, hasOnField := result.Frontmatter["on"] + assert.False(t, hasOnField, + "engine %s .md should be a shared workflow (no on: field)", id) + }) + } +} + +// TestBuiltinEngineStringFormInjection verifies that when a workflow uses the short/string +// form "engine: ", the compiler transparently injects the builtin .md as an import and +// produces a valid lock file with the correct engine ID. +func TestBuiltinEngineStringFormInjection(t *testing.T) { + tests := []struct { + engineID string + engineStep string // distinctive step name in the lock file + }{ + {"copilot", `GH_AW_INFO_ENGINE_ID: "copilot"`}, + {"codex", `GH_AW_INFO_ENGINE_ID: "codex"`}, + {"claude", `GH_AW_INFO_ENGINE_ID: "claude"`}, + {"gemini", `GH_AW_INFO_ENGINE_ID: "gemini"`}, + } + + for _, tt := range tests { + t.Run(tt.engineID, func(t *testing.T) { + tmpDir := testutil.TempDir(t, "test-engine-injection-*") + workflowsDir := filepath.Join(tmpDir, constants.GetWorkflowDir()) + require.NoError(t, os.MkdirAll(workflowsDir, 0755)) + + md := "---\n" + + "name: Test Engine Injection\n" + + "on:\n" + + " issues:\n" + + " types: [opened]\n" + + "permissions:\n" + + " contents: read\n" + + " issues: read\n" + + "engine: " + tt.engineID + "\n" + + "---\n\n" + + "# Task\n\nDo something.\n" + + mainFile := filepath.Join(workflowsDir, "test-engine-injection.md") + require.NoError(t, os.WriteFile(mainFile, []byte(md), 0644)) + + compiler := NewCompiler() + err := compiler.CompileWorkflow(mainFile) + require.NoError(t, err, "compilation should succeed for engine %s (string form)", tt.engineID) + + lockFile := filepath.Join(workflowsDir, "test-engine-injection.lock.yml") + lockContent, err := os.ReadFile(lockFile) + require.NoError(t, err, "lock file should be created for engine %s", tt.engineID) + + assert.Contains(t, string(lockContent), tt.engineStep, + "lock file for engine %s should contain %q", tt.engineID, tt.engineStep) + }) + } +} + +// TestBuiltinEngineStringFormInjection_CLIOverrideNotInjected verifies that when a CLI +// --engine override is active, the builtin .md injection is skipped and the override engine +// is used instead. +func TestBuiltinEngineStringFormInjection_CLIOverrideNotInjected(t *testing.T) { + tmpDir := testutil.TempDir(t, "test-engine-override-*") + workflowsDir := filepath.Join(tmpDir, constants.GetWorkflowDir()) + require.NoError(t, os.MkdirAll(workflowsDir, 0755)) + + md := "---\n" + + "name: Test Engine Override\n" + + "on:\n" + + " issues:\n" + + " types: [opened]\n" + + "permissions:\n" + + " contents: read\n" + + " issues: read\n" + + "engine: copilot\n" + + "---\n\n" + + "# Task\n\nDo something.\n" + + mainFile := filepath.Join(workflowsDir, "test-engine-override.md") + require.NoError(t, os.WriteFile(mainFile, []byte(md), 0644)) + + // Use --engine codex to override the markdown's "engine: copilot". + compiler := NewCompiler(WithEngineOverride("codex")) + err := compiler.CompileWorkflow(mainFile) + require.NoError(t, err, "compilation should succeed with --engine override") + + lockFile := filepath.Join(workflowsDir, "test-engine-override.lock.yml") + lockContent, err := os.ReadFile(lockFile) + require.NoError(t, err, "lock file should be created") + + assert.Contains(t, string(lockContent), `GH_AW_INFO_ENGINE_ID: "codex"`, + "lock file should use the overridden engine, not the markdown's copilot") + assert.NotContains(t, string(lockContent), `GH_AW_INFO_ENGINE_ID: "copilot"`, + "lock file should not contain the overridden copilot engine ID") +} diff --git a/pkg/workflow/engine_includes_test.go b/pkg/workflow/engine_includes_test.go index b2889bd0ca..543b74b339 100644 --- a/pkg/workflow/engine_includes_test.go +++ b/pkg/workflow/engine_includes_test.go @@ -9,6 +9,8 @@ import ( "testing" "github.com/github/gh-aw/pkg/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/github/gh-aw/pkg/constants" ) @@ -630,3 +632,66 @@ func TestExtractEngineConfigFromJSON(t *testing.T) { }) } } + +// TestImportedInlineEngineDefinition tests that an inline engine definition +// (engine.runtime sub-object) in a shared/imported workflow is correctly +// validated and registered when the importing workflow compiles. +func TestImportedInlineEngineDefinition(t *testing.T) { + tmpDir := testutil.TempDir(t, "test-inline-engine-import-*") + workflowsDir := filepath.Join(tmpDir, constants.GetWorkflowDir()) + sharedDir := filepath.Join(workflowsDir, "shared") + require.NoError(t, os.MkdirAll(sharedDir, 0755)) + + // Shared file defines a custom engine via engine.runtime (inline definition) + sharedContent := `--- +engine: + runtime: + id: codex + provider: + id: openai + auth: + secret: CODEX_API_KEY +--- + +# Shared custom engine config +` + sharedFile := filepath.Join(sharedDir, "custom-engine.md") + require.NoError(t, os.WriteFile(sharedFile, []byte(sharedContent), 0644)) + + // Main workflow imports the shared file; it has no engine key of its own + mainContent := `--- +name: Test Imported Inline Engine +on: + issues: + types: [opened] +permissions: + contents: read + issues: read +imports: + - shared/custom-engine.md +--- + +# Test Workflow + +Imports a custom inline engine definition from a shared workflow. +` + mainFile := filepath.Join(workflowsDir, "test-imported-inline-engine.md") + require.NoError(t, os.WriteFile(mainFile, []byte(mainContent), 0644)) + + compiler := NewCompiler() + err := compiler.CompileWorkflow(mainFile) + require.NoError(t, err, "compilation should succeed when inline engine is imported from shared workflow") + + lockFile := filepath.Join(workflowsDir, "test-imported-inline-engine.lock.yml") + lockContent, err := os.ReadFile(lockFile) + require.NoError(t, err, "lock file should be created") + + // Lock file should reference codex execution (inline definition resolved to codex runtime) + lockStr := string(lockContent) + assert.Contains(t, lockStr, "Install Codex", + "lock file should contain Codex installation step") + assert.Contains(t, lockStr, `GH_AW_INFO_ENGINE_ID: "codex"`, + "lock file should set engine ID to codex") + assert.Contains(t, lockStr, "codex ${", + "lock file should contain codex exec invocation") +} diff --git a/pkg/workflow/engine_validation.go b/pkg/workflow/engine_validation.go index e62c879258..a95753a3c4 100644 --- a/pkg/workflow/engine_validation.go +++ b/pkg/workflow/engine_validation.go @@ -282,15 +282,25 @@ func (c *Compiler) validateSingleEngineSpecification(mainEngineSetting string, i if engineStr, ok := firstEngine.(string); ok { return engineStr, nil } else if engineObj, ok := firstEngine.(map[string]any); ok { - // Handle object format - return the ID + // Handle object format: either engine.id (named engine) or engine.runtime.id (inline definition) if id, hasID := engineObj["id"]; hasID { if idStr, ok := id.(string); ok { return idStr, nil } } + // Handle inline definition with 'runtime' sub-object (engine.runtime.id) + if runtime, hasRuntime := engineObj["runtime"]; hasRuntime { + if runtimeObj, ok := runtime.(map[string]any); ok { + if id, hasID := runtimeObj["id"]; hasID { + if idStr, ok := id.(string); ok { + return idStr, nil + } + } + } + } } - return "", fmt.Errorf("invalid engine configuration in included file, missing or invalid 'id' field. Expected string or object with 'id' field.\n\nExample (string):\nengine: copilot\n\nExample (object):\nengine:\n id: copilot\n model: gpt-4\n\nSee: %s", constants.DocsEnginesURL) + return "", fmt.Errorf("invalid engine configuration in included file, missing or invalid 'id' field. Expected string, object with 'id' field, or inline definition with 'runtime.id'.\n\nExample (string):\nengine: copilot\n\nExample (object with id):\nengine:\n id: copilot\n model: gpt-4\n\nExample (inline runtime definition):\nengine:\n runtime:\n id: codex\n\nSee: %s", constants.DocsEnginesURL) } // validatePluginSupport validates that plugins are only used with engines that support them