diff --git a/pkg/parser/frontmatter_content.go b/pkg/parser/frontmatter_content.go index ce3ddf69fb0..45bd0511ee6 100644 --- a/pkg/parser/frontmatter_content.go +++ b/pkg/parser/frontmatter_content.go @@ -176,6 +176,29 @@ func ExtractWorkflowNameFromMarkdown(filePath string) (string, error) { return defaultName, nil } +// ExtractWorkflowNameFromMarkdownBody extracts the workflow name from an already-extracted +// markdown body (i.e. the content after the frontmatter has been stripped). This is more +// efficient than ExtractWorkflowNameFromMarkdown or ExtractWorkflowNameFromContent because it +// avoids the redundant file-read and YAML-parse that those functions perform when the caller +// already holds the parsed FrontmatterResult. +func ExtractWorkflowNameFromMarkdownBody(markdownBody string, virtualPath string) (string, error) { + log.Printf("Extracting workflow name from markdown body: virtualPath=%s, size=%d bytes", virtualPath, len(markdownBody)) + + scanner := bufio.NewScanner(strings.NewReader(markdownBody)) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if strings.HasPrefix(line, "# ") { + workflowName := strings.TrimSpace(line[2:]) + log.Printf("Found workflow name from H1 header: %s", workflowName) + return workflowName, nil + } + } + + defaultName := generateDefaultWorkflowName(virtualPath) + log.Printf("No H1 header found, using default name: %s", defaultName) + return defaultName, nil +} + // ExtractWorkflowNameFromContent extracts the workflow name from markdown content string. // This is the in-memory equivalent of ExtractWorkflowNameFromMarkdown, used by Wasm builds // where filesystem access is unavailable. diff --git a/pkg/parser/schema_utilities.go b/pkg/parser/schema_utilities.go index 1b9f10ff8a9..2a2f0429e4e 100644 --- a/pkg/parser/schema_utilities.go +++ b/pkg/parser/schema_utilities.go @@ -23,6 +23,19 @@ func filterIgnoredFields(frontmatter map[string]any) map[string]any { return frontmatter } + // Check whether any ignored field is actually present before allocating a copy. + // In the common case none of them are present, so we can return as-is. + hasIgnored := false + for _, field := range constants.IgnoredFrontmatterFields { + if _, exists := frontmatter[field]; exists { + hasIgnored = true + break + } + } + if !hasIgnored { + return frontmatter + } + schemaUtilitiesLog.Printf("Filtering ignored frontmatter fields: checking %d ignored field(s) against %d frontmatter keys", len(constants.IgnoredFrontmatterFields), len(frontmatter)) // Create a copy of the frontmatter map without ignored fields diff --git a/pkg/workflow/compiler_orchestrator_tools.go b/pkg/workflow/compiler_orchestrator_tools.go index 3c063ac83a4..5e6db849ad6 100644 --- a/pkg/workflow/compiler_orchestrator_tools.go +++ b/pkg/workflow/compiler_orchestrator_tools.go @@ -9,7 +9,6 @@ import ( "github.com/github/gh-aw/pkg/console" "github.com/github/gh-aw/pkg/logger" "github.com/github/gh-aw/pkg/parser" - "github.com/goccy/go-yaml" ) var orchestratorToolsLog = logger.New("workflow:compiler_orchestrator_tools") @@ -254,7 +253,8 @@ func (c *Compiler) processToolsAndMarkdown(result *parser.FrontmatterResult, cle if c.contentOverride != "" { workflowName, err = parser.ExtractWorkflowNameFromContent(c.contentOverride, cleanPath) } else { - workflowName, err = parser.ExtractWorkflowNameFromMarkdown(cleanPath) + // Use the already-parsed markdown body to avoid a redundant file read and YAML parse. + workflowName, err = parser.ExtractWorkflowNameFromMarkdownBody(result.Markdown, cleanPath) } if err != nil { return nil, fmt.Errorf("failed to extract workflow name: %w", err) @@ -338,47 +338,36 @@ func (c *Compiler) hasContentContext(frontmatter map[string]any) bool { return false } - // Convert the "on" field to YAML string for parsing - onYAML, err := yaml.Marshal(onField) - if err != nil { - orchestratorToolsLog.Printf("Failed to marshal 'on' field: %v", err) + // Only the map form of the "on" field contains individually-keyed event triggers. + // String ("on: issues") and array ("on: [issues]") forms are not inspected because + // GitHub Actions treats them as default-activity-type triggers and the original + // implementation only detected events that appeared as YAML map keys (i.e. "event:"). + onMap, ok := onField.(map[string]any) + if !ok { + orchestratorToolsLog.Printf("No content context detected: 'on' is not a map") return false } - onStr := string(onYAML) - - // Check for content-related event types that provide text/title/body - // These are the same events supported by compute_text.cjs - contentEvents := []string{ - "issues:", - "pull_request:", - "pull_request_target:", - "issue_comment:", - "pull_request_review_comment:", - "pull_request_review:", - "discussion:", - "discussion_comment:", - } - - for _, event := range contentEvents { - if strings.Contains(onStr, event) { - orchestratorToolsLog.Printf("Detected content context: workflow triggered by %s", strings.TrimSuffix(event, ":")) - return true - } - } - - // Check for slash_command trigger (works with comment events that have content) - if strings.Contains(onStr, "slash_command:") { - orchestratorToolsLog.Printf("Detected content context: workflow triggered by slash_command") - return true - } - - // Check for labeled activity type on issues, pull_request, or discussion - // These events provide text content when labeled/unlabeled - if strings.Contains(onStr, "labeled") { - // Ensure it's in the context of an issue, PR, or discussion event - if strings.Contains(onStr, "issues:") || strings.Contains(onStr, "pull_request:") || strings.Contains(onStr, "discussion:") { - orchestratorToolsLog.Printf("Detected content context: workflow triggered by labeled activity type") + // Content-related event types that provide text/title/body outputs via the sanitized step. + // These are the same events supported by compute_text.cjs. + // Note: "issues", "pull_request", and "discussion" are included here, which also covers + // workflows using "labeled"/"unlabeled" activity types on those events — any trigger that + // declares one of these events as a map key is treated as having content context. + contentEventKeys := map[string]bool{ + "issues": true, + "pull_request": true, + "pull_request_target": true, + "issue_comment": true, + "pull_request_review_comment": true, + "pull_request_review": true, + "discussion": true, + "discussion_comment": true, + "slash_command": true, + } + + for eventName := range onMap { + if contentEventKeys[eventName] { + orchestratorToolsLog.Printf("Detected content context: workflow triggered by %s", eventName) return true } }