From f08a91e7ca26083f1c17d8bd7840074c2dcbde63 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 23 Mar 2026 17:21:55 +0000 Subject: [PATCH 1/2] Initial plan From ff54fbc5bfd15eecfabe6b6662c60a38d44e84dc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 23 Mar 2026 17:44:26 +0000 Subject: [PATCH 2/2] perf(parser): fix ParseWorkflow regression by eliminating redundant file read and YAML parse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves the 12% BenchmarkParseWorkflow regression by removing three sources of unnecessary work in the ParseWorkflowFile hot path: 1. ExtractWorkflowNameFromMarkdown was re-reading the workflow file from disk and re-parsing its YAML frontmatter even though the parsed result (result.Markdown) was already available. Added ExtractWorkflowNameFromMarkdownBody() to scan the already-extracted markdown body directly. 2. hasContentContext marshaled the parsed 'on' map to a YAML string just to run strings.Contains checks. Replaced with direct map key lookup — no allocation, same semantics. 3. filterIgnoredFields always copied the frontmatter map even when no ignored fields were present (the common case). Now returns the original map unchanged when no filtering is needed. Benchmark: ~3,472,142 ns/op → ~1,252,809 ns/op (~64% faster) Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> Agent-Logs-Url: https://github.com/github/gh-aw/sessions/94b84a31-b038-466b-bdeb-b8cf9c6ecb27 --- pkg/parser/frontmatter_content.go | 23 +++++++ pkg/parser/schema_utilities.go | 13 ++++ pkg/workflow/compiler_orchestrator_tools.go | 69 +++++++++------------ 3 files changed, 65 insertions(+), 40 deletions(-) diff --git a/pkg/parser/frontmatter_content.go b/pkg/parser/frontmatter_content.go index ce3ddf69fb0..45bd0511ee6 100644 --- a/pkg/parser/frontmatter_content.go +++ b/pkg/parser/frontmatter_content.go @@ -176,6 +176,29 @@ func ExtractWorkflowNameFromMarkdown(filePath string) (string, error) { return defaultName, nil } +// ExtractWorkflowNameFromMarkdownBody extracts the workflow name from an already-extracted +// markdown body (i.e. the content after the frontmatter has been stripped). This is more +// efficient than ExtractWorkflowNameFromMarkdown or ExtractWorkflowNameFromContent because it +// avoids the redundant file-read and YAML-parse that those functions perform when the caller +// already holds the parsed FrontmatterResult. +func ExtractWorkflowNameFromMarkdownBody(markdownBody string, virtualPath string) (string, error) { + log.Printf("Extracting workflow name from markdown body: virtualPath=%s, size=%d bytes", virtualPath, len(markdownBody)) + + scanner := bufio.NewScanner(strings.NewReader(markdownBody)) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if strings.HasPrefix(line, "# ") { + workflowName := strings.TrimSpace(line[2:]) + log.Printf("Found workflow name from H1 header: %s", workflowName) + return workflowName, nil + } + } + + defaultName := generateDefaultWorkflowName(virtualPath) + log.Printf("No H1 header found, using default name: %s", defaultName) + return defaultName, nil +} + // ExtractWorkflowNameFromContent extracts the workflow name from markdown content string. // This is the in-memory equivalent of ExtractWorkflowNameFromMarkdown, used by Wasm builds // where filesystem access is unavailable. diff --git a/pkg/parser/schema_utilities.go b/pkg/parser/schema_utilities.go index 1b9f10ff8a9..2a2f0429e4e 100644 --- a/pkg/parser/schema_utilities.go +++ b/pkg/parser/schema_utilities.go @@ -23,6 +23,19 @@ func filterIgnoredFields(frontmatter map[string]any) map[string]any { return frontmatter } + // Check whether any ignored field is actually present before allocating a copy. + // In the common case none of them are present, so we can return as-is. + hasIgnored := false + for _, field := range constants.IgnoredFrontmatterFields { + if _, exists := frontmatter[field]; exists { + hasIgnored = true + break + } + } + if !hasIgnored { + return frontmatter + } + schemaUtilitiesLog.Printf("Filtering ignored frontmatter fields: checking %d ignored field(s) against %d frontmatter keys", len(constants.IgnoredFrontmatterFields), len(frontmatter)) // Create a copy of the frontmatter map without ignored fields diff --git a/pkg/workflow/compiler_orchestrator_tools.go b/pkg/workflow/compiler_orchestrator_tools.go index 3c063ac83a4..5e6db849ad6 100644 --- a/pkg/workflow/compiler_orchestrator_tools.go +++ b/pkg/workflow/compiler_orchestrator_tools.go @@ -9,7 +9,6 @@ import ( "github.com/github/gh-aw/pkg/console" "github.com/github/gh-aw/pkg/logger" "github.com/github/gh-aw/pkg/parser" - "github.com/goccy/go-yaml" ) var orchestratorToolsLog = logger.New("workflow:compiler_orchestrator_tools") @@ -254,7 +253,8 @@ func (c *Compiler) processToolsAndMarkdown(result *parser.FrontmatterResult, cle if c.contentOverride != "" { workflowName, err = parser.ExtractWorkflowNameFromContent(c.contentOverride, cleanPath) } else { - workflowName, err = parser.ExtractWorkflowNameFromMarkdown(cleanPath) + // Use the already-parsed markdown body to avoid a redundant file read and YAML parse. + workflowName, err = parser.ExtractWorkflowNameFromMarkdownBody(result.Markdown, cleanPath) } if err != nil { return nil, fmt.Errorf("failed to extract workflow name: %w", err) @@ -338,47 +338,36 @@ func (c *Compiler) hasContentContext(frontmatter map[string]any) bool { return false } - // Convert the "on" field to YAML string for parsing - onYAML, err := yaml.Marshal(onField) - if err != nil { - orchestratorToolsLog.Printf("Failed to marshal 'on' field: %v", err) + // Only the map form of the "on" field contains individually-keyed event triggers. + // String ("on: issues") and array ("on: [issues]") forms are not inspected because + // GitHub Actions treats them as default-activity-type triggers and the original + // implementation only detected events that appeared as YAML map keys (i.e. "event:"). + onMap, ok := onField.(map[string]any) + if !ok { + orchestratorToolsLog.Printf("No content context detected: 'on' is not a map") return false } - onStr := string(onYAML) - - // Check for content-related event types that provide text/title/body - // These are the same events supported by compute_text.cjs - contentEvents := []string{ - "issues:", - "pull_request:", - "pull_request_target:", - "issue_comment:", - "pull_request_review_comment:", - "pull_request_review:", - "discussion:", - "discussion_comment:", - } - - for _, event := range contentEvents { - if strings.Contains(onStr, event) { - orchestratorToolsLog.Printf("Detected content context: workflow triggered by %s", strings.TrimSuffix(event, ":")) - return true - } - } - - // Check for slash_command trigger (works with comment events that have content) - if strings.Contains(onStr, "slash_command:") { - orchestratorToolsLog.Printf("Detected content context: workflow triggered by slash_command") - return true - } - - // Check for labeled activity type on issues, pull_request, or discussion - // These events provide text content when labeled/unlabeled - if strings.Contains(onStr, "labeled") { - // Ensure it's in the context of an issue, PR, or discussion event - if strings.Contains(onStr, "issues:") || strings.Contains(onStr, "pull_request:") || strings.Contains(onStr, "discussion:") { - orchestratorToolsLog.Printf("Detected content context: workflow triggered by labeled activity type") + // Content-related event types that provide text/title/body outputs via the sanitized step. + // These are the same events supported by compute_text.cjs. + // Note: "issues", "pull_request", and "discussion" are included here, which also covers + // workflows using "labeled"/"unlabeled" activity types on those events — any trigger that + // declares one of these events as a map key is treated as having content context. + contentEventKeys := map[string]bool{ + "issues": true, + "pull_request": true, + "pull_request_target": true, + "issue_comment": true, + "pull_request_review_comment": true, + "pull_request_review": true, + "discussion": true, + "discussion_comment": true, + "slash_command": true, + } + + for eventName := range onMap { + if contentEventKeys[eventName] { + orchestratorToolsLog.Printf("Detected content context: workflow triggered by %s", eventName) return true } }