-
Notifications
You must be signed in to change notification settings - Fork 352
perf(parser): fix ParseWorkflow regression — eliminate redundant file read and YAML parse #22472
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f08a91e
ff54fbc
f1aa5fe
82f7095
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -176,6 +176,29 @@ func ExtractWorkflowNameFromMarkdown(filePath string) (string, error) { | |
| return defaultName, nil | ||
| } | ||
|
|
||
| // ExtractWorkflowNameFromMarkdownBody extracts the workflow name from an already-extracted | ||
| // markdown body (i.e. the content after the frontmatter has been stripped). This is more | ||
| // efficient than ExtractWorkflowNameFromMarkdown or ExtractWorkflowNameFromContent because it | ||
| // avoids the redundant file-read and YAML-parse that those functions perform when the caller | ||
| // already holds the parsed FrontmatterResult. | ||
| func ExtractWorkflowNameFromMarkdownBody(markdownBody string, virtualPath string) (string, error) { | ||
| log.Printf("Extracting workflow name from markdown body: virtualPath=%s, size=%d bytes", virtualPath, len(markdownBody)) | ||
|
|
||
| scanner := bufio.NewScanner(strings.NewReader(markdownBody)) | ||
| for scanner.Scan() { | ||
| line := strings.TrimSpace(scanner.Text()) | ||
| if strings.HasPrefix(line, "# ") { | ||
| workflowName := strings.TrimSpace(line[2:]) | ||
| log.Printf("Found workflow name from H1 header: %s", workflowName) | ||
| return workflowName, nil | ||
| } | ||
| } | ||
|
|
||
| defaultName := generateDefaultWorkflowName(virtualPath) | ||
| log.Printf("No H1 header found, using default name: %s", defaultName) | ||
| return defaultName, nil | ||
| } | ||
|
Comment on lines
+179
to
+200
|
||
|
|
||
| // ExtractWorkflowNameFromContent extracts the workflow name from markdown content string. | ||
| // This is the in-memory equivalent of ExtractWorkflowNameFromMarkdown, used by Wasm builds | ||
| // where filesystem access is unavailable. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,7 +9,6 @@ import ( | |
| "github.com/github/gh-aw/pkg/console" | ||
| "github.com/github/gh-aw/pkg/logger" | ||
| "github.com/github/gh-aw/pkg/parser" | ||
| "github.com/goccy/go-yaml" | ||
| ) | ||
|
|
||
| var orchestratorToolsLog = logger.New("workflow:compiler_orchestrator_tools") | ||
|
|
@@ -254,7 +253,8 @@ func (c *Compiler) processToolsAndMarkdown(result *parser.FrontmatterResult, cle | |
| if c.contentOverride != "" { | ||
| workflowName, err = parser.ExtractWorkflowNameFromContent(c.contentOverride, cleanPath) | ||
| } else { | ||
| workflowName, err = parser.ExtractWorkflowNameFromMarkdown(cleanPath) | ||
| // Use the already-parsed markdown body to avoid a redundant file read and YAML parse. | ||
| workflowName, err = parser.ExtractWorkflowNameFromMarkdownBody(result.Markdown, cleanPath) | ||
| } | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to extract workflow name: %w", err) | ||
|
|
@@ -338,47 +338,36 @@ func (c *Compiler) hasContentContext(frontmatter map[string]any) bool { | |
| return false | ||
| } | ||
|
|
||
| // Convert the "on" field to YAML string for parsing | ||
| onYAML, err := yaml.Marshal(onField) | ||
| if err != nil { | ||
| orchestratorToolsLog.Printf("Failed to marshal 'on' field: %v", err) | ||
| // Only the map form of the "on" field contains individually-keyed event triggers. | ||
| // String ("on: issues") and array ("on: [issues]") forms are not inspected because | ||
| // GitHub Actions treats them as default-activity-type triggers and the original | ||
| // implementation only detected events that appeared as YAML map keys (i.e. "event:"). | ||
| onMap, ok := onField.(map[string]any) | ||
| if !ok { | ||
| orchestratorToolsLog.Printf("No content context detected: 'on' is not a map") | ||
| return false | ||
| } | ||
|
|
||
| onStr := string(onYAML) | ||
|
|
||
| // Check for content-related event types that provide text/title/body | ||
| // These are the same events supported by compute_text.cjs | ||
| contentEvents := []string{ | ||
| "issues:", | ||
| "pull_request:", | ||
| "pull_request_target:", | ||
| "issue_comment:", | ||
| "pull_request_review_comment:", | ||
| "pull_request_review:", | ||
| "discussion:", | ||
| "discussion_comment:", | ||
| } | ||
|
|
||
| for _, event := range contentEvents { | ||
| if strings.Contains(onStr, event) { | ||
| orchestratorToolsLog.Printf("Detected content context: workflow triggered by %s", strings.TrimSuffix(event, ":")) | ||
| return true | ||
| } | ||
| } | ||
|
|
||
| // Check for slash_command trigger (works with comment events that have content) | ||
| if strings.Contains(onStr, "slash_command:") { | ||
| orchestratorToolsLog.Printf("Detected content context: workflow triggered by slash_command") | ||
| return true | ||
| } | ||
|
|
||
| // Check for labeled activity type on issues, pull_request, or discussion | ||
| // These events provide text content when labeled/unlabeled | ||
| if strings.Contains(onStr, "labeled") { | ||
| // Ensure it's in the context of an issue, PR, or discussion event | ||
| if strings.Contains(onStr, "issues:") || strings.Contains(onStr, "pull_request:") || strings.Contains(onStr, "discussion:") { | ||
| orchestratorToolsLog.Printf("Detected content context: workflow triggered by labeled activity type") | ||
| // Content-related event types that provide text/title/body outputs via the sanitized step. | ||
| // These are the same events supported by compute_text.cjs. | ||
| // Note: "issues", "pull_request", and "discussion" are included here, which also covers | ||
| // workflows using "labeled"/"unlabeled" activity types on those events — any trigger that | ||
| // declares one of these events as a map key is treated as having content context. | ||
| contentEventKeys := map[string]bool{ | ||
| "issues": true, | ||
| "pull_request": true, | ||
| "pull_request_target": true, | ||
| "issue_comment": true, | ||
| "pull_request_review_comment": true, | ||
| "pull_request_review": true, | ||
| "discussion": true, | ||
| "discussion_comment": true, | ||
| "slash_command": true, | ||
| } | ||
|
Comment on lines
+356
to
+366
|
||
|
|
||
| for eventName := range onMap { | ||
| if contentEventKeys[eventName] { | ||
| orchestratorToolsLog.Printf("Detected content context: workflow triggered by %s", eventName) | ||
| return true | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ExtractWorkflowNameFromMarkdownBodyusesbufio.Scannerbut never checksscanner.Err(). If the markdown contains a very long line (over Scanner's token limit),Scan()will stop early and this function will silently fall back to the default name instead of returning an error. Consider setting a larger scanner buffer (or using a different reader) and returningscanner.Err()when non-nil so failures are not hidden.