From 7c7522d43f39388e79d397c907752ef224f91eb2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 23 Mar 2026 19:22:49 +0000 Subject: [PATCH 1/2] Initial plan From 23a0ba58632596f6d74acff94ff1218b1f190261 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 23 Mar 2026 19:49:07 +0000 Subject: [PATCH 2/2] fix(performance): Eliminate redundant YAML parsing in extractAllImportFields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The extractAllImportFields function was calling ExtractFrontmatterFromContent ~20 times per imported file (once per field extraction). Each call independently parsed the entire YAML frontmatter, causing O(fields × imports) YAML parse operations instead of O(imports). Fix: parse frontmatter once at the start of extractAllImportFields and reuse the result via new map-based helper functions. Add to content_extractor.go: - extractFieldJSONFromMap: JSON extraction from pre-parsed frontmatter - extractYAMLFieldFromMap: YAML extraction from pre-parsed frontmatter - extractOnSectionFieldFromMap: on: section array field extraction - extractOnSectionAnyFieldFromMap: on: section any-type field extraction Remove now-unused content-string-based helpers: - extractStepsFromContent, extractServicesFromContent - extractPostStepsFromContent, extractOnSectionField - extractOnSectionAnyField, extractOnGitHubToken - extractOnGitHubApp, extractTopLevelGitHubApp (parser pkg versions) Performance improvement (BenchmarkCompileComplexWorkflow): - Speed: 5.6ms → 4.6ms (~18% faster) - Allocations: 33,933 → 24,751 allocs/op (~27% fewer) Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> Agent-Logs-Url: https://github.com/github/gh-aw/sessions/208ce1c0-fe55-48b2-bfb4-f6cd0e8cdb80 --- pkg/parser/content_extractor.go | 135 +++++++-------------------- pkg/parser/import_field_extractor.go | 103 +++++++++----------- 2 files changed, 79 insertions(+), 159 deletions(-) diff --git a/pkg/parser/content_extractor.go b/pkg/parser/content_extractor.go index 4cd08ca98ae..36b18c8a84b 100644 --- a/pkg/parser/content_extractor.go +++ b/pkg/parser/content_extractor.go @@ -54,77 +54,11 @@ func extractToolsFromContent(content string) (string, error) { return strings.TrimSpace(string(extractedJSON)), nil } -// extractStepsFromContent extracts steps section from frontmatter as YAML string -func extractStepsFromContent(content string) (string, error) { - result, err := ExtractFrontmatterFromContent(content) - if err != nil { - return "", nil // Return empty string on error - } - - // Extract steps section - steps, exists := result.Frontmatter["steps"] - if !exists { - return "", nil - } - - // Convert to YAML string (similar to how CustomSteps are handled in compiler) - stepsYAML, err := yaml.Marshal(steps) - if err != nil { - return "", nil - } - - return strings.TrimSpace(string(stepsYAML)), nil -} - -// extractServicesFromContent extracts services section from frontmatter as YAML string -func extractServicesFromContent(content string) (string, error) { - result, err := ExtractFrontmatterFromContent(content) - if err != nil { - return "", nil // Return empty string on error - } - - // Extract services section - services, exists := result.Frontmatter["services"] - if !exists { - return "", nil - } - - // Convert to YAML string (similar to how steps are handled) - servicesYAML, err := yaml.Marshal(services) - if err != nil { - return "", nil - } - - return strings.TrimSpace(string(servicesYAML)), nil -} - // ExtractPermissionsFromContent extracts permissions section from frontmatter as JSON string func ExtractPermissionsFromContent(content string) (string, error) { return extractFrontmatterField(content, "permissions", "{}") } -// extractPostStepsFromContent extracts post-steps section from frontmatter as YAML string -func extractPostStepsFromContent(content string) (string, error) { - result, err := ExtractFrontmatterFromContent(content) - if err != nil { - return "", nil // Return empty string on error - } - - // Extract post-steps section - postSteps, exists := result.Frontmatter["post-steps"] - if !exists { - return "", nil - } - - // Convert to YAML string (similar to how steps are handled) - postStepsYAML, err := yaml.Marshal(postSteps) - if err != nil { - return "", nil - } - - return strings.TrimSpace(string(postStepsYAML)), nil -} - // extractFrontmatterField extracts a specific field from frontmatter as JSON string func extractFrontmatterField(content, fieldName, emptyValue string) (string, error) { contentExtractorLog.Printf("Extracting field: %s", fieldName) @@ -134,8 +68,16 @@ func extractFrontmatterField(content, fieldName, emptyValue string) (string, err return emptyValue, nil // Return empty value on error } + return extractFieldJSONFromMap(result.Frontmatter, fieldName, emptyValue) +} + +// extractFieldJSONFromMap extracts a specific field from an already-parsed frontmatter map as a JSON string. +// This avoids re-parsing YAML when the frontmatter has already been parsed. +func extractFieldJSONFromMap(frontmatter map[string]any, fieldName, emptyValue string) (string, error) { + contentExtractorLog.Printf("Extracting field from map: %s", fieldName) + // Extract the requested field - fieldValue, exists := result.Frontmatter[fieldName] + fieldValue, exists := frontmatter[fieldName] if !exists { contentExtractorLog.Printf("Field %s not found in frontmatter", fieldName) return emptyValue, nil @@ -152,79 +94,75 @@ func extractFrontmatterField(content, fieldName, emptyValue string) (string, err return strings.TrimSpace(string(fieldJSON)), nil } -// extractOnSectionField extracts a specific field from the on: section in frontmatter as JSON string -func extractOnSectionField(content, fieldName string) (string, error) { - contentExtractorLog.Printf("Extracting on: section field: %s", fieldName) - result, err := ExtractFrontmatterFromContent(content) +// extractYAMLFieldFromMap extracts a specific field from an already-parsed frontmatter map as a YAML string. +// This avoids re-parsing YAML when the frontmatter has already been parsed. +func extractYAMLFieldFromMap(frontmatter map[string]any, fieldName string) (string, error) { + contentExtractorLog.Printf("Extracting YAML field from map: %s", fieldName) + + fieldValue, exists := frontmatter[fieldName] + if !exists { + return "", nil + } + + fieldYAML, err := yaml.Marshal(fieldValue) if err != nil { - contentExtractorLog.Printf("Failed to extract frontmatter for field %s: %v", fieldName, err) - return "[]", nil // Return empty array on error + return "", nil } - // Extract the "on" section - onValue, exists := result.Frontmatter["on"] + return strings.TrimSpace(string(fieldYAML)), nil +} + +// extractOnSectionFieldFromMap extracts a specific field from the on: section in an already-parsed +// frontmatter map as a JSON string. This avoids re-parsing YAML when the frontmatter has already been parsed. +func extractOnSectionFieldFromMap(frontmatter map[string]any, fieldName string) (string, error) { + contentExtractorLog.Printf("Extracting on: section field from map: %s", fieldName) + + onValue, exists := frontmatter["on"] if !exists { - contentExtractorLog.Printf("Field 'on' not found in frontmatter") return "[]", nil } - // The on: section should be a map onMap, ok := onValue.(map[string]any) if !ok { - contentExtractorLog.Printf("Field 'on' is not a map: %T", onValue) return "[]", nil } - // Extract the requested field from the on: section fieldValue, exists := onMap[fieldName] if !exists { - contentExtractorLog.Printf("Field %s not found in 'on' section", fieldName) return "[]", nil } - // Normalize field value to an array var normalizedValue []any switch v := fieldValue.(type) { case string: - // Single string value if v != "" { normalizedValue = []any{v} } case []any: - // Already an array normalizedValue = v case []string: - // String array - convert to []any for _, s := range v { normalizedValue = append(normalizedValue, s) } default: - contentExtractorLog.Printf("Unexpected type for field %s: %T", fieldName, fieldValue) return "[]", nil } - // Return JSON string jsonData, err := json.Marshal(normalizedValue) if err != nil { - contentExtractorLog.Printf("Failed to marshal field %s to JSON: %v", fieldName, err) return "[]", nil } - contentExtractorLog.Printf("Successfully extracted field %s from on: section: %d bytes", fieldName, len(jsonData)) return string(jsonData), nil } -// extractOnSectionAnyField extracts a specific field from the on: section in frontmatter as -// a JSON string, handling any value type (string, object, array, etc.). -// Returns "" when the field is absent or an error occurs. -func extractOnSectionAnyField(content, fieldName string) (string, error) { - contentExtractorLog.Printf("Extracting on: section field (any): %s", fieldName) - result, err := ExtractFrontmatterFromContent(content) - if err != nil { - return "", nil - } +// extractOnSectionAnyFieldFromMap extracts a specific field from the on: section in an already-parsed +// frontmatter map as a JSON string, handling any value type. +// This avoids re-parsing YAML when the frontmatter has already been parsed. +func extractOnSectionAnyFieldFromMap(frontmatter map[string]any, fieldName string) (string, error) { + contentExtractorLog.Printf("Extracting on: section field (any) from map: %s", fieldName) - onValue, exists := result.Frontmatter["on"] + onValue, exists := frontmatter["on"] if !exists { return "", nil } @@ -244,6 +182,5 @@ func extractOnSectionAnyField(content, fieldName string) (string, error) { return "", nil } - contentExtractorLog.Printf("Successfully extracted on.%s: %d bytes", fieldName, len(jsonData)) return string(jsonData), nil } diff --git a/pkg/parser/import_field_extractor.go b/pkg/parser/import_field_extractor.go index 7ee988691c7..a253aaad505 100644 --- a/pkg/parser/import_field_extractor.go +++ b/pkg/parser/import_field_extractor.go @@ -108,69 +108,79 @@ func (acc *importAccumulator) extractAllImportFields(content []byte, item import } } + // Parse frontmatter once to avoid redundant YAML parsing for each field extraction. + // All subsequent field extractions use the pre-parsed result. + parsed, err := ExtractFrontmatterFromContent(string(content)) + var fm map[string]any + if err == nil { + fm = parsed.Frontmatter + } else { + fm = make(map[string]any) + } + // Extract engines from imported file - engineContent, err := extractFrontmatterField(string(content), "engine", "") + engineContent, err := extractFieldJSONFromMap(fm, "engine", "") if err == nil && engineContent != "" { log.Printf("Found engine config in import: %s", item.fullPath) acc.engines = append(acc.engines, engineContent) } // Extract mcp-servers from imported file - mcpServersContent, err := extractFrontmatterField(string(content), "mcp-servers", "{}") + mcpServersContent, err := extractFieldJSONFromMap(fm, "mcp-servers", "{}") if err == nil && mcpServersContent != "" && mcpServersContent != "{}" { acc.mcpServersBuilder.WriteString(mcpServersContent + "\n") } // Extract safe-outputs from imported file - safeOutputsContent, err := extractFrontmatterField(string(content), "safe-outputs", "{}") + safeOutputsContent, err := extractFieldJSONFromMap(fm, "safe-outputs", "{}") if err == nil && safeOutputsContent != "" && safeOutputsContent != "{}" { acc.safeOutputs = append(acc.safeOutputs, safeOutputsContent) } // Extract mcp-scripts from imported file - mcpScriptsContent, err := extractFrontmatterField(string(content), "mcp-scripts", "{}") + mcpScriptsContent, err := extractFieldJSONFromMap(fm, "mcp-scripts", "{}") if err == nil && mcpScriptsContent != "" && mcpScriptsContent != "{}" { acc.mcpScripts = append(acc.mcpScripts, mcpScriptsContent) } // Extract steps from imported file - stepsContent, err := extractStepsFromContent(string(content)) + stepsContent, err := extractYAMLFieldFromMap(fm, "steps") if err == nil && stepsContent != "" { acc.stepsBuilder.WriteString(stepsContent + "\n") } // Extract runtimes from imported file - runtimesContent, err := extractFrontmatterField(string(content), "runtimes", "{}") + runtimesContent, err := extractFieldJSONFromMap(fm, "runtimes", "{}") if err == nil && runtimesContent != "" && runtimesContent != "{}" { acc.runtimesBuilder.WriteString(runtimesContent + "\n") } // Extract services from imported file - servicesContent, err := extractServicesFromContent(string(content)) + servicesContent, err := extractYAMLFieldFromMap(fm, "services") if err == nil && servicesContent != "" { acc.servicesBuilder.WriteString(servicesContent + "\n") } // Extract network from imported file - networkContent, err := extractFrontmatterField(string(content), "network", "{}") + networkContent, err := extractFieldJSONFromMap(fm, "network", "{}") if err == nil && networkContent != "" && networkContent != "{}" { acc.networkBuilder.WriteString(networkContent + "\n") } // Extract permissions from imported file - permissionsContent, err := ExtractPermissionsFromContent(string(content)) + permissionsContent, err := extractFieldJSONFromMap(fm, "permissions", "{}") if err == nil && permissionsContent != "" && permissionsContent != "{}" { acc.permissionsBuilder.WriteString(permissionsContent + "\n") } // Extract secret-masking from imported file - secretMaskingContent, err := extractFrontmatterField(string(content), "secret-masking", "{}") + secretMaskingContent, err := extractFieldJSONFromMap(fm, "secret-masking", "{}") if err == nil && secretMaskingContent != "" && secretMaskingContent != "{}" { acc.secretMaskingBuilder.WriteString(secretMaskingContent + "\n") } // Extract and merge bots from imported file (merge into set to avoid duplicates) - botsContent, err := extractFrontmatterField(string(content), "bots", "[]") + botsContent, err := extractFieldJSONFromMap(fm, "bots", "[]") if err == nil && botsContent != "" && botsContent != "[]" { var importedBots []string if jsonErr := json.Unmarshal([]byte(botsContent), &importedBots); jsonErr == nil { @@ -184,7 +194,7 @@ func (acc *importAccumulator) extractAllImportFields(content []byte, item import } // Extract and merge skip-roles from imported file (merge into set to avoid duplicates) - skipRolesContent, err := extractOnSectionField(string(content), "skip-roles") + skipRolesContent, err := extractOnSectionFieldFromMap(fm, "skip-roles") if err == nil && skipRolesContent != "" && skipRolesContent != "[]" { var importedSkipRoles []string if jsonErr := json.Unmarshal([]byte(skipRolesContent), &importedSkipRoles); jsonErr == nil { @@ -198,7 +208,7 @@ func (acc *importAccumulator) extractAllImportFields(content []byte, item import } // Extract and merge skip-bots from imported file (merge into set to avoid duplicates) - skipBotsContent, err := extractOnSectionField(string(content), "skip-bots") + skipBotsContent, err := extractOnSectionFieldFromMap(fm, "skip-bots") if err == nil && skipBotsContent != "" && skipBotsContent != "[]" { var importedSkipBots []string if jsonErr := json.Unmarshal([]byte(skipBotsContent), &importedSkipBots); jsonErr == nil { @@ -213,36 +223,43 @@ func (acc *importAccumulator) extractAllImportFields(content []byte, item import // Extract on.github-token from imported file (first-wins: only set if not yet populated) if acc.activationGitHubToken == "" { - if token := extractOnGitHubToken(string(content)); token != "" { - acc.activationGitHubToken = token - log.Printf("Extracted on.github-token from import: %s", item.fullPath) + if tokenJSON, tokenErr := extractOnSectionAnyFieldFromMap(fm, "github-token"); tokenErr == nil && tokenJSON != "" && tokenJSON != "null" { + var token string + if jsonErr := json.Unmarshal([]byte(tokenJSON), &token); jsonErr == nil && token != "" { + acc.activationGitHubToken = token + log.Printf("Extracted on.github-token from import: %s", item.fullPath) + } } } // Extract on.github-app from imported file (first-wins: only set if not yet populated) if acc.activationGitHubApp == "" { - if appJSON := extractOnGitHubApp(string(content)); appJSON != "" { - acc.activationGitHubApp = appJSON - log.Printf("Extracted on.github-app from import: %s", item.fullPath) + if appJSON, appErr := extractOnSectionAnyFieldFromMap(fm, "github-app"); appErr == nil { + if validated := validateGitHubAppJSON(appJSON); validated != "" { + acc.activationGitHubApp = validated + log.Printf("Extracted on.github-app from import: %s", item.fullPath) + } } } // Extract top-level github-app from imported file (first-wins: only set if not yet populated) if acc.topLevelGitHubApp == "" { - if appJSON := extractTopLevelGitHubApp(string(content)); appJSON != "" { - acc.topLevelGitHubApp = appJSON - log.Printf("Extracted top-level github-app from import: %s", item.fullPath) + if appJSON, appErr := extractFieldJSONFromMap(fm, "github-app", ""); appErr == nil { + if validated := validateGitHubAppJSON(appJSON); validated != "" { + acc.topLevelGitHubApp = validated + log.Printf("Extracted top-level github-app from import: %s", item.fullPath) + } } } // Extract post-steps from imported file (append in order) - postStepsContent, err := extractPostStepsFromContent(string(content)) + postStepsContent, err := extractYAMLFieldFromMap(fm, "post-steps") if err == nil && postStepsContent != "" { acc.postStepsBuilder.WriteString(postStepsContent + "\n") } // Extract labels from imported file (merge into set to avoid duplicates) - labelsContent, err := extractFrontmatterField(string(content), "labels", "[]") + labelsContent, err := extractFieldJSONFromMap(fm, "labels", "[]") if err == nil && labelsContent != "" && labelsContent != "[]" { var importedLabels []string if jsonErr := json.Unmarshal([]byte(labelsContent), &importedLabels); jsonErr == nil { @@ -256,13 +273,13 @@ func (acc *importAccumulator) extractAllImportFields(content []byte, item import } // Extract cache from imported file (append to list of caches) - cacheContent, err := extractFrontmatterField(string(content), "cache", "{}") + cacheContent, err := extractFieldJSONFromMap(fm, "cache", "{}") if err == nil && cacheContent != "" && cacheContent != "{}" { acc.caches = append(acc.caches, cacheContent) } // Extract features from imported file (parse as map structure) - featuresContent, err := extractFrontmatterField(string(content), "features", "{}") + featuresContent, err := extractFieldJSONFromMap(fm, "features", "{}") if err == nil && featuresContent != "" && featuresContent != "{}" { var featuresMap map[string]any if jsonErr := json.Unmarshal([]byte(featuresContent), &featuresMap); jsonErr == nil { @@ -335,20 +352,6 @@ func computeImportRelPath(fullPath, importPath string) string { return importPath } -// extractOnGitHubToken returns the on.github-token string value from workflow content. -// Returns "" if the field is absent or not a non-empty string. -func extractOnGitHubToken(content string) string { - tokenJSON, err := extractOnSectionAnyField(content, "github-token") - if err != nil || tokenJSON == "" || tokenJSON == "null" { - return "" - } - var token string - if err := json.Unmarshal([]byte(tokenJSON), &token); err != nil { - return "" - } - return token -} - // validateGitHubAppJSON validates that a JSON-encoded GitHub App configuration has the required // fields (app-id and private-key). Returns the input JSON if valid, or "" otherwise. func validateGitHubAppJSON(appJSON string) string { @@ -367,23 +370,3 @@ func validateGitHubAppJSON(appJSON string) string { } return appJSON } - -// extractOnGitHubApp returns the JSON-encoded on.github-app object from workflow content. -// Returns "" if the field is absent, not a valid object, or missing required fields. -func extractOnGitHubApp(content string) string { - appJSON, err := extractOnSectionAnyField(content, "github-app") - if err != nil { - return "" - } - return validateGitHubAppJSON(appJSON) -} - -// extractTopLevelGitHubApp returns the JSON-encoded top-level github-app object from workflow content. -// Returns "" if the field is absent, not a valid object, or missing required fields. -func extractTopLevelGitHubApp(content string) string { - appJSON, err := extractFrontmatterField(content, "github-app", "") - if err != nil { - return "" - } - return validateGitHubAppJSON(appJSON) -}