diff --git a/pkg/workflow/expression_safety_validation.go b/pkg/workflow/expression_safety_validation.go new file mode 100644 index 00000000000..777d5754edd --- /dev/null +++ b/pkg/workflow/expression_safety_validation.go @@ -0,0 +1,294 @@ +// This file provides GitHub Actions expression security validation. +// It enforces an allowlist of approved expressions to prevent injection attacks. +// For syntax helpers, see expression_syntax_validation.go. +// For runtime-import validation, see runtime_import_validation.go. + +package workflow + +import ( + "fmt" + "regexp" + "slices" + "strings" + + "github.com/github/gh-aw/pkg/constants" + "github.com/github/gh-aw/pkg/parser" +) + +var expressionValidationLog = newValidationLogger("expression") + +// maxFuzzyMatchSuggestions is the maximum number of similar expressions to suggest +// when an unauthorized expression is found +const maxFuzzyMatchSuggestions = 7 + +// Pre-compiled regexes for expression safety validation (performance optimization) +var ( + expressionRegex = regexp.MustCompile(`(?s)\$\{\{(.*?)\}\}`) + needsStepsRegex = regexp.MustCompile(`^(needs|steps)\.[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)*$`) + inputsRegex = regexp.MustCompile(`^github\.event\.inputs\.[a-zA-Z0-9_-]+$`) + workflowCallInputsRegex = regexp.MustCompile(`^inputs\.[a-zA-Z0-9_-]+$`) + awInputsRegex = regexp.MustCompile(`^github\.aw\.inputs\.[a-zA-Z0-9_-]+$`) + envRegex = regexp.MustCompile(`^env\.[a-zA-Z0-9_-]+$`) + // comparisonExtractionRegex extracts property accesses from comparison expressions + // Matches patterns like "github.workflow == 'value'" and extracts "github.workflow" + comparisonExtractionRegex = regexp.MustCompile(`([a-zA-Z_][a-zA-Z0-9_.]*)\s*(?:==|!=|<|>|<=|>=)\s*`) + // orExpressionPattern matches "left || right" for fallback literal/expression checking + orExpressionPattern = regexp.MustCompile(`^(.+?)\s*\|\|\s*(.+)$`) +) + +// validateExpressionSafety checks that all GitHub Actions expressions in the markdown content +// are in the allowed list and returns an error if any unauthorized expressions are found +func validateExpressionSafety(markdownContent string) error { + expressionValidationLog.Print("Validating expression safety in markdown content") + + matches := expressionRegex.FindAllStringSubmatch(markdownContent, -1) + expressionValidationLog.Printf("Found %d expressions to validate", len(matches)) + + var unauthorizedExpressions []string + + for _, match := range matches { + if len(match) < 2 { + continue + } + + // Extract the expression content (everything between ${{ and }}) + expression := strings.TrimSpace(match[1]) + + // Reject expressions that span multiple lines (contain newlines) + if strings.Contains(match[1], "\n") { + unauthorizedExpressions = append(unauthorizedExpressions, expression) + continue + } + + // Try to parse the expression using the parser + parsed, parseErr := ParseExpression(expression) + if parseErr == nil { + // If we can parse it, validate each literal expression in the tree + validationErr := VisitExpressionTree(parsed, func(expr *ExpressionNode) error { + return validateSingleExpression(expr.Expression, ExpressionValidationOptions{ + NeedsStepsRe: needsStepsRegex, + InputsRe: inputsRegex, + WorkflowCallInputsRe: workflowCallInputsRegex, + AwInputsRe: awInputsRegex, + EnvRe: envRegex, + UnauthorizedExpressions: &unauthorizedExpressions, + }) + }) + if validationErr != nil { + return validationErr + } + } else { + // If parsing fails, fall back to validating the whole expression as a literal + err := validateSingleExpression(expression, ExpressionValidationOptions{ + NeedsStepsRe: needsStepsRegex, + InputsRe: inputsRegex, + WorkflowCallInputsRe: workflowCallInputsRegex, + AwInputsRe: awInputsRegex, + EnvRe: envRegex, + UnauthorizedExpressions: &unauthorizedExpressions, + }) + if err != nil { + return err + } + } + } + + if len(unauthorizedExpressions) > 0 { + expressionValidationLog.Printf("Expression safety validation failed: %d unauthorized expressions found", len(unauthorizedExpressions)) + var unauthorizedList strings.Builder + unauthorizedList.WriteString("\n") + for _, expr := range unauthorizedExpressions { + unauthorizedList.WriteString(" - ") + unauthorizedList.WriteString(expr) + + // Find closest matches using fuzzy string matching + closestMatches := parser.FindClosestMatches(expr, constants.AllowedExpressions, maxFuzzyMatchSuggestions) + if len(closestMatches) > 0 { + unauthorizedList.WriteString(" (did you mean: ") + unauthorizedList.WriteString(strings.Join(closestMatches, ", ")) + unauthorizedList.WriteString("?)") + } + + unauthorizedList.WriteString("\n") + } + + var allowedList strings.Builder + allowedList.WriteString("\n") + for _, expr := range constants.AllowedExpressions { + allowedList.WriteString(" - ") + allowedList.WriteString(expr) + allowedList.WriteString("\n") + } + allowedList.WriteString(" - needs.*\n") + allowedList.WriteString(" - steps.*\n") + allowedList.WriteString(" - github.event.inputs.*\n") + allowedList.WriteString(" - github.aw.inputs.* (shared workflow inputs)\n") + allowedList.WriteString(" - inputs.* (workflow_call)\n") + allowedList.WriteString(" - env.*\n") + + return NewValidationError( + "expressions", + fmt.Sprintf("%d unauthorized expressions found", len(unauthorizedExpressions)), + "expressions are not in the allowed list:"+unauthorizedList.String(), + fmt.Sprintf("Use only allowed expressions:%s\nFor more details, see the expression security documentation.", allowedList.String()), + ) + } + + expressionValidationLog.Print("Expression safety validation passed") + return nil +} + +// ExpressionValidationOptions contains the options for validating a single expression +type ExpressionValidationOptions struct { + NeedsStepsRe *regexp.Regexp + InputsRe *regexp.Regexp + WorkflowCallInputsRe *regexp.Regexp + AwInputsRe *regexp.Regexp + EnvRe *regexp.Regexp + UnauthorizedExpressions *[]string +} + +// validateExpressionForDangerousProps checks if an expression contains dangerous JavaScript +// property names that could be used for prototype pollution or traversal attacks. +// This matches the JavaScript runtime validation in actions/setup/js/runtime_import.cjs +// Returns an error if dangerous properties are found. +func validateExpressionForDangerousProps(expression string) error { + trimmed := strings.TrimSpace(expression) + + // Split expression into parts using both dot and bracket notation; + // filter out numeric indices (e.g., "0" in "assets[0]") + parts := exprPartSplitRe.Split(trimmed, -1) + + for _, part := range parts { + if part == "" || exprNumericPartRe.MatchString(part) { + continue + } + + for _, dangerousProp := range constants.DangerousPropertyNames { + if part == dangerousProp { + return NewValidationError( + "expressions", + fmt.Sprintf("dangerous property name '%s' found in expression", dangerousProp), + fmt.Sprintf("expression '%s' contains the dangerous property name '%s'", expression, dangerousProp), + fmt.Sprintf("Remove the dangerous property '%s' from the expression. Property names like constructor, __proto__, prototype, and similar JavaScript built-ins are blocked to prevent prototype pollution attacks. See PR #14826 for more details.", dangerousProp), + ) + } + } + } + + return nil +} + +// validateSingleExpression validates a single literal expression +func validateSingleExpression(expression string, opts ExpressionValidationOptions) error { + expression = strings.TrimSpace(expression) + + // Allow literal values (string, number, boolean) — safe leaf nodes in compound expressions. + if stringLiteralRegex.MatchString(expression) || + numberLiteralRegex.MatchString(expression) || + expression == "true" || expression == "false" { + return nil + } + + // Check for dangerous JavaScript property names (prototype pollution, PR #14826) + if err := validateExpressionForDangerousProps(expression); err != nil { + return err + } + + // Check if this expression is in the allowed list + allowed := false + + if opts.NeedsStepsRe.MatchString(expression) { + allowed = true + } else if opts.InputsRe.MatchString(expression) { + allowed = true + } else if opts.WorkflowCallInputsRe.MatchString(expression) { + allowed = true + } else if opts.AwInputsRe.MatchString(expression) { + allowed = true + } else if opts.EnvRe.MatchString(expression) { + allowed = true + } else if slices.Contains(constants.AllowedExpressions, expression) { + allowed = true + } + + // Check for OR expressions with literals (e.g., "inputs.repository || 'default'") + if !allowed { + orMatch := orExpressionPattern.FindStringSubmatch(expression) + if len(orMatch) > 2 { + leftExpr := strings.TrimSpace(orMatch[1]) + rightExpr := strings.TrimSpace(orMatch[2]) + + leftErr := validateSingleExpression(leftExpr, opts) + leftIsSafe := leftErr == nil && !containsExpression(opts.UnauthorizedExpressions, leftExpr) + + if leftIsSafe { + // Check if right side is a literal string (single, double, or backtick quotes) + // Note: Using (?:) for non-capturing group and checking each quote type separately + isStringLiteral := stringLiteralRegex.MatchString(rightExpr) + // Check if right side is a number literal + isNumberLiteral := numberLiteralRegex.MatchString(rightExpr) + // Check if right side is a boolean literal + isBooleanLiteral := rightExpr == "true" || rightExpr == "false" + + if isStringLiteral || isNumberLiteral || isBooleanLiteral { + allowed = true + } else { + // If right side is also a safe expression, recursively check it + rightErr := validateSingleExpression(rightExpr, opts) + if rightErr == nil && !containsExpression(opts.UnauthorizedExpressions, rightExpr) { + allowed = true + } + } + } + } + } + + // Try to extract and validate property accesses from comparison expressions + if !allowed { + matches := comparisonExtractionRegex.FindAllStringSubmatch(expression, -1) + if len(matches) > 0 { + allPropertiesAllowed := true + for _, match := range matches { + if len(match) > 1 { + property := strings.TrimSpace(match[1]) + propertyAllowed := false + + if opts.NeedsStepsRe.MatchString(property) { + propertyAllowed = true + } else if opts.InputsRe.MatchString(property) { + propertyAllowed = true + } else if opts.WorkflowCallInputsRe.MatchString(property) { + propertyAllowed = true + } else if opts.AwInputsRe.MatchString(property) { + propertyAllowed = true + } else if opts.EnvRe.MatchString(property) { + propertyAllowed = true + } else if slices.Contains(constants.AllowedExpressions, property) { + propertyAllowed = true + } + + if !propertyAllowed { + allPropertiesAllowed = false + break + } + } + } + + if allPropertiesAllowed && len(matches) > 0 { + allowed = true + } + } + } + + if !allowed { + *opts.UnauthorizedExpressions = append(*opts.UnauthorizedExpressions, expression) + } + + return nil +} + +// containsExpression checks if an expression is in the list +func containsExpression(list *[]string, expr string) bool { + return slices.Contains(*list, expr) +} diff --git a/pkg/workflow/expression_syntax_validation.go b/pkg/workflow/expression_syntax_validation.go new file mode 100644 index 00000000000..2a6d1c67261 --- /dev/null +++ b/pkg/workflow/expression_syntax_validation.go @@ -0,0 +1,236 @@ +// This file provides syntax-level validation for GitHub Actions expressions. +// It validates structural correctness: balanced braces, balanced quotes, +// empty expressions, and parenthesis pairing. +// For security/allowlist validation, see expression_safety_validation.go. +// For runtime-import file validation, see runtime_import_validation.go. + +package workflow + +import ( + "fmt" + "regexp" + "strings" +) + +// expressionBracesPattern matches GitHub Actions ${{ }} expressions for syntax validation. +// Uses [^}]* to match non-closing-brace characters within the expression. +var expressionBracesPattern = regexp.MustCompile(`\$\{\{([^}]*)\}\}`) + +// Pre-compiled regexes shared between syntax and safety validation +var ( + // stringLiteralRegex matches single-quoted, double-quoted, or backtick-quoted string literals. + // Note: escape sequences inside strings are not handled; GitHub Actions uses '' for literal quotes. + stringLiteralRegex = regexp.MustCompile(`^'[^']*'$|^"[^"]*"$|^` + "`[^`]*`$") + // numberLiteralRegex matches integer and decimal number literals (with optional leading minus) + numberLiteralRegex = regexp.MustCompile(`^-?\d+(\.\d+)?$`) + // exprPartSplitRe splits expression strings on dot and bracket characters + exprPartSplitRe = regexp.MustCompile(`[.\[\]]+`) + // exprNumericPartRe matches purely numeric expression parts (array indices) + exprNumericPartRe = regexp.MustCompile(`^\d+$`) +) + +// validateBalancedBraces checks that all ${{ }} braces are balanced and properly closed +func validateBalancedBraces(group string) error { + expressionValidationLog.Print("Checking balanced braces in expression") + openCount := 0 + i := 0 + positions := []int{} // Track positions of opening braces for error reporting + + for i < len(group) { + // Check for opening ${{ + if i+2 < len(group) && group[i:i+3] == "${{" { + openCount++ + positions = append(positions, i) + i += 3 + continue + } + + // Check for closing }} + if i+1 < len(group) && group[i:i+2] == "}}" { + if openCount == 0 { + return NewValidationError( + "expression", + "unbalanced closing braces", + fmt.Sprintf("found '}}' at position %d without matching opening '${{' in expression: %s", i, group), + "Ensure all '}}' have a corresponding opening '${{'. Check for typos or missing opening braces.", + ) + } + openCount-- + if len(positions) > 0 { + positions = positions[:len(positions)-1] + } + i += 2 + continue + } + + i++ + } + + if openCount > 0 { + // Find the position of the first unclosed opening brace + pos := positions[0] + expressionValidationLog.Printf("Found %d unclosed brace(s) starting at position %d", openCount, pos) + return NewValidationError( + "expression", + "unclosed expression braces", + fmt.Sprintf("found opening '${{' at position %d without matching closing '}}' in expression: %s", pos, group), + "Ensure all '${{' have a corresponding closing '}}'. Add the missing closing braces.", + ) + } + + expressionValidationLog.Print("Brace balance check passed") + return nil +} + +// validateExpressionSyntax validates the syntax of expressions within ${{ }} +func validateExpressionSyntax(group string) error { + matches := expressionBracesPattern.FindAllStringSubmatch(group, -1) + + expressionValidationLog.Printf("Found %d expression(s) to validate", len(matches)) + + for _, match := range matches { + if len(match) < 2 { + continue + } + + exprContent := strings.TrimSpace(match[1]) + if exprContent == "" { + return NewValidationError( + "expression", + "empty expression content", + "found empty expression '${{ }}' in: "+group, + "Provide a valid GitHub Actions expression inside '${{ }}'. Example: '${{ github.ref }}'", + ) + } + + // Check for common syntax errors + if err := validateExpressionContent(exprContent, group); err != nil { + return err + } + } + + return nil +} + +// validateExpressionContent validates the content inside ${{ }} +func validateExpressionContent(expr string, fullGroup string) error { + // Check for unbalanced parentheses + parenCount := 0 + for i, ch := range expr { + switch ch { + case '(': + parenCount++ + case ')': + parenCount-- + if parenCount < 0 { + return NewValidationError( + "expression", + "unbalanced parentheses in expression", + fmt.Sprintf("found closing ')' without matching opening '(' at position %d in expression: %s", i, expr), + "Ensure all parentheses are properly balanced in your expression.", + ) + } + } + } + + if parenCount > 0 { + return NewValidationError( + "expression", + "unclosed parentheses in expression", + fmt.Sprintf("found %d unclosed opening '(' in expression: %s", parenCount, expr), + "Add the missing closing ')' to balance parentheses in your expression.", + ) + } + + // Check for unbalanced quotes (single, double, backtick) + if err := validateBalancedQuotes(expr); err != nil { + return err + } + + // Try to parse complex expressions with logical operators + if containsLogicalOperators(expr) { + expressionValidationLog.Print("Expression contains logical operators, performing deep validation") + if _, err := ParseExpression(expr); err != nil { + expressionValidationLog.Printf("Expression parsing failed: %v", err) + return NewValidationError( + "expression", + "invalid expression syntax", + "failed to parse expression: "+err.Error(), + "Fix the syntax error in your expression. Full expression: "+fullGroup, + ) + } + } + + return nil +} + +// validateBalancedQuotes checks for balanced quotes in an expression +func validateBalancedQuotes(expr string) error { + inSingleQuote := false + inDoubleQuote := false + inBacktick := false + escaped := false + + for i, ch := range expr { + if escaped { + escaped = false + continue + } + + if ch == '\\' { + escaped = true + continue + } + + switch ch { + case '\'': + if !inDoubleQuote && !inBacktick { + inSingleQuote = !inSingleQuote + } + case '"': + if !inSingleQuote && !inBacktick { + inDoubleQuote = !inDoubleQuote + } + case '`': + if !inSingleQuote && !inDoubleQuote { + inBacktick = !inBacktick + } + } + + // Check if we reached end of string with unclosed quote + if i == len(expr)-1 { + if inSingleQuote { + return NewValidationError( + "expression", + "unclosed single quote", + "found unclosed single quote in expression: "+expr, + "Add the missing closing single quote (') to your expression.", + ) + } + if inDoubleQuote { + return NewValidationError( + "expression", + "unclosed double quote", + "found unclosed double quote in expression: "+expr, + "Add the missing closing double quote (\") to your expression.", + ) + } + if inBacktick { + return NewValidationError( + "expression", + "unclosed backtick", + "found unclosed backtick in expression: "+expr, + "Add the missing closing backtick (`) to your expression.", + ) + } + } + } + + return nil +} + +// containsLogicalOperators checks if an expression contains logical operators (&&, ||, !). +// Note: '!=' also matches '!' — this is acceptable since the expression parser handles it. +func containsLogicalOperators(expr string) bool { + return strings.Contains(expr, "&&") || strings.Contains(expr, "||") || strings.Contains(expr, "!") +} diff --git a/pkg/workflow/expression_validation.go b/pkg/workflow/expression_validation.go deleted file mode 100644 index 9e1f586f9d5..00000000000 --- a/pkg/workflow/expression_validation.go +++ /dev/null @@ -1,715 +0,0 @@ -// This file provides GitHub Actions expression security validation. -// -// # Expression Safety Validation -// -// This file validates that GitHub Actions expressions used in workflow markdown -// are safe and authorized. It prevents injection attacks and ensures workflows -// only use approved expression patterns. -// -// # Validation Functions -// -// - validateExpressionSafety() - Validates all expressions in markdown content -// - validateSingleExpression() - Validates individual expression syntax -// -// # Validation Pattern: Allowlist Security -// -// Expression validation uses a strict allowlist approach: -// - Only pre-approved GitHub context expressions are allowed -// - Unauthorized expressions cause compilation to fail -// - Prevents injection of secrets or environment variables -// - Uses regex patterns to match allowed expression formats -// -// # Allowed Expression Patterns -// -// Expressions must match one of these patterns: -// - github.event.* (event context properties) -// - github.actor, github.repository, etc. (core GitHub context) -// - needs.*.outputs.* (job dependencies) -// - steps.*.outputs.* (step outputs) -// - github.event.inputs.* (workflow_dispatch inputs) -// -// See pkg/constants for the complete list of allowed expressions. -// -// # When to Add Validation Here -// -// Add validation to this file when: -// - It validates GitHub Actions expression parsing -// - It enforces expression security policies -// - It prevents expression injection attacks -// - It validates expression syntax and structure -// -// For general validation, see validation.go. -// For detailed documentation, see scratchpad/validation-architecture.md - -package workflow - -import ( - "fmt" - "os" - "path/filepath" - "regexp" - "slices" - "strings" - - "github.com/github/gh-aw/pkg/constants" - "github.com/github/gh-aw/pkg/parser" -) - -var expressionValidationLog = newValidationLogger("expression") - -// maxFuzzyMatchSuggestions is the maximum number of similar expressions to suggest -// when an unauthorized expression is found -const maxFuzzyMatchSuggestions = 7 - -// Pre-compiled regexes for expression validation (performance optimization) -var ( - expressionRegex = regexp.MustCompile(`(?s)\$\{\{(.*?)\}\}`) - needsStepsRegex = regexp.MustCompile(`^(needs|steps)\.[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)*$`) - inputsRegex = regexp.MustCompile(`^github\.event\.inputs\.[a-zA-Z0-9_-]+$`) - workflowCallInputsRegex = regexp.MustCompile(`^inputs\.[a-zA-Z0-9_-]+$`) - awInputsRegex = regexp.MustCompile(`^github\.aw\.inputs\.[a-zA-Z0-9_-]+$`) - envRegex = regexp.MustCompile(`^env\.[a-zA-Z0-9_-]+$`) - // comparisonExtractionRegex extracts property accesses from comparison expressions - // Matches patterns like "github.workflow == 'value'" and extracts "github.workflow" - comparisonExtractionRegex = regexp.MustCompile(`([a-zA-Z_][a-zA-Z0-9_.]*)\s*(?:==|!=|<|>|<=|>=)\s*`) - // stringLiteralRegex matches single-quoted, double-quoted, or backtick-quoted string literals. - // Note: escape sequences inside strings are not handled; GitHub Actions uses '' for literal quotes. - stringLiteralRegex = regexp.MustCompile(`^'[^']*'$|^"[^"]*"$|^` + "`[^`]*`$") - // numberLiteralRegex matches integer and decimal number literals (with optional leading minus) - numberLiteralRegex = regexp.MustCompile(`^-?\d+(\.\d+)?$`) - // exprPartSplitRe splits expression strings on dot and bracket characters - exprPartSplitRe = regexp.MustCompile(`[.\[\]]+`) - // exprNumericPartRe matches purely numeric expression parts (array indices) - exprNumericPartRe = regexp.MustCompile(`^\d+$`) -) - -// validateExpressionSafety checks that all GitHub Actions expressions in the markdown content -// are in the allowed list and returns an error if any unauthorized expressions are found -func validateExpressionSafety(markdownContent string) error { - expressionValidationLog.Print("Validating expression safety in markdown content") - - // Regular expression to match GitHub Actions expressions: ${{ ... }} - // Use (?s) flag to enable dotall mode so . matches newlines to capture multiline expressions - // Use non-greedy matching with .*? to handle nested braces properly - - // Find all expressions in the markdown content - matches := expressionRegex.FindAllStringSubmatch(markdownContent, -1) - expressionValidationLog.Printf("Found %d expressions to validate", len(matches)) - - var unauthorizedExpressions []string - - for _, match := range matches { - if len(match) < 2 { - continue - } - - // Extract the expression content (everything between ${{ and }}) - expression := strings.TrimSpace(match[1]) - - // Reject expressions that span multiple lines (contain newlines) - if strings.Contains(match[1], "\n") { - unauthorizedExpressions = append(unauthorizedExpressions, expression) - continue - } - - // Try to parse the expression using the parser - parsed, parseErr := ParseExpression(expression) - if parseErr == nil { - // If we can parse it, validate each literal expression in the tree - validationErr := VisitExpressionTree(parsed, func(expr *ExpressionNode) error { - return validateSingleExpression(expr.Expression, ExpressionValidationOptions{ - NeedsStepsRe: needsStepsRegex, - InputsRe: inputsRegex, - WorkflowCallInputsRe: workflowCallInputsRegex, - AwInputsRe: awInputsRegex, - EnvRe: envRegex, - UnauthorizedExpressions: &unauthorizedExpressions, - }) - }) - if validationErr != nil { - return validationErr - } - } else { - // If parsing fails, fall back to validating the whole expression as a literal - err := validateSingleExpression(expression, ExpressionValidationOptions{ - NeedsStepsRe: needsStepsRegex, - InputsRe: inputsRegex, - WorkflowCallInputsRe: workflowCallInputsRegex, - AwInputsRe: awInputsRegex, - EnvRe: envRegex, - UnauthorizedExpressions: &unauthorizedExpressions, - }) - if err != nil { - return err - } - } - } - - // If we found unauthorized expressions, return an error - if len(unauthorizedExpressions) > 0 { - expressionValidationLog.Printf("Expression safety validation failed: %d unauthorized expressions found", len(unauthorizedExpressions)) - // Format unauthorized expressions list with fuzzy match suggestions - var unauthorizedList strings.Builder - unauthorizedList.WriteString("\n") - for _, expr := range unauthorizedExpressions { - unauthorizedList.WriteString(" - ") - unauthorizedList.WriteString(expr) - - // Find closest matches using fuzzy string matching - closestMatches := parser.FindClosestMatches(expr, constants.AllowedExpressions, maxFuzzyMatchSuggestions) - if len(closestMatches) > 0 { - unauthorizedList.WriteString(" (did you mean: ") - unauthorizedList.WriteString(strings.Join(closestMatches, ", ")) - unauthorizedList.WriteString("?)") - } - - unauthorizedList.WriteString("\n") - } - - // Format allowed expressions list - var allowedList strings.Builder - allowedList.WriteString("\n") - for _, expr := range constants.AllowedExpressions { - allowedList.WriteString(" - ") - allowedList.WriteString(expr) - allowedList.WriteString("\n") - } - allowedList.WriteString(" - needs.*\n") - allowedList.WriteString(" - steps.*\n") - allowedList.WriteString(" - github.event.inputs.*\n") - allowedList.WriteString(" - github.aw.inputs.* (shared workflow inputs)\n") - allowedList.WriteString(" - inputs.* (workflow_call)\n") - allowedList.WriteString(" - env.*\n") - - return NewValidationError( - "expressions", - fmt.Sprintf("%d unauthorized expressions found", len(unauthorizedExpressions)), - "expressions are not in the allowed list:"+unauthorizedList.String(), - fmt.Sprintf("Use only allowed expressions:%s\nFor more details, see the expression security documentation.", allowedList.String()), - ) - } - - expressionValidationLog.Print("Expression safety validation passed") - return nil -} - -// ExpressionValidationOptions contains the options for validating a single expression -type ExpressionValidationOptions struct { - NeedsStepsRe *regexp.Regexp - InputsRe *regexp.Regexp - WorkflowCallInputsRe *regexp.Regexp - AwInputsRe *regexp.Regexp - EnvRe *regexp.Regexp - UnauthorizedExpressions *[]string -} - -// validateExpressionForDangerousProps checks if an expression contains dangerous JavaScript -// property names that could be used for prototype pollution or traversal attacks. -// This matches the JavaScript runtime validation in actions/setup/js/runtime_import.cjs -// Returns an error if dangerous properties are found. -func validateExpressionForDangerousProps(expression string) error { - trimmed := strings.TrimSpace(expression) - - // Split expression into parts handling both dot notation (e.g., "github.event.issue") - // and bracket notation (e.g., "release.assets[0].id") - // Filter out numeric indices (e.g., "0" in "assets[0]") - parts := exprPartSplitRe.Split(trimmed, -1) - - for _, part := range parts { - // Skip empty parts and numeric indices - if part == "" || exprNumericPartRe.MatchString(part) { - continue - } - - // Check if this part is a dangerous property name - for _, dangerousProp := range constants.DangerousPropertyNames { - if part == dangerousProp { - return NewValidationError( - "expressions", - fmt.Sprintf("dangerous property name '%s' found in expression", dangerousProp), - fmt.Sprintf("expression '%s' contains the dangerous property name '%s'", expression, dangerousProp), - fmt.Sprintf("Remove the dangerous property '%s' from the expression. Property names like constructor, __proto__, prototype, and similar JavaScript built-ins are blocked to prevent prototype pollution attacks. See PR #14826 for more details.", dangerousProp), - ) - } - } - } - - return nil -} - -// validateSingleExpression validates a single literal expression -func validateSingleExpression(expression string, opts ExpressionValidationOptions) error { - expression = strings.TrimSpace(expression) - - // Allow literal values (string, number, boolean) without further checks. - // These appear as leaf nodes when the parser decomposes compound expressions - // such as "inputs.devices || 'mobile,tablet,desktop'" and are safe constants. - if stringLiteralRegex.MatchString(expression) || - numberLiteralRegex.MatchString(expression) || - expression == "true" || expression == "false" { - return nil - } - - // First, check for dangerous JavaScript property names that could be used for - // prototype pollution or traversal attacks (PR #14826) - if err := validateExpressionForDangerousProps(expression); err != nil { - return err - } - - // Check if this expression is in the allowed list - allowed := false - - // Check if this expression starts with "needs." or "steps." and is a simple property access - if opts.NeedsStepsRe.MatchString(expression) { - allowed = true - } else if opts.InputsRe.MatchString(expression) { - // Check if this expression matches github.event.inputs.* pattern - allowed = true - } else if opts.WorkflowCallInputsRe.MatchString(expression) { - // Check if this expression matches inputs.* pattern (workflow_call inputs) - allowed = true - } else if opts.AwInputsRe.MatchString(expression) { - // Check if this expression matches github.agentics.inputs.* pattern (shared workflow inputs) - allowed = true - } else if opts.EnvRe.MatchString(expression) { - // check if this expression matches env.* pattern - allowed = true - } else { - if slices.Contains(constants.AllowedExpressions, expression) { - allowed = true - } - } - - // Check for OR expressions with literals (e.g., "inputs.repository || 'default'") - // Pattern: safe_expression || 'literal' or safe_expression || "literal" or safe_expression || `literal` - // Also supports numbers and booleans as literals - if !allowed { - // Match pattern: something || something_else - orPattern := regexp.MustCompile(`^(.+?)\s*\|\|\s*(.+)$`) - orMatch := orPattern.FindStringSubmatch(expression) - if len(orMatch) > 2 { - leftExpr := strings.TrimSpace(orMatch[1]) - rightExpr := strings.TrimSpace(orMatch[2]) - - // Check if left side is safe (recursively validate) - leftErr := validateSingleExpression(leftExpr, opts) - leftIsSafe := leftErr == nil && !containsExpression(opts.UnauthorizedExpressions, leftExpr) - - if leftIsSafe { - // Check if right side is a literal string (single, double, or backtick quotes) - // Note: Using (?:) for non-capturing group and checking each quote type separately - isStringLiteral := stringLiteralRegex.MatchString(rightExpr) - // Check if right side is a number literal - isNumberLiteral := numberLiteralRegex.MatchString(rightExpr) - // Check if right side is a boolean literal - isBooleanLiteral := rightExpr == "true" || rightExpr == "false" - - if isStringLiteral || isNumberLiteral || isBooleanLiteral { - allowed = true - } else { - // If right side is also a safe expression, recursively check it - rightErr := validateSingleExpression(rightExpr, opts) - if rightErr == nil && !containsExpression(opts.UnauthorizedExpressions, rightExpr) { - allowed = true - } - } - } - } - } - - // If not allowed as a whole, try to extract and validate property accesses from comparisons - if !allowed { - // Extract property accesses from comparison expressions (e.g., "github.workflow == 'value'") - matches := comparisonExtractionRegex.FindAllStringSubmatch(expression, -1) - if len(matches) > 0 { - // Assume it's allowed if all extracted properties are allowed - allPropertiesAllowed := true - for _, match := range matches { - if len(match) > 1 { - property := strings.TrimSpace(match[1]) - propertyAllowed := false - - // Check if extracted property is allowed - if opts.NeedsStepsRe.MatchString(property) { - propertyAllowed = true - } else if opts.InputsRe.MatchString(property) { - propertyAllowed = true - } else if opts.WorkflowCallInputsRe.MatchString(property) { - propertyAllowed = true - } else if opts.AwInputsRe.MatchString(property) { - propertyAllowed = true - } else if opts.EnvRe.MatchString(property) { - propertyAllowed = true - } else { - if slices.Contains(constants.AllowedExpressions, property) { - propertyAllowed = true - } - } - - if !propertyAllowed { - allPropertiesAllowed = false - break - } - } - } - - if allPropertiesAllowed && len(matches) > 0 { - allowed = true - } - } - } - - if !allowed { - *opts.UnauthorizedExpressions = append(*opts.UnauthorizedExpressions, expression) - } - - return nil -} - -// containsExpression checks if an expression is in the list -func containsExpression(list *[]string, expr string) bool { - return slices.Contains(*list, expr) -} - -// extractRuntimeImportPaths extracts all runtime-import file paths from markdown content. -// Returns a list of file paths (not URLs) referenced in {{#runtime-import}} macros. -// URLs (http:// or https://) are excluded since they are validated separately. -func extractRuntimeImportPaths(markdownContent string) []string { - if markdownContent == "" { - return nil - } - - var paths []string - seen := make(map[string]bool) - - // Pattern to match {{#runtime-import filepath}} or {{#runtime-import? filepath}} - // Also handles line ranges like filepath:10-20 - macroPattern := `\{\{#runtime-import\??[ \t]+([^\}]+)\}\}` - macroRe := regexp.MustCompile(macroPattern) - matches := macroRe.FindAllStringSubmatch(markdownContent, -1) - - for _, match := range matches { - if len(match) > 1 { - pathWithRange := strings.TrimSpace(match[1]) - - // Remove line range if present (e.g., "file.md:10-20" -> "file.md") - filepath := pathWithRange - if colonIdx := strings.Index(pathWithRange, ":"); colonIdx > 0 { - // Check if what follows colon looks like a line range (digits-digits) - afterColon := pathWithRange[colonIdx+1:] - if regexp.MustCompile(`^\d+-\d+$`).MatchString(afterColon) { - filepath = pathWithRange[:colonIdx] - } - } - - // Skip URLs - they don't need file validation - if strings.HasPrefix(filepath, "http://") || strings.HasPrefix(filepath, "https://") { - continue - } - - // Add to list if not already seen - if !seen[filepath] { - paths = append(paths, filepath) - seen[filepath] = true - } - } - } - - return paths -} - -// validateRuntimeImportFiles validates expressions in all runtime-import files at compile time. -// This catches expression errors early, before the workflow runs. -// workspaceDir should be the root of the repository (containing .github folder). -func validateRuntimeImportFiles(markdownContent string, workspaceDir string) error { - expressionValidationLog.Print("Validating runtime-import files") - - // Extract all runtime-import file paths - paths := extractRuntimeImportPaths(markdownContent) - if len(paths) == 0 { - expressionValidationLog.Print("No runtime-import files to validate") - return nil - } - - expressionValidationLog.Printf("Found %d runtime-import file(s) to validate", len(paths)) - - var validationErrors []string - - for _, filePath := range paths { - // Normalize the path to be relative to .github folder - normalizedPath := filePath - if strings.HasPrefix(normalizedPath, ".github/") { - normalizedPath = normalizedPath[8:] // Remove ".github/" - } else if strings.HasPrefix(normalizedPath, ".github\\") { - normalizedPath = normalizedPath[8:] // Remove ".github\" (Windows) - } - if strings.HasPrefix(normalizedPath, "./") { - normalizedPath = normalizedPath[2:] // Remove "./" - } else if strings.HasPrefix(normalizedPath, ".\\") { - normalizedPath = normalizedPath[2:] // Remove ".\" (Windows) - } - - // Build absolute path to the file - githubFolder := filepath.Join(workspaceDir, ".github") - absolutePath := filepath.Join(githubFolder, normalizedPath) - - // Security check: ensure the resolved path is within the .github folder - // Use filepath.Rel to check if the path escapes the .github folder - normalizedGithubFolder := filepath.Clean(githubFolder) - normalizedAbsolutePath := filepath.Clean(absolutePath) - relativePath, err := filepath.Rel(normalizedGithubFolder, normalizedAbsolutePath) - if err != nil || relativePath == ".." || strings.HasPrefix(relativePath, ".."+string(filepath.Separator)) || filepath.IsAbs(relativePath) { - validationErrors = append(validationErrors, fmt.Sprintf("%s: Security: Path must be within .github folder (resolves to: %s)", filePath, relativePath)) - continue - } - - // Check if file exists - if _, err := os.Stat(absolutePath); os.IsNotExist(err) { - // Skip validation for optional imports ({{#runtime-import? ...}}) - // We can't determine if it's optional here, but missing files will be caught at runtime - expressionValidationLog.Printf("Skipping validation for non-existent file: %s", filePath) - continue - } - - // Read the file content - content, err := os.ReadFile(absolutePath) - if err != nil { - validationErrors = append(validationErrors, fmt.Sprintf("%s: failed to read file: %v", filePath, err)) - continue - } - - // Validate expressions in the imported file - if err := validateExpressionSafety(string(content)); err != nil { - validationErrors = append(validationErrors, fmt.Sprintf("%s: %v", filePath, err)) - } else { - expressionValidationLog.Printf("✓ Validated expressions in %s", filePath) - } - } - - if len(validationErrors) > 0 { - expressionValidationLog.Printf("Runtime-import validation failed: %d file(s) with errors", len(validationErrors)) - return NewValidationError( - "runtime-import", - fmt.Sprintf("%d files with errors", len(validationErrors)), - "runtime-import files contain expression errors:\n\n"+strings.Join(validationErrors, "\n\n"), - "Fix the expression errors in the imported files listed above. Each file must only use allowed GitHub Actions expressions. See expression security documentation for details.", - ) - } - - expressionValidationLog.Print("All runtime-import files validated successfully") - return nil -} - -// expressionBracesPattern matches GitHub Actions ${{ }} expressions for syntax validation. -// Uses [^}]* to match non-closing-brace characters within the expression. -var expressionBracesPattern = regexp.MustCompile(`\$\{\{([^}]*)\}\}`) - -// validateBalancedBraces checks that all ${{ }} braces are balanced and properly closed -func validateBalancedBraces(group string) error { - expressionValidationLog.Print("Checking balanced braces in expression") - openCount := 0 - i := 0 - positions := []int{} // Track positions of opening braces for error reporting - - for i < len(group) { - // Check for opening ${{ - if i+2 < len(group) && group[i:i+3] == "${{" { - openCount++ - positions = append(positions, i) - i += 3 - continue - } - - // Check for closing }} - if i+1 < len(group) && group[i:i+2] == "}}" { - if openCount == 0 { - return NewValidationError( - "expression", - "unbalanced closing braces", - fmt.Sprintf("found '}}' at position %d without matching opening '${{' in expression: %s", i, group), - "Ensure all '}}' have a corresponding opening '${{'. Check for typos or missing opening braces.", - ) - } - openCount-- - if len(positions) > 0 { - positions = positions[:len(positions)-1] - } - i += 2 - continue - } - - i++ - } - - if openCount > 0 { - // Find the position of the first unclosed opening brace - pos := positions[0] - expressionValidationLog.Printf("Found %d unclosed brace(s) starting at position %d", openCount, pos) - return NewValidationError( - "expression", - "unclosed expression braces", - fmt.Sprintf("found opening '${{' at position %d without matching closing '}}' in expression: %s", pos, group), - "Ensure all '${{' have a corresponding closing '}}'. Add the missing closing braces.", - ) - } - - expressionValidationLog.Print("Brace balance check passed") - return nil -} - -// validateExpressionSyntax validates the syntax of expressions within ${{ }} -func validateExpressionSyntax(group string) error { - // Pattern to extract content between ${{ and }} - matches := expressionBracesPattern.FindAllStringSubmatch(group, -1) - - expressionValidationLog.Printf("Found %d expression(s) to validate", len(matches)) - - for _, match := range matches { - if len(match) < 2 { - continue - } - - exprContent := strings.TrimSpace(match[1]) - if exprContent == "" { - return NewValidationError( - "expression", - "empty expression content", - "found empty expression '${{ }}' in: "+group, - "Provide a valid GitHub Actions expression inside '${{ }}'. Example: '${{ github.ref }}'", - ) - } - - // Check for common syntax errors - if err := validateExpressionContent(exprContent, group); err != nil { - return err - } - } - - return nil -} - -// validateExpressionContent validates the content inside ${{ }} -func validateExpressionContent(expr string, fullGroup string) error { - // Check for unbalanced parentheses - parenCount := 0 - for i, ch := range expr { - switch ch { - case '(': - parenCount++ - case ')': - parenCount-- - if parenCount < 0 { - return NewValidationError( - "expression", - "unbalanced parentheses in expression", - fmt.Sprintf("found closing ')' without matching opening '(' at position %d in expression: %s", i, expr), - "Ensure all parentheses are properly balanced in your expression.", - ) - } - } - } - - if parenCount > 0 { - return NewValidationError( - "expression", - "unclosed parentheses in expression", - fmt.Sprintf("found %d unclosed opening '(' in expression: %s", parenCount, expr), - "Add the missing closing ')' to balance parentheses in your expression.", - ) - } - - // Check for unbalanced quotes (single, double, backtick) - if err := validateBalancedQuotes(expr); err != nil { - return err - } - - // Try to parse complex expressions with logical operators - if containsLogicalOperators(expr) { - expressionValidationLog.Print("Expression contains logical operators, performing deep validation") - if _, err := ParseExpression(expr); err != nil { - expressionValidationLog.Printf("Expression parsing failed: %v", err) - return NewValidationError( - "expression", - "invalid expression syntax", - "failed to parse expression: "+err.Error(), - "Fix the syntax error in your expression. Full expression: "+fullGroup, - ) - } - } - - return nil -} - -// validateBalancedQuotes checks for balanced quotes in an expression -func validateBalancedQuotes(expr string) error { - inSingleQuote := false - inDoubleQuote := false - inBacktick := false - escaped := false - - for i, ch := range expr { - if escaped { - escaped = false - continue - } - - if ch == '\\' { - escaped = true - continue - } - - switch ch { - case '\'': - if !inDoubleQuote && !inBacktick { - inSingleQuote = !inSingleQuote - } - case '"': - if !inSingleQuote && !inBacktick { - inDoubleQuote = !inDoubleQuote - } - case '`': - if !inSingleQuote && !inDoubleQuote { - inBacktick = !inBacktick - } - } - - // Check if we reached end of string with unclosed quote - if i == len(expr)-1 { - if inSingleQuote { - return NewValidationError( - "expression", - "unclosed single quote", - "found unclosed single quote in expression: "+expr, - "Add the missing closing single quote (') to your expression.", - ) - } - if inDoubleQuote { - return NewValidationError( - "expression", - "unclosed double quote", - "found unclosed double quote in expression: "+expr, - "Add the missing closing double quote (\") to your expression.", - ) - } - if inBacktick { - return NewValidationError( - "expression", - "unclosed backtick", - "found unclosed backtick in expression: "+expr, - "Add the missing closing backtick (`) to your expression.", - ) - } - } - } - - return nil -} - -// containsLogicalOperators checks if an expression contains logical operators (&&, ||, !) -// Note: This is a simple string-based check that may return true for expressions containing -// '!=' (not equals) since it includes the '!' character. This is acceptable because the -// function is used to decide whether to parse the expression with the expression parser, -// and expressions with '!=' will be successfully parsed by the parser. -func containsLogicalOperators(expr string) bool { - return strings.Contains(expr, "&&") || strings.Contains(expr, "||") || strings.Contains(expr, "!") -} diff --git a/pkg/workflow/runtime_import_validation.go b/pkg/workflow/runtime_import_validation.go new file mode 100644 index 00000000000..7e48d465aaa --- /dev/null +++ b/pkg/workflow/runtime_import_validation.go @@ -0,0 +1,160 @@ +// This file provides validation for runtime-import file paths and their expressions. +// +// # Runtime Import Validation +// +// This file validates runtime-import file references in workflow markdown: +// - Extracts file paths from {{#runtime-import}} macros +// - Validates that imported files contain only allowed expressions +// +// # Validation Functions +// +// - extractRuntimeImportPaths() - Extracts file paths from {{#runtime-import}} macros +// - validateRuntimeImportFiles() - Validates expressions in all runtime-import files +// +// For expression security and allowlist validation, see expression_safety_validation.go. +// For expression syntax validation, see expression_syntax_validation.go. + +package workflow + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strings" +) + +// runtimeImportMacroRe matches {{#runtime-import filepath}} or {{#runtime-import? filepath}}. +var runtimeImportMacroRe = regexp.MustCompile(`\{\{#runtime-import\??[ \t]+([^\}]+)\}\}`) + +// lineRangeRe matches a line range suffix of the form "digits-digits" (e.g., "10-20"). +var lineRangeRe = regexp.MustCompile(`^\d+-\d+$`) + +// extractRuntimeImportPaths extracts all runtime-import file paths from markdown content. +// Returns a list of file paths (not URLs) referenced in {{#runtime-import}} macros. +// URLs (http:// or https://) are excluded since they are validated separately. +func extractRuntimeImportPaths(markdownContent string) []string { + if markdownContent == "" { + return nil + } + + var paths []string + seen := make(map[string]bool) + + matches := runtimeImportMacroRe.FindAllStringSubmatch(markdownContent, -1) + + for _, match := range matches { + if len(match) > 1 { + pathWithRange := strings.TrimSpace(match[1]) + + // Skip macros with empty or whitespace-only targets + if pathWithRange == "" { + expressionValidationLog.Print("Skipping runtime-import macro with empty target") + continue + } + + // Remove line range if present (e.g., "file.md:10-20" -> "file.md") + importPath := pathWithRange + if colonIdx := strings.Index(pathWithRange, ":"); colonIdx > 0 { + // Check if what follows colon looks like a line range (digits-digits) + afterColon := pathWithRange[colonIdx+1:] + if lineRangeRe.MatchString(afterColon) { + importPath = pathWithRange[:colonIdx] + } + } + + // Skip URLs - they don't need file validation + if strings.HasPrefix(importPath, "http://") || strings.HasPrefix(importPath, "https://") { + continue + } + + // Add to list if not already seen + if !seen[importPath] { + paths = append(paths, importPath) + seen[importPath] = true + } + } + } + + return paths +} + +// validateRuntimeImportFiles validates expressions in all runtime-import files at compile time. +// This catches expression errors early, before the workflow runs. +// workspaceDir should be the root of the repository (containing .github folder). +func validateRuntimeImportFiles(markdownContent string, workspaceDir string) error { + expressionValidationLog.Print("Validating runtime-import files") + + // Extract all runtime-import file paths + paths := extractRuntimeImportPaths(markdownContent) + if len(paths) == 0 { + expressionValidationLog.Print("No runtime-import files to validate") + return nil + } + + expressionValidationLog.Printf("Found %d runtime-import file(s) to validate", len(paths)) + + var validationErrors []string + + for _, filePath := range paths { + // Normalize the path to be relative to .github folder + normalizedPath := filePath + if strings.HasPrefix(normalizedPath, ".github/") { + normalizedPath = normalizedPath[8:] // Remove ".github/" + } else if strings.HasPrefix(normalizedPath, ".github\\") { + normalizedPath = normalizedPath[8:] // Remove ".github\" (Windows) + } + if strings.HasPrefix(normalizedPath, "./") { + normalizedPath = normalizedPath[2:] // Remove "./" + } else if strings.HasPrefix(normalizedPath, ".\\") { + normalizedPath = normalizedPath[2:] // Remove ".\" (Windows) + } + + // Build absolute path to the file + githubFolder := filepath.Join(workspaceDir, ".github") + absolutePath := filepath.Join(githubFolder, normalizedPath) + + // Security check: ensure the resolved path is within the .github folder + // Use filepath.Rel to check if the path escapes the .github folder + normalizedGithubFolder := filepath.Clean(githubFolder) + normalizedAbsolutePath := filepath.Clean(absolutePath) + relativePath, err := filepath.Rel(normalizedGithubFolder, normalizedAbsolutePath) + if err != nil || relativePath == ".." || strings.HasPrefix(relativePath, ".."+string(filepath.Separator)) || filepath.IsAbs(relativePath) { + validationErrors = append(validationErrors, fmt.Sprintf("%s: Security: Path must be within .github folder (resolves to: %s)", filePath, relativePath)) + continue + } + + // Check if file exists; missing files (optional or not) are deferred to runtime + if _, err := os.Stat(absolutePath); os.IsNotExist(err) { + expressionValidationLog.Printf("Skipping validation for non-existent file: %s", filePath) + continue + } + + // Read the file content + content, err := os.ReadFile(absolutePath) + if err != nil { + validationErrors = append(validationErrors, fmt.Sprintf("%s: failed to read file: %v", filePath, err)) + continue + } + + // Validate expressions in the imported file + if err := validateExpressionSafety(string(content)); err != nil { + validationErrors = append(validationErrors, fmt.Sprintf("%s: %v", filePath, err)) + } else { + expressionValidationLog.Printf("✓ Validated expressions in %s", filePath) + } + } + + if len(validationErrors) > 0 { + expressionValidationLog.Printf("Runtime-import validation failed: %d file(s) with errors", len(validationErrors)) + return NewValidationError( + "runtime-import", + fmt.Sprintf("%d files with errors", len(validationErrors)), + "runtime-import files contain expression errors:\n\n"+strings.Join(validationErrors, "\n\n"), + "Fix the expression errors in the imported files listed above. Each file must only use allowed GitHub Actions expressions. See expression security documentation for details.", + ) + } + + expressionValidationLog.Print("All runtime-import files validated successfully") + return nil +}