diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md index 7a2bf857..e2beb773 100644 --- a/DOCUMENTATION.md +++ b/DOCUMENTATION.md @@ -1760,9 +1760,29 @@ triggers: Both approaches work with `wfctl template validate --config` for validation. -## Visual Workflow Builder (UI) +## Engine Validation Config + +Control the engine's startup validation behaviour via the `engine.validation` block: + +```yaml +engine: + validation: + templateRefs: warn # "off" | "warn" | "error" (default: "warn") +``` + +| Value | Behaviour | +|-------|-----------| +| `warn` | (default) Log warnings for suspicious pipeline template references at startup. Engine starts normally. | +| `error` | Refuse to start if any pipeline template reference issues are detected (e.g. dangling step refs, unknown output fields). | +| `off` | Skip template reference validation entirely. Preserves the previous engine behavior. | -A React-based visual editor for composing workflow configurations (`ui/` directory). +The validation checks performed at startup match those run by `wfctl template validate`, including: +- Step name references (`{{ .steps.NAME.field }}`) against declared steps in the same pipeline +- Forward references (referencing a step that appears later in the pipeline) +- Output field validation against each step type's declared output schema +- SQL column validation for `step.db_query` steps with a static `query` + +## Visual Workflow Builder (UI) **Technology stack:** React, ReactFlow, Zustand, TypeScript, Vite diff --git a/cmd/wfctl/api_extract.go b/cmd/wfctl/api_extract.go index d4a12639..074f4ed7 100644 --- a/cmd/wfctl/api_extract.go +++ b/cmd/wfctl/api_extract.go @@ -12,6 +12,7 @@ import ( "github.com/GoCodeAlone/workflow/config" "github.com/GoCodeAlone/workflow/module" + "github.com/GoCodeAlone/workflow/validation" "gopkg.in/yaml.v3" ) @@ -502,7 +503,7 @@ func inferBodyFromSchema(bodyFrom string, steps []map[string]any) *module.OpenAP if query == "" { break } - columns := extractSQLColumns(query) + columns := validation.ExtractSQLColumns(query) if len(columns) == 0 { break } @@ -530,80 +531,6 @@ func inferBodyFromSchema(bodyFrom string, steps []map[string]any) *module.OpenAP return nil } -// extractSQLColumns parses a SQL SELECT statement and returns the column names -// (or aliases) from the SELECT clause. -func extractSQLColumns(query string) []string { - // Normalize whitespace - query = strings.Join(strings.Fields(query), " ") - - // Find SELECT ... FROM - upper := strings.ToUpper(query) - selectIdx := strings.Index(upper, "SELECT ") - fromIdx := strings.Index(upper, " FROM ") - if selectIdx < 0 || fromIdx < 0 || fromIdx <= selectIdx { - return nil - } - - selectClause := query[selectIdx+7 : fromIdx] - - // Handle DISTINCT - if strings.HasPrefix(strings.ToUpper(strings.TrimSpace(selectClause)), "DISTINCT ") { - selectClause = strings.TrimSpace(selectClause)[9:] - } - - // Split by comma, handling parenthesized subexpressions - var columns []string - depth := 0 - current := "" - for _, ch := range selectClause { - switch ch { - case '(': - depth++ - current += string(ch) - case ')': - depth-- - current += string(ch) - case ',': - if depth == 0 { - if col := extractColumnName(strings.TrimSpace(current)); col != "" { - columns = append(columns, col) - } - current = "" - } else { - current += string(ch) - } - default: - current += string(ch) - } - } - if col := extractColumnName(strings.TrimSpace(current)); col != "" { - columns = append(columns, col) - } - return columns -} - -// extractColumnName extracts the effective column name from a SELECT expression. -// Handles: "col", "table.col", "expr AS alias", "COALESCE(...) AS alias". -func extractColumnName(expr string) string { - if expr == "" || expr == "*" { - return "" - } - // Check for AS alias (case-insensitive) - upper := strings.ToUpper(expr) - if asIdx := strings.LastIndex(upper, " AS "); asIdx >= 0 { - alias := strings.TrimSpace(expr[asIdx+4:]) - // Remove quotes if present - alias = strings.Trim(alias, "\"'`") - return alias - } - // Check for table.column - if dotIdx := strings.LastIndex(expr, "."); dotIdx >= 0 { - return strings.TrimSpace(expr[dotIdx+1:]) - } - // Simple column name - return strings.TrimSpace(expr) -} - // userCredentialsSchema returns a schema for email+password request bodies. func userCredentialsSchema() *module.OpenAPISchema { return &module.OpenAPISchema{ diff --git a/cmd/wfctl/template_validate.go b/cmd/wfctl/template_validate.go index 18f0a997..c6a63660 100644 --- a/cmd/wfctl/template_validate.go +++ b/cmd/wfctl/template_validate.go @@ -12,6 +12,7 @@ import ( "github.com/GoCodeAlone/workflow/config" "github.com/GoCodeAlone/workflow/schema" + "github.com/GoCodeAlone/workflow/validation" "gopkg.in/yaml.v3" ) @@ -443,7 +444,9 @@ func validateWorkflowConfig(name string, cfg *config.WorkflowConfig, knownModule } // 4. Validate template expressions in pipeline steps - validatePipelineTemplates(pipelineName, stepsRaw, &result) + vr := validation.ValidatePipelineTemplateRefs(map[string]any{pipelineName: pipelineRaw}) + result.Warnings = append(result.Warnings, vr.Warnings...) + result.Errors = append(result.Errors, vr.Errors...) // 5. Validate trigger types if triggerRaw, ok := pipelineMap["trigger"]; ok { @@ -559,380 +562,3 @@ func printTemplateValidationResults(results []templateValidationResult, summary // templateFSReader allows reading from the embedded templateFS for validation. // It wraps around the existing templateFS embed.FS. var _ fs.FS = templateFS - -// --- Pipeline template expression linting --- - -// templateExprRe matches template actions {{ ... }}. -var templateExprRe = regexp.MustCompile(`\{\{(.*?)\}\}`) - -// stepRefDotRe matches .steps.STEP_NAME and captures an optional field path. -// Group 1: step name (may contain hyphens). -// Group 2: remaining dot-path (e.g. ".row.auth_token"), field names without hyphens. -var stepRefDotRe = regexp.MustCompile(`\.steps\.([a-zA-Z_][a-zA-Z0-9_-]*)((?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)`) - -// stepFieldDotRe matches .steps.STEP_NAME.FIELD_NAME (captures step and first field). -var stepFieldDotRe = regexp.MustCompile(`\.steps\.([a-zA-Z_][a-zA-Z0-9_-]*)\.([a-zA-Z_][a-zA-Z0-9_-]*)`) - -// stepRefIndexRe matches index .steps "STEP_NAME" patterns. -var stepRefIndexRe = regexp.MustCompile(`index\s+\.steps\s+"([^"]+)"`) - -// stepRefFuncRe matches step "STEP_NAME" function calls at the start of an -// action, after a pipe, or after an opening parenthesis. -var stepRefFuncRe = regexp.MustCompile(`(?:^|\||\()\s*step\s+"([^"]+)"`) - -// stepFuncFieldRe matches step "STEP_NAME" "FIELD_NAME" capturing both arguments, -// when used as a function call at the start of an action, after a pipe, or after -// an opening parenthesis. -var stepFuncFieldRe = regexp.MustCompile(`(?:^|\||\()\s*step\s+"([^"]+)"\s+"([^"]+)"`) - -// hyphenDotRe matches dot-access chains with hyphens (e.g., .steps.my-step.field), -// including continuation segments after the hyphenated part. -var hyphenDotRe = regexp.MustCompile(`\.[a-zA-Z_][a-zA-Z0-9_]*-[a-zA-Z0-9_-]*(?:\.[a-zA-Z_][a-zA-Z0-9_-]*)*`) - -// pipelineStepMeta holds the type and config of a pipeline step for static analysis. -type pipelineStepMeta struct { - typ string - config map[string]any -} - -// plainStepPathRe matches bare step context-key references such as -// "steps.STEP_NAME.field.subfield" used in plain-string config values (no {{ }}). -var plainStepPathRe = regexp.MustCompile(`^steps\.([a-zA-Z_][a-zA-Z0-9_-]*)((?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)`) - -// stepBuildInfo holds the type and config of a pipeline step, used for output field validation. -type stepBuildInfo struct { - stepType string - stepConfig map[string]any -} - -// dbQueryStepTypes is the set of step types that produce a "row" or "rows" output -// from a SQL query and support SQL alias extraction. -var dbQueryStepTypes = map[string]bool{ - "step.db_query": true, - "step.db_query_cached": true, -} - -// isDBQueryStep reports whether a step type is a DB query step. -func isDBQueryStep(t string) bool { return dbQueryStepTypes[t] } - -// joinOutputKeys returns a comma-joined list of output key names for error messages, -// omitting placeholder/wildcard entries like "(key)", "(dynamic)", "(nested)". -func joinOutputKeys(outputs []schema.InferredOutput) string { - keys := make([]string, 0, len(outputs)) - for _, o := range outputs { - if !isPlaceholderOutputKey(o.Key) { - keys = append(keys, o.Key) - } - } - return strings.Join(keys, ", ") -} - -// isPlaceholderOutputKey reports whether an output key is a dynamic/wildcard -// placeholder (e.g. "(key)", "(dynamic)", "(nested)"). Steps that expose -// such placeholders produce outputs whose field names cannot be statically -// determined, so field-path validation should be skipped for them. -func isPlaceholderOutputKey(key string) bool { - return len(key) >= 2 && key[0] == '(' && key[len(key)-1] == ')' -} - -// hasDynamicOutputs reports whether any output in the list is a wildcard -// placeholder, meaning the step emits fields that are not statically known. -func hasDynamicOutputs(outputs []schema.InferredOutput) bool { - for _, o := range outputs { - if isPlaceholderOutputKey(o.Key) { - return true - } - } - return false -} - -// validatePipelineTemplates checks template expressions in pipeline step configs for -// references to nonexistent or forward-declared steps and common template pitfalls. -// It also warns when a template references a field that is not in the step type's -// declared output schema (Phase 1 static analysis). -func validatePipelineTemplates(pipelineName string, stepsRaw []any, result *templateValidationResult) { - // Build ordered step name list and step metadata for schema validation. - stepNames := make(map[string]int) // step name -> index in pipeline - stepMeta := make(map[string]pipelineStepMeta) // step name -> type+config (used by validateStepOutputField) - stepInfos := make(map[string]stepBuildInfo) // step name -> type and config (used by validateStepRef) - - reg := schema.NewStepSchemaRegistry() - - for i, stepRaw := range stepsRaw { - stepMap, ok := stepRaw.(map[string]any) - if !ok { - continue - } - name, _ := stepMap["name"].(string) - if name == "" { - continue - } - - stepNames[name] = i - typ, _ := stepMap["type"].(string) - cfg, _ := stepMap["config"].(map[string]any) - if cfg == nil { - cfg = map[string]any{} - } - - stepInfos[name] = stepBuildInfo{stepType: typ, stepConfig: cfg} - stepMeta[name] = pipelineStepMeta{typ: typ, config: cfg} - } - - // Check each step's config for template expressions - for i, stepRaw := range stepsRaw { - stepMap, ok := stepRaw.(map[string]any) - if !ok { - continue - } - stepName, _ := stepMap["name"].(string) - if stepName == "" { - stepName = fmt.Sprintf("step[%d]", i) - } - - // Collect all string values from the step config recursively - templates := collectTemplateStrings(stepMap) - - for _, tmpl := range templates { - // Find all template actions - actions := templateExprRe.FindAllStringSubmatch(tmpl, -1) - for _, action := range actions { - if len(action) < 2 { - continue - } - actionContent := action[1] - - // Skip comments - trimmed := strings.TrimSpace(actionContent) - if strings.HasPrefix(trimmed, "/*") { - continue - } - - // Check for step name references via dot-access (captures optional field path) - dotMatches := stepRefDotRe.FindAllStringSubmatch(actionContent, -1) - for _, m := range dotMatches { - refName := m[1] - fieldPath := "" - if len(m) > 2 { - fieldPath = m[2] - } - validateStepRef(pipelineName, stepName, refName, fieldPath, i, stepNames, stepInfos, reg, result) - } - - // Check for step output field references via dot-access (.steps.NAME.FIELD) - // Skip when the action contains hyphenated dot-access, which is not valid - // Go-template syntax and is already flagged separately by hyphenDotRe. - if !hyphenDotRe.MatchString(actionContent) { - fieldDotMatches := stepFieldDotRe.FindAllStringSubmatch(actionContent, -1) - for _, m := range fieldDotMatches { - refStepName, refField := m[1], m[2] - validateStepOutputField(pipelineName, stepName, refStepName, refField, stepMeta, reg, result) - } - } - - // Check for step name references via index (no field path resolvable) - indexMatches := stepRefIndexRe.FindAllStringSubmatch(actionContent, -1) - for _, m := range indexMatches { - refName := m[1] - validateStepRef(pipelineName, stepName, refName, "", i, stepNames, stepInfos, reg, result) - } - - // Check for step name references via step function (no field path resolvable) - funcMatches := stepRefFuncRe.FindAllStringSubmatch(actionContent, -1) - for _, m := range funcMatches { - refName := m[1] - validateStepRef(pipelineName, stepName, refName, "", i, stepNames, stepInfos, reg, result) - } - - // Check for step output field references via step function (step "NAME" "FIELD") - funcFieldMatches := stepFuncFieldRe.FindAllStringSubmatch(actionContent, -1) - for _, m := range funcFieldMatches { - refStepName, refField := m[1], m[2] - validateStepOutputField(pipelineName, stepName, refStepName, refField, stepMeta, reg, result) - } - - // Warn on hyphenated dot-access (auto-fixed but suggest preferred syntax) - if hyphenDotRe.MatchString(actionContent) { - result.Warnings = append(result.Warnings, - fmt.Sprintf("pipeline %q step %q: template uses hyphenated dot-access which is auto-fixed; prefer step \"name\" \"field\" syntax", pipelineName, stepName)) - } - } - } - - // Validate plain-string step references in specific config fields - // (e.g. secret_from, backend_url_key, field in conditional/branch). - if stepCfg, ok := stepMap["config"].(map[string]any); ok { - validatePlainStepRefs(pipelineName, stepName, i, stepCfg, stepNames, stepInfos, reg, result) - } - } -} - -// validateStepOutputField checks that a referenced output field exists in the -// step type's declared output schema. It emits a warning when the step type is -// known, has declared outputs, and none of them match the referenced field. -func validateStepOutputField(pipelineName, currentStep, refStepName, refField string, stepMeta map[string]pipelineStepMeta, reg *schema.StepSchemaRegistry, result *templateValidationResult) { - meta, ok := stepMeta[refStepName] - if !ok || meta.typ == "" { - return // step name unknown or no type — already caught by validateStepRef - } - - outputs := reg.InferStepOutputs(meta.typ, meta.config) - if len(outputs) == 0 { - return // no declared outputs for this step type — nothing to check - } - - // If any output key is a placeholder (wrapped in parentheses), the step - // has dynamic/config-dependent outputs and we cannot validate statically. - for _, o := range outputs { - if len(o.Key) > 1 && o.Key[0] == '(' && o.Key[len(o.Key)-1] == ')' { - return - } - } - - for _, o := range outputs { - if o.Key == refField { - return // field found — all good - } - } - - // Build a suggestion list from declared outputs - keys := make([]string, 0, len(outputs)) - for _, o := range outputs { - keys = append(keys, o.Key) - } - result.Warnings = append(result.Warnings, - fmt.Sprintf("pipeline %q step %q: references %s.%s but step %q (%s) declares outputs: %s", - pipelineName, currentStep, refStepName, refField, refStepName, meta.typ, strings.Join(keys, ", "))) -} - -// validateStepRef checks that a referenced step name exists and appears before the -// current step in the pipeline execution order. When fieldPath is non-empty it -// also validates the first output field name against the step's known outputs, and -// for db_query steps it performs best-effort SQL alias checking for "row." paths. -func validateStepRef(pipelineName, currentStep, refName, fieldPath string, currentIdx int, stepNames map[string]int, stepInfos map[string]stepBuildInfo, reg *schema.StepSchemaRegistry, result *templateValidationResult) { - refIdx, exists := stepNames[refName] - switch { - case !exists: - result.Warnings = append(result.Warnings, - fmt.Sprintf("pipeline %q step %q: references step %q which does not exist in this pipeline", pipelineName, currentStep, refName)) - return - case refIdx == currentIdx: - result.Warnings = append(result.Warnings, - fmt.Sprintf("pipeline %q step %q: references itself; a step cannot use its own outputs because they are not available until after execution", pipelineName, currentStep)) - return - case refIdx > currentIdx: - result.Warnings = append(result.Warnings, - fmt.Sprintf("pipeline %q step %q: references step %q which has not executed yet (appears later in pipeline)", pipelineName, currentStep, refName)) - return - } - - // Step exists and precedes the current step — validate the output field path. - if fieldPath == "" { - return - } - - info, ok := stepInfos[refName] - if !ok || info.stepType == "" { - return - } - - outputs := reg.InferStepOutputs(info.stepType, info.stepConfig) - if len(outputs) == 0 { - return // no schema information available; skip - } - - // If any output key is a placeholder (e.g. "(key)", "(dynamic)", "(nested)"), - // the step emits dynamic fields whose names cannot be statically determined. - // Skip field-path validation for such steps to avoid false positives. - if hasDynamicOutputs(outputs) { - return - } - - // Split ".row.auth_token" → ["row", "auth_token"] - parts := strings.Split(strings.TrimPrefix(fieldPath, "."), ".") - if len(parts) == 0 || parts[0] == "" { - return - } - firstField := parts[0] - - // Check the first field against known output keys. - var matchedOutput *schema.InferredOutput - for i := range outputs { - if outputs[i].Key == firstField { - matchedOutput = &outputs[i] - break - } - } - if matchedOutput == nil { - result.Warnings = append(result.Warnings, - fmt.Sprintf("pipeline %q step %q: references step %q output field %q which is not a known output of step type %q (known outputs: %s)", - pipelineName, currentStep, refName, firstField, info.stepType, joinOutputKeys(outputs))) - return - } - - // For db_query/db_query_cached steps, try SQL alias validation on "row." paths. - if firstField == "row" && len(parts) > 1 && isDBQueryStep(info.stepType) { - columnName := parts[1] - query, _ := info.stepConfig["query"].(string) - if query != "" { - sqlCols := extractSQLColumns(query) - if len(sqlCols) > 0 { - found := false - for _, col := range sqlCols { - if col == columnName { - found = true - break - } - } - if !found { - result.Warnings = append(result.Warnings, - fmt.Sprintf("pipeline %q step %q: references step %q output field \"row.%s\" but the SQL query does not select column %q (available: %s)", - pipelineName, currentStep, refName, columnName, columnName, strings.Join(sqlCols, ", "))) - } - } - } - } -} - -// validatePlainStepRefs checks plain-string config values that contain bare step -// context-key references (e.g. "steps.STEP_NAME.field") in config fields known to -// accept such paths: secret_from, backend_url_key, and field (conditional/branch). -func validatePlainStepRefs(pipelineName, stepName string, stepIdx int, stepCfg map[string]any, stepNames map[string]int, stepInfos map[string]stepBuildInfo, reg *schema.StepSchemaRegistry, result *templateValidationResult) { - // Config keys that are documented to accept a bare "steps.X.y" context path. - plainRefKeys := []string{"secret_from", "backend_url_key", "field"} - for _, key := range plainRefKeys { - val, ok := stepCfg[key].(string) - if !ok || val == "" { - continue - } - m := plainStepPathRe.FindStringSubmatch(val) - if m == nil { - continue - } - refName := m[1] - fieldPath := m[2] // already in ".field.subfield" form - validateStepRef(pipelineName, stepName, refName, fieldPath, stepIdx, stepNames, stepInfos, reg, result) - } -} - -// collectTemplateStrings recursively finds all strings containing {{ in a value tree. -// This intentionally scans all fields (not just "config") because template expressions -// can appear in conditions, names, and other step fields. -func collectTemplateStrings(data any) []string { - var results []string - switch v := data.(type) { - case string: - if strings.Contains(v, "{{") { - results = append(results, v) - } - case map[string]any: - for _, val := range v { - results = append(results, collectTemplateStrings(val)...) - } - case []any: - for _, item := range v { - results = append(results, collectTemplateStrings(item)...) - } - } - return results -} diff --git a/cmd/wfctl/template_validate_test.go b/cmd/wfctl/template_validate_test.go index 56a817d9..fa9f8630 100644 --- a/cmd/wfctl/template_validate_test.go +++ b/cmd/wfctl/template_validate_test.go @@ -1,6 +1,7 @@ package main import ( + "fmt" "os" "path/filepath" "strings" @@ -8,6 +9,7 @@ import ( "github.com/GoCodeAlone/workflow/config" "github.com/GoCodeAlone/workflow/schema" + "github.com/GoCodeAlone/workflow/validation" ) func TestRunTemplateValidateAllTemplates(t *testing.T) { @@ -795,10 +797,9 @@ func TestValidateStepOutputField_NoOutputSchema_NoWarning(t *testing.T) { } } -// TestValidateStepOutputFieldRegistry tests validateStepOutputField directly. +// TestValidateStepOutputFieldRegistry tests output field validation through +// ValidatePipelineTemplateRefs for known and unknown output fields. func TestValidateStepOutputFieldRegistry(t *testing.T) { - reg := schema.NewStepSchemaRegistry() - tests := []struct { name string stepName string @@ -867,14 +868,28 @@ func TestValidateStepOutputFieldRegistry(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - result := &templateValidationResult{} - stepMeta := map[string]pipelineStepMeta{ - tc.stepName: {typ: tc.stepType, config: tc.stepConfig}, + // Test through ValidatePipelineTemplateRefs: build a pipeline where step B + // references step A's output field, and check for warnings. + pipelines := map[string]any{ + "test-pipeline": map[string]any{ + "steps": []any{ + map[string]any{"name": tc.stepName, "type": tc.stepType, "config": tc.stepConfig}, + map[string]any{ + "name": "next-step", + "type": "step.set", + "config": map[string]any{ + "values": map[string]any{ + "x": fmt.Sprintf("{{ .steps.%s.%s }}", tc.stepName, tc.refField), + }, + }, + }, + }, + }, } - validateStepOutputField("pipeline", "current-step", tc.stepName, tc.refField, stepMeta, reg, result) - hasWarn := len(result.Warnings) > 0 + vr := validation.ValidatePipelineTemplateRefs(pipelines) + hasWarn := len(vr.Warnings) > 0 if hasWarn != tc.expectWarn { - t.Errorf("expectWarn=%v but warnings=%v", tc.expectWarn, result.Warnings) + t.Errorf("expectWarn=%v but warnings=%v", tc.expectWarn, vr.Warnings) } }) } diff --git a/config/config.go b/config/config.go index 21cbd20d..c029c442 100644 --- a/config/config.go +++ b/config/config.go @@ -132,9 +132,22 @@ type WorkflowConfig struct { Plugins *PluginsConfig `json:"plugins,omitempty" yaml:"plugins,omitempty"` Sidecars []SidecarConfig `json:"sidecars,omitempty" yaml:"sidecars,omitempty"` Infrastructure *InfrastructureConfig `json:"infrastructure,omitempty" yaml:"infrastructure,omitempty"` + Engine *EngineConfig `json:"engine,omitempty" yaml:"engine,omitempty"` ConfigDir string `json:"-" yaml:"-"` // directory containing the config file, used for relative path resolution } +// EngineConfig holds engine-level runtime settings. +type EngineConfig struct { + Validation *EngineValidationConfig `json:"validation,omitempty" yaml:"validation,omitempty"` +} + +// EngineValidationConfig controls startup and execution-time validation behaviour. +type EngineValidationConfig struct { + // TemplateRefs controls template cross-reference validation at startup. + // Allowed values: "off" (skip), "warn" (log warnings, default), "error" (fail on any validation issues). + TemplateRefs string `json:"templateRefs,omitempty" yaml:"templateRefs,omitempty"` +} + // InfrastructureConfig holds infrastructure resource declarations. type InfrastructureConfig struct { Resources []InfraResourceConfig `json:"resources" yaml:"resources"` diff --git a/engine.go b/engine.go index c55f2a54..055baac8 100644 --- a/engine.go +++ b/engine.go @@ -19,6 +19,7 @@ import ( "github.com/GoCodeAlone/workflow/plugin" "github.com/GoCodeAlone/workflow/schema" "github.com/GoCodeAlone/workflow/secrets" + "github.com/GoCodeAlone/workflow/validation" "gopkg.in/yaml.v3" ) @@ -400,6 +401,38 @@ func (e *StdEngine) BuildFromConfig(cfg *config.WorkflowConfig) error { return fmt.Errorf("config validation failed: %w", err) } + // Run pipeline template cross-reference validation. + // Mode is controlled by engine.validation.templateRefs in the config: + // "off" — skip entirely (preserves previous behaviour) + // "warn" — log warnings for suspicious references (default) + // "error" — return an error when validation finds problems + if len(cfg.Pipelines) > 0 { + mode := "warn" // default + if cfg.Engine != nil && cfg.Engine.Validation != nil && cfg.Engine.Validation.TemplateRefs != "" { + mode = cfg.Engine.Validation.TemplateRefs + } + switch mode { + case "off", "warn", "error": + // valid + default: + return fmt.Errorf("invalid engine.validation.templateRefs value %q, must be one of: off, warn, error", mode) + } + if mode != "off" { + vr := validation.ValidatePipelineTemplateRefs(cfg.Pipelines, e.PluginLoader().StepSchemaRegistry()) + if vr.HasIssues() { + allMessages := make([]string, 0, len(vr.Warnings)+len(vr.Errors)) + allMessages = append(allMessages, vr.Warnings...) + allMessages = append(allMessages, vr.Errors...) + if mode == "error" { + return fmt.Errorf("pipeline template validation failed: %s", strings.Join(allMessages, "; ")) + } + for _, msg := range allMessages { + e.logger.Warn(msg) + } + } + } + } + // Validate plugin requirements if declared if cfg.Requires != nil { if err := e.validateRequirements(cfg.Requires); err != nil { diff --git a/engine_test.go b/engine_test.go index 0795b19d..6efaed48 100644 --- a/engine_test.go +++ b/engine_test.go @@ -1709,3 +1709,33 @@ func TestStdEngine_ConfigHash_Deterministic(t *testing.T) { t.Errorf("different configs produced the same hash: %q", hashA1) } } + +func TestEngine_BuildFromConfig_InvalidTemplateRefsMode(t *testing.T) { + app := newMockApplication() + engine := NewStdEngine(app, app.Logger()) + loadAllPlugins(t, engine) + + cfg := &config.WorkflowConfig{ + Modules: []config.ModuleConfig{}, + Pipelines: map[string]any{ + "test": map[string]any{ + "steps": []any{ + map[string]any{"name": "a", "type": "step.set"}, + }, + }, + }, + Engine: &config.EngineConfig{ + Validation: &config.EngineValidationConfig{ + TemplateRefs: "typo", + }, + }, + } + + err := engine.BuildFromConfig(cfg) + if err == nil { + t.Fatal("expected error for invalid templateRefs value") + } + if !strings.Contains(err.Error(), "invalid engine.validation.templateRefs") { + t.Errorf("unexpected error message: %v", err) + } +} diff --git a/engine_validation_test.go b/engine_validation_test.go new file mode 100644 index 00000000..8ca2aa0a --- /dev/null +++ b/engine_validation_test.go @@ -0,0 +1,211 @@ +package workflow + +import ( + "strings" + "testing" + + "github.com/GoCodeAlone/workflow/config" +) + +// TestEngine_TemplateValidation_WarnMode verifies that with the default "warn" mode +// (no Engine config set), validation warnings are logged but BuildFromConfig succeeds. +func TestEngine_TemplateValidation_WarnMode(t *testing.T) { + app := newMockApplication() + engine := NewStdEngine(app, app.logger) + loadAllPlugins(t, engine) + + cfg := &config.WorkflowConfig{ + Modules: []config.ModuleConfig{}, + Workflows: map[string]any{}, + // Pipeline with a dangling step reference — should warn but not fail. + Pipelines: map[string]any{ + "test-pipeline": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "step-a", + "type": "step.set", + "config": map[string]any{ + "values": map[string]any{ + "x": "{{ .steps.nonexistent.value }}", + }, + }, + }, + }, + }, + }, + // No Engine config → default mode is "warn". + } + + if err := engine.BuildFromConfig(cfg); err != nil { + t.Fatalf("BuildFromConfig should not fail in default warn mode, got: %v", err) + } + + // Confirm a warning was logged about the missing step reference. + found := false + for _, entry := range app.logger.logs { + if strings.Contains(entry, "nonexistent") { + found = true + break + } + } + if !found { + t.Errorf("expected warning log about 'nonexistent' step, log entries: %v", app.logger.logs) + } +} + +// TestEngine_TemplateValidation_ExplicitWarnMode verifies that explicitly setting +// engine.validation.templateRefs to "warn" behaves the same as the default. +func TestEngine_TemplateValidation_ExplicitWarnMode(t *testing.T) { + app := newMockApplication() + engine := NewStdEngine(app, app.logger) + loadAllPlugins(t, engine) + + cfg := &config.WorkflowConfig{ + Modules: []config.ModuleConfig{}, + Workflows: map[string]any{}, + Pipelines: map[string]any{ + "test-pipeline": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "step-a", + "type": "step.set", + "config": map[string]any{ + "values": map[string]any{ + "x": "{{ .steps.missing.value }}", + }, + }, + }, + }, + }, + }, + Engine: &config.EngineConfig{ + Validation: &config.EngineValidationConfig{ + TemplateRefs: "warn", + }, + }, + } + + if err := engine.BuildFromConfig(cfg); err != nil { + t.Fatalf("BuildFromConfig should not fail in warn mode, got: %v", err) + } +} + +// TestEngine_TemplateValidation_ErrorMode verifies that with mode "error", a config +// with a dangling step reference causes BuildFromConfig to return an error. +func TestEngine_TemplateValidation_ErrorMode(t *testing.T) { + app := newMockApplication() + engine := NewStdEngine(app, app.logger) + loadAllPlugins(t, engine) + + cfg := &config.WorkflowConfig{ + Modules: []config.ModuleConfig{}, + Workflows: map[string]any{}, + Pipelines: map[string]any{ + "test-pipeline": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "step-a", + "type": "step.set", + "config": map[string]any{ + "values": map[string]any{ + "x": "{{ .steps.nonexistent.value }}", + }, + }, + }, + }, + }, + }, + Engine: &config.EngineConfig{ + Validation: &config.EngineValidationConfig{ + TemplateRefs: "error", + }, + }, + } + + err := engine.BuildFromConfig(cfg) + if err == nil { + t.Fatal("BuildFromConfig should return an error in error mode for broken pipeline") + } + if !strings.Contains(err.Error(), "pipeline template validation failed") { + t.Errorf("error should mention template validation, got: %v", err) + } +} + +// TestEngine_TemplateValidation_OffMode verifies that with mode "off", no validation +// is performed and no warnings are logged even for broken pipelines. +func TestEngine_TemplateValidation_OffMode(t *testing.T) { + app := newMockApplication() + engine := NewStdEngine(app, app.logger) + loadAllPlugins(t, engine) + + cfg := &config.WorkflowConfig{ + Modules: []config.ModuleConfig{}, + Workflows: map[string]any{}, + Pipelines: map[string]any{ + "test-pipeline": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "step-a", + "type": "step.set", + "config": map[string]any{ + "values": map[string]any{ + "x": "{{ .steps.nonexistent.value }}", + }, + }, + }, + }, + }, + }, + Engine: &config.EngineConfig{ + Validation: &config.EngineValidationConfig{ + TemplateRefs: "off", + }, + }, + } + + if err := engine.BuildFromConfig(cfg); err != nil { + t.Fatalf("BuildFromConfig should not fail in off mode, got: %v", err) + } + for _, entry := range app.logger.logs { + if strings.Contains(entry, "nonexistent") { + t.Errorf("expected no log about 'nonexistent' in off mode, got: %s", entry) + } + } +} + +// TestEngine_TemplateValidation_ValidPipeline verifies that a valid pipeline with +// correct step references does not produce warnings or errors. +func TestEngine_TemplateValidation_ValidPipeline(t *testing.T) { + app := newMockApplication() + engine := NewStdEngine(app, app.logger) + loadAllPlugins(t, engine) + + cfg := &config.WorkflowConfig{ + Modules: []config.ModuleConfig{}, + Workflows: map[string]any{}, + Pipelines: map[string]any{ + "test-pipeline": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "first", + "type": "step.set", + "config": map[string]any{"values": map[string]any{"val": "hello"}}, + }, + map[string]any{ + "name": "second", + "type": "step.set", + "config": map[string]any{ + "values": map[string]any{ + "copy": "{{ .steps.first.val }}", + }, + }, + }, + }, + }, + }, + } + + if err := engine.BuildFromConfig(cfg); err != nil { + t.Fatalf("BuildFromConfig failed for valid pipeline: %v", err) + } +} diff --git a/validation/pipeline_refs.go b/validation/pipeline_refs.go new file mode 100644 index 00000000..b8932853 --- /dev/null +++ b/validation/pipeline_refs.go @@ -0,0 +1,507 @@ +// Package validation provides shared pipeline configuration validation utilities +// that are used by both the workflow engine (at startup) and the wfctl CLI tool +// (as static analysis). This avoids duplicating logic between the two consumers. +package validation + +import ( + "fmt" + "regexp" + "strings" + + "github.com/GoCodeAlone/workflow/schema" +) + +// RefValidationResult holds the outcome of pipeline template reference validation. +// Warnings are suspicious but non-fatal references; Errors are definitively wrong. +type RefValidationResult struct { + Warnings []string + Errors []string +} + +// HasIssues returns true when there are any warnings or errors. +func (r *RefValidationResult) HasIssues() bool { + return len(r.Warnings) > 0 || len(r.Errors) > 0 +} + +// templateExprRe matches template actions {{ ... }}. +var templateExprRe = regexp.MustCompile(`\{\{(.*?)\}\}`) + +// stepRefDotRe matches .steps.STEP_NAME and captures an optional field path. +// Group 1: step name (may contain hyphens). +// Group 2: remaining dot-path (e.g. ".row.auth_token"), field names without hyphens. +var stepRefDotRe = regexp.MustCompile(`\.steps\.([a-zA-Z_][a-zA-Z0-9_-]*)((?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)`) + +// stepFieldDotRe matches .steps.STEP_NAME.FIELD_NAME (captures step and first field). +var stepFieldDotRe = regexp.MustCompile(`\.steps\.([a-zA-Z_][a-zA-Z0-9_-]*)\.([a-zA-Z_][a-zA-Z0-9_-]*)`) + +// stepRefIndexRe matches index .steps "STEP_NAME" patterns. +var stepRefIndexRe = regexp.MustCompile(`index\s+\.steps\s+"([^"]+)"`) + +// stepRefFuncRe matches step "STEP_NAME" function calls at the start of an +// action, after a pipe, or after an opening parenthesis. +var stepRefFuncRe = regexp.MustCompile(`(?:^|\||\()\s*step\s+"([^"]+)"`) + +// stepFuncFieldRe matches step "STEP_NAME" "FIELD_NAME" capturing both arguments, +// when used as a function call at the start of an action, after a pipe, or after +// an opening parenthesis. +var stepFuncFieldRe = regexp.MustCompile(`(?:^|\||\()\s*step\s+"([^"]+)"\s+"([^"]+)"`) + +// hyphenDotRe matches dot-access chains with hyphens (e.g., .steps.my-step.field), +// including continuation segments after the hyphenated part. +var hyphenDotRe = regexp.MustCompile(`\.[a-zA-Z_][a-zA-Z0-9_]*-[a-zA-Z0-9_-]*(?:\.[a-zA-Z_][a-zA-Z0-9_-]*)*`) + +// plainStepPathRe matches bare step context-key references such as +// "steps.STEP_NAME.field.subfield" used in plain-string config values (no {{ }}). +var plainStepPathRe = regexp.MustCompile(`^steps\.([a-zA-Z_][a-zA-Z0-9_-]*)((?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)`) + +// pipelineStepMeta holds the type and config of a pipeline step for static analysis. +type pipelineStepMeta struct { + typ string + config map[string]any +} + +// stepBuildInfo holds the type and config of a pipeline step, used for output field validation. +type stepBuildInfo struct { + stepType string + stepConfig map[string]any +} + +// dbQueryStepTypes is the set of step types that produce a "row" or "rows" output +// from a SQL query and support SQL alias extraction. +var dbQueryStepTypes = map[string]bool{ + "step.db_query": true, + "step.db_query_cached": true, +} + +// isDBQueryStep reports whether a step type is a DB query step. +func isDBQueryStep(t string) bool { return dbQueryStepTypes[t] } + +// ValidatePipelineTemplateRefs validates all pipeline step template expressions in the +// given pipelines map for dangling step references and output field mismatches. +// It performs the same checks as `wfctl template validate` at the pipeline template level. +// +// The pipelines parameter is expected to be a map[string]any where each value is a +// pipeline config map containing a "steps" field (as parsed from YAML). +// +// An optional *schema.StepSchemaRegistry may be provided to supply plugin-registered +// step schemas. When absent, a default built-in registry is created once and reused +// across all pipelines. +func ValidatePipelineTemplateRefs(pipelines map[string]any, reg ...*schema.StepSchemaRegistry) *RefValidationResult { + var r *schema.StepSchemaRegistry + if len(reg) > 0 && reg[0] != nil { + r = reg[0] + } else { + r = schema.NewStepSchemaRegistry() + } + result := &RefValidationResult{} + for pipelineName, pipelineRaw := range pipelines { + pipelineMap, ok := pipelineRaw.(map[string]any) + if !ok { + continue + } + stepsRaw, _ := pipelineMap["steps"].([]any) + if len(stepsRaw) == 0 { + continue + } + validatePipelineTemplateRefs(pipelineName, stepsRaw, r, result) + } + return result +} + +// validatePipelineTemplateRefs checks template expressions in pipeline step configs for +// references to nonexistent or forward-declared steps and common template pitfalls. +// It also warns when a template references a field that is not in the step type's +// declared output schema (Phase 1 static analysis). +func validatePipelineTemplateRefs(pipelineName string, stepsRaw []any, reg *schema.StepSchemaRegistry, result *RefValidationResult) { + // Build ordered step name list and step metadata for schema validation. + stepNames := make(map[string]int) // step name -> index in pipeline + stepMeta := make(map[string]pipelineStepMeta) // step name -> type+config (used by validateStepOutputField) + stepInfos := make(map[string]stepBuildInfo) // step name -> type and config (used by validateStepRef) + + for i, stepRaw := range stepsRaw { + stepMap, ok := stepRaw.(map[string]any) + if !ok { + continue + } + name, _ := stepMap["name"].(string) + if name == "" { + continue + } + + stepNames[name] = i + typ, _ := stepMap["type"].(string) + cfg, _ := stepMap["config"].(map[string]any) + if cfg == nil { + cfg = map[string]any{} + } + + stepInfos[name] = stepBuildInfo{stepType: typ, stepConfig: cfg} + stepMeta[name] = pipelineStepMeta{typ: typ, config: cfg} + } + + // Check each step's config for template expressions + for i, stepRaw := range stepsRaw { + stepMap, ok := stepRaw.(map[string]any) + if !ok { + continue + } + stepName, _ := stepMap["name"].(string) + if stepName == "" { + stepName = fmt.Sprintf("step[%d]", i) + } + + // Collect all string values from the step config recursively + templates := collectTemplateStrings(stepMap) + + for _, tmpl := range templates { + // Find all template actions + actions := templateExprRe.FindAllStringSubmatch(tmpl, -1) + for _, action := range actions { + if len(action) < 2 { + continue + } + actionContent := action[1] + + // Skip comments + trimmed := strings.TrimSpace(actionContent) + if strings.HasPrefix(trimmed, "/*") { + continue + } + + // Check for step name references via dot-access (captures optional field path) + dotMatches := stepRefDotRe.FindAllStringSubmatch(actionContent, -1) + hasHyphen := hyphenDotRe.MatchString(actionContent) + for _, m := range dotMatches { + refName := m[1] + fieldPath := "" + // When the action contains a hyphenated dot-access, skip field-path + // validation to avoid spurious output-field or SQL-column warnings + // (a dedicated hyphen warning is emitted separately below). + if !hasHyphen && len(m) > 2 { + fieldPath = m[2] + } + validateStepRef(pipelineName, stepName, refName, fieldPath, i, stepNames, stepInfos, reg, result) + } + + // Check for step output field references via dot-access (.steps.NAME.FIELD) + // Skip when the action contains hyphenated dot-access, which is not valid + // Go-template syntax and is already flagged separately by hyphenDotRe. + if !hyphenDotRe.MatchString(actionContent) { + fieldDotMatches := stepFieldDotRe.FindAllStringSubmatch(actionContent, -1) + for _, m := range fieldDotMatches { + refStepName, refField := m[1], m[2] + validateStepOutputField(pipelineName, stepName, refStepName, refField, stepMeta, reg, result) + } + } + + // Check for step name references via index (no field path resolvable) + indexMatches := stepRefIndexRe.FindAllStringSubmatch(actionContent, -1) + for _, m := range indexMatches { + refName := m[1] + validateStepRef(pipelineName, stepName, refName, "", i, stepNames, stepInfos, reg, result) + } + + // Check for step name references via step function (no field path resolvable) + funcMatches := stepRefFuncRe.FindAllStringSubmatch(actionContent, -1) + for _, m := range funcMatches { + refName := m[1] + validateStepRef(pipelineName, stepName, refName, "", i, stepNames, stepInfos, reg, result) + } + + // Check for step output field references via step function (step "NAME" "FIELD") + funcFieldMatches := stepFuncFieldRe.FindAllStringSubmatch(actionContent, -1) + for _, m := range funcFieldMatches { + refStepName, refField := m[1], m[2] + validateStepOutputField(pipelineName, stepName, refStepName, refField, stepMeta, reg, result) + } + + // Warn on hyphenated dot-access (auto-fixed but suggest preferred syntax) + if hyphenDotRe.MatchString(actionContent) { + result.Warnings = append(result.Warnings, + fmt.Sprintf("pipeline %q step %q: template uses hyphenated dot-access which is auto-fixed; prefer step \"name\" \"field\" syntax", pipelineName, stepName)) + } + } + } + + // Validate plain-string step references in specific config fields + // (e.g. secret_from, backend_url_key, field in conditional/branch). + if stepCfg, ok := stepMap["config"].(map[string]any); ok { + validatePlainStepRefs(pipelineName, stepName, i, stepCfg, stepNames, stepInfos, reg, result) + } + } +} + +// validateStepOutputField checks that a referenced output field exists in the +// step type's declared output schema. It emits a warning when the step type is +// known, has declared outputs, and none of them match the referenced field. +func validateStepOutputField(pipelineName, currentStep, refStepName, refField string, stepMeta map[string]pipelineStepMeta, reg *schema.StepSchemaRegistry, result *RefValidationResult) { + meta, ok := stepMeta[refStepName] + if !ok || meta.typ == "" { + return // step name unknown or no type — already caught by validateStepRef + } + + outputs := reg.InferStepOutputs(meta.typ, meta.config) + if len(outputs) == 0 { + return // no declared outputs for this step type — nothing to check + } + + // If any output key is a placeholder (wrapped in parentheses), the step + // has dynamic/config-dependent outputs and we cannot validate statically. + for _, o := range outputs { + if len(o.Key) > 1 && o.Key[0] == '(' && o.Key[len(o.Key)-1] == ')' { + return + } + } + + for _, o := range outputs { + if o.Key == refField { + return // field found — all good + } + } + + // Build a suggestion list from declared outputs + keys := make([]string, 0, len(outputs)) + for _, o := range outputs { + keys = append(keys, o.Key) + } + result.Warnings = append(result.Warnings, + fmt.Sprintf("pipeline %q step %q: references %s.%s but step %q (%s) declares outputs: %s", + pipelineName, currentStep, refStepName, refField, refStepName, meta.typ, strings.Join(keys, ", "))) +} + +// validateStepRef checks that a referenced step name exists and appears before the +// current step in the pipeline execution order. When fieldPath is non-empty it +// also validates the first output field name against the step's known outputs, and +// for db_query steps it performs best-effort SQL alias checking for "row." paths. +func validateStepRef(pipelineName, currentStep, refName, fieldPath string, currentIdx int, stepNames map[string]int, stepInfos map[string]stepBuildInfo, reg *schema.StepSchemaRegistry, result *RefValidationResult) { + refIdx, exists := stepNames[refName] + switch { + case !exists: + result.Warnings = append(result.Warnings, + fmt.Sprintf("pipeline %q step %q: references step %q which does not exist in this pipeline", pipelineName, currentStep, refName)) + return + case refIdx == currentIdx: + result.Warnings = append(result.Warnings, + fmt.Sprintf("pipeline %q step %q: references itself; a step cannot use its own outputs because they are not available until after execution", pipelineName, currentStep)) + return + case refIdx > currentIdx: + result.Warnings = append(result.Warnings, + fmt.Sprintf("pipeline %q step %q: references step %q which has not executed yet (appears later in pipeline)", pipelineName, currentStep, refName)) + return + } + + // Step exists and precedes the current step — validate the output field path. + if fieldPath == "" { + return + } + + info, ok := stepInfos[refName] + if !ok || info.stepType == "" { + return + } + + outputs := reg.InferStepOutputs(info.stepType, info.stepConfig) + if len(outputs) == 0 { + return // no schema information available; skip + } + + // If any output key is a placeholder (e.g. "(key)", "(dynamic)", "(nested)"), + // the step emits dynamic fields whose names cannot be statically determined. + // Skip field-path validation for such steps to avoid false positives. + if hasDynamicOutputs(outputs) { + return + } + + // Split ".row.auth_token" → ["row", "auth_token"] + parts := strings.Split(strings.TrimPrefix(fieldPath, "."), ".") + if len(parts) == 0 || parts[0] == "" { + return + } + firstField := parts[0] + + // Check the first field against known output keys. + var matchedOutput *schema.InferredOutput + for i := range outputs { + if outputs[i].Key == firstField { + matchedOutput = &outputs[i] + break + } + } + if matchedOutput == nil { + result.Warnings = append(result.Warnings, + fmt.Sprintf("pipeline %q step %q: references step %q output field %q which is not a known output of step type %q (known outputs: %s)", + pipelineName, currentStep, refName, firstField, info.stepType, joinOutputKeys(outputs))) + return + } + + // For db_query/db_query_cached steps, try SQL alias validation on "row." paths. + if firstField == "row" && len(parts) > 1 && isDBQueryStep(info.stepType) { + columnName := parts[1] + query, _ := info.stepConfig["query"].(string) + if query != "" { + sqlCols := ExtractSQLColumns(query) + if len(sqlCols) > 0 { + found := false + for _, col := range sqlCols { + if col == columnName { + found = true + break + } + } + if !found { + result.Warnings = append(result.Warnings, + fmt.Sprintf("pipeline %q step %q: references step %q output field \"row.%s\" but the SQL query does not select column %q (available: %s)", + pipelineName, currentStep, refName, columnName, columnName, strings.Join(sqlCols, ", "))) + } + } + } + } +} + +// validatePlainStepRefs checks plain-string config values that contain bare step +// context-key references (e.g. "steps.STEP_NAME.field") in config fields known to +// accept such paths: secret_from, backend_url_key, and field (conditional/branch). +func validatePlainStepRefs(pipelineName, stepName string, stepIdx int, stepCfg map[string]any, stepNames map[string]int, stepInfos map[string]stepBuildInfo, reg *schema.StepSchemaRegistry, result *RefValidationResult) { + // Config keys that are documented to accept a bare "steps.X.y" context path. + plainRefKeys := []string{"secret_from", "backend_url_key", "field"} + for _, key := range plainRefKeys { + val, ok := stepCfg[key].(string) + if !ok || val == "" { + continue + } + m := plainStepPathRe.FindStringSubmatch(val) + if m == nil { + continue + } + refName := m[1] + fieldPath := m[2] // already in ".field.subfield" form + validateStepRef(pipelineName, stepName, refName, fieldPath, stepIdx, stepNames, stepInfos, reg, result) + } +} + +// collectTemplateStrings recursively finds all strings containing {{ in a value tree. +// This intentionally scans all fields (not just "config") because template expressions +// can appear in conditions, names, and other step fields. +func collectTemplateStrings(data any) []string { + var results []string + switch v := data.(type) { + case string: + if strings.Contains(v, "{{") { + results = append(results, v) + } + case map[string]any: + for _, val := range v { + results = append(results, collectTemplateStrings(val)...) + } + case []any: + for _, item := range v { + results = append(results, collectTemplateStrings(item)...) + } + } + return results +} + +// joinOutputKeys returns a comma-joined list of output key names for error messages, +// omitting placeholder/wildcard entries like "(key)", "(dynamic)", "(nested)". +func joinOutputKeys(outputs []schema.InferredOutput) string { + keys := make([]string, 0, len(outputs)) + for _, o := range outputs { + if !isPlaceholderOutputKey(o.Key) { + keys = append(keys, o.Key) + } + } + return strings.Join(keys, ", ") +} + +// isPlaceholderOutputKey reports whether an output key is a dynamic/wildcard +// placeholder (e.g. "(key)", "(dynamic)", "(nested)"). Steps that expose +// such placeholders produce outputs whose field names cannot be statically +// determined, so field-path validation should be skipped for them. +func isPlaceholderOutputKey(key string) bool { + return len(key) >= 2 && key[0] == '(' && key[len(key)-1] == ')' +} + +// hasDynamicOutputs reports whether any output in the list is a wildcard +// placeholder, meaning the step emits fields that are not statically known. +func hasDynamicOutputs(outputs []schema.InferredOutput) bool { + for _, o := range outputs { + if isPlaceholderOutputKey(o.Key) { + return true + } + } + return false +} + +// ExtractSQLColumns parses a SQL SELECT statement and returns the column names +// (or aliases) from the SELECT clause. +func ExtractSQLColumns(query string) []string { + // Normalize whitespace + query = strings.Join(strings.Fields(query), " ") + + // Find SELECT ... FROM + upper := strings.ToUpper(query) + selectIdx := strings.Index(upper, "SELECT ") + fromIdx := strings.Index(upper, " FROM ") + if selectIdx < 0 || fromIdx < 0 || fromIdx <= selectIdx { + return nil + } + + selectClause := query[selectIdx+7 : fromIdx] + + // Handle DISTINCT + if strings.HasPrefix(strings.ToUpper(strings.TrimSpace(selectClause)), "DISTINCT ") { + selectClause = strings.TrimSpace(selectClause)[9:] + } + + // Split by comma, handling parenthesized subexpressions + var columns []string + depth := 0 + current := "" + for _, ch := range selectClause { + switch ch { + case '(': + depth++ + current += string(ch) + case ')': + depth-- + current += string(ch) + case ',': + if depth == 0 { + if col := extractColumnName(strings.TrimSpace(current)); col != "" { + columns = append(columns, col) + } + current = "" + } else { + current += string(ch) + } + default: + current += string(ch) + } + } + if col := extractColumnName(strings.TrimSpace(current)); col != "" { + columns = append(columns, col) + } + return columns +} + +// extractColumnName extracts the effective column name from a SELECT expression. +// Handles: "col", "table.col", "expr AS alias", "COALESCE(...) AS alias". +func extractColumnName(expr string) string { + if expr == "" || expr == "*" { + return "" + } + // Check for AS alias (case-insensitive) + upper := strings.ToUpper(expr) + if asIdx := strings.LastIndex(upper, " AS "); asIdx >= 0 { + alias := strings.TrimSpace(expr[asIdx+4:]) + // Remove quotes if present + alias = strings.Trim(alias, "\"'`") + return alias + } + // Check for table.column + if dotIdx := strings.LastIndex(expr, "."); dotIdx >= 0 { + return strings.TrimSpace(expr[dotIdx+1:]) + } + // Simple column name + return strings.TrimSpace(expr) +} diff --git a/validation/pipeline_refs_test.go b/validation/pipeline_refs_test.go new file mode 100644 index 00000000..4a1cf107 --- /dev/null +++ b/validation/pipeline_refs_test.go @@ -0,0 +1,375 @@ +package validation_test + +import ( + "strings" + "testing" + + "github.com/GoCodeAlone/workflow/validation" +) + +// TestValidatePipelineTemplateRefs_ValidRefs ensures no warnings are produced for +// well-formed pipelines where all step references are correct. +func TestValidatePipelineTemplateRefs_ValidRefs(t *testing.T) { + pipelines := map[string]any{ + "api": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "query", + "type": "step.db_query", + "config": map[string]any{ + "database": "db", + "query": "SELECT id, name FROM users WHERE id = $1", + "mode": "single", + }, + }, + map[string]any{ + "name": "respond", + "type": "step.set", + "config": map[string]any{ + "values": map[string]any{ + "user_id": "{{ .steps.query.row.id }}", + "user_name": "{{ .steps.query.row.name }}", + }, + }, + }, + }, + }, + } + result := validation.ValidatePipelineTemplateRefs(pipelines) + if result.HasIssues() { + t.Errorf("expected no issues for valid pipeline, got warnings=%v errors=%v", result.Warnings, result.Errors) + } +} + +// TestValidatePipelineTemplateRefs_MissingStep checks that referencing a step that +// does not exist in the pipeline produces a warning. +func TestValidatePipelineTemplateRefs_MissingStep(t *testing.T) { + pipelines := map[string]any{ + "api": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "respond", + "type": "step.set", + "config": map[string]any{ + "values": map[string]any{ + "x": "{{ .steps.nonexistent.value }}", + }, + }, + }, + }, + }, + } + result := validation.ValidatePipelineTemplateRefs(pipelines) + if len(result.Warnings) == 0 { + t.Error("expected warning for reference to nonexistent step") + } + found := false + for _, w := range result.Warnings { + if strings.Contains(w, "nonexistent") { + found = true + break + } + } + if !found { + t.Errorf("warning should mention 'nonexistent' step, got: %v", result.Warnings) + } +} + +// TestValidatePipelineTemplateRefs_ForwardRef checks that referencing a step that +// appears later in the pipeline produces a warning. +func TestValidatePipelineTemplateRefs_ForwardRef(t *testing.T) { + pipelines := map[string]any{ + "api": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "step-a", + "type": "step.set", + "config": map[string]any{ + // References step-b which comes AFTER step-a + "values": map[string]any{"x": "{{ .steps.step_b.value }}"}, + }, + }, + map[string]any{ + "name": "step_b", + "type": "step.set", + "config": map[string]any{"values": map[string]any{"value": "hello"}}, + }, + }, + }, + } + result := validation.ValidatePipelineTemplateRefs(pipelines) + if len(result.Warnings) == 0 { + t.Error("expected warning for forward reference to later step") + } +} + +// TestValidatePipelineTemplateRefs_UnknownOutputField checks that referencing an +// output field that is not declared in a step's schema produces a warning. +func TestValidatePipelineTemplateRefs_UnknownOutputField(t *testing.T) { + pipelines := map[string]any{ + "api": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "query", + "type": "step.db_query", + "config": map[string]any{"mode": "single"}, + }, + map[string]any{ + "name": "respond", + "type": "step.set", + "config": map[string]any{ + // "rows" is invalid for mode=single (should be "row") + "values": map[string]any{"x": "{{ .steps.query.rows }}"}, + }, + }, + }, + }, + } + result := validation.ValidatePipelineTemplateRefs(pipelines) + if len(result.Warnings) == 0 { + t.Error("expected warning for invalid output field 'rows' on single-mode db_query") + } +} + +// TestValidatePipelineTemplateRefs_KnownOutputField verifies that referencing a +// known output field does NOT produce a warning. +func TestValidatePipelineTemplateRefs_KnownOutputField(t *testing.T) { + pipelines := map[string]any{ + "api": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "query", + "type": "step.db_query", + "config": map[string]any{"mode": "single"}, + }, + map[string]any{ + "name": "respond", + "type": "step.set", + "config": map[string]any{ + "values": map[string]any{"x": "{{ .steps.query.row }}"}, + }, + }, + }, + }, + } + result := validation.ValidatePipelineTemplateRefs(pipelines) + for _, w := range result.Warnings { + if strings.Contains(w, "declares outputs") { + t.Errorf("unexpected output field warning for known field: %s", w) + } + } +} + +// TestValidatePipelineTemplateRefs_SelfReference checks that a step referencing +// its own output produces a warning. +func TestValidatePipelineTemplateRefs_SelfReference(t *testing.T) { + pipelines := map[string]any{ + "api": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "self", + "type": "step.set", + "config": map[string]any{ + "values": map[string]any{"x": "{{ .steps.self.value }}"}, + }, + }, + }, + }, + } + result := validation.ValidatePipelineTemplateRefs(pipelines) + if len(result.Warnings) == 0 { + t.Error("expected warning for self-reference") + } + found := false + for _, w := range result.Warnings { + if strings.Contains(w, "references itself") || strings.Contains(w, "self") { + found = true + break + } + } + if !found { + t.Errorf("warning should mention self-reference, got: %v", result.Warnings) + } +} + +// TestValidatePipelineTemplateRefs_HyphenatedStep ensures that a step with a +// hyphenated name correctly generates a warning when referenced via dot-access, +// and does NOT produce spurious output-field or unknown-step warnings alongside it. +func TestValidatePipelineTemplateRefs_HyphenatedStep(t *testing.T) { + pipelines := map[string]any{ + "api": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "my-step", + "type": "step.set", + "config": map[string]any{"values": map[string]any{"val": "ok"}}, + }, + map[string]any{ + "name": "next", + "type": "step.set", + "config": map[string]any{ + "values": map[string]any{"x": "{{ .steps.my-step.val }}"}, + }, + }, + }, + }, + } + result := validation.ValidatePipelineTemplateRefs(pipelines) + // Should warn about hyphenated dot-access syntax + found := false + for _, w := range result.Warnings { + if strings.Contains(w, "hyphenated") { + found = true + break + } + } + if !found { + t.Errorf("expected hyphenated dot-access warning, got: %v", result.Warnings) + } + // Should NOT produce spurious "does not exist" or output-field warnings — + // the single hyphen warning is sufficient guidance for the user. + for _, w := range result.Warnings { + if strings.Contains(w, "does not exist") || strings.Contains(w, "declares outputs") { + t.Errorf("unexpected spurious warning alongside hyphen warning: %s", w) + } + } +} + +// TestValidatePipelineTemplateRefs_EmptyPipelines ensures no panic on empty input. +func TestValidatePipelineTemplateRefs_EmptyPipelines(t *testing.T) { + result := validation.ValidatePipelineTemplateRefs(nil) + if result == nil { + t.Fatal("result should not be nil") + } + if result.HasIssues() { + t.Errorf("expected no issues for nil pipelines, got: %+v", result) + } + + result = validation.ValidatePipelineTemplateRefs(map[string]any{}) + if result.HasIssues() { + t.Errorf("expected no issues for empty pipelines, got: %+v", result) + } +} + +// TestValidatePipelineTemplateRefs_MultiplePipelines checks that multiple pipelines +// are all validated and results are aggregated. +func TestValidatePipelineTemplateRefs_MultiplePipelines(t *testing.T) { + pipelines := map[string]any{ + "pipeline-a": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "step1", + "type": "step.set", + "config": map[string]any{ + "values": map[string]any{"x": "{{ .steps.missing.val }}"}, + }, + }, + }, + }, + "pipeline-b": map[string]any{ + "steps": []any{ + map[string]any{ + "name": "ok-step", + "type": "step.set", + "config": map[string]any{ + "values": map[string]any{"y": "static"}, + }, + }, + }, + }, + } + result := validation.ValidatePipelineTemplateRefs(pipelines) + if len(result.Warnings) == 0 { + t.Error("expected at least one warning from pipeline-a") + } + // pipeline-b should not contribute warnings + foundPipelineA := false + for _, w := range result.Warnings { + if strings.Contains(w, "pipeline-a") { + foundPipelineA = true + } + if strings.Contains(w, "pipeline-b") { + t.Errorf("unexpected warning for pipeline-b: %s", w) + } + } + if !foundPipelineA { + t.Errorf("expected warning mentioning 'pipeline-a', got: %v", result.Warnings) + } +} + +// TestExtractSQLColumns verifies SQL column extraction from SELECT statements. +func TestExtractSQLColumns(t *testing.T) { + tests := []struct { + name string + query string + expected []string + }{ + { + name: "simple columns", + query: "SELECT id, name FROM users", + expected: []string{"id", "name"}, + }, + { + name: "table.column notation", + query: "SELECT u.id, u.email FROM users u", + expected: []string{"id", "email"}, + }, + { + name: "AS alias", + query: "SELECT id AS user_id, name AS user_name FROM users", + expected: []string{"user_id", "user_name"}, + }, + { + name: "DISTINCT", + query: "SELECT DISTINCT id, name FROM users", + expected: []string{"id", "name"}, + }, + { + name: "function with alias", + query: "SELECT COALESCE(name, 'unknown') AS display_name FROM users", + expected: []string{"display_name"}, + }, + { + name: "no FROM clause", + query: "INSERT INTO users (name) VALUES ('test')", + expected: nil, + }, + { + name: "wildcard", + query: "SELECT * FROM users", + expected: nil, // star is filtered + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := validation.ExtractSQLColumns(tc.query) + if len(got) != len(tc.expected) { + t.Errorf("expected columns %v, got %v", tc.expected, got) + return + } + for i, col := range tc.expected { + if got[i] != col { + t.Errorf("column[%d]: expected %q, got %q", i, col, got[i]) + } + } + }) + } +} + +// TestRefValidationResult_HasIssues checks the HasIssues helper. +func TestRefValidationResult_HasIssues(t *testing.T) { + r := &validation.RefValidationResult{} + if r.HasIssues() { + t.Error("expected HasIssues()=false for empty result") + } + r.Warnings = append(r.Warnings, "some warning") + if !r.HasIssues() { + t.Error("expected HasIssues()=true after adding warning") + } + r2 := &validation.RefValidationResult{} + r2.Errors = append(r2.Errors, "some error") + if !r2.HasIssues() { + t.Error("expected HasIssues()=true after adding error") + } +}