diff --git a/pkg/parser/frontmatter_content.go b/pkg/parser/frontmatter_content.go index fbaa30c9c4b..ce3ddf69fb0 100644 --- a/pkg/parser/frontmatter_content.go +++ b/pkg/parser/frontmatter_content.go @@ -136,35 +136,6 @@ func ExtractMarkdownSection(content, sectionName string) (string, error) { return extractedContent, nil } -// ExtractFrontmatterString extracts only the YAML frontmatter as a string -// This matches the bash extract_frontmatter function -func ExtractFrontmatterString(content string) (string, error) { - log.Printf("Extracting frontmatter string from content: size=%d bytes", len(content)) - result, err := ExtractFrontmatterFromContent(content) - if err != nil { - return "", err - } - - // Convert frontmatter map back to YAML string - if len(result.Frontmatter) == 0 { - log.Print("No frontmatter fields found, returning empty string") - return "", nil - } - - yamlBytes, err := yaml.Marshal(result.Frontmatter) - if err != nil { - return "", fmt.Errorf("failed to marshal frontmatter: %w", err) - } - - // Post-process YAML to ensure cron expressions are quoted - // The YAML library may drop quotes from cron expressions like "0 14 * * 1-5" - // which causes validation errors since they start with numbers but contain spaces - yamlString := string(yamlBytes) - yamlString = QuoteCronExpressions(yamlString) - - return strings.TrimSpace(yamlString), nil -} - // ExtractMarkdownContent extracts only the markdown content (excluding frontmatter) // This matches the bash extract_markdown function func ExtractMarkdownContent(content string) (string, error) { @@ -176,76 +147,6 @@ func ExtractMarkdownContent(content string) (string, error) { return result.Markdown, nil } -// ExtractYamlChunk extracts a specific YAML section with proper indentation handling -// This matches the bash extract_yaml_chunk function exactly -func ExtractYamlChunk(yamlContent, key string) (string, error) { - log.Printf("Extracting YAML chunk: key=%s, content_size=%d bytes", key, len(yamlContent)) - - if yamlContent == "" || key == "" { - return "", nil - } - - scanner := bufio.NewScanner(strings.NewReader(yamlContent)) - var result bytes.Buffer - inSection := false - var keyLevel int - // Match both quoted and unquoted keys - keyPattern := regexp.MustCompile(`^(\s*)(?:"` + regexp.QuoteMeta(key) + `"|` + regexp.QuoteMeta(key) + `):\s*(.*)$`) - - for scanner.Scan() { - line := scanner.Text() - - // Skip empty lines when not in section - if !inSection && strings.TrimSpace(line) == "" { - continue - } - - // Check if this line starts our target key - if matches := keyPattern.FindStringSubmatch(line); matches != nil { - inSection = true - keyLevel = len(matches[1]) // Indentation level - result.WriteString(line + "\n") - - // If it's a single-line value, we're done - if strings.TrimSpace(matches[2]) != "" { - break - } - continue - } - - // If we're in the section, check indentation - if inSection { - // Skip empty lines - if strings.TrimSpace(line) == "" { - continue - } - - // Count leading spaces - spaces := 0 - for _, char := range line { - if char == ' ' { - spaces++ - } else { - break - } - } - - // If indentation is less than or equal to key level, we've left the section - if spaces <= keyLevel { - break - } - - result.WriteString(line + "\n") - } - } - - if !inSection { - return "", nil - } - - return strings.TrimRight(result.String(), "\n"), nil -} - // ExtractWorkflowNameFromMarkdown extracts workflow name from first H1 header // This matches the bash extract_workflow_name_from_markdown function exactly func ExtractWorkflowNameFromMarkdown(filePath string) (string, error) { diff --git a/pkg/parser/frontmatter_extraction_test.go b/pkg/parser/frontmatter_extraction_test.go index 77e342e510d..e8ec1402714 100644 --- a/pkg/parser/frontmatter_extraction_test.go +++ b/pkg/parser/frontmatter_extraction_test.go @@ -3,7 +3,6 @@ package parser import ( - "strings" "testing" ) @@ -103,95 +102,6 @@ This is a test workflow with empty frontmatter.`, } } -func TestExtractYamlChunk(t *testing.T) { - tests := []struct { - name string - yaml string - key string - expected string - }{ - { - name: "simple key-value", - yaml: `title: Test Workflow -on: push -permissions: read`, - key: "on", - expected: "on: push", - }, - { - name: "nested structure", - yaml: `title: Test Workflow -on: - push: - branches: - - main - pull_request: - types: [opened] -permissions: read`, - key: "on", - expected: `on: - push: - branches: - - main - pull_request: - types: [opened]`, - }, - { - name: "deeply nested structure", - yaml: `tools: - bash: - allowed: - - "ls" - - "cat" - github: - allowed: - - "create_issue"`, - key: "tools", - expected: `tools: - bash: - allowed: - - "ls" - - "cat" - github: - allowed: - - "create_issue"`, - }, - { - name: "key not found", - yaml: `title: Test Workflow -on: push`, - key: "nonexistent", - expected: "", - }, - { - name: "empty yaml", - yaml: "", - key: "test", - expected: "", - }, - { - name: "empty key", - yaml: "title: Test", - key: "", - expected: "", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result, err := ExtractYamlChunk(tt.yaml, tt.key) - if err != nil { - t.Errorf("ExtractYamlChunk() error = %v", err) - return - } - - if result != tt.expected { - t.Errorf("ExtractYamlChunk() = %q, want %q", result, tt.expected) - } - }) - } -} - func TestExtractMarkdownSection(t *testing.T) { tests := []struct { name string @@ -332,54 +242,6 @@ func TestGenerateDefaultWorkflowName(t *testing.T) { } } -func TestExtractFrontmatterString(t *testing.T) { - tests := []struct { - name string - content string - expected string - wantErr bool - }{ - { - name: "valid frontmatter", - content: `--- -title: Test Workflow -on: push ---- - -# Content`, - expected: "on: push\ntitle: Test Workflow", - }, - { - name: "no frontmatter", - content: "# Just markdown", - expected: "", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result, err := ExtractFrontmatterString(tt.content) - - if tt.wantErr && err == nil { - t.Errorf("ExtractFrontmatterString() expected error, got nil") - return - } - - if !tt.wantErr && err != nil { - t.Errorf("ExtractFrontmatterString() error = %v", err) - return - } - - // For YAML, order may vary, so check both possible orders - if !strings.Contains(result, "title: Test Workflow") && tt.expected != "" { - if result != tt.expected { - t.Errorf("ExtractFrontmatterString() = %q, want %q", result, tt.expected) - } - } - }) - } -} - func TestExtractMarkdownContent(t *testing.T) { tests := []struct { name string diff --git a/pkg/parser/frontmatter_hash.go b/pkg/parser/frontmatter_hash.go index 9448c07d667..1c06f32380b 100644 --- a/pkg/parser/frontmatter_hash.go +++ b/pkg/parser/frontmatter_hash.go @@ -38,43 +38,6 @@ type FileReader func(filePath string) ([]byte, error) // DefaultFileReader reads files from disk using os.ReadFile var DefaultFileReader FileReader = os.ReadFile -// ComputeFrontmatterHash computes a deterministic SHA-256 hash of frontmatter -// including contributions from all imported workflows. -// -// The hash is computed over a canonical JSON representation that includes: -// - Main workflow frontmatter -// - All imported workflow frontmatter (in BFS processing order) -// - Normalized and sorted for deterministic output -// -// This function follows the Frontmatter Hash Specification (v1.0). -func ComputeFrontmatterHash(frontmatter map[string]any, baseDir string, cache *ImportCache) (string, error) { - frontmatterHashLog.Print("Computing frontmatter hash") - - // Process imports to get merged frontmatter - result, err := ProcessImportsFromFrontmatterWithManifest(frontmatter, baseDir, cache) - if err != nil { - return "", fmt.Errorf("failed to process imports: %w", err) - } - - // Build the canonical frontmatter map - canonical := buildCanonicalFrontmatter(frontmatter, result) - - // Serialize to canonical JSON - canonicalJSON, err := marshalCanonicalJSON(canonical) - if err != nil { - return "", fmt.Errorf("failed to marshal canonical JSON: %w", err) - } - - frontmatterHashLog.Printf("Canonical JSON length: %d bytes", len(canonicalJSON)) - - // Compute SHA-256 hash - hash := sha256.Sum256([]byte(canonicalJSON)) - hashHex := hex.EncodeToString(hash[:]) - - frontmatterHashLog.Printf("Computed hash: %s", hashHex) - return hashHex, nil -} - // buildCanonicalFrontmatter builds a canonical representation of frontmatter // including all fields that should be included in the hash computation. func buildCanonicalFrontmatter(frontmatter map[string]any, result *ImportsResult) map[string]any { @@ -342,44 +305,6 @@ func computeFrontmatterHashFromContent(content string, parsedFrontmatter map[str return computeFrontmatterHashTextBasedWithReader(frontmatterText, fullBody, baseDir, cache, relevantExpressions, fileReader) } -// ComputeFrontmatterHashWithExpressions computes the hash including template expressions -func ComputeFrontmatterHashWithExpressions(frontmatter map[string]any, baseDir string, cache *ImportCache, expressions []string) (string, error) { - frontmatterHashLog.Print("Computing frontmatter hash with template expressions") - - // Process imports to get merged frontmatter - result, err := ProcessImportsFromFrontmatterWithManifest(frontmatter, baseDir, cache) - if err != nil { - return "", fmt.Errorf("failed to process imports: %w", err) - } - - // Build the canonical frontmatter map - canonical := buildCanonicalFrontmatter(frontmatter, result) - - // Add template expressions if present - if len(expressions) > 0 { - // Sort expressions for deterministic output - sortedExpressions := make([]string, len(expressions)) - copy(sortedExpressions, expressions) - sort.Strings(sortedExpressions) - canonical["template-expressions"] = sortedExpressions - } - - // Serialize to canonical JSON - canonicalJSON, err := marshalCanonicalJSON(canonical) - if err != nil { - return "", fmt.Errorf("failed to marshal canonical JSON: %w", err) - } - - frontmatterHashLog.Printf("Canonical JSON length: %d bytes", len(canonicalJSON)) - - // Compute SHA-256 hash - hash := sha256.Sum256([]byte(canonicalJSON)) - hashHex := hex.EncodeToString(hash[:]) - - frontmatterHashLog.Printf("Computed hash: %s", hashHex) - return hashHex, nil -} - // extractRelevantTemplateExpressions extracts template expressions from markdown // that reference env. or vars. contexts func extractRelevantTemplateExpressions(markdown string) []string { diff --git a/pkg/parser/frontmatter_hash_stability_test.go b/pkg/parser/frontmatter_hash_stability_test.go index bf926768119..a3de5faac4a 100644 --- a/pkg/parser/frontmatter_hash_stability_test.go +++ b/pkg/parser/frontmatter_hash_stability_test.go @@ -3,7 +3,6 @@ package parser import ( - "encoding/json" "os" "os/exec" "path/filepath" @@ -177,22 +176,4 @@ Use env: ${{ env.TEST_VAR }} expressions := extractRelevantTemplateExpressions(result.Markdown) require.Len(t, expressions, 1, "Should extract one env expression") assert.Equal(t, "${{ env.TEST_VAR }}", expressions[0], "Should extract correct expression") - - // Build canonical - importsResult := &ImportsResult{} - canonical := buildCanonicalFrontmatter(result.Frontmatter, importsResult) - canonical["template-expressions"] = expressions - - canonicalJSON, err := marshalCanonicalJSON(canonical) - require.NoError(t, err, "Should marshal canonical JSON") - - // Verify the canonical JSON structure - var parsed map[string]any - err = json.Unmarshal([]byte(canonicalJSON), &parsed) - require.NoError(t, err, "Should parse canonical JSON") - - // Verify template expressions are included - exprs, hasExprs := parsed["template-expressions"].([]any) - require.True(t, hasExprs, "Canonical JSON should include template expressions") - assert.Len(t, exprs, 1, "Should have one expression") } diff --git a/pkg/parser/frontmatter_hash_test.go b/pkg/parser/frontmatter_hash_test.go index 6efb73f670c..9af46ef3e93 100644 --- a/pkg/parser/frontmatter_hash_test.go +++ b/pkg/parser/frontmatter_hash_test.go @@ -11,168 +11,6 @@ import ( "github.com/stretchr/testify/require" ) -func TestComputeFrontmatterHash_EmptyFrontmatter(t *testing.T) { - frontmatter := map[string]any{} - baseDir := "." - cache := NewImportCache("") - - hash, err := ComputeFrontmatterHash(frontmatter, baseDir, cache) - require.NoError(t, err, "Should compute hash for empty frontmatter") - assert.Len(t, hash, 64, "Hash should be 64 characters (SHA-256 hex)") - assert.Regexp(t, "^[a-f0-9]{64}$", hash, "Hash should be lowercase hex") -} - -func TestComputeFrontmatterHash_SimpleFrontmatter(t *testing.T) { - frontmatter := map[string]any{ - "engine": "copilot", - "description": "Test workflow", - "on": map[string]any{ - "schedule": "daily", - }, - } - baseDir := "." - cache := NewImportCache("") - - hash, err := ComputeFrontmatterHash(frontmatter, baseDir, cache) - require.NoError(t, err, "Should compute hash for simple frontmatter") - assert.Len(t, hash, 64, "Hash should be 64 characters") - - // Compute again to verify determinism - hash2, err := ComputeFrontmatterHash(frontmatter, baseDir, cache) - require.NoError(t, err, "Should compute hash again") - assert.Equal(t, hash, hash2, "Hash should be deterministic") -} - -func TestComputeFrontmatterHash_KeyOrdering(t *testing.T) { - // Test that different key ordering produces the same hash - frontmatter1 := map[string]any{ - "engine": "copilot", - "description": "Test", - "on": map[string]any{"schedule": "daily"}, - } - - frontmatter2 := map[string]any{ - "on": map[string]any{"schedule": "daily"}, - "description": "Test", - "engine": "copilot", - } - - cache := NewImportCache("") - - hash1, err := ComputeFrontmatterHash(frontmatter1, ".", cache) - require.NoError(t, err, "Should compute hash for frontmatter1") - - hash2, err := ComputeFrontmatterHash(frontmatter2, ".", cache) - require.NoError(t, err, "Should compute hash for frontmatter2") - - assert.Equal(t, hash1, hash2, "Hashes should be identical regardless of key order") -} - -func TestComputeFrontmatterHash_NestedObjects(t *testing.T) { - frontmatter := map[string]any{ - "tools": map[string]any{ - "playwright": map[string]any{ - "version": "v1.41.0", - "domains": []any{"github.com", "example.com"}, - }, - "mcp": map[string]any{ - "server": "remote", - }, - }, - "permissions": map[string]any{ - "contents": "read", - "actions": "write", - }, - } - - cache := NewImportCache("") - - hash, err := ComputeFrontmatterHash(frontmatter, ".", cache) - require.NoError(t, err, "Should compute hash for nested objects") - assert.Len(t, hash, 64, "Hash should be 64 characters") -} - -func TestComputeFrontmatterHash_Arrays(t *testing.T) { - frontmatter := map[string]any{ - "labels": []any{"audit", "automation", "daily"}, - "bots": []any{"copilot"}, - "steps": []any{ - map[string]any{ - "name": "Step 1", - "run": "echo 'test'", - }, - map[string]any{ - "name": "Step 2", - "run": "echo 'test2'", - }, - }, - } - - cache := NewImportCache("") - - hash, err := ComputeFrontmatterHash(frontmatter, ".", cache) - require.NoError(t, err, "Should compute hash with arrays") - assert.Len(t, hash, 64, "Hash should be 64 characters") - - // Array order matters - different order should produce different hash - frontmatter2 := map[string]any{ - "labels": []any{"automation", "audit", "daily"}, // Different order - "bots": []any{"copilot"}, - "steps": []any{ - map[string]any{ - "name": "Step 1", - "run": "echo 'test'", - }, - map[string]any{ - "name": "Step 2", - "run": "echo 'test2'", - }, - }, - } - - hash2, err := ComputeFrontmatterHash(frontmatter2, ".", cache) - require.NoError(t, err, "Should compute hash with reordered arrays") - assert.NotEqual(t, hash, hash2, "Array order should affect hash") -} - -func TestComputeFrontmatterHash_AllFieldTypes(t *testing.T) { - frontmatter := map[string]any{ - "engine": "claude", - "description": "Full workflow", - "tracker-id": "test-workflow", - "timeout-minutes": 30, - "on": map[string]any{ - "schedule": "daily", - "workflow_dispatch": true, - }, - "permissions": map[string]any{ - "contents": "read", - "actions": "read", - }, - "tools": map[string]any{ - "playwright": map[string]any{ - "version": "v1.41.0", - }, - }, - "network": map[string]any{ - "allowed": []any{"api.github.com"}, - }, - "runtimes": map[string]any{ - "node": map[string]any{ - "version": "20", - }, - }, - "labels": []any{"test"}, - "bots": []any{"copilot"}, - } - - cache := NewImportCache("") - - hash, err := ComputeFrontmatterHash(frontmatter, ".", cache) - require.NoError(t, err, "Should compute hash with all field types") - assert.Len(t, hash, 64, "Hash should be 64 characters") -} - func TestMarshalSorted_Primitives(t *testing.T) { tests := []struct { name string @@ -345,40 +183,6 @@ This is the main workflow. assert.Equal(t, hash, hash2, "Hash with imports should be deterministic") } -func TestBuildCanonicalFrontmatter(t *testing.T) { - frontmatter := map[string]any{ - "engine": "copilot", - "description": "Test", - "on": map[string]any{ - "schedule": "daily", - }, - "tools": map[string]any{ - "playwright": map[string]any{ - "version": "v1.41.0", - }, - }, - } - - result := &ImportsResult{ - MergedTools: `{"mcp":{"server":"remote"}}`, - MergedEngines: []string{"claude", "copilot"}, - ImportedFiles: []string{"shared/common.md"}, - } - - canonical := buildCanonicalFrontmatter(frontmatter, result) - - // Verify expected fields are present - assert.Equal(t, "copilot", canonical["engine"], "Should include engine") - assert.Equal(t, "Test", canonical["description"], "Should include description") - assert.NotNil(t, canonical["on"], "Should include on") - assert.NotNil(t, canonical["tools"], "Should include tools") - - // Verify merged content is included - assert.JSONEq(t, `{"mcp":{"server":"remote"}}`, canonical["merged-tools"].(string), "Should include merged tools") - assert.Equal(t, []string{"claude", "copilot"}, canonical["merged-engines"], "Should include merged engines") - assert.Equal(t, []string{"shared/common.md"}, canonical["imports"], "Should include imported files") -} - func TestComputeFrontmatterHashFromFileWithReader_CustomReader(t *testing.T) { // Create in-memory file system mock mockFS := map[string]string{