Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 107 additions & 12 deletions pkg/parser/schema_suggestions.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,13 @@ func generateSchemaBasedSuggestions(schemaJSON, errorMessage, jsonPath, frontmat
if strings.Contains(strings.ToLower(errorMessage), "value must be one of") {
schemaSuggestionsLog.Print("Detected enum constraint violation")
enumValues := extractEnumValuesFromError(errorMessage)
userValue := extractYAMLValueAtPath(frontmatterContent, jsonPath)
// For oneOf errors, the path points to the container (e.g., "/permissions") but
// the enum constraint is on a nested field (e.g., "/permissions/contents").
// Try to extract the actual sub-path from the message.
actualPath := extractEnumConstraintPath(errorMessage, jsonPath)
userValue := extractYAMLValueAtPath(frontmatterContent, actualPath)
if userValue != "" && len(enumValues) > 0 {
closest := FindClosestMatches(userValue, enumValues, maxClosestMatches)
closest := sliceutil.Deduplicate(FindClosestMatches(userValue, enumValues, maxClosestMatches))
Comment on lines +49 to +55
Copy link

Copilot AI Mar 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR’s main behavior change is improving enum "Did you mean?" for nested oneOf errors, but the tests currently only cover extractEnumConstraintPath and extractYAMLValueAtPath separately. Add an end-to-end test for generateSchemaBasedSuggestions where jsonPath is the container (e.g. /permissions), the error message contains a nested enum line (e.g. - at '/permissions/contents': value must be one of ...), and frontmatter has a typo (contents: raed), asserting that the returned suggestion includes Did you mean 'read'?.

Copilot uses AI. Check for mistakes.
if len(closest) == 1 {
return fmt.Sprintf("Did you mean '%s'?", closest[0])
} else if len(closest) > 1 {
Expand Down Expand Up @@ -468,38 +472,129 @@ func extractEnumValuesFromError(errorMessage string) []string {
return values
}

// extractYAMLValueAtPath extracts the scalar value at a simple top-level JSON path
// (e.g., "/engine") from raw YAML frontmatter content.
// Only top-level paths are supported; nested paths return an empty string.
// extractYAMLValueAtPath extracts the scalar value at a JSON path from raw YAML frontmatter.
// Supports top-level paths ("/field") and two-level nested paths ("/parent/child").
// Deeper paths return an empty string.
func extractYAMLValueAtPath(yamlContent, jsonPath string) string {
if yamlContent == "" || jsonPath == "" {
return ""
}
// Only handle simple top-level paths like "/engine" (one slash, one segment)
if strings.Count(jsonPath, "/") != 1 {
segments := strings.SplitN(strings.TrimPrefix(jsonPath, "/"), "/", 3)
switch len(segments) {
case 1:
return extractTopLevelYAMLValue(yamlContent, segments[0])
case 2:
return extractNestedYAMLValue(yamlContent, segments[0], segments[1])
default:
return ""
}
fieldName := strings.TrimPrefix(jsonPath, "/")
}

// extractTopLevelYAMLValue extracts the scalar value of a top-level key from raw YAML.
// Uses horizontal-only whitespace between the colon and value to avoid matching multi-line blocks.
// Only keys at column 0 (no indentation) are matched, preventing false matches against
// nested keys with the same name.
func extractTopLevelYAMLValue(yamlContent, fieldName string) string {
escapedField := regexp.QuoteMeta(fieldName)

// Try single-quoted value: field: 'value'
reSingle := regexp.MustCompile(`(?m)^\s*` + escapedField + `\s*:\s*'([^'\n]+)'`)
// Try single-quoted value: field: 'value' (anchored to column 0, no leading whitespace)
reSingle := regexp.MustCompile(`(?m)^` + escapedField + `[ \t]*:[ \t]*'([^'\n]+)'`)
if match := reSingle.FindStringSubmatch(yamlContent); len(match) >= 2 {
return strings.TrimSpace(match[1])
}
// Try double-quoted value: field: "value"
reDouble := regexp.MustCompile(`(?m)^\s*` + escapedField + `\s*:\s*"([^"\n]+)"`)
reDouble := regexp.MustCompile(`(?m)^` + escapedField + `[ \t]*:[ \t]*"([^"\n]+)"`)
if match := reDouble.FindStringSubmatch(yamlContent); len(match) >= 2 {
return strings.TrimSpace(match[1])
}
// Try unquoted value: field: value
reUnquoted := regexp.MustCompile(`(?m)^\s*` + escapedField + `\s*:\s*([^'"\n#][^\n#]*?)(?:\s*#.*)?$`)
reUnquoted := regexp.MustCompile(`(?m)^` + escapedField + `[ \t]*:[ \t]*([^'"\n#][^\n#]*?)(?:[ \t]*#.*)?$`)
if match := reUnquoted.FindStringSubmatch(yamlContent); len(match) >= 2 {
Comment on lines +493 to 512
Copy link

Copilot AI Mar 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extractTopLevelYAMLValue is intended to read a top-level scalar, but the regexes are anchored with ^\s*, so they will also match nested/indented occurrences of the same key (e.g. parent:\n engine: copilot would satisfy path /engine). Consider anchoring to the start of line with no indentation (or otherwise explicitly restricting to indent=0) so top-level lookups can’t accidentally read nested keys now that nested path support exists.

See below for a potential fix:

	reSingle := regexp.MustCompile(`(?m)^` + escapedField + `[ \t]*:[ \t]*'([^'\n]+)'`)
	if match := reSingle.FindStringSubmatch(yamlContent); len(match) >= 2 {
		return strings.TrimSpace(match[1])
	}
	// Try double-quoted value: field: "value"
	reDouble := regexp.MustCompile(`(?m)^` + escapedField + `[ \t]*:[ \t]*"([^"\n]+)"`)
	if match := reDouble.FindStringSubmatch(yamlContent); len(match) >= 2 {
		return strings.TrimSpace(match[1])
	}
	// Try unquoted value: field: value
	reUnquoted := regexp.MustCompile(`(?m)^` + escapedField + `[ \t]*:[ \t]*([^'"\n#][^\n#]*?)(?:[ \t]*#.*)?$`)

Copilot uses AI. Check for mistakes.
return strings.TrimSpace(match[1])
}
return ""
}

// extractNestedYAMLValue extracts the scalar value of a direct child key under a parent key in raw YAML.
// It finds the parent key's block (by indentation), determines the direct-child indent level from
// the first non-blank line inside the block, and only matches keys at that exact indent level.
// This prevents false matches against grandchildren that share the same key name.
func extractNestedYAMLValue(yamlContent, parentKey, childKey string) string {
lines := strings.Split(yamlContent, "\n")

escapedParent := regexp.QuoteMeta(parentKey)
parentPattern := regexp.MustCompile(`^(\s*)` + escapedParent + `[ \t]*:`)
escapedChild := regexp.QuoteMeta(childKey)

parentIndent := -1
childIndent := -1 // indent of direct children (set on first non-blank line inside the block)
inParentBlock := false

for _, line := range lines {
if !inParentBlock {
if match := parentPattern.FindStringSubmatch(line); match != nil {
parentIndent = len(match[1])
inParentBlock = true
}
continue
}

// Inside parent block: skip blank lines
if strings.TrimSpace(line) == "" {
continue
}
lineIndent := len(line) - len(strings.TrimLeft(line, " \t"))

// Left parent block if indentation returned to parent level or less
if lineIndent <= parentIndent {
break
Comment on lines +546 to +550
Copy link

Copilot AI Mar 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extractNestedYAMLValue will match the child key at any indentation level deeper than the parent because the child regex uses ^\s+... and there’s no check that the key is a direct child of parentKey. This can return the wrong value when the same key appears in a deeper nested mapping within the parent block (grandchild) before the direct child. Consider deriving the direct-child indentation level (e.g., the minimum indent > parentIndent) and only matching keys at that indentation.

This issue also appears on line 549 of the same file.

Copilot uses AI. Check for mistakes.
}

// Establish the direct-child indentation from the first non-blank child line
if childIndent == -1 {
childIndent = lineIndent
}

// Only match keys at the direct-child indent level (not grandchildren deeper)
if lineIndent != childIndent {
continue
}

// Try to match child key with its value (single-quoted, double-quoted, unquoted).
childPrefix := `^\s+` + escapedChild + `[ \t]*:[ \t]*`
reSingle := regexp.MustCompile(childPrefix + `'([^'\n]+)'`)
if match := reSingle.FindStringSubmatch(line); len(match) >= 2 {
return strings.TrimSpace(match[1])
}
reDouble := regexp.MustCompile(childPrefix + `"([^"\n]+)"`)
if match := reDouble.FindStringSubmatch(line); len(match) >= 2 {
return strings.TrimSpace(match[1])
}
reUnquoted := regexp.MustCompile(childPrefix + `([^'"\n#][^\n#]*?)(?:[ \t]*#.*)?$`)
if match := reUnquoted.FindStringSubmatch(line); len(match) >= 2 {
return strings.TrimSpace(match[1])
}
}

return ""
}

// extractEnumConstraintPath finds the JSON path of an enum constraint violation in an error message.
// For simple errors like "value must be one of 'a', 'b'", it returns the provided fallbackPath.
// For oneOf errors that contain a nested sub-path such as:
//
// "- at '/permissions/contents': value must be one of 'read', 'write', 'none'"
//
// it extracts "/permissions/contents" as the actual constraint path.
var enumConstraintPathPattern = regexp.MustCompile(`at '(/[^']+)':\s*value must be one of`)

func extractEnumConstraintPath(errorMessage, fallbackPath string) string {
if match := enumConstraintPathPattern.FindStringSubmatch(errorMessage); len(match) >= 2 {
return match[1]
}
return fallbackPath
}

// collectSchemaPropertyPaths recursively collects all (fieldName, parentPath) pairs from a JSON schema document.
// It traverses properties, oneOf/anyOf/allOf, and items to build a complete picture of valid fields across the schema.
func collectSchemaPropertyPaths(schemaDoc any, currentPath string, depth int) []schemaFieldLocation {
Expand Down
108 changes: 106 additions & 2 deletions pkg/parser/schema_suggestions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,15 @@ func TestGenerateSchemaBasedSuggestions(t *testing.T) {
frontmatterContent: "engine: xyz123\n",
wantEmpty: true,
},
{
// Full end-to-end: path is the oneOf container, message contains nested path,
// frontmatter has a permission level typo.
name: "nested oneOf enum violation extracts sub-path and suggests Did you mean",
errorMessage: "'oneOf' failed, none matched\n - at '/permissions': got object, want string\n - at '/permissions/contents': value must be one of 'read', 'write', 'none'",
jsonPath: "/permissions",
frontmatterContent: "permissions:\n contents: raed\n",
wantContains: []string{"Did you mean", "read"},
},
}

for _, tt := range tests {
Expand Down Expand Up @@ -409,11 +418,41 @@ func TestExtractYAMLValueAtPath(t *testing.T) {
wantValue: "copilot",
},
{
name: "nested path returns empty",
name: "nested path - child not in yaml returns empty",
yaml: "engine: copilot\n",
path: "/permissions/issues",
wantValue: "",
},
{
name: "nested path - extracts value under parent key",
yaml: "permissions:\n contents: raed\n issues: write\n",
path: "/permissions/contents",
wantValue: "raed",
},
{
name: "nested path - second child key",
yaml: "permissions:\n contents: read\n issues: neno\n",
path: "/permissions/issues",
wantValue: "neno",
},
{
name: "nested path - single-quoted value",
yaml: "permissions:\n contents: 'raed'\n",
path: "/permissions/contents",
wantValue: "raed",
},
{
name: "nested path - double-quoted value",
yaml: "permissions:\n contents: \"raed\"\n",
path: "/permissions/contents",
wantValue: "raed",
},
{
name: "three-level path returns empty",
yaml: "a:\n b:\n c: value\n",
path: "/a/b/c",
wantValue: "",
},
{
name: "empty yaml returns empty",
yaml: "",
Expand All @@ -426,6 +465,27 @@ func TestExtractYAMLValueAtPath(t *testing.T) {
path: "/timeout-minutes",
wantValue: "",
},
{
name: "top-level key with only nested block returns empty - no inline scalar value",
yaml: "permissions:\n contents: raed\n",
path: "/permissions",
wantValue: "",
},
{
// Ensures column-0 anchoring: a nested key with the same name must not
// satisfy a top-level path request.
name: "indented key with same name does not match top-level path",
yaml: "parent:\n engine: nested-value\nengine: top-value\n",
path: "/engine",
wantValue: "top-value",
},
{
// Grandchild key must not be returned for a direct-child path.
name: "nested path - grandchild key not returned for child path",
yaml: "permissions:\n nested:\n contents: grandchild\n contents: direct\n",
path: "/permissions/contents",
wantValue: "direct",
},
}

for _, tt := range tests {
Expand All @@ -438,7 +498,51 @@ func TestExtractYAMLValueAtPath(t *testing.T) {
}
}

// TestGenerateExampleFromSchemaWithExamples tests that schema examples array is preferred over generic fallback
// TestExtractEnumConstraintPath tests that the correct JSON path is extracted from
// enum constraint messages, including nested paths embedded in oneOf error messages.
func TestExtractEnumConstraintPath(t *testing.T) {
tests := []struct {
name string
errorMessage string
fallbackPath string
wantPath string
}{
{
name: "simple enum error uses fallback path",
errorMessage: "value must be one of 'claude', 'copilot'",
fallbackPath: "/engine",
wantPath: "/engine",
},
{
name: "nested enum constraint extracted from oneOf message",
errorMessage: "'oneOf' failed, none matched\n - at '/permissions': got object, want string\n - at '/permissions/contents': value must be one of 'read', 'write', 'none'",
fallbackPath: "/permissions",
wantPath: "/permissions/contents",
},
{
name: "nested path with issues scope",
errorMessage: " - at '/permissions/issues': value must be one of 'read', 'write', 'none'",
fallbackPath: "/permissions",
wantPath: "/permissions/issues",
},
{
name: "no enum path pattern uses fallback",
errorMessage: "got object, want string",
fallbackPath: "/engine",
wantPath: "/engine",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := extractEnumConstraintPath(tt.errorMessage, tt.fallbackPath)
if result != tt.wantPath {
t.Errorf("extractEnumConstraintPath() = %q, want %q", result, tt.wantPath)
}
})
}
}

func TestGenerateExampleFromSchemaWithExamples(t *testing.T) {
tests := []struct {
name string
Expand Down
Loading