diff --git a/pkg/stringutil/README.md b/pkg/stringutil/README.md index 53beb4fbe19..7db65e1ea27 100644 --- a/pkg/stringutil/README.md +++ b/pkg/stringutil/README.md @@ -105,9 +105,13 @@ stringutil.SanitizeErrorMessage("Error: MY_SECRET_TOKEN is invalid") // → "Error: [REDACTED] is invalid" ``` +### `SanitizeIdentifierName(name string, extraAllowed func(rune) bool) string` + +Sanitizes a string for use as a programming-language identifier by replacing invalid characters with underscores and prefixing `_` when the identifier starts with a digit. `extraAllowed` can be used to permit additional runes beyond the normal identifier rules; if `extraAllowed` is `nil`, no extra characters are allowed. + ### `SanitizeParameterName(name string) string` -Sanitizes a parameter name for use as a GitHub Actions output or environment variable name. Replaces non-alphanumeric characters with underscores. +Sanitizes a parameter name for use as a GitHub Actions output or environment variable name. Preserves letters, digits, `$`, and `_`, and replaces all other characters with underscores. ### `SanitizePythonVariableName(name string) string` diff --git a/pkg/stringutil/sanitize.go b/pkg/stringutil/sanitize.go index 72f42a79701..674b1a7ed5d 100644 --- a/pkg/stringutil/sanitize.go +++ b/pkg/stringutil/sanitize.go @@ -80,11 +80,20 @@ func SanitizeErrorMessage(message string) string { return sanitized } -// sanitizeIdentifierName converts a name to a valid identifier by replacing -// disallowed characters with underscores. The extraAllowed function determines -// which additional runes (beyond a-z, A-Z, 0-9, _) are permitted. -// If the resulting name starts with a digit, an underscore is prepended. -func sanitizeIdentifierName(name string, extraAllowed func(rune) bool) string { +// SanitizeIdentifierName sanitizes a name for use as a programming-language identifier +// by replacing disallowed characters with underscores. +// +// Use this function for code identifiers (for example JavaScript and Python variable +// names). It preserves [a-zA-Z0-9_] plus optional extraAllowed runes and prepends +// an underscore if the result would otherwise start with a digit. +// +// This function enforces only character-level sanitization. In particular, it returns +// the empty string unchanged for empty input and does not check language-specific +// constraints such as reserved keywords. +// +// For workflow artifact and user-agent identifiers, use workflow.SanitizeArtifactIdentifier +// instead, which produces hyphen-separated lowercase output. +func SanitizeIdentifierName(name string, extraAllowed func(rune) bool) string { result := strings.Map(func(r rune) rune { if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' { return r @@ -121,7 +130,7 @@ func sanitizeIdentifierName(name string, extraAllowed func(rune) bool) string { // SanitizeParameterName("valid_name") // returns "valid_name" // SanitizeParameterName("$special") // returns "$special" func SanitizeParameterName(name string) string { - return sanitizeIdentifierName(name, func(r rune) bool { return r == '$' }) + return SanitizeIdentifierName(name, func(r rune) bool { return r == '$' }) } // SanitizePythonVariableName converts a parameter name to a valid Python identifier @@ -142,7 +151,7 @@ func SanitizeParameterName(name string) string { // SanitizePythonVariableName("123param") // returns "_123param" // SanitizePythonVariableName("valid_name") // returns "valid_name" func SanitizePythonVariableName(name string) string { - return sanitizeIdentifierName(name, nil) + return SanitizeIdentifierName(name, nil) } // SanitizeToolID removes common MCP prefixes and suffixes from tool IDs. diff --git a/pkg/stringutil/sanitize_test.go b/pkg/stringutil/sanitize_test.go index c469f6fedf5..660cfcfed55 100644 --- a/pkg/stringutil/sanitize_test.go +++ b/pkg/stringutil/sanitize_test.go @@ -304,6 +304,41 @@ func BenchmarkSanitizeErrorMessage_ManySecrets(b *testing.B) { } } +func TestSanitizeIdentifierName(t *testing.T) { + tests := []struct { + name string + input string + extraAllowed func(rune) bool + expected string + }{ + { + name: "default behavior uses underscores", + input: "my-workflow.name", + expected: "my_workflow_name", + }, + { + name: "prefix underscore when starting with number", + input: "123name", + expected: "_123name", + }, + { + name: "allows extra characters when provided", + input: "$param", + extraAllowed: func(r rune) bool { return r == '$' }, + expected: "$param", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := SanitizeIdentifierName(tt.input, tt.extraAllowed) + if result != tt.expected { + t.Errorf("SanitizeIdentifierName(%q) = %q; want %q", tt.input, result, tt.expected) + } + }) + } +} + func TestSanitizeParameterName(t *testing.T) { tests := []struct { name string diff --git a/pkg/workflow/codex_engine_test.go b/pkg/workflow/codex_engine_test.go index d4680f3388c..3848c3755ba 100644 --- a/pkg/workflow/codex_engine_test.go +++ b/pkg/workflow/codex_engine_test.go @@ -471,7 +471,7 @@ func TestCodexEngineRenderMCPConfigUserAgentFromConfig(t *testing.T) { } } -func TestSanitizeIdentifier(t *testing.T) { +func TestSanitizeArtifactIdentifier(t *testing.T) { tests := []struct { name string input string @@ -531,9 +531,9 @@ func TestSanitizeIdentifier(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result := SanitizeIdentifier(tt.input) + result := SanitizeArtifactIdentifier(tt.input) if result != tt.expected { - t.Errorf("SanitizeIdentifier(%q) = %q, expected %q", tt.input, result, tt.expected) + t.Errorf("SanitizeArtifactIdentifier(%q) = %q, expected %q", tt.input, result, tt.expected) } }) } diff --git a/pkg/workflow/mcp_renderer_github.go b/pkg/workflow/mcp_renderer_github.go index 80746a823fa..5971fead2b6 100644 --- a/pkg/workflow/mcp_renderer_github.go +++ b/pkg/workflow/mcp_renderer_github.go @@ -121,8 +121,8 @@ func (r *MCPConfigRendererUnified) renderGitHubTOML(yaml *strings.Builder, githu if workflowData.EngineConfig != nil && workflowData.EngineConfig.UserAgent != "" { userAgent = workflowData.EngineConfig.UserAgent } else if workflowData.Name != "" { - // Fall back to sanitizing workflow name to identifier - userAgent = SanitizeIdentifier(workflowData.Name) + // Fall back to sanitizing the workflow name as an artifact/user-agent identifier + userAgent = SanitizeArtifactIdentifier(workflowData.Name) } } yaml.WriteString(" user_agent = \"" + userAgent + "\"\n") diff --git a/pkg/workflow/strings.go b/pkg/workflow/strings.go index bd8907c59e8..3750969adeb 100644 --- a/pkg/workflow/strings.go +++ b/pkg/workflow/strings.go @@ -16,7 +16,7 @@ // - SanitizeWorkflowIDForCacheKey: Sanitizes workflow ID for use in cache keys (removes hyphens) // - sanitizeJobName: Sanitizes workflow name to a valid GitHub Actions job name // - sanitizeRefForPath: Sanitizes a git ref for use in a file path -// - SanitizeIdentifier: Creates clean identifiers for user agents +// - SanitizeArtifactIdentifier: Creates clean identifiers for artifacts and user agents // // Example: // @@ -411,7 +411,7 @@ func sanitizeRefForPath(ref string) string { return sanitized } -// SanitizeIdentifier sanitizes a workflow name to create a safe identifier +// SanitizeArtifactIdentifier sanitizes a workflow name to create a safe identifier // suitable for use as a user agent string or similar context. // // This is a SANITIZE function (character validity pattern). Use this when creating @@ -429,23 +429,23 @@ func sanitizeRefForPath(ref string) string { // // Example inputs and outputs: // -// SanitizeIdentifier("My Workflow") // returns "my-workflow" -// SanitizeIdentifier("test_workflow") // returns "test-workflow" -// SanitizeIdentifier("@@@") // returns "github-agentic-workflow" (default) -// SanitizeIdentifier("Weekly v2.0") // returns "weekly-v2-0" +// SanitizeArtifactIdentifier("My Workflow") // returns "my-workflow" +// SanitizeArtifactIdentifier("test_workflow") // returns "test-workflow" +// SanitizeArtifactIdentifier("@@@") // returns "github-agentic-workflow" (default) +// SanitizeArtifactIdentifier("Weekly v2.0") // returns "weekly-v2-0" // // This function uses the unified SanitizeName function with options configured // to trim leading/trailing hyphens and return a default value for empty results. // Hyphens are preserved by default in SanitizeName, not via PreserveSpecialChars. // -// Note: Do not confuse with stringutil.sanitizeIdentifierName (private), which uses +// Note: Do not confuse with stringutil.SanitizeIdentifierName, which uses // a different algorithm — it keeps [a-zA-Z0-9_] and replaces others with underscores, // making it suitable for programming language identifiers (e.g. JavaScript, Python). -// SanitizeIdentifier instead produces hyphen-separated lowercase identifiers for +// SanitizeArtifactIdentifier instead produces hyphen-separated lowercase identifiers for // workflow artifacts, job names, and user agent strings. // // See package documentation for guidance on when to use sanitize vs normalize patterns. -func SanitizeIdentifier(name string) string { +func SanitizeArtifactIdentifier(name string) string { stringsLog.Printf("Sanitizing identifier: %s", name) result := SanitizeName(name, &SanitizeOptions{ PreserveSpecialChars: []rune{},