From eb4867e32ed8cf511dfb0e45afbca74e3ac16944 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 21 Apr 2026 13:46:03 +0000 Subject: [PATCH 1/5] Initial plan From a7edd1d89924ad576672915287fe048358a26417 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 21 Apr 2026 14:24:43 +0000 Subject: [PATCH 2/5] refactor: clarify workflow and code identifier sanitizers Agent-Logs-Url: https://github.com/github/gh-aw/sessions/b76bdebf-72b0-48e3-8f17-92d521a5e130 Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/stringutil/README.md | 4 ++++ pkg/stringutil/sanitize.go | 19 ++++++++++------ pkg/stringutil/sanitize_test.go | 35 +++++++++++++++++++++++++++++ pkg/workflow/codex_engine_test.go | 6 ++--- pkg/workflow/mcp_renderer_github.go | 2 +- pkg/workflow/strings.go | 18 +++++++-------- 6 files changed, 64 insertions(+), 20 deletions(-) diff --git a/pkg/stringutil/README.md b/pkg/stringutil/README.md index 53beb4fbe19..b4898d86316 100644 --- a/pkg/stringutil/README.md +++ b/pkg/stringutil/README.md @@ -105,6 +105,10 @@ stringutil.SanitizeErrorMessage("Error: MY_SECRET_TOKEN is invalid") // → "Error: [REDACTED] is invalid" ``` +### `SanitizeIdentifierName(name string, extraAllowed func(rune) bool) string` + +Sanitizes a string for use as a programming-language identifier by replacing invalid characters with underscores and prefixing `_` when the identifier starts with a digit. + ### `SanitizeParameterName(name string) string` Sanitizes a parameter name for use as a GitHub Actions output or environment variable name. Replaces non-alphanumeric characters with underscores. diff --git a/pkg/stringutil/sanitize.go b/pkg/stringutil/sanitize.go index 72f42a79701..cd57128c12a 100644 --- a/pkg/stringutil/sanitize.go +++ b/pkg/stringutil/sanitize.go @@ -80,11 +80,16 @@ func SanitizeErrorMessage(message string) string { return sanitized } -// sanitizeIdentifierName converts a name to a valid identifier by replacing -// disallowed characters with underscores. The extraAllowed function determines -// which additional runes (beyond a-z, A-Z, 0-9, _) are permitted. -// If the resulting name starts with a digit, an underscore is prepended. -func sanitizeIdentifierName(name string, extraAllowed func(rune) bool) string { +// SanitizeIdentifierName converts a name to a valid programming-language identifier +// by replacing disallowed characters with underscores. +// +// Use this function for code identifiers (for example JavaScript and Python variable +// names). It preserves [a-zA-Z0-9_] plus optional extraAllowed runes and prepends +// an underscore if the result would start with a digit. +// +// For workflow artifact and user-agent identifiers, use workflow.SanitizeArtifactIdentifier +// instead, which produces hyphen-separated lowercase output. +func SanitizeIdentifierName(name string, extraAllowed func(rune) bool) string { result := strings.Map(func(r rune) rune { if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' { return r @@ -121,7 +126,7 @@ func sanitizeIdentifierName(name string, extraAllowed func(rune) bool) string { // SanitizeParameterName("valid_name") // returns "valid_name" // SanitizeParameterName("$special") // returns "$special" func SanitizeParameterName(name string) string { - return sanitizeIdentifierName(name, func(r rune) bool { return r == '$' }) + return SanitizeIdentifierName(name, func(r rune) bool { return r == '$' }) } // SanitizePythonVariableName converts a parameter name to a valid Python identifier @@ -142,7 +147,7 @@ func SanitizeParameterName(name string) string { // SanitizePythonVariableName("123param") // returns "_123param" // SanitizePythonVariableName("valid_name") // returns "valid_name" func SanitizePythonVariableName(name string) string { - return sanitizeIdentifierName(name, nil) + return SanitizeIdentifierName(name, nil) } // SanitizeToolID removes common MCP prefixes and suffixes from tool IDs. diff --git a/pkg/stringutil/sanitize_test.go b/pkg/stringutil/sanitize_test.go index c469f6fedf5..660cfcfed55 100644 --- a/pkg/stringutil/sanitize_test.go +++ b/pkg/stringutil/sanitize_test.go @@ -304,6 +304,41 @@ func BenchmarkSanitizeErrorMessage_ManySecrets(b *testing.B) { } } +func TestSanitizeIdentifierName(t *testing.T) { + tests := []struct { + name string + input string + extraAllowed func(rune) bool + expected string + }{ + { + name: "default behavior uses underscores", + input: "my-workflow.name", + expected: "my_workflow_name", + }, + { + name: "prefix underscore when starting with number", + input: "123name", + expected: "_123name", + }, + { + name: "allows extra characters when provided", + input: "$param", + extraAllowed: func(r rune) bool { return r == '$' }, + expected: "$param", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := SanitizeIdentifierName(tt.input, tt.extraAllowed) + if result != tt.expected { + t.Errorf("SanitizeIdentifierName(%q) = %q; want %q", tt.input, result, tt.expected) + } + }) + } +} + func TestSanitizeParameterName(t *testing.T) { tests := []struct { name string diff --git a/pkg/workflow/codex_engine_test.go b/pkg/workflow/codex_engine_test.go index d4680f3388c..3848c3755ba 100644 --- a/pkg/workflow/codex_engine_test.go +++ b/pkg/workflow/codex_engine_test.go @@ -471,7 +471,7 @@ func TestCodexEngineRenderMCPConfigUserAgentFromConfig(t *testing.T) { } } -func TestSanitizeIdentifier(t *testing.T) { +func TestSanitizeArtifactIdentifier(t *testing.T) { tests := []struct { name string input string @@ -531,9 +531,9 @@ func TestSanitizeIdentifier(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result := SanitizeIdentifier(tt.input) + result := SanitizeArtifactIdentifier(tt.input) if result != tt.expected { - t.Errorf("SanitizeIdentifier(%q) = %q, expected %q", tt.input, result, tt.expected) + t.Errorf("SanitizeArtifactIdentifier(%q) = %q, expected %q", tt.input, result, tt.expected) } }) } diff --git a/pkg/workflow/mcp_renderer_github.go b/pkg/workflow/mcp_renderer_github.go index 80746a823fa..557c4281a7f 100644 --- a/pkg/workflow/mcp_renderer_github.go +++ b/pkg/workflow/mcp_renderer_github.go @@ -122,7 +122,7 @@ func (r *MCPConfigRendererUnified) renderGitHubTOML(yaml *strings.Builder, githu userAgent = workflowData.EngineConfig.UserAgent } else if workflowData.Name != "" { // Fall back to sanitizing workflow name to identifier - userAgent = SanitizeIdentifier(workflowData.Name) + userAgent = SanitizeArtifactIdentifier(workflowData.Name) } } yaml.WriteString(" user_agent = \"" + userAgent + "\"\n") diff --git a/pkg/workflow/strings.go b/pkg/workflow/strings.go index bd8907c59e8..3750969adeb 100644 --- a/pkg/workflow/strings.go +++ b/pkg/workflow/strings.go @@ -16,7 +16,7 @@ // - SanitizeWorkflowIDForCacheKey: Sanitizes workflow ID for use in cache keys (removes hyphens) // - sanitizeJobName: Sanitizes workflow name to a valid GitHub Actions job name // - sanitizeRefForPath: Sanitizes a git ref for use in a file path -// - SanitizeIdentifier: Creates clean identifiers for user agents +// - SanitizeArtifactIdentifier: Creates clean identifiers for artifacts and user agents // // Example: // @@ -411,7 +411,7 @@ func sanitizeRefForPath(ref string) string { return sanitized } -// SanitizeIdentifier sanitizes a workflow name to create a safe identifier +// SanitizeArtifactIdentifier sanitizes a workflow name to create a safe identifier // suitable for use as a user agent string or similar context. // // This is a SANITIZE function (character validity pattern). Use this when creating @@ -429,23 +429,23 @@ func sanitizeRefForPath(ref string) string { // // Example inputs and outputs: // -// SanitizeIdentifier("My Workflow") // returns "my-workflow" -// SanitizeIdentifier("test_workflow") // returns "test-workflow" -// SanitizeIdentifier("@@@") // returns "github-agentic-workflow" (default) -// SanitizeIdentifier("Weekly v2.0") // returns "weekly-v2-0" +// SanitizeArtifactIdentifier("My Workflow") // returns "my-workflow" +// SanitizeArtifactIdentifier("test_workflow") // returns "test-workflow" +// SanitizeArtifactIdentifier("@@@") // returns "github-agentic-workflow" (default) +// SanitizeArtifactIdentifier("Weekly v2.0") // returns "weekly-v2-0" // // This function uses the unified SanitizeName function with options configured // to trim leading/trailing hyphens and return a default value for empty results. // Hyphens are preserved by default in SanitizeName, not via PreserveSpecialChars. // -// Note: Do not confuse with stringutil.sanitizeIdentifierName (private), which uses +// Note: Do not confuse with stringutil.SanitizeIdentifierName, which uses // a different algorithm — it keeps [a-zA-Z0-9_] and replaces others with underscores, // making it suitable for programming language identifiers (e.g. JavaScript, Python). -// SanitizeIdentifier instead produces hyphen-separated lowercase identifiers for +// SanitizeArtifactIdentifier instead produces hyphen-separated lowercase identifiers for // workflow artifacts, job names, and user agent strings. // // See package documentation for guidance on when to use sanitize vs normalize patterns. -func SanitizeIdentifier(name string) string { +func SanitizeArtifactIdentifier(name string) string { stringsLog.Printf("Sanitizing identifier: %s", name) result := SanitizeName(name, &SanitizeOptions{ PreserveSpecialChars: []rune{}, From d70513a67d89c8a905a487b09ed85d02e8d419f2 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 21 Apr 2026 08:12:28 -0700 Subject: [PATCH 3/5] Update mcp_renderer_github.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pkg/workflow/mcp_renderer_github.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/workflow/mcp_renderer_github.go b/pkg/workflow/mcp_renderer_github.go index 557c4281a7f..5971fead2b6 100644 --- a/pkg/workflow/mcp_renderer_github.go +++ b/pkg/workflow/mcp_renderer_github.go @@ -121,7 +121,7 @@ func (r *MCPConfigRendererUnified) renderGitHubTOML(yaml *strings.Builder, githu if workflowData.EngineConfig != nil && workflowData.EngineConfig.UserAgent != "" { userAgent = workflowData.EngineConfig.UserAgent } else if workflowData.Name != "" { - // Fall back to sanitizing workflow name to identifier + // Fall back to sanitizing the workflow name as an artifact/user-agent identifier userAgent = SanitizeArtifactIdentifier(workflowData.Name) } } From fd02f18fbd77134d9fee5c20614e9fb30bf7b256 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 21 Apr 2026 08:12:36 -0700 Subject: [PATCH 4/5] Update README.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pkg/stringutil/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/stringutil/README.md b/pkg/stringutil/README.md index b4898d86316..7db65e1ea27 100644 --- a/pkg/stringutil/README.md +++ b/pkg/stringutil/README.md @@ -107,11 +107,11 @@ stringutil.SanitizeErrorMessage("Error: MY_SECRET_TOKEN is invalid") ### `SanitizeIdentifierName(name string, extraAllowed func(rune) bool) string` -Sanitizes a string for use as a programming-language identifier by replacing invalid characters with underscores and prefixing `_` when the identifier starts with a digit. +Sanitizes a string for use as a programming-language identifier by replacing invalid characters with underscores and prefixing `_` when the identifier starts with a digit. `extraAllowed` can be used to permit additional runes beyond the normal identifier rules; if `extraAllowed` is `nil`, no extra characters are allowed. ### `SanitizeParameterName(name string) string` -Sanitizes a parameter name for use as a GitHub Actions output or environment variable name. Replaces non-alphanumeric characters with underscores. +Sanitizes a parameter name for use as a GitHub Actions output or environment variable name. Preserves letters, digits, `$`, and `_`, and replaces all other characters with underscores. ### `SanitizePythonVariableName(name string) string` From 1ee89f4ed3db30ee1f452a1cc838bcce3530943d Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 21 Apr 2026 08:12:44 -0700 Subject: [PATCH 5/5] Update sanitize.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pkg/stringutil/sanitize.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pkg/stringutil/sanitize.go b/pkg/stringutil/sanitize.go index cd57128c12a..674b1a7ed5d 100644 --- a/pkg/stringutil/sanitize.go +++ b/pkg/stringutil/sanitize.go @@ -80,12 +80,16 @@ func SanitizeErrorMessage(message string) string { return sanitized } -// SanitizeIdentifierName converts a name to a valid programming-language identifier +// SanitizeIdentifierName sanitizes a name for use as a programming-language identifier // by replacing disallowed characters with underscores. // // Use this function for code identifiers (for example JavaScript and Python variable // names). It preserves [a-zA-Z0-9_] plus optional extraAllowed runes and prepends -// an underscore if the result would start with a digit. +// an underscore if the result would otherwise start with a digit. +// +// This function enforces only character-level sanitization. In particular, it returns +// the empty string unchanged for empty input and does not check language-specific +// constraints such as reserved keywords. // // For workflow artifact and user-agent identifiers, use workflow.SanitizeArtifactIdentifier // instead, which produces hyphen-separated lowercase output.