From b5b750a89959c7e9541293d7aa739a61a1fc255c Mon Sep 17 00:00:00 2001 From: Landon Cox Date: Wed, 1 Apr 2026 15:52:34 -0700 Subject: [PATCH 1/2] refactor: jq middleware improvements from gojq module review - Remove duplicate integrationPayloadMetadataToMap test helper; use the shared payloadMetadataToMap across both test files - Document why a custom walk function is used instead of gojq's built-in walk(f) (structural array collapse + leaf type replacement) - Document single-result iterator contract (walk(.) produces exactly one output) - Add UTF-8 safety comment on preview truncation (json.Marshal output is ASCII-clean, so byte-level slicing is safe) - Skip redundant JSON unmarshal in WrapToolHandler when data is already a native Go type (map[string]interface{} or []interface{}) Closes #2985 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- internal/middleware/jqschema.go | 27 ++++++++++++++++--- .../middleware/jqschema_integration_test.go | 17 +----------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/internal/middleware/jqschema.go b/internal/middleware/jqschema.go index 7820aa97..7f5c47c3 100644 --- a/internal/middleware/jqschema.go +++ b/internal/middleware/jqschema.go @@ -54,6 +54,15 @@ type PayloadMetadata struct { // // For arrays, only the first element's schema is retained to represent the array structure. // Empty arrays are preserved as []. +// +// NOTE: This defines a custom walk function rather than using gojq's built-in walk(f). +// The built-in walk(f) applies f to every node but preserves the original structure. +// Our custom walk does two things the built-in cannot: +// 1. Replaces leaf values with their type name (e.g., "test" → "string") +// 2. Collapses arrays to only the first element for schema inference +// +// These behaviors are incompatible with standard walk(f) semantics, which would +// apply f post-recursion without structural changes to arrays. const jqSchemaFilter = ` def walk(f): . as $in | @@ -143,6 +152,8 @@ func applyJqSchema(ctx context.Context, jsonData interface{}) (interface{}, erro } // Run the pre-compiled query with context support (much faster than Parse+Run) + // The iterator is consumed only once because the walk(.) filter produces exactly + // one output value (the fully-transformed schema). There is no need to drain it. iter := jqSchemaCode.RunWithContext(ctx, jsonData) v, ok := iter.Next() if !ok { @@ -331,10 +342,16 @@ func WrapToolHandler( logger.LogDebug("payload", "Applying jq schema transformation: tool=%s, queryID=%s", toolName, queryID) var schemaObj interface{} if schemaErr := func() error { - // Unmarshal to interface{} for jq processing + // Prepare data for jq processing. If data is already a native Go type + // (map or slice), use it directly to avoid a redundant JSON round-trip. var jsonData interface{} - if err := json.Unmarshal(payloadJSON, &jsonData); err != nil { - return fmt.Errorf("failed to unmarshal for schema: %w", err) + switch data.(type) { + case map[string]interface{}, []interface{}: + jsonData = data + default: + if err := json.Unmarshal(payloadJSON, &jsonData); err != nil { + return fmt.Errorf("failed to unmarshal for schema: %w", err) + } } schema, err := applyJqSchema(ctx, jsonData) @@ -359,6 +376,10 @@ func WrapToolHandler( // Build the transformed response: first PayloadPreviewSize chars + schema. // Slice the bytes before converting to string to avoid allocating a full copy of the // (potentially multi-MB) payload when only the first PayloadPreviewSize bytes are needed. + // + // Byte-level slicing is safe here because json.Marshal produces ASCII-clean output: + // non-ASCII runes are escaped as \uXXXX sequences, so every byte boundary is a + // valid UTF-8 boundary. payloadLen := len(payloadJSON) var preview string truncated := payloadLen > PayloadPreviewSize diff --git a/internal/middleware/jqschema_integration_test.go b/internal/middleware/jqschema_integration_test.go index 97e4ea73..e69a24c4 100644 --- a/internal/middleware/jqschema_integration_test.go +++ b/internal/middleware/jqschema_integration_test.go @@ -13,21 +13,6 @@ import ( "github.com/stretchr/testify/require" ) -// integrationPayloadMetadataToMap converts PayloadMetadata to map[string]interface{} for test assertions -func integrationPayloadMetadataToMap(t *testing.T, data interface{}) map[string]interface{} { - t.Helper() - pm, ok := data.(PayloadMetadata) - if !ok { - t.Fatalf("expected PayloadMetadata, got %T", data) - } - jsonBytes, err := json.Marshal(pm) - require.NoError(t, err) - var result map[string]interface{} - err = json.Unmarshal(jsonBytes, &result) - require.NoError(t, err) - return result -} - // TestMiddlewareIntegration tests the complete middleware flow func TestMiddlewareIntegration(t *testing.T) { // Create temporary directory for test @@ -214,7 +199,7 @@ func TestMiddlewareWithLargePayload(t *testing.T) { } // Also check data return value - dataMap := integrationPayloadMetadataToMap(t, data) + dataMap := payloadMetadataToMap(t, data) // Verify preview truncation (check if it ends with ...) preview := dataMap["payloadPreview"].(string) From faff0b8b185b3f5d3916328a4eafbb7b56eee4e3 Mon Sep 17 00:00:00 2001 From: Landon Cox Date: Wed, 1 Apr 2026 16:00:14 -0700 Subject: [PATCH 2/2] fix: use rune-boundary-safe UTF-8 truncation for preview json.Marshal emits raw UTF-8 (not \uXXXX escapes) for non-ASCII runes, so byte-level slicing can split multi-byte sequences. Adjust the cut point backward to the nearest valid rune boundary using utf8.RuneStart. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- internal/middleware/jqschema.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/internal/middleware/jqschema.go b/internal/middleware/jqschema.go index 7f5c47c3..c211b38d 100644 --- a/internal/middleware/jqschema.go +++ b/internal/middleware/jqschema.go @@ -10,6 +10,7 @@ import ( "path/filepath" "strings" "time" + "unicode/utf8" "github.com/github/gh-aw-mcpg/internal/logger" "github.com/itchyny/gojq" @@ -373,18 +374,22 @@ func WrapToolHandler( logger.LogDebug("payload", "Schema transformation completed: tool=%s, queryID=%s, schemaSize=%d bytes", toolName, queryID, len(schemaBytes)) - // Build the transformed response: first PayloadPreviewSize chars + schema. + // Build the transformed response: first PayloadPreviewSize bytes + schema. // Slice the bytes before converting to string to avoid allocating a full copy of the - // (potentially multi-MB) payload when only the first PayloadPreviewSize bytes are needed. + // (potentially multi-MB) payload when only a short preview is needed. // - // Byte-level slicing is safe here because json.Marshal produces ASCII-clean output: - // non-ASCII runes are escaped as \uXXXX sequences, so every byte boundary is a - // valid UTF-8 boundary. + // json.Marshal emits raw UTF-8 for non-ASCII runes, so a naive byte slice could + // split a multi-byte sequence. We adjust the cut point backward to the nearest + // valid rune boundary to guarantee the preview is valid UTF-8. payloadLen := len(payloadJSON) var preview string truncated := payloadLen > PayloadPreviewSize if truncated { - preview = string(payloadJSON[:PayloadPreviewSize]) + "..." + cutPoint := PayloadPreviewSize + for cutPoint > 0 && !utf8.RuneStart(payloadJSON[cutPoint]) { + cutPoint-- + } + preview = string(payloadJSON[:cutPoint]) + "..." logger.LogInfo("payload", "Payload truncated for preview: tool=%s, queryID=%s, originalSize=%d bytes, previewSize=%d bytes", toolName, queryID, payloadLen, PayloadPreviewSize) } else {