diff --git a/internal/middleware/jqschema.go b/internal/middleware/jqschema.go index 7820aa97..c211b38d 100644 --- a/internal/middleware/jqschema.go +++ b/internal/middleware/jqschema.go @@ -10,6 +10,7 @@ import ( "path/filepath" "strings" "time" + "unicode/utf8" "github.com/github/gh-aw-mcpg/internal/logger" "github.com/itchyny/gojq" @@ -54,6 +55,15 @@ type PayloadMetadata struct { // // For arrays, only the first element's schema is retained to represent the array structure. // Empty arrays are preserved as []. +// +// NOTE: This defines a custom walk function rather than using gojq's built-in walk(f). +// The built-in walk(f) applies f to every node but preserves the original structure. +// Our custom walk does two things the built-in cannot: +// 1. Replaces leaf values with their type name (e.g., "test" → "string") +// 2. Collapses arrays to only the first element for schema inference +// +// These behaviors are incompatible with standard walk(f) semantics, which would +// apply f post-recursion without structural changes to arrays. const jqSchemaFilter = ` def walk(f): . as $in | @@ -143,6 +153,8 @@ func applyJqSchema(ctx context.Context, jsonData interface{}) (interface{}, erro } // Run the pre-compiled query with context support (much faster than Parse+Run) + // The iterator is consumed only once because the walk(.) filter produces exactly + // one output value (the fully-transformed schema). There is no need to drain it. iter := jqSchemaCode.RunWithContext(ctx, jsonData) v, ok := iter.Next() if !ok { @@ -331,10 +343,16 @@ func WrapToolHandler( logger.LogDebug("payload", "Applying jq schema transformation: tool=%s, queryID=%s", toolName, queryID) var schemaObj interface{} if schemaErr := func() error { - // Unmarshal to interface{} for jq processing + // Prepare data for jq processing. If data is already a native Go type + // (map or slice), use it directly to avoid a redundant JSON round-trip. var jsonData interface{} - if err := json.Unmarshal(payloadJSON, &jsonData); err != nil { - return fmt.Errorf("failed to unmarshal for schema: %w", err) + switch data.(type) { + case map[string]interface{}, []interface{}: + jsonData = data + default: + if err := json.Unmarshal(payloadJSON, &jsonData); err != nil { + return fmt.Errorf("failed to unmarshal for schema: %w", err) + } } schema, err := applyJqSchema(ctx, jsonData) @@ -356,14 +374,22 @@ func WrapToolHandler( logger.LogDebug("payload", "Schema transformation completed: tool=%s, queryID=%s, schemaSize=%d bytes", toolName, queryID, len(schemaBytes)) - // Build the transformed response: first PayloadPreviewSize chars + schema. + // Build the transformed response: first PayloadPreviewSize bytes + schema. // Slice the bytes before converting to string to avoid allocating a full copy of the - // (potentially multi-MB) payload when only the first PayloadPreviewSize bytes are needed. + // (potentially multi-MB) payload when only a short preview is needed. + // + // json.Marshal emits raw UTF-8 for non-ASCII runes, so a naive byte slice could + // split a multi-byte sequence. We adjust the cut point backward to the nearest + // valid rune boundary to guarantee the preview is valid UTF-8. payloadLen := len(payloadJSON) var preview string truncated := payloadLen > PayloadPreviewSize if truncated { - preview = string(payloadJSON[:PayloadPreviewSize]) + "..." + cutPoint := PayloadPreviewSize + for cutPoint > 0 && !utf8.RuneStart(payloadJSON[cutPoint]) { + cutPoint-- + } + preview = string(payloadJSON[:cutPoint]) + "..." logger.LogInfo("payload", "Payload truncated for preview: tool=%s, queryID=%s, originalSize=%d bytes, previewSize=%d bytes", toolName, queryID, payloadLen, PayloadPreviewSize) } else { diff --git a/internal/middleware/jqschema_integration_test.go b/internal/middleware/jqschema_integration_test.go index 97e4ea73..e69a24c4 100644 --- a/internal/middleware/jqschema_integration_test.go +++ b/internal/middleware/jqschema_integration_test.go @@ -13,21 +13,6 @@ import ( "github.com/stretchr/testify/require" ) -// integrationPayloadMetadataToMap converts PayloadMetadata to map[string]interface{} for test assertions -func integrationPayloadMetadataToMap(t *testing.T, data interface{}) map[string]interface{} { - t.Helper() - pm, ok := data.(PayloadMetadata) - if !ok { - t.Fatalf("expected PayloadMetadata, got %T", data) - } - jsonBytes, err := json.Marshal(pm) - require.NoError(t, err) - var result map[string]interface{} - err = json.Unmarshal(jsonBytes, &result) - require.NoError(t, err) - return result -} - // TestMiddlewareIntegration tests the complete middleware flow func TestMiddlewareIntegration(t *testing.T) { // Create temporary directory for test @@ -214,7 +199,7 @@ func TestMiddlewareWithLargePayload(t *testing.T) { } // Also check data return value - dataMap := integrationPayloadMetadataToMap(t, data) + dataMap := payloadMetadataToMap(t, data) // Verify preview truncation (check if it ends with ...) preview := dataMap["payloadPreview"].(string)