diff --git a/go.mod b/go.mod index 0c9db388..f7782280 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( ) require ( - github.com/itchyny/gojq v0.12.18 + github.com/itchyny/gojq v0.12.19 github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 github.com/stretchr/testify v1.11.1 github.com/tetratelabs/wazero v1.11.0 @@ -30,7 +30,7 @@ require ( github.com/google/uuid v1.6.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/itchyny/timefmt-go v0.1.7 // indirect + github.com/itchyny/timefmt-go v0.1.8 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/segmentio/asm v1.1.3 // indirect github.com/segmentio/encoding v0.5.4 // indirect diff --git a/go.sum b/go.sum index da2d956b..8c290f57 100644 --- a/go.sum +++ b/go.sum @@ -26,10 +26,10 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/itchyny/gojq v0.12.18 h1:gFGHyt/MLbG9n6dqnvlliiya2TaMMh6FFaR2b1H6Drc= -github.com/itchyny/gojq v0.12.18/go.mod h1:4hPoZ/3lN9fDL1D+aK7DY1f39XZpY9+1Xpjz8atrEkg= -github.com/itchyny/timefmt-go v0.1.7 h1:xyftit9Tbw+Dc/huSSPJaEmX1TVL8lw5vxjJLK4GMMA= -github.com/itchyny/timefmt-go v0.1.7/go.mod h1:5E46Q+zj7vbTgWY8o5YkMeYb4I6GeWLFnetPy5oBrAI= +github.com/itchyny/gojq v0.12.19 h1:ttXA0XCLEMoaLOz5lSeFOZ6u6Q3QxmG46vfgI4O0DEs= +github.com/itchyny/gojq v0.12.19/go.mod h1:5galtVPDywX8SPSOrqjGxkBeDhSxEW1gSxoy7tn1iZY= +github.com/itchyny/timefmt-go v0.1.8 h1:1YEo1JvfXeAHKdjelbYr/uCuhkybaHCeTkH8Bo791OI= +github.com/itchyny/timefmt-go v0.1.8/go.mod h1:5E46Q+zj7vbTgWY8o5YkMeYb4I6GeWLFnetPy5oBrAI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= diff --git a/internal/middleware/README.md b/internal/middleware/README.md index 72eac895..0e40ce68 100644 --- a/internal/middleware/README.md +++ b/internal/middleware/README.md @@ -96,19 +96,19 @@ with open(payload_path) as f: The middleware uses the same jq filter logic as the gh-aw jqschema utility: ```jq -def walk(f): +def walk_schema: . as $in | if type == "object" then - reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))}) + reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk_schema)}) elif type == "array" then - if length == 0 then [] else [.[0] | walk(f)] end + if length == 0 then [] else [.[0] | walk_schema] end else type end; -walk(.) +walk_schema ``` -This recursively walks the JSON structure and replaces values with their type names. +This recursively walks the JSON structure and replaces values with their type names. The function is named `walk_schema` to avoid shadowing gojq's built-in `walk/1`. ### Go Implementation diff --git a/internal/middleware/jqschema.go b/internal/middleware/jqschema.go index 45c29e47..9f62ff04 100644 --- a/internal/middleware/jqschema.go +++ b/internal/middleware/jqschema.go @@ -42,7 +42,7 @@ type PayloadMetadata struct { } // jqSchemaFilter is the jq filter that transforms JSON to schema -// This filter leverages gojq v0.12.18 features including: +// This filter leverages gojq v0.12.19 features including: // - Enhanced array handling (supports up to 536,870,912 elements / 2^29) // - Improved concurrent execution performance // - Better error messages for type errors @@ -55,25 +55,26 @@ type PayloadMetadata struct { // For arrays, only the first element's schema is retained to represent the array structure. // Empty arrays are preserved as []. // -// NOTE: This defines a custom walk function rather than using gojq's built-in walk(f). +// NOTE: This defines a custom walk_schema function rather than using gojq's built-in walk(f). // The built-in walk(f) applies f to every node but preserves the original structure. -// Our custom walk does two things the built-in cannot: +// Our custom walk_schema does two things the built-in cannot: // 1. Replaces leaf values with their type name (e.g., "test" → "string") // 2. Collapses arrays to only the first element for schema inference // // These behaviors are incompatible with standard walk(f) semantics, which would // apply f post-recursion without structural changes to arrays. +// Using a distinct name avoids shadowing gojq's built-in walk/1. const jqSchemaFilter = ` -def walk(f): +def walk_schema: . as $in | if type == "object" then - reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))}) + reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk_schema)}) elif type == "array" then - if length == 0 then [] else [.[0] | walk(f)] end + if length == 0 then [] else [.[0] | walk_schema] end else type end; -walk(.) +walk_schema ` // Pre-compiled jq query code for performance @@ -107,7 +108,7 @@ func init() { return } - logMiddleware.Printf("Successfully compiled jq schema filter at init (gojq v0.12.18)") + logMiddleware.Printf("Successfully compiled jq schema filter at init") logger.LogInfo("startup", "jq schema filter compiled successfully - array limit: 2^29 elements, timeout: %v", DefaultJqTimeout) } @@ -135,7 +136,7 @@ func generateRandomID() string { // Error handling: // - Returns compilation errors if init() failed // - Returns context.DeadlineExceeded if query times out -// - Returns enhanced error messages for type errors (gojq v0.12.18+) +// - Returns enhanced error messages for type errors (gojq v0.12.19+) // - Properly handles gojq.HaltError for clean halt conditions func applyJqSchema(ctx context.Context, jsonData interface{}) (interface{}, error) { // Check if compilation succeeded at init time @@ -152,7 +153,7 @@ func applyJqSchema(ctx context.Context, jsonData interface{}) (interface{}, erro } // Run the pre-compiled query with context support (much faster than Parse+Run) - // The iterator is consumed only once because the walk(.) filter produces exactly + // The iterator is consumed only once because the walk_schema filter produces exactly // one output value (the fully-transformed schema). There is no need to drain it. iter := jqSchemaCode.RunWithContext(ctx, jsonData) v, ok := iter.Next() @@ -177,7 +178,7 @@ func applyJqSchema(ctx context.Context, jsonData interface{}) (interface{}, erro return nil, fmt.Errorf("jq schema filter halted with error (exit code %d): %w", haltErr.ExitCode(), err) } - // Generic error case (includes enhanced v0.12.18+ type error messages) + // Generic error case (includes enhanced v0.12.19+ type error messages) return nil, fmt.Errorf("jq schema filter error: %w", err) } @@ -209,19 +210,27 @@ func savePayload(baseDir, pathPrefix, sessionID, queryID string, payload []byte) logger.LogInfo("payload", "Writing large payload to filesystem: path=%s, size=%d bytes (%.2f KB, %.2f MB)", filePath, payloadSize, float64(payloadSize)/1024, float64(payloadSize)/(1024*1024)) - if err := os.WriteFile(filePath, payload, 0644); err != nil { + if err := os.WriteFile(filePath, payload, 0600); err != nil { logger.LogError("payload", "Failed to write payload file: path=%s, size=%d bytes, error=%v", filePath, payloadSize, err) return "", fmt.Errorf("failed to write payload file: %w", err) } - logger.LogInfo("payload", "Successfully saved large payload to filesystem: path=%s, size=%d bytes, permissions=0644", - filePath, payloadSize) + // Enforce permissions even if the file already existed (WriteFile only sets mode on create) + if err := os.Chmod(filePath, 0600); err != nil { + logger.LogError("payload", "Failed to enforce payload file permissions: path=%s, size=%d bytes, error=%v", + filePath, payloadSize, err) + return "", fmt.Errorf("failed to set payload file permissions: %w", err) + } - // Verify file was written correctly + // Verify file was written correctly and log actual resulting mode if stat, err := os.Stat(filePath); err != nil { logger.LogWarn("payload", "Could not verify payload file after write: path=%s, error=%v", filePath, err) + logger.LogInfo("payload", "Successfully saved large payload to filesystem: path=%s, size=%d bytes", + filePath, payloadSize) } else { + logger.LogInfo("payload", "Successfully saved large payload to filesystem: path=%s, size=%d bytes, permissions=%#o", + filePath, payloadSize, stat.Mode().Perm()) logger.LogDebug("payload", "Payload file verified: path=%s, size=%d bytes, mode=%s", filePath, stat.Size(), stat.Mode()) } diff --git a/internal/middleware/jqschema_test.go b/internal/middleware/jqschema_test.go index 2eb91932..b5396839 100644 --- a/internal/middleware/jqschema_test.go +++ b/internal/middleware/jqschema_test.go @@ -108,6 +108,36 @@ func TestApplyJqSchema(t *testing.T) { } } +// TestApplyJqSchema_SingleOutputContract verifies that the walk_schema filter produces +// exactly one output value. This documents the invariant that the iterator yields a single +// result, catching any future filter changes that accidentally produce multiple outputs. +func TestApplyJqSchema_SingleOutputContract(t *testing.T) { + require := require.New(t) + + require.Nil(jqSchemaCompileErr, "jq schema filter must compile without error") + require.NotNil(jqSchemaCode, "jq schema compiled code must not be nil") + + inputs := []interface{}{ + map[string]interface{}{"name": "test", "count": 42}, + []interface{}{map[string]interface{}{"id": 1}}, + map[string]interface{}{"nested": map[string]interface{}{"a": []interface{}{1, 2, 3}}}, + } + + for _, input := range inputs { + iter := jqSchemaCode.RunWithContext(context.Background(), input) + + // First call must return a value + v, ok := iter.Next() + require.True(ok, "walk_schema should produce at least one output") + _, isErr := v.(error) + require.False(isErr, "walk_schema should not produce an error: %v", v) + + // Second call must signal exhaustion (no more values) + v2, ok2 := iter.Next() + require.False(ok2, "walk_schema should produce exactly one output, got second value: %v", v2) + } +} + func TestSavePayload(t *testing.T) { // Create temporary directory for test baseDir := filepath.Join(os.TempDir(), "test-jq-payloads") @@ -517,7 +547,7 @@ func TestPayloadStorage_FilePermissions(t *testing.T) { // Check file permissions fileInfo, err := os.Stat(filePath) require.NoError(t, err) - assert.Equal(t, os.FileMode(0644), fileInfo.Mode().Perm(), "File should have 0644 permissions") + assert.Equal(t, os.FileMode(0600), fileInfo.Mode().Perm(), "File should have 0600 permissions") } // TestPayloadStorage_DefaultSessionID verifies behavior when session ID is empty