diff --git a/.github/agentics/large-payload-tester.md b/.github/agentics/large-payload-tester.md index cce3f794..36e9b497 100644 --- a/.github/agentics/large-payload-tester.md +++ b/.github/agentics/large-payload-tester.md @@ -9,6 +9,8 @@ You are an AI agent trying to verify a secret in a file hosted by the filesystem Use the filesystem MCP server to access a file called `large-test-file.json`, which contains a field `secret_reference`. Verify that the secret value matches the content of file `secret.txt` which should also be accessed from the filesystem MCP server. +If a payload is too large to return over MCP, the server will return a path in the local filesystem to the payload file instead. Use the path in the local filesystem to access the full payload and extract the secret. + ## Important Notes - **Keep all outputs concise** - Use brief, factual statements diff --git a/.github/workflows/large-payload-tester-README.md b/.github/workflows/large-payload-tester-README.md deleted file mode 100644 index 7426f509..00000000 --- a/.github/workflows/large-payload-tester-README.md +++ /dev/null @@ -1,181 +0,0 @@ -# Large Payload Tester Workflow - -## Purpose - -This agentic workflow tests the MCP Gateway's large payload handling feature, specifically: -1. **Payload Storage**: Verifies that large responses (>500 chars) are automatically stored to disk -2. **Metadata Response**: Confirms the gateway returns correct metadata including `payloadPath`, `schema`, `preview`, etc. -3. **Agent Access**: Tests that agents can successfully read the payload files from their mounted directories -4. **Session Isolation**: Validates that payload files are organized by session ID for multi-agent isolation - -## How It Works - -### Test Architecture - -``` -┌─────────────────┐ ┌──────────────────┐ ┌────────────────┐ -│ Agent │ │ MCP Gateway │ │ Filesystem │ -│ (via Copilot) │◄────────►│ Container │◄────────►│ MCP Server │ -└─────────────────┘ └──────────────────┘ └────────────────┘ - │ │ │ - │ │ │ - Reads via Stores payload Reads test file - filesystem MCP to /tmp/jq-payloads from /tmp/mcp-test-fs - │ │ │ - ▼ ▼ ▼ - /workspace/mcp-payloads/ /tmp/jq-payloads/ /workspace/test-data/ - (mounted from runner) (gateway writes) (mounted from runner) - {sessionID}/{queryID}/ large-test-file.json - payload.json (contains secret) - -Runner Filesystem: -/tmp/mcp-test-fs/ → Only accessible to filesystem MCP server -/tmp/jq-payloads/ → Shared between gateway (writes) and filesystem server (reads) -``` - -**Flow**: -1. Agent requests file via gateway → filesystem MCP server -2. Filesystem server reads from its `/workspace/test-data/` (mounted from `/tmp/mcp-test-fs`) -3. Gateway intercepts large response -4. Gateway stores to `/tmp/jq-payloads/{sessionID}/{queryID}/payload.json` -5. Agent reads payload via filesystem server's `/workspace/mcp-payloads/` mount - -### Test Protocol - -The workflow uses a **secret-based verification** approach: - -1. **Setup Phase** (bash step): - - Generate a unique UUID secret - - Create `/tmp/mcp-test-fs/test-secret.txt` containing just the secret - - Create `/tmp/mcp-test-fs/large-test-file.json` (~500KB) with: - - The secret embedded in JSON data - - Array of 2000 items each referencing the secret - - 400KB of padding to ensure size ~500KB - -2. **Test Phase** (agent): - - **Step 1**: Read `/workspace/test-data/test-secret.txt` to get the expected secret - - **Step 2**: Call filesystem MCP server to read `/workspace/test-data/large-test-file.json` - - **Step 3**: Gateway intercepts large response, stores to disk, returns metadata - - **Step 4**: Extract `payloadPath` and `queryID` from metadata - - **Step 5**: Translate path and read from `/workspace/mcp-payloads/{sessionID}/{queryID}/payload.json` - - **Step 6**: Extract secret from payload and verify it matches expected secret - -3. **Report Phase** (safe-output): - - Create GitHub issue with test results - - Report pass/fail for each step - - Include secret comparison results - -### Volume Mounts - -### Volume Mounts - -The workflow uses a carefully structured mount configuration to ensure proper isolation: - -1. **Test Data Mount** (filesystem MCP server ONLY): - ```yaml - /tmp/mcp-test-fs:/workspace/test-data:ro - ``` - - Contains the control secret file and large test file on the actions runner - - Mounted ONLY to the filesystem MCP server container (NOT to the gateway) - - Read-only access for safety - - Accessible to agent via filesystem MCP server at `/workspace/test-data/` - - Gateway does NOT have direct access to test files - -2. **Payload Mount** (filesystem MCP server): - ```yaml - /tmp/jq-payloads:/workspace/mcp-payloads:ro - ``` - - Allows agent to read stored payloads through filesystem MCP server - - Read-only to prevent accidental corruption - - Accessible to agent via `/workspace/mcp-payloads/` - - Initially empty/non-existent until gateway stores first payload - -3. **Gateway Payload Mount** (MCP gateway container): - ```yaml - /tmp/jq-payloads:/tmp/jq-payloads:rw - ``` - - Allows gateway to write payload files - - Read-write for payload storage - - Gateway creates directory structure on-demand - - This is the ONLY directory the gateway container has mounted - -**Key Design Principle**: The test data directory (`/tmp/mcp-test-fs`) is isolated from the gateway. The gateway only has access to the payload directory (`/tmp/jq-payloads`). This ensures that the gateway cannot directly access test files and must retrieve them through the filesystem MCP server, properly testing the MCP protocol flow. - -### Path Translation - -The agent must translate paths between gateway and agent perspectives: - -- **Gateway reports**: `/tmp/jq-payloads/{sessionID}/{queryID}/payload.json` -- **Agent uses**: `/workspace/mcp-payloads/{sessionID}/{queryID}/payload.json` -- **Translation rule**: Replace `/tmp/jq-payloads` → `/workspace/mcp-payloads` - -## Expected Behavior - -### Success Scenario - -When working correctly: -1. Gateway intercepts the large file read response -2. Gateway stores payload to disk with structure: `{payloadDir}/{sessionID}/{queryID}/payload.json` -3. Gateway returns metadata with `truncated: true` and `payloadPath` -4. Agent translates path and successfully reads payload file -5. Agent extracts secret from payload -6. Secret matches the expected value -7. Test reports **PASS** - -### Failure Scenarios - -The test is designed to detect: -- **Gateway not intercepting**: No `payloadPath` in response -- **Wrong path structure**: Agent can't find the file -- **Permission issues**: Agent can't read the payload file -- **Mount problems**: Volume mounts not configured correctly -- **Data corruption**: Secret in payload doesn't match expected -- **Session isolation broken**: Wrong session directory used - -## Files - -- `.github/workflows/large-payload-tester.md` - Workflow definition with frontmatter and setup steps -- `.github/agentics/large-payload-tester.md` - Agent prompt with detailed test instructions -- `.github/workflows/large-payload-tester.lock.yml` - Compiled GitHub Actions workflow - -## Triggering - -- **Manual**: `workflow_dispatch` - Can be triggered manually from GitHub UI -- **Scheduled**: Runs daily at a scattered time (around 1:12 AM UTC) - -## Configuration - -Key configuration in frontmatter: -- `strict: true` - Enforces security best practices -- `timeout-minutes: 10` - Reasonable timeout for the test -- `network.allowed: [defaults]` - Minimal network access -- `tools.bash: ["*"]` - Full bash access for setup steps -- `mcp-servers.filesystem` - Configured with two volume mounts - -## Related Features - -This workflow tests the jqschema middleware feature. The related implementation files are: -- `internal/middleware/jqschema.go` - Middleware that intercepts large responses -- `internal/middleware/README.md` - Documentation of the jqschema middleware -- `internal/config/config_payload.go` - Payload directory configuration -- `test/integration/large_payload_test.go` - Unit/integration tests for payload handling - -These files already exist in the repository and implement the feature being tested by this workflow. - -## Security Considerations - -- **Read-only mounts**: Agent has read-only access to payload directory -- **Session isolation**: Each session gets its own subdirectory -- **Payload cleanup**: Old payloads are not automatically cleaned up (manual cleanup needed) -- **File permissions**: Payload files created with `0600` (owner read/write only) -- **Secret handling**: Test secret is only used for this test and is not sensitive - -## Future Enhancements - -Potential improvements: -1. Test with multiple concurrent sessions -2. Test with very large payloads (>10MB) -3. Test payload cleanup mechanisms -4. Add performance metrics (storage time, read time) -5. Test error handling (disk full, permission denied) -6. Verify jq schema accuracy against complex data structures diff --git a/internal/middleware/jqschema.go b/internal/middleware/jqschema.go index 7c22410f..162b0635 100644 --- a/internal/middleware/jqschema.go +++ b/internal/middleware/jqschema.go @@ -17,6 +17,22 @@ import ( var logMiddleware = logger.New("middleware:jqschema") +// PayloadTruncatedInstructions is the message returned to clients when a payload +// has been truncated and saved to the filesystem +const PayloadTruncatedInstructions = "The payload was too large for an MCP response. The full response can be accessed through the local file system at the payloadPath." + +// PayloadMetadata represents the metadata response returned when a payload is too large +// and has been saved to the filesystem +type PayloadMetadata struct { + QueryID string `json:"queryID"` + PayloadPath string `json:"payloadPath"` + Preview string `json:"preview"` + Schema interface{} `json:"schema"` + OriginalSize int `json:"originalSize"` + Truncated bool `json:"truncated"` + Instructions string `json:"instructions"` +} + // jqSchemaFilter is the jq filter that transforms JSON to schema // This is the same logic as in gh-aw shared/jqschema.md const jqSchemaFilter = ` @@ -71,17 +87,18 @@ func generateRandomID() string { // applyJqSchema applies the jq schema transformation to JSON data // Uses pre-compiled query code for better performance (3-10x faster than parsing on each request) // Accepts a context for timeout and cancellation support -func applyJqSchema(ctx context.Context, jsonData interface{}) (string, error) { +// Returns the schema as an interface{} object (not a JSON string) +func applyJqSchema(ctx context.Context, jsonData interface{}) (interface{}, error) { // Check if compilation succeeded at init time if jqSchemaCompileErr != nil { - return "", jqSchemaCompileErr + return nil, jqSchemaCompileErr } // Run the pre-compiled query with context support (much faster than Parse+Run) iter := jqSchemaCode.RunWithContext(ctx, jsonData) v, ok := iter.Next() if !ok { - return "", fmt.Errorf("jq schema filter returned no results") + return nil, fmt.Errorf("jq schema filter returned no results") } // Check for errors with type-specific handling @@ -90,22 +107,17 @@ func applyJqSchema(ctx context.Context, jsonData interface{}) (string, error) { if haltErr, ok := err.(*gojq.HaltError); ok { // HaltError with nil value means clean halt (not an error) if haltErr.Value() == nil { - return "", fmt.Errorf("jq schema filter halted cleanly with no output") + return nil, fmt.Errorf("jq schema filter halted cleanly with no output") } // HaltError with non-nil value is an actual error - return "", fmt.Errorf("jq schema filter halted with error (exit code %d): %w", haltErr.ExitCode(), err) + return nil, fmt.Errorf("jq schema filter halted with error (exit code %d): %w", haltErr.ExitCode(), err) } // Generic error case - return "", fmt.Errorf("jq schema filter error: %w", err) - } - - // Convert result to JSON - schemaJSON, err := json.Marshal(v) - if err != nil { - return "", fmt.Errorf("failed to marshal schema result: %w", err) + return nil, fmt.Errorf("jq schema filter error: %w", err) } - return string(schemaJSON), nil + // Return the schema object directly (no JSON marshaling needed here) + return v, nil } // savePayload saves the payload to disk and returns the file path @@ -234,7 +246,7 @@ func WrapToolHandler( // Apply jq schema transformation logger.LogDebug("payload", "Applying jq schema transformation: tool=%s, queryID=%s", toolName, queryID) - var schemaJSON string + var schemaObj interface{} if schemaErr := func() error { // Unmarshal to interface{} for jq processing var jsonData interface{} @@ -246,7 +258,7 @@ func WrapToolHandler( if err != nil { return err } - schemaJSON = schema + schemaObj = schema return nil }(); schemaErr != nil { logMiddleware.Printf("Failed to apply jq schema: tool=%s, queryID=%s, sessionID=%s, error=%v", toolName, queryID, sessionID, schemaErr) @@ -256,8 +268,10 @@ func WrapToolHandler( return result, data, err } + // Calculate schema size for logging (marshal temporarily) + schemaBytes, _ := json.Marshal(schemaObj) logger.LogDebug("payload", "Schema transformation completed: tool=%s, queryID=%s, schemaSize=%d bytes", - toolName, queryID, len(schemaJSON)) + toolName, queryID, len(schemaBytes)) // Build the transformed response: first 500 chars + schema payloadStr := string(payloadJSON) @@ -273,14 +287,15 @@ func WrapToolHandler( toolName, queryID, len(payloadStr)) } - // Create rewritten response - rewrittenResponse := map[string]interface{}{ - "queryID": queryID, - "payloadPath": filePath, - "preview": preview, - "schema": schemaJSON, - "originalSize": len(payloadJSON), - "truncated": truncated, + // Create rewritten response using the PayloadMetadata struct + rewrittenResponse := PayloadMetadata{ + QueryID: queryID, + PayloadPath: filePath, + Preview: preview, + Schema: schemaObj, + OriginalSize: len(payloadJSON), + Truncated: truncated, + Instructions: PayloadTruncatedInstructions, } logMiddleware.Printf("Rewritten response: tool=%s, queryID=%s, sessionID=%s, originalSize=%d, truncated=%v", @@ -288,12 +303,6 @@ func WrapToolHandler( logger.LogInfo("payload", "Created metadata response for client: tool=%s, queryID=%s, session=%s, payloadPath=%s, originalSize=%d bytes, truncated=%v", toolName, queryID, sessionID, filePath, len(payloadJSON), truncated) - // Parse the schema JSON string back to an object for cleaner display - var schemaObj interface{} - if err := json.Unmarshal([]byte(schemaJSON), &schemaObj); err == nil { - rewrittenResponse["schema"] = schemaObj - } - // Marshal the rewritten response to JSON for the Content field rewrittenJSON, marshalErr := json.Marshal(rewrittenResponse) if marshalErr != nil { diff --git a/internal/middleware/jqschema_integration_test.go b/internal/middleware/jqschema_integration_test.go index 689b203d..864186f3 100644 --- a/internal/middleware/jqschema_integration_test.go +++ b/internal/middleware/jqschema_integration_test.go @@ -12,6 +12,21 @@ import ( "github.com/stretchr/testify/require" ) +// integrationPayloadMetadataToMap converts PayloadMetadata to map[string]interface{} for test assertions +func integrationPayloadMetadataToMap(t *testing.T, data interface{}) map[string]interface{} { + t.Helper() + pm, ok := data.(PayloadMetadata) + if !ok { + t.Fatalf("expected PayloadMetadata, got %T", data) + } + jsonBytes, err := json.Marshal(pm) + require.NoError(t, err) + var result map[string]interface{} + err = json.Unmarshal(jsonBytes, &result) + require.NoError(t, err) + return result +} + // TestMiddlewareIntegration tests the complete middleware flow func TestMiddlewareIntegration(t *testing.T) { // Create temporary directory for test @@ -88,8 +103,7 @@ func TestMiddlewareIntegration(t *testing.T) { assert.Len(t, queryIDFromContent, 32, "QueryID should be 32 hex characters") // Verify response structure in data return value (for internal use) - dataMap, ok := data.(map[string]interface{}) - require.True(t, ok, "Response should be a map") + dataMap := integrationPayloadMetadataToMap(t, data) // Check all required fields exist assert.Contains(t, dataMap, "queryID") @@ -170,7 +184,7 @@ func TestMiddlewareIntegration(t *testing.T) { assert.False(t, dataMap["truncated"].(bool), "Should not be truncated for small payloads") // Verify originalSize - originalSize := dataMap["originalSize"].(int) + originalSize := int(dataMap["originalSize"].(float64)) assert.Greater(t, originalSize, 0, "Original size should be positive") } @@ -222,7 +236,7 @@ func TestMiddlewareWithLargePayload(t *testing.T) { } // Also check data return value - dataMap := data.(map[string]interface{}) + dataMap := integrationPayloadMetadataToMap(t, data) // Verify truncation occurred truncated := dataMap["truncated"].(bool) @@ -277,7 +291,7 @@ func TestMiddlewareDirectoryCreation(t *testing.T) { queryIDFromContent := contentMap["queryID"].(string) // Also check data return value - dataMap := data.(map[string]interface{}) + dataMap := integrationPayloadMetadataToMap(t, data) queryID := dataMap["queryID"].(string) // Both should match diff --git a/internal/middleware/jqschema_test.go b/internal/middleware/jqschema_test.go index c531443a..e8e893cd 100644 --- a/internal/middleware/jqschema_test.go +++ b/internal/middleware/jqschema_test.go @@ -32,6 +32,22 @@ func TestGenerateRandomID(t *testing.T) { } } +// payloadMetadataToMap converts PayloadMetadata to map[string]interface{} for test assertions +// This allows tests to remain unchanged while working with the new struct type +func payloadMetadataToMap(t *testing.T, data interface{}) map[string]interface{} { + t.Helper() + pm, ok := data.(PayloadMetadata) + if !ok { + t.Fatalf("expected PayloadMetadata, got %T", data) + } + jsonBytes, err := json.Marshal(pm) + require.NoError(t, err) + var result map[string]interface{} + err = json.Unmarshal(jsonBytes, &result) + require.NoError(t, err) + return result +} + func TestApplyJqSchema(t *testing.T) { tests := []struct { name string @@ -83,7 +99,10 @@ func TestApplyJqSchema(t *testing.T) { t.Run(tt.name, func(t *testing.T) { result, err := applyJqSchema(context.Background(), tt.input) require.NoError(t, err, "applyJqSchema should not return error") - assert.JSONEq(t, tt.expected, result, "Schema should match expected") + // Convert result to JSON string for comparison + resultJSON, err := json.Marshal(result) + require.NoError(t, err, "Should marshal result to JSON") + assert.JSONEq(t, tt.expected, string(resultJSON), "Schema should match expected") }) } } @@ -169,8 +188,7 @@ func TestWrapToolHandler(t *testing.T) { assert.NotNil(t, schema, "Schema should not be nil") // Also verify rewritten response in data return value (for internal use) - dataMap, ok := data.(map[string]interface{}) - require.True(t, ok, "Data should be a map") + dataMap := payloadMetadataToMap(t, data) assert.Contains(t, dataMap, "queryID", "Data should contain queryID") assert.Contains(t, dataMap, "payloadPath", "Data should contain payloadPath") @@ -247,8 +265,7 @@ func TestWrapToolHandler_LongPayload(t *testing.T) { assert.True(t, strings.HasSuffix(preview, "..."), "Preview should end with '...'") // Also verify in data return value - dataMap, ok := data.(map[string]interface{}) - require.True(t, ok, "Data should be a map") + dataMap := payloadMetadataToMap(t, data) assert.True(t, dataMap["truncated"].(bool), "Should indicate truncation in data") } @@ -283,8 +300,8 @@ func TestPayloadStorage_SessionIsolation(t *testing.T) { require.NoError(t, err) // Extract payload paths - dataMap1 := data1.(map[string]interface{}) - dataMap2 := data2.(map[string]interface{}) + dataMap1 := payloadMetadataToMap(t, data1) + dataMap2 := payloadMetadataToMap(t, data2) payloadPath1 := dataMap1["payloadPath"].(string) payloadPath2 := dataMap2["payloadPath"].(string) @@ -341,7 +358,7 @@ func TestPayloadStorage_LargePayloadPreserved(t *testing.T) { _, data, err := wrapped(context.Background(), &sdk.CallToolRequest{}, map[string]interface{}{}) require.NoError(t, err) - dataMap := data.(map[string]interface{}) + dataMap := payloadMetadataToMap(t, data) // Verify truncation occurred assert.True(t, dataMap["truncated"].(bool), "Large payload should be truncated") @@ -374,7 +391,7 @@ func TestPayloadStorage_LargePayloadPreserved(t *testing.T) { assert.Equal(t, "test-author", metadata["author"], "Metadata author should be preserved") // Verify originalSize matches stored content size - originalSize := dataMap["originalSize"].(int) + originalSize := int(dataMap["originalSize"].(float64)) assert.Equal(t, len(storedContent), originalSize, "originalSize should match stored content size") } @@ -393,7 +410,7 @@ func TestPayloadStorage_DirectoryStructure(t *testing.T) { _, data, err := wrapped(context.Background(), &sdk.CallToolRequest{}, map[string]interface{}{}) require.NoError(t, err) - dataMap := data.(map[string]interface{}) + dataMap := payloadMetadataToMap(t, data) payloadPath := dataMap["payloadPath"].(string) queryID := dataMap["queryID"].(string) @@ -434,7 +451,7 @@ func TestPayloadStorage_MultipleRequestsSameSession(t *testing.T) { _, data, err := wrapped(context.Background(), &sdk.CallToolRequest{}, map[string]interface{}{}) require.NoError(t, err) - dataMap := data.(map[string]interface{}) + dataMap := payloadMetadataToMap(t, data) payloadPaths = append(payloadPaths, dataMap["payloadPath"].(string)) queryIDs = append(queryIDs, dataMap["queryID"].(string)) } @@ -502,7 +519,7 @@ func TestPayloadStorage_DefaultSessionID(t *testing.T) { _, data, err := wrapped(context.Background(), &sdk.CallToolRequest{}, map[string]interface{}{}) require.NoError(t, err) - dataMap := data.(map[string]interface{}) + dataMap := payloadMetadataToMap(t, data) payloadPath := dataMap["payloadPath"].(string) // Verify the payload is stored in "default" session directory @@ -561,12 +578,11 @@ func TestApplyJqSchema_ErrorCases(t *testing.T) { result, err := applyJqSchema(context.Background(), input) require.NoError(t, err, "Should handle deeply nested structures") - assert.NotEmpty(t, result, "Result should not be empty") + assert.NotNil(t, result, "Result should not be nil") // Verify the schema is correctly nested - var schema map[string]interface{} - err = json.Unmarshal([]byte(result), &schema) - require.NoError(t, err, "Should be valid JSON") + schema, ok := result.(map[string]interface{}) + require.True(t, ok, "Result should be a map") assert.Contains(t, schema, "level1", "Should contain level1") })