From 52a803a1f7e092efe9fd2b3ed6414556686e07de Mon Sep 17 00:00:00 2001 From: Landon Cox Date: Tue, 31 Mar 2026 16:17:16 -0700 Subject: [PATCH 1/2] perf: add wazero compilation cache and wasm guard improvements Implement all findings from Go Fan report (#2829): 1. Process-level compilation cache: shared globalCompilationCache eliminates redundant JIT compilation across WasmGuard instances. 2. CompilationCache field in WasmGuardOptions: allows injection of custom caches for testing or per-deployment tuning. 3. TestMain with cache cleanup: properly closes the global cache after the test suite runs. 4. Consistent Close() error handling: use errors.Join to surface both module and runtime close errors instead of silently swallowing module errors. 5. Buffer retry strategy comment: documents the -2 error code convention and adaptive buffer growth protocol. 6. Remove redundant ExportedFunction("label_agent") check in LabelAgent: already verified at construction time and again inside callWasmFunction. Closes #2829 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- internal/guard/wasm.go | 45 ++++++++++++++++++++++++++++--------- internal/guard/wasm_test.go | 33 +++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 11 deletions(-) diff --git a/internal/guard/wasm.go b/internal/guard/wasm.go index d36795ae..4443a6db 100644 --- a/internal/guard/wasm.go +++ b/internal/guard/wasm.go @@ -3,6 +3,7 @@ package guard import ( "context" "encoding/json" + "errors" "fmt" "io" "os" @@ -18,12 +19,20 @@ import ( var logWasm = logger.New("guard:wasm") +// globalCompilationCache is a process-level compilation cache shared across all +// WasmGuard instances. wazero's cache is goroutine-safe and eliminates redundant +// JIT compilation when multiple guards load the same WASM binary. +var globalCompilationCache = wazero.NewCompilationCache() + // WasmGuardOptions configures optional settings for WASM guard creation type WasmGuardOptions struct { // Stdout is the writer for WASM stdout output. Defaults to os.Stdout if nil. Stdout io.Writer // Stderr is the writer for WASM stderr output. Defaults to os.Stderr if nil. Stderr io.Writer + // CompilationCache overrides the process-level compilation cache. + // If nil, the shared globalCompilationCache is used. + CompilationCache wazero.CompilationCache } // WasmGuard implements Guard interface by executing a WASM module in-process @@ -79,9 +88,17 @@ func NewWasmGuardFromBytes(ctx context.Context, name string, wasmBytes []byte, b func NewWasmGuardWithOptions(ctx context.Context, name string, wasmBytes []byte, backend BackendCaller, opts *WasmGuardOptions) (*WasmGuard, error) { logWasm.Printf("Creating WASM guard from bytes: name=%s, size=%d", name, len(wasmBytes)) + // Select compilation cache: use injected cache if provided, else shared global. + cache := globalCompilationCache + if opts != nil && opts.CompilationCache != nil { + cache = opts.CompilationCache + } + // Create WASM runtime with explicit compiler config and context-based cancellation // WithCloseOnContextDone enables request-scoped timeouts to propagate into guard execution - runtimeConfig := wazero.NewRuntimeConfigCompiler().WithCloseOnContextDone(true) + runtimeConfig := wazero.NewRuntimeConfigCompiler(). + WithCloseOnContextDone(true). + WithCompilationCache(cache) runtime := wazero.NewRuntimeWithConfig(ctx, runtimeConfig) // Instantiate WASI @@ -624,10 +641,6 @@ func (g *WasmGuard) callWasmGuardFunction(ctx context.Context, funcName string, func (g *WasmGuard) LabelAgent(ctx context.Context, policy interface{}, backend BackendCaller, caps *difc.Capabilities) (*LabelAgentResult, error) { logWasm.Printf("LabelAgent called: guard=%s", g.name) - if g.module.ExportedFunction("label_agent") == nil { - return nil, fmt.Errorf("WASM guard does not export label_agent") - } - // Normalisation and payload-build operate only on the caller-supplied `policy` // argument and do not access any g.* fields, so they are safe to run outside // the lock that callWasmGuardFunction acquires. @@ -810,7 +823,18 @@ func (g *WasmGuard) callWasmFunction(ctx context.Context, funcName string, input return nil, fmt.Errorf("input too large: %d bytes (max %d)", len(inputJSON), maxInputSize) } - // Try with initial buffer size, retry with larger buffer if needed + // Adaptive output buffer strategy: + // + // WASM guards communicate buffer-too-small via a return code convention: + // -2 → buffer too small; first 4 bytes of the output buffer MAY contain the + // required size as a little-endian uint32. If present and > 0, we use + // that size for the next attempt; otherwise we double the buffer. + // < 0 → other error (returned as-is to the caller). + // >= 0 → success; value is the number of bytes written to the output buffer. + // + // We retry up to maxRetries times, growing from 4MB toward the 16MB ceiling. + // A WASM trap (e.g. "wasm error: unreachable" from a Rust panic) permanently + // marks the guard as failed because the module's internal state may be corrupt. outputSize := initialOutputSize const maxRetries = 3 @@ -1130,13 +1154,12 @@ func parseCollectionLabeledData(items []interface{}) (*difc.CollectionLabeledDat // Close releases WASM runtime resources func (g *WasmGuard) Close(ctx context.Context) error { + var moduleErr, runtimeErr error if g.module != nil { - if err := g.module.Close(ctx); err != nil { - logWasm.Printf("Error closing module: %v", err) - } + moduleErr = g.module.Close(ctx) } if g.runtime != nil { - return g.runtime.Close(ctx) + runtimeErr = g.runtime.Close(ctx) } - return nil + return errors.Join(moduleErr, runtimeErr) } diff --git a/internal/guard/wasm_test.go b/internal/guard/wasm_test.go index 543aee0e..4a5fd257 100644 --- a/internal/guard/wasm_test.go +++ b/internal/guard/wasm_test.go @@ -6,6 +6,7 @@ import ( "encoding/binary" "encoding/json" "errors" + "os" "testing" "time" @@ -16,6 +17,12 @@ import ( "github.com/tetratelabs/wazero" ) +func TestMain(m *testing.M) { + code := m.Run() + globalCompilationCache.Close(context.Background()) + os.Exit(code) +} + type ctxKey string const testCtxKey ctxKey = "test-key" @@ -1081,3 +1088,29 @@ func TestWasmGuardFailedState(t *testing.T) { assert.ErrorIs(t, err, originalErr) }) } + +func TestWasmGuardCompilationCache(t *testing.T) { + t.Run("global compilation cache is not nil", func(t *testing.T) { + assert.NotNil(t, globalCompilationCache) + }) + + t.Run("custom cache is used when provided via options", func(t *testing.T) { + customCache := wazero.NewCompilationCache() + defer customCache.Close(context.Background()) + + opts := &WasmGuardOptions{ + CompilationCache: customCache, + } + + // Instantiation will fail (minimal WASM), but the code path + // that selects the cache runs before module compilation. + ctx := context.Background() + _, _ = NewWasmGuardWithOptions(ctx, "cache-test", minimalGuardWasm, &mockBackendCaller{}, opts) + }) + + t.Run("global cache is used when options cache is nil", func(t *testing.T) { + ctx := context.Background() + // nil opts → global cache path + _, _ = NewWasmGuardWithOptions(ctx, "cache-test", minimalGuardWasm, &mockBackendCaller{}, nil) + }) +} From d75033cde010d8ef00840da56007b9a3c7270c07 Mon Sep 17 00:00:00 2001 From: Landon Cox Date: Tue, 31 Mar 2026 16:26:11 -0700 Subject: [PATCH 2/2] fix: address PR review feedback on compilation cache - Add DisableCompilationCache bool to WasmGuardOptions for explicit opt-out of caching (avoids unbounded memory in long-lived processes) - Check and surface error from globalCompilationCache.Close() in TestMain - Replace unobservable cache tests with disk-backed assertions that verify cache artifacts are actually written to the expected directory Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- internal/guard/wasm.go | 25 ++++++++------- internal/guard/wasm_test.go | 61 ++++++++++++++++++++++++++++++++----- 2 files changed, 68 insertions(+), 18 deletions(-) diff --git a/internal/guard/wasm.go b/internal/guard/wasm.go index 4443a6db..bc05b2ed 100644 --- a/internal/guard/wasm.go +++ b/internal/guard/wasm.go @@ -31,8 +31,13 @@ type WasmGuardOptions struct { // Stderr is the writer for WASM stderr output. Defaults to os.Stderr if nil. Stderr io.Writer // CompilationCache overrides the process-level compilation cache. - // If nil, the shared globalCompilationCache is used. + // If nil and DisableCompilationCache is false, the shared globalCompilationCache is used. CompilationCache wazero.CompilationCache + // DisableCompilationCache, when true, prevents any compilation cache from being used + // even if a global or per-instance cache is available. This can be useful to avoid + // unbounded memory growth when many distinct WASM binaries are loaded over the + // lifetime of a long-lived process. + DisableCompilationCache bool } // WasmGuard implements Guard interface by executing a WASM module in-process @@ -88,17 +93,15 @@ func NewWasmGuardFromBytes(ctx context.Context, name string, wasmBytes []byte, b func NewWasmGuardWithOptions(ctx context.Context, name string, wasmBytes []byte, backend BackendCaller, opts *WasmGuardOptions) (*WasmGuard, error) { logWasm.Printf("Creating WASM guard from bytes: name=%s, size=%d", name, len(wasmBytes)) - // Select compilation cache: use injected cache if provided, else shared global. - cache := globalCompilationCache - if opts != nil && opts.CompilationCache != nil { - cache = opts.CompilationCache + // Select compilation cache: explicit opt-out, injected cache, or shared global. + runtimeConfig := wazero.NewRuntimeConfigCompiler().WithCloseOnContextDone(true) + if opts != nil && opts.DisableCompilationCache { + // Caller explicitly disabled caching + } else if opts != nil && opts.CompilationCache != nil { + runtimeConfig = runtimeConfig.WithCompilationCache(opts.CompilationCache) + } else { + runtimeConfig = runtimeConfig.WithCompilationCache(globalCompilationCache) } - - // Create WASM runtime with explicit compiler config and context-based cancellation - // WithCloseOnContextDone enables request-scoped timeouts to propagate into guard execution - runtimeConfig := wazero.NewRuntimeConfigCompiler(). - WithCloseOnContextDone(true). - WithCompilationCache(cache) runtime := wazero.NewRuntimeWithConfig(ctx, runtimeConfig) // Instantiate WASI diff --git a/internal/guard/wasm_test.go b/internal/guard/wasm_test.go index 4a5fd257..d069f42d 100644 --- a/internal/guard/wasm_test.go +++ b/internal/guard/wasm_test.go @@ -6,6 +6,7 @@ import ( "encoding/binary" "encoding/json" "errors" + "fmt" "os" "testing" "time" @@ -19,7 +20,12 @@ import ( func TestMain(m *testing.M) { code := m.Run() - globalCompilationCache.Close(context.Background()) + if err := globalCompilationCache.Close(context.Background()); err != nil { + fmt.Fprintf(os.Stderr, "failed to close global compilation cache: %v\n", err) + if code == 0 { + code = 1 + } + } os.Exit(code) } @@ -1095,22 +1101,63 @@ func TestWasmGuardCompilationCache(t *testing.T) { }) t.Run("custom cache is used when provided via options", func(t *testing.T) { - customCache := wazero.NewCompilationCache() - defer customCache.Close(context.Background()) + ctx := context.Background() + + // Use a disk-backed cache in a temp dir so we can observe that it was used. + cacheDir := t.TempDir() + customCache, err := wazero.NewCompilationCacheWithDir(cacheDir) + require.NoError(t, err) + defer customCache.Close(ctx) opts := &WasmGuardOptions{ CompilationCache: customCache, } // Instantiation will fail (minimal WASM), but the code path - // that selects the cache runs before module compilation. - ctx := context.Background() - _, _ = NewWasmGuardWithOptions(ctx, "cache-test", minimalGuardWasm, &mockBackendCaller{}, opts) + // that selects the cache runs before module compilation, which + // should populate the disk-backed cache. + _, err = NewWasmGuardWithOptions(ctx, "cache-test", minimalGuardWasm, &mockBackendCaller{}, opts) + require.Error(t, err) + + entries, readErr := os.ReadDir(cacheDir) + require.NoError(t, readErr) + assert.NotEmpty(t, entries, "expected compilation artifacts in custom cache directory") }) t.Run("global cache is used when options cache is nil", func(t *testing.T) { ctx := context.Background() + + // Swap in a disk-backed global cache pointing at a temp dir so we can + // observe that the global cache path is actually exercised. + cacheDir := t.TempDir() + tmpCache, err := wazero.NewCompilationCacheWithDir(cacheDir) + require.NoError(t, err) + + origCache := globalCompilationCache + globalCompilationCache = tmpCache + defer func() { + globalCompilationCache = origCache + tmpCache.Close(ctx) + }() + // nil opts → global cache path - _, _ = NewWasmGuardWithOptions(ctx, "cache-test", minimalGuardWasm, &mockBackendCaller{}, nil) + _, err = NewWasmGuardWithOptions(ctx, "cache-test", minimalGuardWasm, &mockBackendCaller{}, nil) + require.Error(t, err) + + entries, readErr := os.ReadDir(cacheDir) + require.NoError(t, readErr) + assert.NotEmpty(t, entries, "expected compilation artifacts in global cache directory") + }) + + t.Run("cache is disabled when DisableCompilationCache is true", func(t *testing.T) { + ctx := context.Background() + + opts := &WasmGuardOptions{ + DisableCompilationCache: true, + } + + // Should still work (fail on minimal WASM) but without caching + _, err := NewWasmGuardWithOptions(ctx, "no-cache-test", minimalGuardWasm, &mockBackendCaller{}, opts) + require.Error(t, err) }) }