From f8fd9f67185ff9f88316723ff617b5070be55ca0 Mon Sep 17 00:00:00 2001
From: JY Tan <jy8230@gmail.com>
Date: Fri, 24 Apr 2026 02:07:16 -0700
Subject: [PATCH 1/5] Commit

---
 go.mod                            |  7 +++-
 go.sum                            |  8 ++--
 internal/runner/compose_replay.go |  6 +++
 internal/runner/server.go         | 19 ++++++++--
 internal/runner/server_test.go    | 13 ++++---
 internal/runner/service.go        | 62 ++++++++++++++++++++++++-------
 6 files changed, 87 insertions(+), 28 deletions(-)

diff --git a/go.mod b/go.mod
index 249a190..145f8fb 100644
--- a/go.mod
+++ b/go.mod
@@ -28,7 +28,7 @@ require (
 	github.com/stretchr/testify v1.11.1
 	github.com/zricethezav/gitleaks/v8 v8.30.1
 	golang.org/x/mod v0.29.0
-	golang.org/x/term v0.41.0
+	golang.org/x/term v0.42.0
 	google.golang.org/protobuf v1.36.9
 	gopkg.in/yaml.v3 v3.0.1
 	mvdan.cc/sh/v3 v3.12.0
@@ -119,7 +119,10 @@ require (
 	golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect
 	golang.org/x/net v0.47.0 // indirect
 	golang.org/x/sync v0.18.0 // indirect
-	golang.org/x/sys v0.42.0 // indirect
+	golang.org/x/sys v0.43.0 // indirect
 	golang.org/x/text v0.31.0 // indirect
 	gopkg.in/ini.v1 v1.67.0 // indirect
 )
+
+// TODO: remove before publishing.
+replace github.com/Use-Tusk/fence => ../fence
diff --git a/go.sum b/go.sum
index 1d18344..8283621 100644
--- a/go.sum
+++ b/go.sum
@@ -437,13 +437,13 @@ golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
-golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
+golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
+golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
-golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU=
-golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A=
+golang.org/x/term v0.42.0 h1:UiKe+zDFmJobeJ5ggPwOshJIVt6/Ft0rcfrXZDLWAWY=
+golang.org/x/term v0.42.0/go.mod h1:Dq/D+snpsbazcBG5+F9Q1n2rXV8Ma+71xEjTRufARgY=
 golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
diff --git a/internal/runner/compose_replay.go b/internal/runner/compose_replay.go
index 828e232..ef0d4e7 100644
--- a/internal/runner/compose_replay.go
+++ b/internal/runner/compose_replay.go
@@ -67,6 +67,12 @@ func createReplayComposeOverrideFile(envVars map[string]string, groupName string
 	if safeGroup == "" {
 		safeGroup = "default"
 	}
+	// The override file lives in the OS temp dir (/tmp on Linux). Fence
+	// tmpfs-overmounts /tmp inside its Linux sandbox, so a naive `docker
+	// compose -f /tmp/...` inside the sandbox can't see this file. Callers
+	// that pass this path into a sandboxed command must register it via
+	// fence.Manager.ExposeHostPath before launching the sandbox — see
+	// StartService in service.go, which does this automatically.
 	tempFile, err := os.CreateTemp("", fmt.Sprintf("tusk-replay-env-override-%s-*.yml", safeGroup))
 	if err != nil {
 		return "", fmt.Errorf("failed to create temporary replay compose override file: %w", err)
diff --git a/internal/runner/server.go b/internal/runner/server.go
index 8684fd8..59f8760 100644
--- a/internal/runner/server.go
+++ b/internal/runner/server.go
@@ -119,7 +119,20 @@ type MockNotFoundEvent struct {
 	ReplaySpan  *core.Span `json:"replaySpan"` // The outbound span that failed to find a mock
 }
 
-func isDockerCommand(cmd string) bool {
+// serviceDelegatesToHostDaemon reports whether the configured service start
+// command delegates port binding / process execution to an external daemon
+// whose network listener lives on the host (outside any sandbox netns that
+// fence might set up). Today this covers the docker / docker-compose family;
+// extending to podman, nerdctl, or systemctl is a one-liner.
+//
+// This predicate is consulted in two places:
+//   - determineCommunicationType: daemon-delegated services cannot reach a
+//     Unix socket on the host filesystem from inside a container, so we must
+//     use TCP for the mock server ↔ SDK channel.
+//   - StartService: daemon-delegated services bind the host port via the
+//     daemon's own bind/iptables, so fence's reverse bridge would collide;
+//     fence is told ServiceBindsOnHost and skips it.
+func serviceDelegatesToHostDaemon(cmd string) bool {
 	cmd = strings.ToLower(cmd)
 	cmd = strings.Join(strings.Fields(cmd), " ")
 
@@ -134,8 +147,8 @@ func determineCommunicationType(cfg *config.ServiceConfig) CommunicationType {
 
 	// Auto-detect based on start command
 	if commType == "auto" {
-		if isDockerCommand(cfg.Start.Command) {
-			log.Debug("Auto-detected Docker command, using TCP communication")
+		if serviceDelegatesToHostDaemon(cfg.Start.Command) {
+			log.Debug("Auto-detected host-daemon-delegated service, using TCP communication")
 			return CommunicationTCP
 		}
 		return CommunicationUnix
diff --git a/internal/runner/server_test.go b/internal/runner/server_test.go
index 8053faf..2fa6403 100644
--- a/internal/runner/server_test.go
+++ b/internal/runner/server_test.go
@@ -302,12 +302,12 @@ func TestDetermineCommunicationType(t *testing.T) {
 	}
 }
 
-func TestIsDockerCommand(t *testing.T) {
+func TestServiceDelegatesToHostDaemon(t *testing.T) {
 	tests := []struct {
 		command  string
 		expected bool
 	}{
-		// Docker commands
+		// Docker commands (host-daemon-delegated)
 		{"docker", true},
 		{"docker-compose", true},
 		{"docker compose up", true},
@@ -315,19 +315,20 @@ func TestIsDockerCommand(t *testing.T) {
 		{"docker run myimage", true},
 		{"ENV=test docker compose up", true},
 
-		// Non-Docker commands
+		// Directly-executed services (bind in-process)
 		{"npm run start", false},
 		{"node server.js", false},
 		{"python app.py", false},
 
-		// This is likely a docker-related script, but we don't make further assumptions.
-		// Users can explicitly set the communication type in the config.
+		// Likely a docker-related script, but we don't make further assumptions.
+		// Users can explicitly set the communication type / execution model
+		// in the config.
 		{"./start-docker.sh", false},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.command, func(t *testing.T) {
-			result := isDockerCommand(tt.command)
+			result := serviceDelegatesToHostDaemon(tt.command)
 			assert.Equal(t, tt.expected, result, "Command: %s", tt.command)
 		})
 	}
diff --git a/internal/runner/service.go b/internal/runner/service.go
index e65d6f1..86c9c33 100644
--- a/internal/runner/service.go
+++ b/internal/runner/service.go
@@ -87,32 +87,68 @@ func (e *Executor) StartService() error {
 				log.ServiceLog(fmt.Sprintf("🔧 Merged custom Fence config into replay sandbox: %s", utils.ResolveTuskPath(sandboxConfigPath)))
 			}
 			e.fenceManager = fence.NewManager(fenceCfg, e.debug, false)
-			e.fenceManager.SetExposedPorts([]int{cfg.Service.Port})
 
-			if err := e.fenceManager.Initialize(); err != nil {
+			// Tell fence how the sandboxed service binds its port. For
+			// docker / docker-compose / podman commands, the daemon binds
+			// the host port outside the sandbox netns, so fence must NOT
+			// set up a reverse bridge (it would collide with the daemon's
+			// bind). For everything else, fence proxies inbound traffic
+			// into the sandbox netns as usual.
+			executionModel := fence.ServiceBindsInSandbox
+			if serviceDelegatesToHostDaemon(cfg.Service.Start.Command) {
+				executionModel = fence.ServiceBindsOnHost
+			}
+			e.fenceManager.SetService(fence.ServiceOptions{
+				ExposedPorts:   []int{cfg.Service.Port},
+				ExecutionModel: executionModel,
+			})
+
+			// Hand any caller-generated host files the sandboxed process
+			// needs to see (e.g. the replay compose env-override YAML)
+			// to fence. Without this, a file created via
+			// os.CreateTemp("", ...) lives under /tmp, which fence
+			// tmpfs-overmounts — invisible to the sandboxed docker client.
+			exposeErr := error(nil)
+			if replayOverridePath != "" {
+				if err := e.fenceManager.ExposeHostPath(replayOverridePath, false); err != nil {
+					exposeErr = err
+				}
+			}
+			if exposeErr != nil {
 				if requireSandbox {
 					e.fenceManager = nil
-					return fmt.Errorf("strict replay sandbox unavailable: %s", friendlySandboxError(err))
+					return fmt.Errorf("strict replay sandbox unavailable: failed to expose replay override file to sandbox: %w", exposeErr)
 				}
-				log.UserWarn(fmt.Sprintf("⚠️  Sandbox unavailable: %s", friendlySandboxError(err)))
-				log.UserWarn("   Tests will run without network isolation (real connections allowed)\n")
+				log.UserWarn(fmt.Sprintf("⚠️  Sandbox: failed to expose replay override file (%v); proceeding without sandbox", exposeErr))
 				e.fenceManager = nil
-			} else {
-				wrappedCmd, err := e.fenceManager.WrapCommand(command)
-				if err != nil {
+			}
+
+			if e.fenceManager != nil {
+				if err := e.fenceManager.Initialize(); err != nil {
 					if requireSandbox {
-						e.fenceManager.Cleanup()
 						e.fenceManager = nil
 						return fmt.Errorf("strict replay sandbox unavailable: %s", friendlySandboxError(err))
 					}
 					log.UserWarn(fmt.Sprintf("⚠️  Sandbox unavailable: %s", friendlySandboxError(err)))
 					log.UserWarn("   Tests will run without network isolation (real connections allowed)\n")
-					e.fenceManager.Cleanup()
 					e.fenceManager = nil
 				} else {
-					command = wrappedCmd
-					e.lastServiceSandboxed = true
-					log.ServiceLog("🔒 Service sandboxed (localhost outbound blocked for replay isolation)")
+					wrappedCmd, err := e.fenceManager.WrapCommand(command)
+					if err != nil {
+						if requireSandbox {
+							e.fenceManager.Cleanup()
+							e.fenceManager = nil
+							return fmt.Errorf("strict replay sandbox unavailable: %s", friendlySandboxError(err))
+						}
+						log.UserWarn(fmt.Sprintf("⚠️  Sandbox unavailable: %s", friendlySandboxError(err)))
+						log.UserWarn("   Tests will run without network isolation (real connections allowed)\n")
+						e.fenceManager.Cleanup()
+						e.fenceManager = nil
+					} else {
+						command = wrappedCmd
+						e.lastServiceSandboxed = true
+						log.ServiceLog("🔒 Service sandboxed (localhost outbound blocked for replay isolation)")
+					}
 				}
 			}
 		}

From 60efd38b45a86ef35fc92d926df66bdbb76b5647 Mon Sep 17 00:00:00 2001
From: JY Tan <jy8230@gmail.com>
Date: Fri, 24 Apr 2026 13:31:57 -0700
Subject: [PATCH 2/5] Bump fence to v0.1.51

---
 go.mod | 5 +----
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/go.mod b/go.mod
index 145f8fb..06f47c5 100644
--- a/go.mod
+++ b/go.mod
@@ -3,7 +3,7 @@ module github.com/Use-Tusk/tusk-cli
 go 1.25.0
 
 require (
-	github.com/Use-Tusk/fence v0.1.36
+	github.com/Use-Tusk/fence v0.1.51
 	github.com/Use-Tusk/tusk-drift-schemas v0.1.36
 	github.com/agnivade/levenshtein v1.0.3
 	github.com/aymanbagabas/go-osc52/v2 v2.0.1
@@ -123,6 +123,3 @@ require (
 	golang.org/x/text v0.31.0 // indirect
 	gopkg.in/ini.v1 v1.67.0 // indirect
 )
-
-// TODO: remove before publishing.
-replace github.com/Use-Tusk/fence => ../fence
diff --git a/go.sum b/go.sum
index 8283621..933eca8 100644
--- a/go.sum
+++ b/go.sum
@@ -31,8 +31,8 @@ github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe
 github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
 github.com/STARRY-S/zip v0.2.3 h1:luE4dMvRPDOWQdeDdUxUoZkzUIpTccdKdhHHsQJ1fm4=
 github.com/STARRY-S/zip v0.2.3/go.mod h1:lqJ9JdeRipyOQJrYSOtpNAiaesFO6zVDsE8GIGFaoSk=
-github.com/Use-Tusk/fence v0.1.36 h1:8S15y8cp3X+xXukx6AN0Ky/aX9/dZyW3fLw5XOQ8YtE=
-github.com/Use-Tusk/fence v0.1.36/go.mod h1:YkowBDzXioVKJE16vg9z3gSVC6vhzkIZZw2dFf7MW/o=
+github.com/Use-Tusk/fence v0.1.51 h1:GGr4bx/eFYYA3WNNIIE7RAkJJu5zlW6nsTdrAqEzTQc=
+github.com/Use-Tusk/fence v0.1.51/go.mod h1:ADX3cEerqZumoA+RXDtLC1p+8vUqcNaaaXEK33vHnVs=
 github.com/Use-Tusk/tusk-drift-schemas v0.1.36 h1:baojaWiEFEdRU61CLYAbFievXxDLlWTFW/ijL4IpdiE=
 github.com/Use-Tusk/tusk-drift-schemas v0.1.36/go.mod h1:pa3EvTj9kKxl9f904RVFkj9YK1zB75QogboKi70zalM=
 github.com/agnivade/levenshtein v1.0.3 h1:M5ZnqLOoZR8ygVq0FfkXsNOKzMCk0xRiow0R5+5VkQ0=

From a6280a084d688a957b97d1008b652c79e8b313d5 Mon Sep 17 00:00:00 2001
From: JY Tan <jy8230@gmail.com>
Date: Fri, 24 Apr 2026 13:57:10 -0700
Subject: [PATCH 3/5] Make sandboxing no-op on windows

---
 internal/runner/executor.go          |   5 +-
 internal/runner/sandbox.go           |  27 ++++
 internal/runner/sandbox_test.go      |   4 +-
 internal/runner/sandbox_unix.go      | 185 ++++++++++++++++++++++
 internal/runner/sandbox_unix_test.go |  52 +++++++
 internal/runner/sandbox_windows.go   |  18 +++
 internal/runner/service.go           | 222 ++++++---------------------
 internal/runner/service_test.go      |  40 -----
 8 files changed, 330 insertions(+), 223 deletions(-)
 create mode 100644 internal/runner/sandbox.go
 create mode 100644 internal/runner/sandbox_unix.go
 create mode 100644 internal/runner/sandbox_unix_test.go
 create mode 100644 internal/runner/sandbox_windows.go

diff --git a/internal/runner/executor.go b/internal/runner/executor.go
index 2d2851d..63d2b32 100644
--- a/internal/runner/executor.go
+++ b/internal/runner/executor.go
@@ -15,7 +15,6 @@ import (
 	"sync"
 	"time"
 
-	"github.com/Use-Tusk/fence/pkg/fence"
 	"github.com/Use-Tusk/tusk-cli/internal/config"
 	"github.com/Use-Tusk/tusk-cli/internal/log"
 	"github.com/Use-Tusk/tusk-cli/internal/utils"
@@ -87,7 +86,7 @@ type Executor struct {
 	sandboxMode             string
 	lastServiceSandboxed    bool
 	debug                   bool
-	fenceManager            *fence.Manager
+	sandbox                 sandboxManager
 	requireInboundReplay    bool
 	replayComposeOverride   string
 	replayEnvVars           map[string]string
@@ -142,7 +141,7 @@ func (e *Executor) GetEffectiveSandboxMode() string {
 	if e.sandboxMode != "" {
 		return e.sandboxMode
 	}
-	if fence.IsSupported() {
+	if isSandboxSupported() {
 		return SandboxModeStrict
 	}
 	return SandboxModeAuto
diff --git a/internal/runner/sandbox.go b/internal/runner/sandbox.go
new file mode 100644
index 0000000..b7b58c7
--- /dev/null
+++ b/internal/runner/sandbox.go
@@ -0,0 +1,27 @@
+package runner
+
+// Platform-split sandbox adapter. Real implementation lives in sandbox_unix.go
+// (fence-backed); Windows gets a no-op stub in sandbox_windows.go because
+// fence doesn't cross-compile there.
+
+// sandboxManager wraps whatever sandbox backs replay isolation on the current
+// platform. Nil means no sandbox configured.
+type sandboxManager interface {
+	WrapCommand(command string) (string, error)
+	Cleanup()
+}
+
+type replaySandboxOptions struct {
+	UserConfigPath string // optional fence config override (e.g. .tusk/replay.fence.json)
+	Debug          bool
+	ExposedPort    int
+	// BindsOnHost signals that an external daemon (docker, podman) binds
+	// ExposedPort outside the sandbox netns; skips the reverse bridge.
+	BindsOnHost      bool
+	ExposedHostPaths []exposedHostPath
+}
+
+type exposedHostPath struct {
+	Path     string
+	Writable bool
+}
diff --git a/internal/runner/sandbox_test.go b/internal/runner/sandbox_test.go
index 549f474..ee61508 100644
--- a/internal/runner/sandbox_test.go
+++ b/internal/runner/sandbox_test.go
@@ -2,8 +2,6 @@ package runner
 
 import (
 	"testing"
-
-	"github.com/Use-Tusk/fence/pkg/fence"
 )
 
 // newExecutorForServiceLifecycleTests keeps generic lifecycle tests focused on
@@ -16,7 +14,7 @@ func newExecutorForServiceLifecycleTests() *Executor {
 
 func TestGetEffectiveSandboxMode(t *testing.T) {
 	e := NewExecutor()
-	if fence.IsSupported() {
+	if isSandboxSupported() {
 		if got := e.GetEffectiveSandboxMode(); got != SandboxModeStrict {
 			t.Fatalf("expected default sandbox mode %q on supported platform, got %q", SandboxModeStrict, got)
 		}
diff --git a/internal/runner/sandbox_unix.go b/internal/runner/sandbox_unix.go
new file mode 100644
index 0000000..a730691
--- /dev/null
+++ b/internal/runner/sandbox_unix.go
@@ -0,0 +1,185 @@
+//go:build darwin || linux || freebsd
+
+package runner
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/Use-Tusk/fence/pkg/fence"
+	"github.com/Use-Tusk/tusk-cli/internal/utils"
+)
+
+// isSandboxSupported reports whether the current platform can actually
+// isolate replay service startup (i.e. fence is available).
+func isSandboxSupported() bool {
+	return fence.IsSupported()
+}
+
+// fenceSandbox is the Unix-platform implementation of sandboxManager,
+// backed by github.com/Use-Tusk/fence.
+type fenceSandbox struct {
+	mgr *fence.Manager
+}
+
+// WrapCommand delegates to the underlying fence.Manager.
+func (s *fenceSandbox) WrapCommand(command string) (string, error) {
+	return s.mgr.WrapCommand(command)
+}
+
+// Cleanup releases fence's socat bridges, proxies, and temp sockets.
+func (s *fenceSandbox) Cleanup() {
+	if s.mgr != nil {
+		s.mgr.Cleanup()
+	}
+}
+
+// newReplaySandboxManager builds the effective fence config for replay
+// mode, creates the fence.Manager, applies the requested service
+// execution model + exposed host paths, and initializes the manager.
+// On error, any partial state is cleaned up before returning.
+func newReplaySandboxManager(opts replaySandboxOptions) (sandboxManager, error) {
+	fenceCfg, err := createReplayFenceConfig(opts.UserConfigPath)
+	if err != nil {
+		return nil, fmt.Errorf("prepare replay sandbox config: %w", err)
+	}
+
+	mgr := fence.NewManager(fenceCfg, opts.Debug, false)
+
+	executionModel := fence.ServiceBindsInSandbox
+	if opts.BindsOnHost {
+		executionModel = fence.ServiceBindsOnHost
+	}
+	mgr.SetService(fence.ServiceOptions{
+		ExposedPorts:   []int{opts.ExposedPort},
+		ExecutionModel: executionModel,
+	})
+
+	for _, ehp := range opts.ExposedHostPaths {
+		if err := mgr.ExposeHostPath(ehp.Path, ehp.Writable); err != nil {
+			return nil, fmt.Errorf("expose host path %q to sandbox: %w", ehp.Path, err)
+		}
+	}
+
+	if err := mgr.Initialize(); err != nil {
+		return nil, fmt.Errorf("initialize replay sandbox: %w", err)
+	}
+
+	return &fenceSandbox{mgr: mgr}, nil
+}
+
+// createReplayFenceConfig creates the effective fence config for replay mode.
+// This blocks localhost outbound connections to force the service to use SDK
+// mocks.
+//
+// Exposed (lowercase) for the Unix-only service_test.go tests that verify
+// user-config merging behavior. Not part of the package's cross-platform
+// surface.
+func createReplayFenceConfig(userConfigPath string) (*fence.Config, error) {
+	cfg := baseReplayFenceConfig()
+	if userConfigPath == "" {
+		return cfg, nil
+	}
+
+	resolvedPath := utils.ResolveTuskPath(userConfigPath)
+	userCfg, err := fence.LoadConfigResolved(resolvedPath)
+	if err != nil {
+		return nil, fmt.Errorf("load custom fence config %q: %w", resolvedPath, err)
+	}
+	if userCfg == nil {
+		return nil, fmt.Errorf("custom fence config not found: %s", resolvedPath)
+	}
+	if err := validateReplayFenceConfig(userCfg); err != nil {
+		return nil, err
+	}
+
+	merged := fence.MergeConfigs(cfg, userCfg)
+	applyReplayFenceInvariants(merged)
+	return merged, nil
+}
+
+func baseReplayFenceConfig() *fence.Config {
+	f := false
+	return &fence.Config{
+		Network: fence.NetworkConfig{
+			AllowedDomains: []string{
+				// Allow localhost for the service's own health checks
+				"localhost",
+				"127.0.0.1",
+			},
+			AllowLocalBinding:   true, // Allow service to bind to its port
+			AllowLocalOutbound:  &f,   // Block outbound to localhost (Postgres, Redis, etc.)
+			AllowAllUnixSockets: true, // Allow SDK to connect to mock server via Unix socket
+		},
+		Filesystem: fence.FilesystemConfig{
+			AllowWrite: getAllowedWriteDirs(),
+		},
+	}
+}
+
+func validateReplayFenceConfig(cfg *fence.Config) error {
+	if cfg == nil {
+		return nil
+	}
+
+	requiredDomains := []string{"localhost", "127.0.0.1"}
+	for _, deniedDomain := range cfg.Network.DeniedDomains {
+		for _, requiredDomain := range requiredDomains {
+			if strings.EqualFold(deniedDomain, requiredDomain) {
+				return fmt.Errorf("custom replay fence config cannot deny %q because replay health checks require it", requiredDomain)
+			}
+		}
+	}
+
+	return nil
+}
+
+func applyReplayFenceInvariants(cfg *fence.Config) {
+	if cfg == nil {
+		return
+	}
+
+	f := false
+	cfg.Network.AllowedDomains = mergeUniqueStrings(
+		cfg.Network.AllowedDomains,
+		[]string{"localhost", "127.0.0.1"},
+	)
+	cfg.Network.AllowLocalBinding = true
+	cfg.Network.AllowLocalOutbound = &f
+	cfg.Network.AllowAllUnixSockets = true
+	cfg.Filesystem.AllowWrite = mergeUniqueStrings(cfg.Filesystem.AllowWrite, getAllowedWriteDirs())
+}
+
+func mergeUniqueStrings(existing, required []string) []string {
+	if len(required) == 0 {
+		return existing
+	}
+
+	seen := make(map[string]struct{}, len(existing)+len(required))
+	merged := make([]string, 0, len(existing)+len(required))
+	for _, value := range existing {
+		if _, ok := seen[value]; ok {
+			continue
+		}
+		seen[value] = struct{}{}
+		merged = append(merged, value)
+	}
+	for _, value := range required {
+		if _, ok := seen[value]; ok {
+			continue
+		}
+		seen[value] = struct{}{}
+		merged = append(merged, value)
+	}
+	return merged
+}
+
+// getAllowedWriteDirs returns the default writable paths for replay mode.
+// We allow broad local writes by default. Note that Fence still enforces
+// mandatory dangerous-path protections (see
+// https://github.com/Use-Tusk/fence/blob/main/internal/sandbox/dangerous.go).
+func getAllowedWriteDirs() []string {
+	return []string{
+		"/",
+	}
+}
diff --git a/internal/runner/sandbox_unix_test.go b/internal/runner/sandbox_unix_test.go
new file mode 100644
index 0000000..463958a
--- /dev/null
+++ b/internal/runner/sandbox_unix_test.go
@@ -0,0 +1,52 @@
+//go:build darwin || linux || freebsd
+
+package runner
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestCreateReplayFenceConfigMergesCustomConfig(t *testing.T) {
+	customConfigPath := filepath.Join(t.TempDir(), "replay.fence.json")
+	err := os.WriteFile(customConfigPath, []byte(`{
+  "network": {
+    "allowedDomains": ["api.example.com"]
+  },
+  "filesystem": {
+    "allowWrite": ["custom-cache"]
+  }
+}`), 0o600)
+	require.NoError(t, err)
+
+	cfg, err := createReplayFenceConfig(customConfigPath)
+	require.NoError(t, err)
+	require.NotNil(t, cfg)
+	require.NotNil(t, cfg.Network.AllowLocalOutbound)
+
+	assert.Contains(t, cfg.Network.AllowedDomains, "localhost")
+	assert.Contains(t, cfg.Network.AllowedDomains, "127.0.0.1")
+	assert.Contains(t, cfg.Network.AllowedDomains, "api.example.com")
+	assert.True(t, cfg.Network.AllowLocalBinding)
+	assert.False(t, *cfg.Network.AllowLocalOutbound)
+	assert.True(t, cfg.Network.AllowAllUnixSockets)
+	assert.Contains(t, cfg.Filesystem.AllowWrite, "custom-cache")
+	assert.Contains(t, cfg.Filesystem.AllowWrite, "/")
+}
+
+func TestCreateReplayFenceConfigRejectsDeniedLocalhost(t *testing.T) {
+	customConfigPath := filepath.Join(t.TempDir(), "replay.fence.json")
+	err := os.WriteFile(customConfigPath, []byte(`{
+  "network": {
+    "deniedDomains": ["localhost"]
+  }
+}`), 0o600)
+	require.NoError(t, err)
+
+	_, err = createReplayFenceConfig(customConfigPath)
+	require.ErrorContains(t, err, `cannot deny "localhost"`)
+}
diff --git a/internal/runner/sandbox_windows.go b/internal/runner/sandbox_windows.go
new file mode 100644
index 0000000..811f11b
--- /dev/null
+++ b/internal/runner/sandbox_windows.go
@@ -0,0 +1,18 @@
+//go:build windows
+
+package runner
+
+import "errors"
+
+// Fence only supports Linux and macOS; on Windows the replay sandbox is a
+// no-op. Callers treat the error the same as "sandbox not available on this
+// platform" on an unsupported Unix.
+var errSandboxUnsupportedOnWindows = errors.New("replay sandbox not supported on Windows")
+
+func isSandboxSupported() bool {
+	return false
+}
+
+func newReplaySandboxManager(_ replaySandboxOptions) (sandboxManager, error) {
+	return nil, errSandboxUnsupportedOnWindows
+}
diff --git a/internal/runner/service.go b/internal/runner/service.go
index 86c9c33..8ed0e30 100644
--- a/internal/runner/service.go
+++ b/internal/runner/service.go
@@ -9,7 +9,6 @@ import (
 	"strings"
 	"time"
 
-	"github.com/Use-Tusk/fence/pkg/fence"
 	"github.com/Use-Tusk/tusk-cli/internal/config"
 	"github.com/Use-Tusk/tusk-cli/internal/log"
 	"github.com/Use-Tusk/tusk-cli/internal/utils"
@@ -68,7 +67,7 @@ func (e *Executor) StartService() error {
 
 	requireSandbox := effectiveSandboxMode == SandboxModeStrict
 	if effectiveSandboxMode != SandboxModeOff && !e.sandboxBypass {
-		if !fence.IsSupported() {
+		if !isSandboxSupported() {
 			if requireSandbox {
 				return fmt.Errorf("strict replay sandbox unavailable: sandbox not supported on this platform")
 			}
@@ -79,76 +78,56 @@ func (e *Executor) StartService() error {
 			if e.getReplaySandboxConfigPath() != "" {
 				sandboxConfigPath = e.getReplaySandboxConfigPath()
 			}
-			fenceCfg, err := createReplayFenceConfig(sandboxConfigPath)
-			if err != nil {
-				return fmt.Errorf("failed to prepare replay sandbox config: %w", err)
-			}
 			if sandboxConfigPath != "" {
 				log.ServiceLog(fmt.Sprintf("🔧 Merged custom Fence config into replay sandbox: %s", utils.ResolveTuskPath(sandboxConfigPath)))
 			}
-			e.fenceManager = fence.NewManager(fenceCfg, e.debug, false)
-
-			// Tell fence how the sandboxed service binds its port. For
-			// docker / docker-compose / podman commands, the daemon binds
-			// the host port outside the sandbox netns, so fence must NOT
-			// set up a reverse bridge (it would collide with the daemon's
-			// bind). For everything else, fence proxies inbound traffic
-			// into the sandbox netns as usual.
-			executionModel := fence.ServiceBindsInSandbox
-			if serviceDelegatesToHostDaemon(cfg.Service.Start.Command) {
-				executionModel = fence.ServiceBindsOnHost
-			}
-			e.fenceManager.SetService(fence.ServiceOptions{
-				ExposedPorts:   []int{cfg.Service.Port},
-				ExecutionModel: executionModel,
-			})
 
-			// Hand any caller-generated host files the sandboxed process
-			// needs to see (e.g. the replay compose env-override YAML)
-			// to fence. Without this, a file created via
-			// os.CreateTemp("", ...) lives under /tmp, which fence
-			// tmpfs-overmounts — invisible to the sandboxed docker client.
-			exposeErr := error(nil)
+			// Build the list of host paths that must be visible inside
+			// the sandbox. The replay compose env-override (when present)
+			// is the main case: it lives under /tmp on the host, which
+			// fence tmpfs-overmounts — without this exposure the sandboxed
+			// docker client can't see the file passed to `-f`.
+			var exposedHostPaths []exposedHostPath
 			if replayOverridePath != "" {
-				if err := e.fenceManager.ExposeHostPath(replayOverridePath, false); err != nil {
-					exposeErr = err
-				}
+				exposedHostPaths = append(exposedHostPaths, exposedHostPath{
+					Path:     replayOverridePath,
+					Writable: false,
+				})
 			}
-			if exposeErr != nil {
+
+			// For docker / docker-compose / podman commands, the daemon
+			// binds the host port outside the sandbox netns, so the
+			// sandbox must NOT set up a reverse bridge on that port (it
+			// would collide with the daemon's own bind). For everything
+			// else, the sandbox proxies inbound traffic into its netns as
+			// usual.
+			sbx, sbxErr := newReplaySandboxManager(replaySandboxOptions{
+				UserConfigPath:   sandboxConfigPath,
+				Debug:            e.debug,
+				ExposedPort:      cfg.Service.Port,
+				BindsOnHost:      serviceDelegatesToHostDaemon(cfg.Service.Start.Command),
+				ExposedHostPaths: exposedHostPaths,
+			})
+			if sbxErr != nil {
 				if requireSandbox {
-					e.fenceManager = nil
-					return fmt.Errorf("strict replay sandbox unavailable: failed to expose replay override file to sandbox: %w", exposeErr)
+					return fmt.Errorf("strict replay sandbox unavailable: %s", friendlySandboxError(sbxErr))
 				}
-				log.UserWarn(fmt.Sprintf("⚠️  Sandbox: failed to expose replay override file (%v); proceeding without sandbox", exposeErr))
-				e.fenceManager = nil
-			}
-
-			if e.fenceManager != nil {
-				if err := e.fenceManager.Initialize(); err != nil {
+				log.UserWarn(fmt.Sprintf("⚠️  Sandbox unavailable: %s", friendlySandboxError(sbxErr)))
+				log.UserWarn("   Tests will run without network isolation (real connections allowed)\n")
+			} else {
+				wrappedCmd, wrapErr := sbx.WrapCommand(command)
+				if wrapErr != nil {
+					sbx.Cleanup()
 					if requireSandbox {
-						e.fenceManager = nil
-						return fmt.Errorf("strict replay sandbox unavailable: %s", friendlySandboxError(err))
+						return fmt.Errorf("strict replay sandbox unavailable: %s", friendlySandboxError(wrapErr))
 					}
-					log.UserWarn(fmt.Sprintf("⚠️  Sandbox unavailable: %s", friendlySandboxError(err)))
+					log.UserWarn(fmt.Sprintf("⚠️  Sandbox unavailable: %s", friendlySandboxError(wrapErr)))
 					log.UserWarn("   Tests will run without network isolation (real connections allowed)\n")
-					e.fenceManager = nil
 				} else {
-					wrappedCmd, err := e.fenceManager.WrapCommand(command)
-					if err != nil {
-						if requireSandbox {
-							e.fenceManager.Cleanup()
-							e.fenceManager = nil
-							return fmt.Errorf("strict replay sandbox unavailable: %s", friendlySandboxError(err))
-						}
-						log.UserWarn(fmt.Sprintf("⚠️  Sandbox unavailable: %s", friendlySandboxError(err)))
-						log.UserWarn("   Tests will run without network isolation (real connections allowed)\n")
-						e.fenceManager.Cleanup()
-						e.fenceManager = nil
-					} else {
-						command = wrappedCmd
-						e.lastServiceSandboxed = true
-						log.ServiceLog("🔒 Service sandboxed (localhost outbound blocked for replay isolation)")
-					}
+					e.sandbox = sbx
+					command = wrappedCmd
+					e.lastServiceSandboxed = true
+					log.ServiceLog("🔒 Service sandboxed (localhost outbound blocked for replay isolation)")
 				}
 			}
 		}
@@ -223,9 +202,9 @@ func (e *Executor) StartService() error {
 	}
 
 	if err := e.serviceCmd.Start(); err != nil {
-		if e.fenceManager != nil {
-			e.fenceManager.Cleanup()
-			e.fenceManager = nil
+		if e.sandbox != nil {
+			e.sandbox.Cleanup()
+			e.sandbox = nil
 		}
 		return fmt.Errorf("failed to start service: %w", err)
 	}
@@ -250,125 +229,14 @@ func (e *Executor) StartService() error {
 	return nil
 }
 
-// createReplayFenceConfig creates the effective fence config for replay mode.
-// This blocks localhost outbound connections to force the service to use SDK mocks.
-func createReplayFenceConfig(userConfigPath string) (*fence.Config, error) {
-	cfg := baseReplayFenceConfig()
-	if userConfigPath == "" {
-		return cfg, nil
-	}
-
-	resolvedPath := utils.ResolveTuskPath(userConfigPath)
-	userCfg, err := fence.LoadConfigResolved(resolvedPath)
-	if err != nil {
-		return nil, fmt.Errorf("load custom fence config %q: %w", resolvedPath, err)
-	}
-	if userCfg == nil {
-		return nil, fmt.Errorf("custom fence config not found: %s", resolvedPath)
-	}
-	if err := validateReplayFenceConfig(userCfg); err != nil {
-		return nil, err
-	}
-
-	merged := fence.MergeConfigs(cfg, userCfg)
-	applyReplayFenceInvariants(merged)
-	return merged, nil
-}
-
-func baseReplayFenceConfig() *fence.Config {
-	f := false
-	return &fence.Config{
-		Network: fence.NetworkConfig{
-			AllowedDomains: []string{
-				// Allow localhost for the service's own health checks
-				"localhost",
-				"127.0.0.1",
-			},
-			AllowLocalBinding:   true, // Allow service to bind to its port
-			AllowLocalOutbound:  &f,   // Block outbound to localhost (Postgres, Redis, etc.)
-			AllowAllUnixSockets: true, // Allow SDK to connect to mock server via Unix socket
-		},
-		Filesystem: fence.FilesystemConfig{
-			AllowWrite: getAllowedWriteDirs(),
-		},
-	}
-}
-
-func validateReplayFenceConfig(cfg *fence.Config) error {
-	if cfg == nil {
-		return nil
-	}
-
-	requiredDomains := []string{"localhost", "127.0.0.1"}
-	for _, deniedDomain := range cfg.Network.DeniedDomains {
-		for _, requiredDomain := range requiredDomains {
-			if strings.EqualFold(deniedDomain, requiredDomain) {
-				return fmt.Errorf("custom replay fence config cannot deny %q because replay health checks require it", requiredDomain)
-			}
-		}
-	}
-
-	return nil
-}
-
-func applyReplayFenceInvariants(cfg *fence.Config) {
-	if cfg == nil {
-		return
-	}
-
-	f := false
-	cfg.Network.AllowedDomains = mergeUniqueStrings(
-		cfg.Network.AllowedDomains,
-		[]string{"localhost", "127.0.0.1"},
-	)
-	cfg.Network.AllowLocalBinding = true
-	cfg.Network.AllowLocalOutbound = &f
-	cfg.Network.AllowAllUnixSockets = true
-	cfg.Filesystem.AllowWrite = mergeUniqueStrings(cfg.Filesystem.AllowWrite, getAllowedWriteDirs())
-}
-
-func mergeUniqueStrings(existing, required []string) []string {
-	if len(required) == 0 {
-		return existing
-	}
-
-	seen := make(map[string]struct{}, len(existing)+len(required))
-	merged := make([]string, 0, len(existing)+len(required))
-	for _, value := range existing {
-		if _, ok := seen[value]; ok {
-			continue
-		}
-		seen[value] = struct{}{}
-		merged = append(merged, value)
-	}
-	for _, value := range required {
-		if _, ok := seen[value]; ok {
-			continue
-		}
-		seen[value] = struct{}{}
-		merged = append(merged, value)
-	}
-	return merged
-}
-
-// getAllowedWriteDirs returns the default writable paths for replay mode.
-// We allow broad local writes by default. Note that Fence still enforces
-// mandatory dangerous-path protections (see
-// https://github.com/Use-Tusk/fence/blob/main/internal/sandbox/dangerous.go).
-func getAllowedWriteDirs() []string {
-	return []string{
-		"/",
-	}
-}
-
 func (e *Executor) StopService() error {
 	cfg, _ := config.Get()
 
 	defer func() {
 		e.cleanupLogFiles()
-		if e.fenceManager != nil {
-			e.fenceManager.Cleanup()
-			e.fenceManager = nil
+		if e.sandbox != nil {
+			e.sandbox.Cleanup()
+			e.sandbox = nil
 		}
 		// Clean up V8 coverage temp directory
 		if e.coverageTempDir != "" {
diff --git a/internal/runner/service_test.go b/internal/runner/service_test.go
index 36d7e72..d82f760 100644
--- a/internal/runner/service_test.go
+++ b/internal/runner/service_test.go
@@ -1027,43 +1027,3 @@ func TestConcurrentServiceOperations(t *testing.T) {
 	// At least the stop should succeed
 	assert.NoError(t, errors[0])
 }
-
-func TestCreateReplayFenceConfigMergesCustomConfig(t *testing.T) {
-	customConfigPath := filepath.Join(t.TempDir(), "replay.fence.json")
-	err := os.WriteFile(customConfigPath, []byte(`{
-  "network": {
-    "allowedDomains": ["api.example.com"]
-  },
-  "filesystem": {
-    "allowWrite": ["custom-cache"]
-  }
-}`), 0o600)
-	require.NoError(t, err)
-
-	cfg, err := createReplayFenceConfig(customConfigPath)
-	require.NoError(t, err)
-	require.NotNil(t, cfg)
-	require.NotNil(t, cfg.Network.AllowLocalOutbound)
-
-	assert.Contains(t, cfg.Network.AllowedDomains, "localhost")
-	assert.Contains(t, cfg.Network.AllowedDomains, "127.0.0.1")
-	assert.Contains(t, cfg.Network.AllowedDomains, "api.example.com")
-	assert.True(t, cfg.Network.AllowLocalBinding)
-	assert.False(t, *cfg.Network.AllowLocalOutbound)
-	assert.True(t, cfg.Network.AllowAllUnixSockets)
-	assert.Contains(t, cfg.Filesystem.AllowWrite, "custom-cache")
-	assert.Contains(t, cfg.Filesystem.AllowWrite, "/")
-}
-
-func TestCreateReplayFenceConfigRejectsDeniedLocalhost(t *testing.T) {
-	customConfigPath := filepath.Join(t.TempDir(), "replay.fence.json")
-	err := os.WriteFile(customConfigPath, []byte(`{
-  "network": {
-    "deniedDomains": ["localhost"]
-  }
-}`), 0o600)
-	require.NoError(t, err)
-
-	_, err = createReplayFenceConfig(customConfigPath)
-	require.ErrorContains(t, err, `cannot deny "localhost"`)
-}

From d35b3bf9428ce7defb0a2eaba259715535f17ac1 Mon Sep 17 00:00:00 2001
From: JY Tan <jy8230@gmail.com>
Date: Fri, 24 Apr 2026 14:26:50 -0700
Subject: [PATCH 4/5] Fixes

---
 internal/runner/sandbox.go           | 11 ++++++
 internal/runner/sandbox_unix.go      | 16 +++++++--
 internal/runner/sandbox_unix_test.go | 50 ++++++++++++++++++++++++++++
 internal/runner/service.go           |  9 +++++
 4 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/internal/runner/sandbox.go b/internal/runner/sandbox.go
index b7b58c7..6b5f49f 100644
--- a/internal/runner/sandbox.go
+++ b/internal/runner/sandbox.go
@@ -11,6 +11,17 @@ type sandboxManager interface {
 	Cleanup()
 }
 
+// sandboxConfigError marks errors that stem from invalid user sandbox config
+// (bad JSON, denied localhost, missing file). These are always fatal
+// regardless of sandbox mode — a user who supplied a broken config asked for
+// sandboxing and shouldn't silently get unisolated execution. Distinct from
+// runtime-availability errors (missing bwrap/socat, Initialize failure), which
+// auto mode treats as "fall back to no sandbox".
+type sandboxConfigError struct{ err error }
+
+func (e *sandboxConfigError) Error() string { return e.err.Error() }
+func (e *sandboxConfigError) Unwrap() error { return e.err }
+
 type replaySandboxOptions struct {
 	UserConfigPath string // optional fence config override (e.g. .tusk/replay.fence.json)
 	Debug          bool
diff --git a/internal/runner/sandbox_unix.go b/internal/runner/sandbox_unix.go
index a730691..b4d15e2 100644
--- a/internal/runner/sandbox_unix.go
+++ b/internal/runner/sandbox_unix.go
@@ -37,14 +37,25 @@ func (s *fenceSandbox) Cleanup() {
 // newReplaySandboxManager builds the effective fence config for replay
 // mode, creates the fence.Manager, applies the requested service
 // execution model + exposed host paths, and initializes the manager.
-// On error, any partial state is cleaned up before returning.
+// On any error after fence.NewManager succeeds, the manager's Cleanup is
+// invoked before returning so no fence-allocated resources leak.
 func newReplaySandboxManager(opts replaySandboxOptions) (sandboxManager, error) {
 	fenceCfg, err := createReplayFenceConfig(opts.UserConfigPath)
 	if err != nil {
-		return nil, fmt.Errorf("prepare replay sandbox config: %w", err)
+		return nil, &sandboxConfigError{err: fmt.Errorf("prepare replay sandbox config: %w", err)}
 	}
 
 	mgr := fence.NewManager(fenceCfg, opts.Debug, false)
+	// Defensive: Cleanup is idempotent and fence's Initialize already
+	// unwinds its own partial state on failure, but this guards against
+	// future fence changes that add allocating steps between NewManager
+	// and Initialize (or between error returns inside Initialize).
+	success := false
+	defer func() {
+		if !success {
+			mgr.Cleanup()
+		}
+	}()
 
 	executionModel := fence.ServiceBindsInSandbox
 	if opts.BindsOnHost {
@@ -65,6 +76,7 @@ func newReplaySandboxManager(opts replaySandboxOptions) (sandboxManager, error)
 		return nil, fmt.Errorf("initialize replay sandbox: %w", err)
 	}
 
+	success = true
 	return &fenceSandbox{mgr: mgr}, nil
 }
 
diff --git a/internal/runner/sandbox_unix_test.go b/internal/runner/sandbox_unix_test.go
index 463958a..10c2b12 100644
--- a/internal/runner/sandbox_unix_test.go
+++ b/internal/runner/sandbox_unix_test.go
@@ -3,6 +3,7 @@
 package runner
 
 import (
+	"errors"
 	"os"
 	"path/filepath"
 	"testing"
@@ -50,3 +51,52 @@ func TestCreateReplayFenceConfigRejectsDeniedLocalhost(t *testing.T) {
 	_, err = createReplayFenceConfig(customConfigPath)
 	require.ErrorContains(t, err, `cannot deny "localhost"`)
 }
+
+// Invalid user sandbox config must surface as *sandboxConfigError so
+// service.go keeps it fatal in auto mode (a broken user config shouldn't
+// silently degrade to "no sandbox").
+func TestNewReplaySandboxManagerWrapsConfigErrors(t *testing.T) {
+	tests := []struct {
+		name       string
+		configBody string
+	}{
+		{
+			name:       "denies_localhost",
+			configBody: `{"network": {"deniedDomains": ["localhost"]}}`,
+		},
+		{
+			name:       "malformed_json",
+			configBody: `{"network": {`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			cfgPath := filepath.Join(t.TempDir(), "replay.fence.json")
+			require.NoError(t, os.WriteFile(cfgPath, []byte(tt.configBody), 0o600))
+
+			_, err := newReplaySandboxManager(replaySandboxOptions{
+				UserConfigPath: cfgPath,
+				ExposedPort:    3000,
+			})
+			require.Error(t, err)
+
+			var cfgErr *sandboxConfigError
+			require.Truef(t, errors.As(err, &cfgErr),
+				"expected error to be *sandboxConfigError so service.go treats it as fatal in auto mode; got %T: %v", err, err)
+		})
+	}
+}
+
+// A user-supplied config path that doesn't resolve is also a config mistake
+// (the user pointed at nothing), so must be *sandboxConfigError too.
+func TestNewReplaySandboxManagerMissingUserConfigIsConfigError(t *testing.T) {
+	_, err := newReplaySandboxManager(replaySandboxOptions{
+		UserConfigPath: filepath.Join(t.TempDir(), "does-not-exist.json"),
+		ExposedPort:    3000,
+	})
+	require.Error(t, err)
+	var cfgErr *sandboxConfigError
+	require.Truef(t, errors.As(err, &cfgErr),
+		"missing user config should be *sandboxConfigError; got %T: %v", err, err)
+}
diff --git a/internal/runner/service.go b/internal/runner/service.go
index 8ed0e30..7cd1e07 100644
--- a/internal/runner/service.go
+++ b/internal/runner/service.go
@@ -2,6 +2,7 @@ package runner
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"net"
 	"os"
@@ -109,6 +110,14 @@ func (e *Executor) StartService() error {
 				ExposedHostPaths: exposedHostPaths,
 			})
 			if sbxErr != nil {
+				// User-config errors (bad JSON, denied localhost, ...) are
+				// always fatal. User asked for a sandbox and got the config
+				// wrong; silently proceeding without isolation would violate
+				// that intent.
+				var cfgErr *sandboxConfigError
+				if errors.As(sbxErr, &cfgErr) {
+					return fmt.Errorf("failed to prepare replay sandbox config: %w", sbxErr)
+				}
 				if requireSandbox {
 					return fmt.Errorf("strict replay sandbox unavailable: %s", friendlySandboxError(sbxErr))
 				}

From 061f367a0b4f156e8e973c93e5d142028e8fe4ad Mon Sep 17 00:00:00 2001
From: JY Tan <jy8230@gmail.com>
Date: Fri, 24 Apr 2026 14:34:44 -0700
Subject: [PATCH 5/5] Fix error message

---
 internal/runner/sandbox_unix.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/internal/runner/sandbox_unix.go b/internal/runner/sandbox_unix.go
index b4d15e2..9148fce 100644
--- a/internal/runner/sandbox_unix.go
+++ b/internal/runner/sandbox_unix.go
@@ -42,7 +42,9 @@ func (s *fenceSandbox) Cleanup() {
 func newReplaySandboxManager(opts replaySandboxOptions) (sandboxManager, error) {
 	fenceCfg, err := createReplayFenceConfig(opts.UserConfigPath)
 	if err != nil {
-		return nil, &sandboxConfigError{err: fmt.Errorf("prepare replay sandbox config: %w", err)}
+		// No prefix here: service.go adds the user-facing
+		// "failed to prepare replay sandbox config:" framing.
+		return nil, &sandboxConfigError{err: err}
 	}
 
 	mgr := fence.NewManager(fenceCfg, opts.Debug, false)