From 6b2ee14083f63b723751fc87ebbf4d7ececf4a1b Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sat, 18 Apr 2026 10:48:14 +0000
Subject: [PATCH] Enforce specifications for actionpins, agentdrain, fileutil

Adds specification-driven tests (spec_test.go) for three packages,
derived from each package's README.md contract, not implementation details.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 pkg/actionpins/spec_test.go | 217 +++++++++++++++++++++++
 pkg/agentdrain/spec_test.go | 334 ++++++++++++++++++++++++++++++++++++
 pkg/fileutil/spec_test.go   | 241 ++++++++++++++++++++++++++
 3 files changed, 792 insertions(+)
 create mode 100644 pkg/actionpins/spec_test.go
 create mode 100644 pkg/agentdrain/spec_test.go
 create mode 100644 pkg/fileutil/spec_test.go

diff --git a/pkg/actionpins/spec_test.go b/pkg/actionpins/spec_test.go
new file mode 100644
index 00000000000..103de0a209b
--- /dev/null
+++ b/pkg/actionpins/spec_test.go
@@ -0,0 +1,217 @@
+//go:build !integration
+
+package actionpins_test
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/github/gh-aw/pkg/actionpins"
+)
+
+// TestSpec_PublicAPI_FormatReference validates the documented format "repo@sha # version".
+func TestSpec_PublicAPI_FormatReference(t *testing.T) {
+	tests := []struct {
+		name     string
+		repo     string
+		sha      string
+		version  string
+		expected string
+	}{
+		{
+			name:     "formats standard reference",
+			repo:     "actions/checkout",
+			sha:      "abc123",
+			version:  "v4",
+			expected: "actions/checkout@abc123 # v4",
+		},
+		{
+			name:     "formats reference with full 40-char sha",
+			repo:     "actions/setup-go",
+			sha:      "cdabf2d4679a00bef48b5a7c69a9b8d0b4f6e3c9",
+			version:  "v5",
+			expected: "actions/setup-go@cdabf2d4679a00bef48b5a7c69a9b8d0b4f6e3c9 # v5",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := actionpins.FormatReference(tt.repo, tt.sha, tt.version)
+			assert.Equal(t, tt.expected, result, "FormatReference(%q, %q, %q) should match spec format", tt.repo, tt.sha, tt.version)
+		})
+	}
+}
+
+// TestSpec_PublicAPI_FormatCacheKey validates the documented format "repo@version".
+func TestSpec_PublicAPI_FormatCacheKey(t *testing.T) {
+	tests := []struct {
+		name     string
+		repo     string
+		version  string
+		expected string
+	}{
+		{
+			name:     "formats cache key as repo@version",
+			repo:     "actions/checkout",
+			version:  "v4",
+			expected: "actions/checkout@v4",
+		},
+		{
+			name:     "formats cache key with full semver",
+			repo:     "actions/setup-node",
+			version:  "v3.0.0",
+			expected: "actions/setup-node@v3.0.0",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := actionpins.FormatCacheKey(tt.repo, tt.version)
+			assert.Equal(t, tt.expected, result, "FormatCacheKey(%q, %q) should match spec format", tt.repo, tt.version)
+		})
+	}
+}
+
+// TestSpec_PublicAPI_ExtractRepo validates extracting the repository from a uses reference.
+func TestSpec_PublicAPI_ExtractRepo(t *testing.T) {
+	tests := []struct {
+		name     string
+		uses     string
+		expected string
+	}{
+		{
+			name:     "extracts repo from tag reference",
+			uses:     "actions/checkout@v4",
+			expected: "actions/checkout",
+		},
+		{
+			name:     "extracts repo from sha reference",
+			uses:     "actions/setup-go@cdabf2d4679a00bef48b5a7c69a9b8d0b4f6e3c9",
+			expected: "actions/setup-go",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := actionpins.ExtractRepo(tt.uses)
+			assert.Equal(t, tt.expected, result, "ExtractRepo(%q) should return repo part", tt.uses)
+		})
+	}
+}
+
+// TestSpec_PublicAPI_ExtractVersion validates extracting the version from a uses reference.
+func TestSpec_PublicAPI_ExtractVersion(t *testing.T) {
+	tests := []struct {
+		name     string
+		uses     string
+		expected string
+	}{
+		{
+			name:     "extracts tag version",
+			uses:     "actions/checkout@v4",
+			expected: "v4",
+		},
+		{
+			name:     "extracts sha version",
+			uses:     "actions/setup-go@abc123def456",
+			expected: "abc123def456",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := actionpins.ExtractVersion(tt.uses)
+			assert.Equal(t, tt.expected, result, "ExtractVersion(%q) should return version part", tt.uses)
+		})
+	}
+}
+
+// TestSpec_PublicAPI_GetActionPins validates that GetActionPins returns a non-nil slice.
+func TestSpec_PublicAPI_GetActionPins(t *testing.T) {
+	pins := actionpins.GetActionPins()
+	assert.NotNil(t, pins, "GetActionPins should return non-nil slice of all loaded pins")
+}
+
+// TestSpec_PublicAPI_GetActionPinsByRepo validates GetActionPinsByRepo for known and unknown repos.
+func TestSpec_PublicAPI_GetActionPinsByRepo(t *testing.T) {
+	t.Run("returns no pins for unknown repository", func(t *testing.T) {
+		// SPEC_MISMATCH: spec implies a non-nil slice but implementation returns nil from map lookup.
+		pins := actionpins.GetActionPinsByRepo("does-not-exist/unknown-action-xyzzy")
+		assert.Empty(t, pins, "should return empty result for unknown repo")
+	})
+
+	t.Run("returns pins for a known repository when embedded data is loaded", func(t *testing.T) {
+		all := actionpins.GetActionPins()
+		if len(all) == 0 {
+			t.Skip("no embedded pin data available")
+		}
+		known := all[0].Repo
+		pins := actionpins.GetActionPinsByRepo(known)
+		assert.NotEmpty(t, pins, "should return pins for a known repo from embedded data")
+	})
+}
+
+// TestSpec_PublicAPI_GetActionPinByRepo validates GetActionPinByRepo returns the latest pin.
+func TestSpec_PublicAPI_GetActionPinByRepo(t *testing.T) {
+	t.Run("returns false for unknown repository", func(t *testing.T) {
+		_, ok := actionpins.GetActionPinByRepo("does-not-exist/unknown-action-xyzzy")
+		assert.False(t, ok, "should return false for unknown repo")
+	})
+
+	t.Run("returns a pin for a known repository", func(t *testing.T) {
+		all := actionpins.GetActionPins()
+		if len(all) == 0 {
+			t.Skip("no embedded pin data available")
+		}
+		known := all[0].Repo
+		pin, ok := actionpins.GetActionPinByRepo(known)
+		assert.True(t, ok, "should return true for a known repo")
+		assert.Equal(t, known, pin.Repo, "returned pin should belong to the queried repo")
+	})
+}
+
+// TestSpec_PublicAPI_ResolveActionPin validates resolution behavior.
+// Spec: "fallback behavior controlled by PinContext.StrictMode"
+func TestSpec_PublicAPI_ResolveActionPin(t *testing.T) {
+	t.Run("strict mode returns empty string and no error when pin is not found", func(t *testing.T) {
+		// SPEC_MISMATCH: spec implies StrictMode causes an error on missing pins, but the
+		// implementation returns ("", nil) and emits a warning to stderr instead.
+		ctx := &actionpins.PinContext{StrictMode: true, Warnings: make(map[string]bool)}
+		result, err := actionpins.ResolveActionPin("does-not-exist/unknown-action-xyzzy", "v1", ctx)
+		assert.NoError(t, err, "implementation returns no error even in strict mode for unknown pin")
+		assert.Empty(t, result, "strict mode should return empty reference for unknown pin")
+	})
+}
+
+// TestSpec_Types_PinContext validates the documented PinContext type fields.
+func TestSpec_Types_PinContext(t *testing.T) {
+	t.Run("can construct PinContext with StrictMode enabled", func(t *testing.T) {
+		ctx := &actionpins.PinContext{StrictMode: true}
+		assert.NotNil(t, ctx)
+	})
+
+	t.Run("can construct PinContext without resolver for embedded-only lookup", func(t *testing.T) {
+		ctx := &actionpins.PinContext{}
+		assert.NotNil(t, ctx)
+		assert.Nil(t, ctx.Resolver, "nil Resolver enables embedded-only lookup")
+	})
+}
+
+// TestSpec_DesignDecision_FormatConsistency validates that FormatReference and FormatCacheKey
+// produce outputs consistent with the spec: cacheKey = "repo@version", ref = "repo@sha # version".
+func TestSpec_DesignDecision_FormatConsistency(t *testing.T) {
+	repo := "actions/checkout"
+	version := "v4"
+	sha := "deadbeef"
+
+	cacheKey := actionpins.FormatCacheKey(repo, version)
+	reference := actionpins.FormatReference(repo, sha, version)
+
+	assert.True(t, strings.HasPrefix(cacheKey, repo+"@"), "cache key should be repo@version")
+	assert.True(t, strings.HasPrefix(reference, repo+"@"), "reference should start with repo@sha")
+	assert.Contains(t, cacheKey, version, "cache key should contain version")
+	assert.Contains(t, reference, sha, "reference should contain sha")
+	assert.Contains(t, reference, version, "reference should contain version comment")
+}
diff --git a/pkg/agentdrain/spec_test.go b/pkg/agentdrain/spec_test.go
new file mode 100644
index 00000000000..16cc7cabf7f
--- /dev/null
+++ b/pkg/agentdrain/spec_test.go
@@ -0,0 +1,334 @@
+//go:build !integration
+
+package agentdrain_test
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/github/gh-aw/pkg/agentdrain"
+)
+
+// TestSpec_PublicAPI_DefaultConfig validates the documented default values.
+// Spec: Depth=4, SimThreshold=0.4, MaxChildren=100, ParamToken="<*>", RareClusterThreshold=2,
+// ExcludeFields=["session_id","trace_id","span_id","timestamp"]
+func TestSpec_PublicAPI_DefaultConfig(t *testing.T) {
+	cfg := agentdrain.DefaultConfig()
+
+	assert.Equal(t, 4, cfg.Depth, "Depth default should be 4")
+	assert.Equal(t, 0.4, cfg.SimThreshold, "SimThreshold default should be 0.4")
+	assert.Equal(t, 100, cfg.MaxChildren, "MaxChildren default should be 100")
+	assert.Equal(t, "<*>", cfg.ParamToken, "ParamToken default should be <*>")
+	assert.Equal(t, 2, cfg.RareClusterThreshold, "RareClusterThreshold default should be 2")
+	assert.ElementsMatch(t,
+		[]string{"session_id", "trace_id", "span_id", "timestamp"},
+		cfg.ExcludeFields,
+		"ExcludeFields should match documented defaults",
+	)
+	assert.NotEmpty(t, cfg.MaskRules, "DefaultConfig should include MaskRules")
+}
+
+// TestSpec_PublicAPI_NewMiner validates that NewMiner creates a usable miner.
+func TestSpec_PublicAPI_NewMiner(t *testing.T) {
+	cfg := agentdrain.DefaultConfig()
+	miner, err := agentdrain.NewMiner(cfg)
+	require.NoError(t, err, "NewMiner with valid config should not error")
+	assert.NotNil(t, miner, "NewMiner should return non-nil miner")
+}
+
+// TestSpec_PublicAPI_Miner_TrainEvent validates that TrainEvent processes known-good events.
+func TestSpec_PublicAPI_Miner_TrainEvent(t *testing.T) {
+	cfg := agentdrain.DefaultConfig()
+	miner, err := agentdrain.NewMiner(cfg)
+	require.NoError(t, err)
+
+	evt := agentdrain.AgentEvent{
+		Stage:  "plan",
+		Fields: map[string]string{"action": "start", "step": "1"},
+	}
+	result, err := miner.TrainEvent(evt)
+	require.NoError(t, err, "TrainEvent should not error on valid event")
+	assert.NotNil(t, result, "TrainEvent should return a MatchResult")
+	assert.Greater(t, result.ClusterID, 0, "ClusterID should be positive after training")
+}
+
+// TestSpec_PublicAPI_Miner_AnalyzeEvent validates that AnalyzeEvent produces a match result and anomaly report.
+func TestSpec_PublicAPI_Miner_AnalyzeEvent(t *testing.T) {
+	cfg := agentdrain.DefaultConfig()
+	miner, err := agentdrain.NewMiner(cfg)
+	require.NoError(t, err)
+
+	evt := agentdrain.AgentEvent{
+		Stage:  "tool_call",
+		Fields: map[string]string{"tool": "bash", "status": "ok"},
+	}
+	result, report, err := miner.AnalyzeEvent(evt)
+	require.NoError(t, err, "AnalyzeEvent should not error on valid event")
+	assert.NotNil(t, result, "AnalyzeEvent should return a MatchResult")
+	assert.NotNil(t, report, "AnalyzeEvent should return an AnomalyReport")
+}
+
+// TestSpec_PublicAPI_Miner_Clusters validates that Clusters and ClusterCount report trained state.
+func TestSpec_PublicAPI_Miner_Clusters(t *testing.T) {
+	cfg := agentdrain.DefaultConfig()
+	miner, err := agentdrain.NewMiner(cfg)
+	require.NoError(t, err)
+
+	assert.Equal(t, 0, miner.ClusterCount(), "ClusterCount should be 0 before training")
+	assert.Empty(t, miner.Clusters(), "Clusters should be empty before training")
+
+	evt := agentdrain.AgentEvent{Stage: "finish", Fields: map[string]string{"result": "success"}}
+	_, err = miner.TrainEvent(evt)
+	require.NoError(t, err)
+
+	assert.Equal(t, 1, miner.ClusterCount(), "ClusterCount should be 1 after training one unique event")
+	assert.Len(t, miner.Clusters(), 1, "Clusters should have one entry after training one unique event")
+}
+
+// TestSpec_PublicAPI_Miner_Persistence validates SaveJSON/LoadJSON round-trip preserves cluster state.
+func TestSpec_PublicAPI_Miner_Persistence(t *testing.T) {
+	cfg := agentdrain.DefaultConfig()
+	miner, err := agentdrain.NewMiner(cfg)
+	require.NoError(t, err)
+
+	evt := agentdrain.AgentEvent{Stage: "plan", Fields: map[string]string{"step": "evaluate"}}
+	_, err = miner.TrainEvent(evt)
+	require.NoError(t, err)
+
+	original := miner.ClusterCount()
+
+	data, err := miner.SaveJSON()
+	require.NoError(t, err, "SaveJSON should not error")
+	assert.NotEmpty(t, data, "SaveJSON should return non-empty JSON data")
+
+	restored, err := agentdrain.NewMiner(cfg)
+	require.NoError(t, err)
+	err = restored.LoadJSON(data)
+	require.NoError(t, err, "LoadJSON should not error with valid data")
+	assert.Equal(t, original, restored.ClusterCount(), "restored miner should have same cluster count")
+}
+
+// TestSpec_PublicAPI_NewCoordinator validates that NewCoordinator creates a coordinator for given stages.
+func TestSpec_PublicAPI_NewCoordinator(t *testing.T) {
+	cfg := agentdrain.DefaultConfig()
+	stages := []string{"plan", "tool_call", "finish"}
+	coord, err := agentdrain.NewCoordinator(cfg, stages)
+	require.NoError(t, err, "NewCoordinator with valid config and stages should not error")
+	assert.NotNil(t, coord, "NewCoordinator should return non-nil Coordinator")
+}
+
+// TestSpec_PublicAPI_Coordinator_AllClusters validates that AllClusters returns a map keyed by stage.
+func TestSpec_PublicAPI_Coordinator_AllClusters(t *testing.T) {
+	cfg := agentdrain.DefaultConfig()
+	stages := []string{"plan", "tool_call", "finish"}
+	coord, err := agentdrain.NewCoordinator(cfg, stages)
+	require.NoError(t, err)
+
+	all := coord.AllClusters()
+	assert.NotNil(t, all, "AllClusters should return non-nil map")
+	for _, stage := range stages {
+		_, exists := all[stage]
+		assert.True(t, exists, "AllClusters should contain an entry for stage %q", stage)
+	}
+}
+
+// TestSpec_PublicAPI_Coordinator_Snapshots validates SaveSnapshots/LoadSnapshots round-trip.
+func TestSpec_PublicAPI_Coordinator_Snapshots(t *testing.T) {
+	cfg := agentdrain.DefaultConfig()
+	stages := []string{"plan", "finish"}
+	coord, err := agentdrain.NewCoordinator(cfg, stages)
+	require.NoError(t, err)
+
+	evt := agentdrain.AgentEvent{Stage: "plan", Fields: map[string]string{"action": "start"}}
+	_, err = coord.TrainEvent(evt)
+	require.NoError(t, err)
+
+	snapshots, err := coord.SaveSnapshots()
+	require.NoError(t, err, "SaveSnapshots should not error")
+	assert.NotEmpty(t, snapshots, "SaveSnapshots should return non-empty map")
+
+	coord2, err := agentdrain.NewCoordinator(cfg, stages)
+	require.NoError(t, err)
+	err = coord2.LoadSnapshots(snapshots)
+	require.NoError(t, err, "LoadSnapshots should not error with valid snapshots")
+}
+
+// TestSpec_PublicAPI_Utility_Tokenize validates that Tokenize splits on whitespace boundaries.
+func TestSpec_PublicAPI_Utility_Tokenize(t *testing.T) {
+	tests := []struct {
+		name     string
+		line     string
+		expected []string
+	}{
+		{
+			name:     "splits single-space delimited tokens",
+			line:     "a b c",
+			expected: []string{"a", "b", "c"},
+		},
+		{
+			name:     "handles multiple whitespace",
+			line:     "foo  bar",
+			expected: []string{"foo", "bar"},
+		},
+		{
+			name:     "returns empty for empty string",
+			line:     "",
+			expected: []string{},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := agentdrain.Tokenize(tt.line)
+			assert.Equal(t, tt.expected, result, "Tokenize(%q) mismatch", tt.line)
+		})
+	}
+}
+
+// TestSpec_PublicAPI_Utility_FlattenEvent validates that FlattenEvent excludes specified fields
+// and produces deterministic (sorted) output.
+func TestSpec_PublicAPI_Utility_FlattenEvent(t *testing.T) {
+	t.Run("excludes listed fields", func(t *testing.T) {
+		evt := agentdrain.AgentEvent{
+			Stage: "plan",
+			Fields: map[string]string{
+				"session_id": "abc-123",
+				"action":     "start",
+			},
+		}
+		result := agentdrain.FlattenEvent(evt, []string{"session_id"})
+		assert.NotContains(t, result, "session_id", "excluded field should not appear in flattened output")
+		assert.Contains(t, result, "action", "non-excluded field should appear in flattened output")
+	})
+
+	t.Run("produces deterministic output for same input", func(t *testing.T) {
+		evt := agentdrain.AgentEvent{
+			Stage: "tool_call",
+			Fields: map[string]string{
+				"tool":   "bash",
+				"status": "ok",
+				"step":   "3",
+			},
+		}
+		first := agentdrain.FlattenEvent(evt, nil)
+		second := agentdrain.FlattenEvent(evt, nil)
+		assert.Equal(t, first, second, "FlattenEvent should be deterministic for same input")
+	})
+}
+
+// TestSpec_PublicAPI_Utility_StageSequence validates space-separated stage extraction.
+// Spec: returns "a space-separated string of the stages from a slice of events"
+func TestSpec_PublicAPI_Utility_StageSequence(t *testing.T) {
+	tests := []struct {
+		name     string
+		events   []agentdrain.AgentEvent
+		expected string
+	}{
+		{
+			name: "returns space-separated stage names",
+			events: []agentdrain.AgentEvent{
+				{Stage: "plan"},
+				{Stage: "tool_call"},
+				{Stage: "tool_result"},
+				{Stage: "finish"},
+			},
+			expected: "plan tool_call tool_result finish",
+		},
+		{
+			name:     "returns empty string for empty events",
+			events:   []agentdrain.AgentEvent{},
+			expected: "",
+		},
+		{
+			name:     "returns single stage for single event",
+			events:   []agentdrain.AgentEvent{{Stage: "plan"}},
+			expected: "plan",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := agentdrain.StageSequence(tt.events)
+			assert.Equal(t, tt.expected, result, "StageSequence mismatch for %q", tt.name)
+		})
+	}
+}
+
+// TestSpec_PublicAPI_NewMasker validates that NewMasker creates a masker and Mask applies substitutions.
+func TestSpec_PublicAPI_NewMasker(t *testing.T) {
+	rules := []agentdrain.MaskRule{
+		{
+			Name:        "number_test",
+			Pattern:     `\d+`,
+			Replacement: "<NUM>",
+		},
+	}
+	masker, err := agentdrain.NewMasker(rules)
+	require.NoError(t, err, "NewMasker should not error with valid rules")
+	assert.NotNil(t, masker, "NewMasker should return non-nil Masker")
+
+	result := masker.Mask("step 42 completed")
+	assert.Contains(t, result, "<NUM>", "Mask should apply substitution rules")
+	assert.NotContains(t, result, "42", "Mask should replace matched content")
+}
+
+// TestSpec_PublicAPI_NewAnomalyDetector validates AnomalyDetector construction.
+func TestSpec_PublicAPI_NewAnomalyDetector(t *testing.T) {
+	detector := agentdrain.NewAnomalyDetector(0.4, 2)
+	assert.NotNil(t, detector, "NewAnomalyDetector should return non-nil detector")
+}
+
+// TestSpec_Types_AgentEvent validates the documented AgentEvent type structure.
+// Spec: Stage string, Fields map[string]string
+func TestSpec_Types_AgentEvent(t *testing.T) {
+	evt := agentdrain.AgentEvent{
+		Stage:  "plan",
+		Fields: map[string]string{"key": "value"},
+	}
+	assert.Equal(t, "plan", evt.Stage)
+	assert.Equal(t, "value", evt.Fields["key"])
+}
+
+// TestSpec_Types_MaskRule validates the documented MaskRule type structure.
+// Spec: Name, Pattern, Replacement fields
+func TestSpec_Types_MaskRule(t *testing.T) {
+	rule := agentdrain.MaskRule{
+		Name:        "test-rule",
+		Pattern:     `\d+`,
+		Replacement: "<NUM>",
+	}
+	assert.Equal(t, "test-rule", rule.Name)
+	assert.Equal(t, `\d+`, rule.Pattern)
+	assert.Equal(t, "<NUM>", rule.Replacement)
+}
+
+// TestSpec_DesignDecision_SimThreshold validates that SimThreshold=0.4 means 40% token match.
+// Spec: "SimThreshold of 0.4 means at least 40% of tokens must match exactly"
+func TestSpec_DesignDecision_SimThreshold(t *testing.T) {
+	cfg := agentdrain.DefaultConfig()
+	assert.Equal(t, 0.4, cfg.SimThreshold, "40%% token match threshold as documented")
+}
+
+// TestSpec_DesignDecision_CoordinatorRouting validates that events from different stages
+// are routed to separate miners so templates do not interfere.
+// Spec: "The Coordinator routes each AgentEvent to its stage-specific Miner"
+func TestSpec_DesignDecision_CoordinatorRouting(t *testing.T) {
+	cfg := agentdrain.DefaultConfig()
+	stages := []string{"plan", "tool_call"}
+	coord, err := agentdrain.NewCoordinator(cfg, stages)
+	require.NoError(t, err)
+
+	planEvt := agentdrain.AgentEvent{Stage: "plan", Fields: map[string]string{"x": "y"}}
+	toolEvt := agentdrain.AgentEvent{Stage: "tool_call", Fields: map[string]string{"a": "b"}}
+
+	_, err = coord.TrainEvent(planEvt)
+	require.NoError(t, err)
+	_, err = coord.TrainEvent(toolEvt)
+	require.NoError(t, err)
+
+	all := coord.AllClusters()
+	assert.Len(t, all["plan"], 1, "plan stage should have one cluster")
+	assert.Len(t, all["tool_call"], 1, "tool_call stage should have one cluster")
+}
diff --git a/pkg/fileutil/spec_test.go b/pkg/fileutil/spec_test.go
new file mode 100644
index 00000000000..917681df840
--- /dev/null
+++ b/pkg/fileutil/spec_test.go
@@ -0,0 +1,241 @@
+//go:build !integration
+
+package fileutil_test
+
+import (
+	"archive/tar"
+	"bytes"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/github/gh-aw/pkg/fileutil"
+)
+
+// TestSpec_PublicAPI_ValidateAbsolutePath validates the documented behavior:
+// rejects empty paths, cleans with filepath.Clean, verifies cleaned path is absolute.
+func TestSpec_PublicAPI_ValidateAbsolutePath(t *testing.T) {
+	tests := []struct {
+		name        string
+		input       string
+		wantErr     bool
+		expectedOut string
+	}{
+		{
+			name:    "rejects empty path",
+			input:   "",
+			wantErr: true,
+		},
+		{
+			name:    "rejects relative path",
+			input:   "relative/path",
+			wantErr: true,
+		},
+		{
+			name:        "accepts and returns clean absolute path",
+			input:       "/usr/local/bin",
+			wantErr:     false,
+			expectedOut: "/usr/local/bin",
+		},
+		{
+			name:        "cleans dot components from absolute path",
+			input:       "/usr/./local/bin",
+			wantErr:     false,
+			expectedOut: "/usr/local/bin",
+		},
+		{
+			name:        "cleans double-dot components from absolute path",
+			input:       "/usr/local/../bin",
+			wantErr:     false,
+			expectedOut: "/usr/bin",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := fileutil.ValidateAbsolutePath(tt.input)
+			if tt.wantErr {
+				assert.Error(t, err, "expected error for input %q", tt.input)
+				return
+			}
+			require.NoError(t, err, "unexpected error for input %q", tt.input)
+			assert.Equal(t, tt.expectedOut, result, "cleaned path mismatch for input %q", tt.input)
+		})
+	}
+}
+
+// TestSpec_PublicAPI_MustBeWithin validates that candidate must be within the base directory.
+// Spec: "prevents both .. traversal and symlink escapes"
+func TestSpec_PublicAPI_MustBeWithin(t *testing.T) {
+	base := t.TempDir()
+	within := filepath.Join(base, "subdir", "file.txt")
+	outside := filepath.Join(base, "..", "outside")
+
+	t.Run("accepts path within base", func(t *testing.T) {
+		err := fileutil.MustBeWithin(base, within)
+		assert.NoError(t, err, "path within base should be accepted")
+	})
+
+	t.Run("rejects path outside base", func(t *testing.T) {
+		err := fileutil.MustBeWithin(base, outside)
+		assert.Error(t, err, "path outside base should be rejected")
+	})
+
+	t.Run("accepts base path itself", func(t *testing.T) {
+		err := fileutil.MustBeWithin(base, base)
+		assert.NoError(t, err, "base path itself should be accepted")
+	})
+}
+
+// TestSpec_PublicAPI_FileExists validates the documented behavior:
+// returns true for regular files, false for directories and non-existent paths.
+func TestSpec_PublicAPI_FileExists(t *testing.T) {
+	dir := t.TempDir()
+	regularFile := filepath.Join(dir, "regular.txt")
+	require.NoError(t, os.WriteFile(regularFile, []byte("content"), 0600))
+
+	t.Run("returns true for regular file", func(t *testing.T) {
+		assert.True(t, fileutil.FileExists(regularFile), "FileExists should return true for regular file")
+	})
+
+	t.Run("returns false for directory", func(t *testing.T) {
+		assert.False(t, fileutil.FileExists(dir), "FileExists should return false for directory")
+	})
+
+	t.Run("returns false for non-existent path", func(t *testing.T) {
+		assert.False(t, fileutil.FileExists(filepath.Join(dir, "nonexistent.txt")), "FileExists should return false for non-existent path")
+	})
+}
+
+// TestSpec_PublicAPI_DirExists validates the documented behavior:
+// returns true for directories, false for regular files and non-existent paths.
+func TestSpec_PublicAPI_DirExists(t *testing.T) {
+	dir := t.TempDir()
+	regularFile := filepath.Join(dir, "file.txt")
+	require.NoError(t, os.WriteFile(regularFile, []byte("content"), 0600))
+
+	t.Run("returns true for existing directory", func(t *testing.T) {
+		assert.True(t, fileutil.DirExists(dir), "DirExists should return true for directory")
+	})
+
+	t.Run("returns false for regular file", func(t *testing.T) {
+		assert.False(t, fileutil.DirExists(regularFile), "DirExists should return false for regular file")
+	})
+
+	t.Run("returns false for non-existent path", func(t *testing.T) {
+		assert.False(t, fileutil.DirExists(filepath.Join(dir, "nonexistent")), "DirExists should return false for non-existent path")
+	})
+}
+
+// TestSpec_PublicAPI_IsDirEmpty validates the documented behavior:
+// returns true when directory has no entries, true when directory cannot be read.
+func TestSpec_PublicAPI_IsDirEmpty(t *testing.T) {
+	t.Run("returns true for empty directory", func(t *testing.T) {
+		emptyDir := t.TempDir()
+		assert.True(t, fileutil.IsDirEmpty(emptyDir), "IsDirEmpty should return true for empty directory")
+	})
+
+	t.Run("returns false for non-empty directory", func(t *testing.T) {
+		dir := t.TempDir()
+		require.NoError(t, os.WriteFile(filepath.Join(dir, "file.txt"), []byte("x"), 0600))
+		assert.False(t, fileutil.IsDirEmpty(dir), "IsDirEmpty should return false for non-empty directory")
+	})
+
+	t.Run("returns true for unreadable or non-existent path", func(t *testing.T) {
+		assert.True(t, fileutil.IsDirEmpty("/nonexistent/path/xyzzy"), "IsDirEmpty should return true when directory cannot be read")
+	})
+}
+
+// TestSpec_PublicAPI_CopyFile validates the documented behavior:
+// copies src to dst using buffered I/O, creates dst if not exist, truncates if exists.
+func TestSpec_PublicAPI_CopyFile(t *testing.T) {
+	dir := t.TempDir()
+	src := filepath.Join(dir, "source.txt")
+	dst := filepath.Join(dir, "destination.txt")
+	content := []byte("hello specification test")
+
+	require.NoError(t, os.WriteFile(src, content, 0600))
+
+	t.Run("copies file content to new destination", func(t *testing.T) {
+		err := fileutil.CopyFile(src, dst)
+		require.NoError(t, err, "CopyFile should not error for valid src/dst")
+		got, err := os.ReadFile(dst)
+		require.NoError(t, err)
+		assert.Equal(t, content, got, "destination should have same content as source")
+	})
+
+	t.Run("truncates existing destination", func(t *testing.T) {
+		require.NoError(t, os.WriteFile(dst, []byte("old content that is longer"), 0600))
+		err := fileutil.CopyFile(src, dst)
+		require.NoError(t, err, "CopyFile should not error when destination exists")
+		got, err := os.ReadFile(dst)
+		require.NoError(t, err)
+		assert.Equal(t, content, got, "destination should be truncated and overwritten with source content")
+	})
+}
+
+// makeTar builds an in-memory tar archive with named entries.
+func makeTar(entries map[string][]byte) []byte {
+	var buf bytes.Buffer
+	tw := tar.NewWriter(&buf)
+	for name, data := range entries {
+		hdr := &tar.Header{
+			Name: name,
+			Mode: 0600,
+			Size: int64(len(data)),
+		}
+		_ = tw.WriteHeader(hdr)
+		_, _ = tw.Write(data)
+	}
+	_ = tw.Close()
+	return buf.Bytes()
+}
+
+// TestSpec_PublicAPI_ExtractFileFromTar validates extraction and security guarantees.
+// Spec: extracts single file by path, skips entries with unsafe names.
+func TestSpec_PublicAPI_ExtractFileFromTar(t *testing.T) {
+	t.Run("extracts file at specified path from tar", func(t *testing.T) {
+		want := []byte("binary content")
+		data := makeTar(map[string][]byte{
+			"bin/gh": want,
+			"other":  []byte("other content"),
+		})
+		got, err := fileutil.ExtractFileFromTar(data, "bin/gh")
+		require.NoError(t, err, "ExtractFileFromTar should not error for present file")
+		assert.Equal(t, want, got, "extracted content should match file in archive")
+	})
+
+	t.Run("returns error when path is not present in tar", func(t *testing.T) {
+		data := makeTar(map[string][]byte{"other": []byte("data")})
+		_, err := fileutil.ExtractFileFromTar(data, "bin/gh")
+		assert.Error(t, err, "should error when requested path is not in archive")
+	})
+
+	t.Run("rejects caller-supplied absolute path", func(t *testing.T) {
+		data := makeTar(map[string][]byte{"bin/gh": []byte("x")})
+		_, err := fileutil.ExtractFileFromTar(data, "/bin/gh")
+		assert.Error(t, err, "absolute caller path should be rejected")
+	})
+
+	t.Run("rejects caller-supplied path with .. traversal", func(t *testing.T) {
+		data := makeTar(map[string][]byte{"bin/gh": []byte("x")})
+		_, err := fileutil.ExtractFileFromTar(data, "../bin/gh")
+		assert.Error(t, err, "path traversal in caller path should be rejected")
+	})
+
+	t.Run("skips tar entries with unsafe names", func(t *testing.T) {
+		// SPEC: "Individual tar entries with unsafe names are skipped, not extracted"
+		// The unsafe entry should not be returned even when requested.
+		var buf bytes.Buffer
+		tw := tar.NewWriter(&buf)
+		_ = tw.WriteHeader(&tar.Header{Name: "../etc/passwd", Mode: 0600, Size: 4})
+		_, _ = tw.Write([]byte("root"))
+		_ = tw.Close()
+
+		_, err := fileutil.ExtractFileFromTar(buf.Bytes(), "../etc/passwd")
+		assert.Error(t, err, "tar entry with unsafe name should be skipped/not found")
+	})
+}