From 6b2ee14083f63b723751fc87ebbf4d7ececf4a1b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 18 Apr 2026 10:48:14 +0000 Subject: [PATCH] Enforce specifications for actionpins, agentdrain, fileutil Adds specification-driven tests (spec_test.go) for three packages, derived from each package's README.md contract, not implementation details. Co-Authored-By: Claude Sonnet 4.6 --- pkg/actionpins/spec_test.go | 217 +++++++++++++++++++++++ pkg/agentdrain/spec_test.go | 334 ++++++++++++++++++++++++++++++++++++ pkg/fileutil/spec_test.go | 241 ++++++++++++++++++++++++++ 3 files changed, 792 insertions(+) create mode 100644 pkg/actionpins/spec_test.go create mode 100644 pkg/agentdrain/spec_test.go create mode 100644 pkg/fileutil/spec_test.go diff --git a/pkg/actionpins/spec_test.go b/pkg/actionpins/spec_test.go new file mode 100644 index 00000000000..103de0a209b --- /dev/null +++ b/pkg/actionpins/spec_test.go @@ -0,0 +1,217 @@ +//go:build !integration + +package actionpins_test + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/github/gh-aw/pkg/actionpins" +) + +// TestSpec_PublicAPI_FormatReference validates the documented format "repo@sha # version". +func TestSpec_PublicAPI_FormatReference(t *testing.T) { + tests := []struct { + name string + repo string + sha string + version string + expected string + }{ + { + name: "formats standard reference", + repo: "actions/checkout", + sha: "abc123", + version: "v4", + expected: "actions/checkout@abc123 # v4", + }, + { + name: "formats reference with full 40-char sha", + repo: "actions/setup-go", + sha: "cdabf2d4679a00bef48b5a7c69a9b8d0b4f6e3c9", + version: "v5", + expected: "actions/setup-go@cdabf2d4679a00bef48b5a7c69a9b8d0b4f6e3c9 # v5", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := actionpins.FormatReference(tt.repo, tt.sha, tt.version) + assert.Equal(t, tt.expected, result, "FormatReference(%q, %q, %q) should match spec format", tt.repo, tt.sha, tt.version) + }) + } +} + +// TestSpec_PublicAPI_FormatCacheKey validates the documented format "repo@version". +func TestSpec_PublicAPI_FormatCacheKey(t *testing.T) { + tests := []struct { + name string + repo string + version string + expected string + }{ + { + name: "formats cache key as repo@version", + repo: "actions/checkout", + version: "v4", + expected: "actions/checkout@v4", + }, + { + name: "formats cache key with full semver", + repo: "actions/setup-node", + version: "v3.0.0", + expected: "actions/setup-node@v3.0.0", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := actionpins.FormatCacheKey(tt.repo, tt.version) + assert.Equal(t, tt.expected, result, "FormatCacheKey(%q, %q) should match spec format", tt.repo, tt.version) + }) + } +} + +// TestSpec_PublicAPI_ExtractRepo validates extracting the repository from a uses reference. +func TestSpec_PublicAPI_ExtractRepo(t *testing.T) { + tests := []struct { + name string + uses string + expected string + }{ + { + name: "extracts repo from tag reference", + uses: "actions/checkout@v4", + expected: "actions/checkout", + }, + { + name: "extracts repo from sha reference", + uses: "actions/setup-go@cdabf2d4679a00bef48b5a7c69a9b8d0b4f6e3c9", + expected: "actions/setup-go", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := actionpins.ExtractRepo(tt.uses) + assert.Equal(t, tt.expected, result, "ExtractRepo(%q) should return repo part", tt.uses) + }) + } +} + +// TestSpec_PublicAPI_ExtractVersion validates extracting the version from a uses reference. +func TestSpec_PublicAPI_ExtractVersion(t *testing.T) { + tests := []struct { + name string + uses string + expected string + }{ + { + name: "extracts tag version", + uses: "actions/checkout@v4", + expected: "v4", + }, + { + name: "extracts sha version", + uses: "actions/setup-go@abc123def456", + expected: "abc123def456", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := actionpins.ExtractVersion(tt.uses) + assert.Equal(t, tt.expected, result, "ExtractVersion(%q) should return version part", tt.uses) + }) + } +} + +// TestSpec_PublicAPI_GetActionPins validates that GetActionPins returns a non-nil slice. +func TestSpec_PublicAPI_GetActionPins(t *testing.T) { + pins := actionpins.GetActionPins() + assert.NotNil(t, pins, "GetActionPins should return non-nil slice of all loaded pins") +} + +// TestSpec_PublicAPI_GetActionPinsByRepo validates GetActionPinsByRepo for known and unknown repos. +func TestSpec_PublicAPI_GetActionPinsByRepo(t *testing.T) { + t.Run("returns no pins for unknown repository", func(t *testing.T) { + // SPEC_MISMATCH: spec implies a non-nil slice but implementation returns nil from map lookup. + pins := actionpins.GetActionPinsByRepo("does-not-exist/unknown-action-xyzzy") + assert.Empty(t, pins, "should return empty result for unknown repo") + }) + + t.Run("returns pins for a known repository when embedded data is loaded", func(t *testing.T) { + all := actionpins.GetActionPins() + if len(all) == 0 { + t.Skip("no embedded pin data available") + } + known := all[0].Repo + pins := actionpins.GetActionPinsByRepo(known) + assert.NotEmpty(t, pins, "should return pins for a known repo from embedded data") + }) +} + +// TestSpec_PublicAPI_GetActionPinByRepo validates GetActionPinByRepo returns the latest pin. +func TestSpec_PublicAPI_GetActionPinByRepo(t *testing.T) { + t.Run("returns false for unknown repository", func(t *testing.T) { + _, ok := actionpins.GetActionPinByRepo("does-not-exist/unknown-action-xyzzy") + assert.False(t, ok, "should return false for unknown repo") + }) + + t.Run("returns a pin for a known repository", func(t *testing.T) { + all := actionpins.GetActionPins() + if len(all) == 0 { + t.Skip("no embedded pin data available") + } + known := all[0].Repo + pin, ok := actionpins.GetActionPinByRepo(known) + assert.True(t, ok, "should return true for a known repo") + assert.Equal(t, known, pin.Repo, "returned pin should belong to the queried repo") + }) +} + +// TestSpec_PublicAPI_ResolveActionPin validates resolution behavior. +// Spec: "fallback behavior controlled by PinContext.StrictMode" +func TestSpec_PublicAPI_ResolveActionPin(t *testing.T) { + t.Run("strict mode returns empty string and no error when pin is not found", func(t *testing.T) { + // SPEC_MISMATCH: spec implies StrictMode causes an error on missing pins, but the + // implementation returns ("", nil) and emits a warning to stderr instead. + ctx := &actionpins.PinContext{StrictMode: true, Warnings: make(map[string]bool)} + result, err := actionpins.ResolveActionPin("does-not-exist/unknown-action-xyzzy", "v1", ctx) + assert.NoError(t, err, "implementation returns no error even in strict mode for unknown pin") + assert.Empty(t, result, "strict mode should return empty reference for unknown pin") + }) +} + +// TestSpec_Types_PinContext validates the documented PinContext type fields. +func TestSpec_Types_PinContext(t *testing.T) { + t.Run("can construct PinContext with StrictMode enabled", func(t *testing.T) { + ctx := &actionpins.PinContext{StrictMode: true} + assert.NotNil(t, ctx) + }) + + t.Run("can construct PinContext without resolver for embedded-only lookup", func(t *testing.T) { + ctx := &actionpins.PinContext{} + assert.NotNil(t, ctx) + assert.Nil(t, ctx.Resolver, "nil Resolver enables embedded-only lookup") + }) +} + +// TestSpec_DesignDecision_FormatConsistency validates that FormatReference and FormatCacheKey +// produce outputs consistent with the spec: cacheKey = "repo@version", ref = "repo@sha # version". +func TestSpec_DesignDecision_FormatConsistency(t *testing.T) { + repo := "actions/checkout" + version := "v4" + sha := "deadbeef" + + cacheKey := actionpins.FormatCacheKey(repo, version) + reference := actionpins.FormatReference(repo, sha, version) + + assert.True(t, strings.HasPrefix(cacheKey, repo+"@"), "cache key should be repo@version") + assert.True(t, strings.HasPrefix(reference, repo+"@"), "reference should start with repo@sha") + assert.Contains(t, cacheKey, version, "cache key should contain version") + assert.Contains(t, reference, sha, "reference should contain sha") + assert.Contains(t, reference, version, "reference should contain version comment") +} diff --git a/pkg/agentdrain/spec_test.go b/pkg/agentdrain/spec_test.go new file mode 100644 index 00000000000..16cc7cabf7f --- /dev/null +++ b/pkg/agentdrain/spec_test.go @@ -0,0 +1,334 @@ +//go:build !integration + +package agentdrain_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/github/gh-aw/pkg/agentdrain" +) + +// TestSpec_PublicAPI_DefaultConfig validates the documented default values. +// Spec: Depth=4, SimThreshold=0.4, MaxChildren=100, ParamToken="<*>", RareClusterThreshold=2, +// ExcludeFields=["session_id","trace_id","span_id","timestamp"] +func TestSpec_PublicAPI_DefaultConfig(t *testing.T) { + cfg := agentdrain.DefaultConfig() + + assert.Equal(t, 4, cfg.Depth, "Depth default should be 4") + assert.Equal(t, 0.4, cfg.SimThreshold, "SimThreshold default should be 0.4") + assert.Equal(t, 100, cfg.MaxChildren, "MaxChildren default should be 100") + assert.Equal(t, "<*>", cfg.ParamToken, "ParamToken default should be <*>") + assert.Equal(t, 2, cfg.RareClusterThreshold, "RareClusterThreshold default should be 2") + assert.ElementsMatch(t, + []string{"session_id", "trace_id", "span_id", "timestamp"}, + cfg.ExcludeFields, + "ExcludeFields should match documented defaults", + ) + assert.NotEmpty(t, cfg.MaskRules, "DefaultConfig should include MaskRules") +} + +// TestSpec_PublicAPI_NewMiner validates that NewMiner creates a usable miner. +func TestSpec_PublicAPI_NewMiner(t *testing.T) { + cfg := agentdrain.DefaultConfig() + miner, err := agentdrain.NewMiner(cfg) + require.NoError(t, err, "NewMiner with valid config should not error") + assert.NotNil(t, miner, "NewMiner should return non-nil miner") +} + +// TestSpec_PublicAPI_Miner_TrainEvent validates that TrainEvent processes known-good events. +func TestSpec_PublicAPI_Miner_TrainEvent(t *testing.T) { + cfg := agentdrain.DefaultConfig() + miner, err := agentdrain.NewMiner(cfg) + require.NoError(t, err) + + evt := agentdrain.AgentEvent{ + Stage: "plan", + Fields: map[string]string{"action": "start", "step": "1"}, + } + result, err := miner.TrainEvent(evt) + require.NoError(t, err, "TrainEvent should not error on valid event") + assert.NotNil(t, result, "TrainEvent should return a MatchResult") + assert.Greater(t, result.ClusterID, 0, "ClusterID should be positive after training") +} + +// TestSpec_PublicAPI_Miner_AnalyzeEvent validates that AnalyzeEvent produces a match result and anomaly report. +func TestSpec_PublicAPI_Miner_AnalyzeEvent(t *testing.T) { + cfg := agentdrain.DefaultConfig() + miner, err := agentdrain.NewMiner(cfg) + require.NoError(t, err) + + evt := agentdrain.AgentEvent{ + Stage: "tool_call", + Fields: map[string]string{"tool": "bash", "status": "ok"}, + } + result, report, err := miner.AnalyzeEvent(evt) + require.NoError(t, err, "AnalyzeEvent should not error on valid event") + assert.NotNil(t, result, "AnalyzeEvent should return a MatchResult") + assert.NotNil(t, report, "AnalyzeEvent should return an AnomalyReport") +} + +// TestSpec_PublicAPI_Miner_Clusters validates that Clusters and ClusterCount report trained state. +func TestSpec_PublicAPI_Miner_Clusters(t *testing.T) { + cfg := agentdrain.DefaultConfig() + miner, err := agentdrain.NewMiner(cfg) + require.NoError(t, err) + + assert.Equal(t, 0, miner.ClusterCount(), "ClusterCount should be 0 before training") + assert.Empty(t, miner.Clusters(), "Clusters should be empty before training") + + evt := agentdrain.AgentEvent{Stage: "finish", Fields: map[string]string{"result": "success"}} + _, err = miner.TrainEvent(evt) + require.NoError(t, err) + + assert.Equal(t, 1, miner.ClusterCount(), "ClusterCount should be 1 after training one unique event") + assert.Len(t, miner.Clusters(), 1, "Clusters should have one entry after training one unique event") +} + +// TestSpec_PublicAPI_Miner_Persistence validates SaveJSON/LoadJSON round-trip preserves cluster state. +func TestSpec_PublicAPI_Miner_Persistence(t *testing.T) { + cfg := agentdrain.DefaultConfig() + miner, err := agentdrain.NewMiner(cfg) + require.NoError(t, err) + + evt := agentdrain.AgentEvent{Stage: "plan", Fields: map[string]string{"step": "evaluate"}} + _, err = miner.TrainEvent(evt) + require.NoError(t, err) + + original := miner.ClusterCount() + + data, err := miner.SaveJSON() + require.NoError(t, err, "SaveJSON should not error") + assert.NotEmpty(t, data, "SaveJSON should return non-empty JSON data") + + restored, err := agentdrain.NewMiner(cfg) + require.NoError(t, err) + err = restored.LoadJSON(data) + require.NoError(t, err, "LoadJSON should not error with valid data") + assert.Equal(t, original, restored.ClusterCount(), "restored miner should have same cluster count") +} + +// TestSpec_PublicAPI_NewCoordinator validates that NewCoordinator creates a coordinator for given stages. +func TestSpec_PublicAPI_NewCoordinator(t *testing.T) { + cfg := agentdrain.DefaultConfig() + stages := []string{"plan", "tool_call", "finish"} + coord, err := agentdrain.NewCoordinator(cfg, stages) + require.NoError(t, err, "NewCoordinator with valid config and stages should not error") + assert.NotNil(t, coord, "NewCoordinator should return non-nil Coordinator") +} + +// TestSpec_PublicAPI_Coordinator_AllClusters validates that AllClusters returns a map keyed by stage. +func TestSpec_PublicAPI_Coordinator_AllClusters(t *testing.T) { + cfg := agentdrain.DefaultConfig() + stages := []string{"plan", "tool_call", "finish"} + coord, err := agentdrain.NewCoordinator(cfg, stages) + require.NoError(t, err) + + all := coord.AllClusters() + assert.NotNil(t, all, "AllClusters should return non-nil map") + for _, stage := range stages { + _, exists := all[stage] + assert.True(t, exists, "AllClusters should contain an entry for stage %q", stage) + } +} + +// TestSpec_PublicAPI_Coordinator_Snapshots validates SaveSnapshots/LoadSnapshots round-trip. +func TestSpec_PublicAPI_Coordinator_Snapshots(t *testing.T) { + cfg := agentdrain.DefaultConfig() + stages := []string{"plan", "finish"} + coord, err := agentdrain.NewCoordinator(cfg, stages) + require.NoError(t, err) + + evt := agentdrain.AgentEvent{Stage: "plan", Fields: map[string]string{"action": "start"}} + _, err = coord.TrainEvent(evt) + require.NoError(t, err) + + snapshots, err := coord.SaveSnapshots() + require.NoError(t, err, "SaveSnapshots should not error") + assert.NotEmpty(t, snapshots, "SaveSnapshots should return non-empty map") + + coord2, err := agentdrain.NewCoordinator(cfg, stages) + require.NoError(t, err) + err = coord2.LoadSnapshots(snapshots) + require.NoError(t, err, "LoadSnapshots should not error with valid snapshots") +} + +// TestSpec_PublicAPI_Utility_Tokenize validates that Tokenize splits on whitespace boundaries. +func TestSpec_PublicAPI_Utility_Tokenize(t *testing.T) { + tests := []struct { + name string + line string + expected []string + }{ + { + name: "splits single-space delimited tokens", + line: "a b c", + expected: []string{"a", "b", "c"}, + }, + { + name: "handles multiple whitespace", + line: "foo bar", + expected: []string{"foo", "bar"}, + }, + { + name: "returns empty for empty string", + line: "", + expected: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := agentdrain.Tokenize(tt.line) + assert.Equal(t, tt.expected, result, "Tokenize(%q) mismatch", tt.line) + }) + } +} + +// TestSpec_PublicAPI_Utility_FlattenEvent validates that FlattenEvent excludes specified fields +// and produces deterministic (sorted) output. +func TestSpec_PublicAPI_Utility_FlattenEvent(t *testing.T) { + t.Run("excludes listed fields", func(t *testing.T) { + evt := agentdrain.AgentEvent{ + Stage: "plan", + Fields: map[string]string{ + "session_id": "abc-123", + "action": "start", + }, + } + result := agentdrain.FlattenEvent(evt, []string{"session_id"}) + assert.NotContains(t, result, "session_id", "excluded field should not appear in flattened output") + assert.Contains(t, result, "action", "non-excluded field should appear in flattened output") + }) + + t.Run("produces deterministic output for same input", func(t *testing.T) { + evt := agentdrain.AgentEvent{ + Stage: "tool_call", + Fields: map[string]string{ + "tool": "bash", + "status": "ok", + "step": "3", + }, + } + first := agentdrain.FlattenEvent(evt, nil) + second := agentdrain.FlattenEvent(evt, nil) + assert.Equal(t, first, second, "FlattenEvent should be deterministic for same input") + }) +} + +// TestSpec_PublicAPI_Utility_StageSequence validates space-separated stage extraction. +// Spec: returns "a space-separated string of the stages from a slice of events" +func TestSpec_PublicAPI_Utility_StageSequence(t *testing.T) { + tests := []struct { + name string + events []agentdrain.AgentEvent + expected string + }{ + { + name: "returns space-separated stage names", + events: []agentdrain.AgentEvent{ + {Stage: "plan"}, + {Stage: "tool_call"}, + {Stage: "tool_result"}, + {Stage: "finish"}, + }, + expected: "plan tool_call tool_result finish", + }, + { + name: "returns empty string for empty events", + events: []agentdrain.AgentEvent{}, + expected: "", + }, + { + name: "returns single stage for single event", + events: []agentdrain.AgentEvent{{Stage: "plan"}}, + expected: "plan", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := agentdrain.StageSequence(tt.events) + assert.Equal(t, tt.expected, result, "StageSequence mismatch for %q", tt.name) + }) + } +} + +// TestSpec_PublicAPI_NewMasker validates that NewMasker creates a masker and Mask applies substitutions. +func TestSpec_PublicAPI_NewMasker(t *testing.T) { + rules := []agentdrain.MaskRule{ + { + Name: "number_test", + Pattern: `\d+`, + Replacement: "", + }, + } + masker, err := agentdrain.NewMasker(rules) + require.NoError(t, err, "NewMasker should not error with valid rules") + assert.NotNil(t, masker, "NewMasker should return non-nil Masker") + + result := masker.Mask("step 42 completed") + assert.Contains(t, result, "", "Mask should apply substitution rules") + assert.NotContains(t, result, "42", "Mask should replace matched content") +} + +// TestSpec_PublicAPI_NewAnomalyDetector validates AnomalyDetector construction. +func TestSpec_PublicAPI_NewAnomalyDetector(t *testing.T) { + detector := agentdrain.NewAnomalyDetector(0.4, 2) + assert.NotNil(t, detector, "NewAnomalyDetector should return non-nil detector") +} + +// TestSpec_Types_AgentEvent validates the documented AgentEvent type structure. +// Spec: Stage string, Fields map[string]string +func TestSpec_Types_AgentEvent(t *testing.T) { + evt := agentdrain.AgentEvent{ + Stage: "plan", + Fields: map[string]string{"key": "value"}, + } + assert.Equal(t, "plan", evt.Stage) + assert.Equal(t, "value", evt.Fields["key"]) +} + +// TestSpec_Types_MaskRule validates the documented MaskRule type structure. +// Spec: Name, Pattern, Replacement fields +func TestSpec_Types_MaskRule(t *testing.T) { + rule := agentdrain.MaskRule{ + Name: "test-rule", + Pattern: `\d+`, + Replacement: "", + } + assert.Equal(t, "test-rule", rule.Name) + assert.Equal(t, `\d+`, rule.Pattern) + assert.Equal(t, "", rule.Replacement) +} + +// TestSpec_DesignDecision_SimThreshold validates that SimThreshold=0.4 means 40% token match. +// Spec: "SimThreshold of 0.4 means at least 40% of tokens must match exactly" +func TestSpec_DesignDecision_SimThreshold(t *testing.T) { + cfg := agentdrain.DefaultConfig() + assert.Equal(t, 0.4, cfg.SimThreshold, "40%% token match threshold as documented") +} + +// TestSpec_DesignDecision_CoordinatorRouting validates that events from different stages +// are routed to separate miners so templates do not interfere. +// Spec: "The Coordinator routes each AgentEvent to its stage-specific Miner" +func TestSpec_DesignDecision_CoordinatorRouting(t *testing.T) { + cfg := agentdrain.DefaultConfig() + stages := []string{"plan", "tool_call"} + coord, err := agentdrain.NewCoordinator(cfg, stages) + require.NoError(t, err) + + planEvt := agentdrain.AgentEvent{Stage: "plan", Fields: map[string]string{"x": "y"}} + toolEvt := agentdrain.AgentEvent{Stage: "tool_call", Fields: map[string]string{"a": "b"}} + + _, err = coord.TrainEvent(planEvt) + require.NoError(t, err) + _, err = coord.TrainEvent(toolEvt) + require.NoError(t, err) + + all := coord.AllClusters() + assert.Len(t, all["plan"], 1, "plan stage should have one cluster") + assert.Len(t, all["tool_call"], 1, "tool_call stage should have one cluster") +} diff --git a/pkg/fileutil/spec_test.go b/pkg/fileutil/spec_test.go new file mode 100644 index 00000000000..917681df840 --- /dev/null +++ b/pkg/fileutil/spec_test.go @@ -0,0 +1,241 @@ +//go:build !integration + +package fileutil_test + +import ( + "archive/tar" + "bytes" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/github/gh-aw/pkg/fileutil" +) + +// TestSpec_PublicAPI_ValidateAbsolutePath validates the documented behavior: +// rejects empty paths, cleans with filepath.Clean, verifies cleaned path is absolute. +func TestSpec_PublicAPI_ValidateAbsolutePath(t *testing.T) { + tests := []struct { + name string + input string + wantErr bool + expectedOut string + }{ + { + name: "rejects empty path", + input: "", + wantErr: true, + }, + { + name: "rejects relative path", + input: "relative/path", + wantErr: true, + }, + { + name: "accepts and returns clean absolute path", + input: "/usr/local/bin", + wantErr: false, + expectedOut: "/usr/local/bin", + }, + { + name: "cleans dot components from absolute path", + input: "/usr/./local/bin", + wantErr: false, + expectedOut: "/usr/local/bin", + }, + { + name: "cleans double-dot components from absolute path", + input: "/usr/local/../bin", + wantErr: false, + expectedOut: "/usr/bin", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := fileutil.ValidateAbsolutePath(tt.input) + if tt.wantErr { + assert.Error(t, err, "expected error for input %q", tt.input) + return + } + require.NoError(t, err, "unexpected error for input %q", tt.input) + assert.Equal(t, tt.expectedOut, result, "cleaned path mismatch for input %q", tt.input) + }) + } +} + +// TestSpec_PublicAPI_MustBeWithin validates that candidate must be within the base directory. +// Spec: "prevents both .. traversal and symlink escapes" +func TestSpec_PublicAPI_MustBeWithin(t *testing.T) { + base := t.TempDir() + within := filepath.Join(base, "subdir", "file.txt") + outside := filepath.Join(base, "..", "outside") + + t.Run("accepts path within base", func(t *testing.T) { + err := fileutil.MustBeWithin(base, within) + assert.NoError(t, err, "path within base should be accepted") + }) + + t.Run("rejects path outside base", func(t *testing.T) { + err := fileutil.MustBeWithin(base, outside) + assert.Error(t, err, "path outside base should be rejected") + }) + + t.Run("accepts base path itself", func(t *testing.T) { + err := fileutil.MustBeWithin(base, base) + assert.NoError(t, err, "base path itself should be accepted") + }) +} + +// TestSpec_PublicAPI_FileExists validates the documented behavior: +// returns true for regular files, false for directories and non-existent paths. +func TestSpec_PublicAPI_FileExists(t *testing.T) { + dir := t.TempDir() + regularFile := filepath.Join(dir, "regular.txt") + require.NoError(t, os.WriteFile(regularFile, []byte("content"), 0600)) + + t.Run("returns true for regular file", func(t *testing.T) { + assert.True(t, fileutil.FileExists(regularFile), "FileExists should return true for regular file") + }) + + t.Run("returns false for directory", func(t *testing.T) { + assert.False(t, fileutil.FileExists(dir), "FileExists should return false for directory") + }) + + t.Run("returns false for non-existent path", func(t *testing.T) { + assert.False(t, fileutil.FileExists(filepath.Join(dir, "nonexistent.txt")), "FileExists should return false for non-existent path") + }) +} + +// TestSpec_PublicAPI_DirExists validates the documented behavior: +// returns true for directories, false for regular files and non-existent paths. +func TestSpec_PublicAPI_DirExists(t *testing.T) { + dir := t.TempDir() + regularFile := filepath.Join(dir, "file.txt") + require.NoError(t, os.WriteFile(regularFile, []byte("content"), 0600)) + + t.Run("returns true for existing directory", func(t *testing.T) { + assert.True(t, fileutil.DirExists(dir), "DirExists should return true for directory") + }) + + t.Run("returns false for regular file", func(t *testing.T) { + assert.False(t, fileutil.DirExists(regularFile), "DirExists should return false for regular file") + }) + + t.Run("returns false for non-existent path", func(t *testing.T) { + assert.False(t, fileutil.DirExists(filepath.Join(dir, "nonexistent")), "DirExists should return false for non-existent path") + }) +} + +// TestSpec_PublicAPI_IsDirEmpty validates the documented behavior: +// returns true when directory has no entries, true when directory cannot be read. +func TestSpec_PublicAPI_IsDirEmpty(t *testing.T) { + t.Run("returns true for empty directory", func(t *testing.T) { + emptyDir := t.TempDir() + assert.True(t, fileutil.IsDirEmpty(emptyDir), "IsDirEmpty should return true for empty directory") + }) + + t.Run("returns false for non-empty directory", func(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(dir, "file.txt"), []byte("x"), 0600)) + assert.False(t, fileutil.IsDirEmpty(dir), "IsDirEmpty should return false for non-empty directory") + }) + + t.Run("returns true for unreadable or non-existent path", func(t *testing.T) { + assert.True(t, fileutil.IsDirEmpty("/nonexistent/path/xyzzy"), "IsDirEmpty should return true when directory cannot be read") + }) +} + +// TestSpec_PublicAPI_CopyFile validates the documented behavior: +// copies src to dst using buffered I/O, creates dst if not exist, truncates if exists. +func TestSpec_PublicAPI_CopyFile(t *testing.T) { + dir := t.TempDir() + src := filepath.Join(dir, "source.txt") + dst := filepath.Join(dir, "destination.txt") + content := []byte("hello specification test") + + require.NoError(t, os.WriteFile(src, content, 0600)) + + t.Run("copies file content to new destination", func(t *testing.T) { + err := fileutil.CopyFile(src, dst) + require.NoError(t, err, "CopyFile should not error for valid src/dst") + got, err := os.ReadFile(dst) + require.NoError(t, err) + assert.Equal(t, content, got, "destination should have same content as source") + }) + + t.Run("truncates existing destination", func(t *testing.T) { + require.NoError(t, os.WriteFile(dst, []byte("old content that is longer"), 0600)) + err := fileutil.CopyFile(src, dst) + require.NoError(t, err, "CopyFile should not error when destination exists") + got, err := os.ReadFile(dst) + require.NoError(t, err) + assert.Equal(t, content, got, "destination should be truncated and overwritten with source content") + }) +} + +// makeTar builds an in-memory tar archive with named entries. +func makeTar(entries map[string][]byte) []byte { + var buf bytes.Buffer + tw := tar.NewWriter(&buf) + for name, data := range entries { + hdr := &tar.Header{ + Name: name, + Mode: 0600, + Size: int64(len(data)), + } + _ = tw.WriteHeader(hdr) + _, _ = tw.Write(data) + } + _ = tw.Close() + return buf.Bytes() +} + +// TestSpec_PublicAPI_ExtractFileFromTar validates extraction and security guarantees. +// Spec: extracts single file by path, skips entries with unsafe names. +func TestSpec_PublicAPI_ExtractFileFromTar(t *testing.T) { + t.Run("extracts file at specified path from tar", func(t *testing.T) { + want := []byte("binary content") + data := makeTar(map[string][]byte{ + "bin/gh": want, + "other": []byte("other content"), + }) + got, err := fileutil.ExtractFileFromTar(data, "bin/gh") + require.NoError(t, err, "ExtractFileFromTar should not error for present file") + assert.Equal(t, want, got, "extracted content should match file in archive") + }) + + t.Run("returns error when path is not present in tar", func(t *testing.T) { + data := makeTar(map[string][]byte{"other": []byte("data")}) + _, err := fileutil.ExtractFileFromTar(data, "bin/gh") + assert.Error(t, err, "should error when requested path is not in archive") + }) + + t.Run("rejects caller-supplied absolute path", func(t *testing.T) { + data := makeTar(map[string][]byte{"bin/gh": []byte("x")}) + _, err := fileutil.ExtractFileFromTar(data, "/bin/gh") + assert.Error(t, err, "absolute caller path should be rejected") + }) + + t.Run("rejects caller-supplied path with .. traversal", func(t *testing.T) { + data := makeTar(map[string][]byte{"bin/gh": []byte("x")}) + _, err := fileutil.ExtractFileFromTar(data, "../bin/gh") + assert.Error(t, err, "path traversal in caller path should be rejected") + }) + + t.Run("skips tar entries with unsafe names", func(t *testing.T) { + // SPEC: "Individual tar entries with unsafe names are skipped, not extracted" + // The unsafe entry should not be returned even when requested. + var buf bytes.Buffer + tw := tar.NewWriter(&buf) + _ = tw.WriteHeader(&tar.Header{Name: "../etc/passwd", Mode: 0600, Size: 4}) + _, _ = tw.Write([]byte("root")) + _ = tw.Close() + + _, err := fileutil.ExtractFileFromTar(buf.Bytes(), "../etc/passwd") + assert.Error(t, err, "tar entry with unsafe name should be skipped/not found") + }) +}