diff --git a/allowedsymbols/symbols_interp.go b/allowedsymbols/symbols_interp.go index b688a9a0..41cda17f 100644 --- a/allowedsymbols/symbols_interp.go +++ b/allowedsymbols/symbols_interp.go @@ -60,6 +60,7 @@ var interpAllowedSymbols = []string{ "sync.Mutex", // 🟒 mutual exclusion lock; concurrency primitive, no I/O. "sync.Once", // 🟒 ensures a function runs exactly once; concurrency primitive, no I/O. "sync.WaitGroup", // 🟒 waits for goroutines to finish; concurrency primitive, no I/O. + "sync/atomic.Int64", // 🟒 atomic int64 counter; concurrency primitive, no I/O. "time.Duration", // 🟒 numeric duration type; pure type, no side effects. "time.Now", // 🟠 returns current time; read-only, no mutation. "time.Time", // 🟒 time value type; pure data, no side effects. diff --git a/interp/api.go b/interp/api.go index 3a30d3db..5acccc25 100644 --- a/interp/api.go +++ b/interp/api.go @@ -19,6 +19,7 @@ import ( "io" "os" "strings" + "sync/atomic" "time" "mvdan.cc/sh/v3/expand" @@ -131,6 +132,12 @@ type runnerState struct { // startTime is captured once at the beginning of Run() and passed to // all builtin invocations so they share a consistent time reference. startTime time.Time + + // globReadDirCount tracks the total number of ReadDirForGlob calls + // across the entire Run() invocation. It is shared with subshells + // (including concurrent pipe subshells) via pointer, and must be + // accessed atomically. + globReadDirCount *atomic.Int64 } // A Runner interprets shell programs. It can be reused, but it is not safe for @@ -409,6 +416,7 @@ func (r *Runner) Run(ctx context.Context, node syntax.Node) (retErr error) { } } r.startTime = time.Now() + r.globReadDirCount = &atomic.Int64{} r.fillExpandConfig(ctx) if err := validateNode(node); err != nil { fmt.Fprintln(r.stderr, err) @@ -536,15 +544,16 @@ func (r *Runner) subshell(background bool) *Runner { r2 := &Runner{ runnerConfig: r.runnerConfig, runnerState: runnerState{ - Dir: r.Dir, - Params: r.Params, - stdin: r.stdin, - stdout: r.stdout, - stderr: r.stderr, - filename: r.filename, - exit: r.exit, - lastExit: r.lastExit, - startTime: r.startTime, + Dir: r.Dir, + Params: r.Params, + stdin: r.stdin, + stdout: r.stdout, + stderr: r.stderr, + filename: r.filename, + exit: r.exit, + lastExit: r.lastExit, + startTime: r.startTime, + globReadDirCount: r.globReadDirCount, }, } r2.writeEnv = newOverlayEnviron(r.writeEnv, background) diff --git a/interp/glob_bench_test.go b/interp/glob_bench_test.go index 2bdeac5c..07b36d03 100644 --- a/interp/glob_bench_test.go +++ b/interp/glob_bench_test.go @@ -8,10 +8,13 @@ package interp_test import ( + "context" "fmt" "os" "path/filepath" + "strings" "testing" + "time" "github.com/DataDog/rshell/builtins/testutil" "github.com/DataDog/rshell/interp" @@ -239,3 +242,251 @@ func TestGlobNestedMemoryBounded(t *testing.T) { t.Errorf("glob echo */* allocated %d bytes/op on 20Γ—50 nested dir; want < %d", bpo, maxBytesPerOp) } } + +// --------------------------------------------------------------------------- +// Safety edge-case tests: pathological glob patterns that could cause +// crashes, excessive memory usage, or exponential blowup. +// --------------------------------------------------------------------------- + +// TestGlobManyConsecutiveStars verifies that a pattern with many consecutive +// stars (e.g. "echo ****...****") does not cause exponential blowup. +// In a naΓ―ve implementation, N consecutive stars could cause O(2^N) matching. +func TestGlobManyConsecutiveStars(t *testing.T) { + dir := createGlobDir(t, 100) + + // 50 consecutive stars β€” should collapse to a single star internally. + pattern := "echo " + strings.Repeat("*", 50) + result := testing.Benchmark(func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + runGlob(b, pattern, dir) + } + }) + + const maxBytesPerOp = 10 << 20 // 10 MB ceiling + if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp { + t.Errorf("glob with 50 consecutive stars allocated %d bytes/op; want < %d", bpo, maxBytesPerOp) + } +} + +// TestGlobManyStarSegments verifies that a pattern like "a*b*c*d*...*z" with +// many star-separated single-char segments doesn't cause exponential +// backtracking when matching against filenames. +func TestGlobManyStarSegments(t *testing.T) { + dir := t.TempDir() + // Create a file that forces maximum backtracking: a long name with + // repeated characters that partially match each segment. + longName := strings.Repeat("a", 200) + ".txt" + f, err := os.Create(filepath.Join(dir, longName)) + if err != nil { + t.Fatal(err) + } + f.Close() + + // Pattern: a*a*a*a*...*a*a*b (20 star-separated 'a' segments ending with 'b'). + // The file has no 'b', so every segment match attempt must backtrack. + segments := make([]string, 21) + for i := 0; i < 20; i++ { + segments[i] = "a" + } + segments[20] = "b" + pattern := "echo " + strings.Join(segments, "*") + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + stderr, exitCode := testutil.RunScriptDiscardCtx(ctx, t, pattern, dir, interp.AllowedPaths([]string{dir})) + if ctx.Err() != nil { + t.Fatal("glob pattern with many star segments timed out (possible exponential backtracking)") + } + _ = stderr + _ = exitCode +} + +// TestGlobHugeNumberOfStarArgs verifies that many independent star arguments +// in a single command don't cause excessive resource consumption. +func TestGlobHugeNumberOfStarArgs(t *testing.T) { + dir := createGlobDir(t, 50) + + // 100 separate "*" arguments β€” each expands the full directory listing. + args := make([]string, 100) + for i := range args { + args[i] = "*" + } + script := "echo " + strings.Join(args, " ") + + result := testing.Benchmark(func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + runGlob(b, script, dir) + } + }) + + const maxBytesPerOp = 50 << 20 // 50 MB ceiling + if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp { + t.Errorf("glob with 100 star args allocated %d bytes/op; want < %d", bpo, maxBytesPerOp) + } +} + +// TestGlobDeepNestedStarSlash verifies that deeply nested "*/" patterns +// (e.g. "*/*/*/*/*") don't cause resource exhaustion. +func TestGlobDeepNestedStarSlash(t *testing.T) { + // Create a 5-level deep tree: 3 dirs at each level = 3^5 = 243 leaf dirs. + dir := t.TempDir() + createNestedTree(t, dir, 5, 3) + + pattern := "echo */*/*/*/*" + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + stderr, exitCode := testutil.RunScriptDiscardCtx(ctx, t, pattern, dir, interp.AllowedPaths([]string{dir})) + if ctx.Err() != nil { + t.Fatal("deep nested glob timed out") + } + _ = stderr + _ = exitCode +} + +// TestGlobBacktrackingWorstCase constructs the worst-case scenario for glob +// backtracking: a pattern like "*a*a*a*a*a*b" against a file named "aaa...aaa" +// (all a's, no b). A naΓ―ve backtracking matcher would try O(n^k) combinations +// where n is the filename length and k is the number of star segments. +func TestGlobBacktrackingWorstCase(t *testing.T) { + dir := t.TempDir() + + // File with 100 'a' characters β€” no 'b' anywhere. + name := strings.Repeat("a", 100) + f, err := os.Create(filepath.Join(dir, name)) + if err != nil { + t.Fatal(err) + } + f.Close() + + // Pattern with 15 star-separated 'a' segments ending in 'b'. + // This is the classic ReDoS-like pattern for glob matchers. + segments := make([]string, 16) + for i := 0; i < 15; i++ { + segments[i] = "a" + } + segments[15] = "b" + pattern := "echo " + strings.Join(segments, "*") + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + stderr, exitCode := testutil.RunScriptDiscardCtx(ctx, t, pattern, dir, interp.AllowedPaths([]string{dir})) + if ctx.Err() != nil { + t.Fatal("glob backtracking worst case timed out β€” possible exponential complexity") + } + _ = stderr + _ = exitCode +} + +// TestGlobManyConsecutiveStarsMemoryBounded checks that 200 consecutive stars +// don't cause memory blowup. +func TestGlobManyConsecutiveStarsMemoryBounded(t *testing.T) { + dir := createGlobDir(t, 10) + + pattern := "echo " + strings.Repeat("*", 200) + ".txt" + result := testing.Benchmark(func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + runGlob(b, pattern, dir) + } + }) + + const maxBytesPerOp = 10 << 20 // 10 MB ceiling + if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp { + t.Errorf("glob with 200 consecutive stars allocated %d bytes/op; want < %d", bpo, maxBytesPerOp) + } +} + +// TestGlobReadDirLimitEnforced verifies that a script triggering more than +// MaxGlobReadDirCalls directory reads is stopped with an error. +func TestGlobReadDirLimitEnforced(t *testing.T) { + dir := createGlobDir(t, 5) + + // Generate a script with 10,001 glob words β€” each "*" triggers one + // ReadDirForGlob call, exceeding MaxGlobReadDirCalls (10,000). + args := make([]string, interp.MaxGlobReadDirCalls+1) + for i := range args { + args[i] = "*" + } + script := "echo " + strings.Join(args, " ") + + stdout, stderr, exitCode := testutil.RunScript(t, script, dir, interp.AllowedPaths([]string{dir})) + _ = stdout + + if exitCode == 0 { + t.Fatal("expected non-zero exit code when glob ReadDir limit is exceeded") + } + if !strings.Contains(stderr, "exceeded maximum number of directory reads") { + t.Errorf("expected glob limit error in stderr, got: %s", stderr) + } +} + +// TestGlobReadDirLimitNotTriggeredBelowCap verifies that scripts just under +// the limit work fine. +func TestGlobReadDirLimitNotTriggeredBelowCap(t *testing.T) { + dir := createGlobDir(t, 5) + + // Exactly MaxGlobReadDirCalls glob words β€” should succeed. + args := make([]string, interp.MaxGlobReadDirCalls) + for i := range args { + args[i] = "*" + } + script := "echo " + strings.Join(args, " ") + + _, stderr, exitCode := testutil.RunScript(t, script, dir, interp.AllowedPaths([]string{dir})) + + if exitCode != 0 { + t.Errorf("expected exit code 0 at exactly MaxGlobReadDirCalls, got %d; stderr: %s", exitCode, stderr) + } +} + +// TestGlobReadDirLimitSharedAcrossSubshells verifies that the counter is +// shared between the parent shell and subshells (e.g. pipes). +func TestGlobReadDirLimitSharedAcrossSubshells(t *testing.T) { + dir := createGlobDir(t, 5) + + // Use a for-in loop that expands globs many times. + // Each "echo *" triggers 1 ReadDir call. We need >10K iterations. + // Generate a sequence as for-in values and glob inside the body. + vals := make([]string, interp.MaxGlobReadDirCalls+1) + for i := range vals { + vals[i] = "x" + } + script := "for i in " + strings.Join(vals, " ") + "; do echo *; done" + + _, stderr, exitCode := testutil.RunScript(t, script, dir, interp.AllowedPaths([]string{dir})) + + if exitCode == 0 { + t.Fatal("expected non-zero exit code when glob ReadDir limit is exceeded via loop") + } + if !strings.Contains(stderr, "exceeded maximum number of directory reads") { + t.Errorf("expected glob limit error in stderr, got: %s", stderr) + } +} + +// createNestedTree creates a directory tree with the given depth and branching +// factor. At each level, it creates 'branching' subdirectories. At the leaf +// level, it creates a single file. +func createNestedTree(tb testing.TB, dir string, depth, branching int) { + tb.Helper() + if depth == 0 { + f, err := os.Create(filepath.Join(dir, "leaf.txt")) + if err != nil { + tb.Fatal(err) + } + f.Close() + return + } + for i := range branching { + sub := filepath.Join(dir, fmt.Sprintf("d%d", i)) + if err := os.Mkdir(sub, 0755); err != nil { + tb.Fatal(err) + } + createNestedTree(tb, sub, depth-1, branching) + } +} diff --git a/interp/glob_readdir_limit_test.go b/interp/glob_readdir_limit_test.go new file mode 100644 index 00000000..b6b4f7cf --- /dev/null +++ b/interp/glob_readdir_limit_test.go @@ -0,0 +1,136 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package interp + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestGlobReadDirCountInitializedByRun verifies that Run() initializes the +// globReadDirCount counter (it must be non-nil for the limit check to work). +func TestGlobReadDirCountInitializedByRun(t *testing.T) { + r, err := New(allowAllCommandsOpt()) + require.NoError(t, err) + t.Cleanup(func() { _ = r.Close() }) + + assert.Nil(t, r.globReadDirCount, "counter should be nil before Run") + + err = r.Run(context.Background(), parseScript(t, "true")) + require.NoError(t, err) + + assert.NotNil(t, r.globReadDirCount, "counter should be initialized after Run") +} + +// TestGlobReadDirCountResetBetweenRuns verifies that each Run() call creates +// a fresh counter, so a script that used many ReadDir calls in the first run +// does not affect the budget of the second run. +func TestGlobReadDirCountResetBetweenRuns(t *testing.T) { + dir := t.TempDir() + for i := range 3 { + f, err := os.Create(filepath.Join(dir, "f"+string(rune('a'+i)))) + require.NoError(t, err) + f.Close() + } + + r, err := New(allowAllCommandsOpt(), AllowedPaths([]string{dir})) + require.NoError(t, err) + t.Cleanup(func() { _ = r.Close() }) + r.Dir = dir + + // First run: consume some ReadDir budget. + prog := parseScript(t, "echo * * * * *") + err = r.Run(context.Background(), prog) + require.NoError(t, err) + firstCount := r.globReadDirCount.Load() + assert.Greater(t, firstCount, int64(0), "first run should have consumed ReadDir calls") + + // Second run: counter should be fresh (not accumulated). + err = r.Run(context.Background(), prog) + require.NoError(t, err) + secondCount := r.globReadDirCount.Load() + assert.Equal(t, firstCount, secondCount, + "second run should have the same count as first (fresh counter, same script)") +} + +// TestGlobReadDirCountSharedWithSubshell verifies that subshell() shares the +// same atomic counter pointer with the parent, so the limit is enforced +// across the entire Run() tree. +func TestGlobReadDirCountSharedWithSubshell(t *testing.T) { + r, err := New(allowAllCommandsOpt()) + require.NoError(t, err) + t.Cleanup(func() { _ = r.Close() }) + + err = r.Run(context.Background(), parseScript(t, "true")) + require.NoError(t, err) + require.NotNil(t, r.globReadDirCount) + + sub := r.subshell(false) + assert.Same(t, r.globReadDirCount, sub.globReadDirCount, + "subshell must share the parent's globReadDirCount pointer") +} + +// TestGlobReadDirCountSharedWithBackgroundSubshell verifies that background +// subshells (used for pipes) also share the same counter. +func TestGlobReadDirCountSharedWithBackgroundSubshell(t *testing.T) { + r, err := New(allowAllCommandsOpt()) + require.NoError(t, err) + t.Cleanup(func() { _ = r.Close() }) + + err = r.Run(context.Background(), parseScript(t, "true")) + require.NoError(t, err) + require.NotNil(t, r.globReadDirCount) + + sub := r.subshell(true) + assert.Same(t, r.globReadDirCount, sub.globReadDirCount, + "background subshell must share the parent's globReadDirCount pointer") +} + +// TestGlobReadDirCountNotIncrementedForQuotedStrings verifies that quoted +// strings (which don't trigger glob expansion) do not increment the counter. +func TestGlobReadDirCountNotIncrementedForQuotedStrings(t *testing.T) { + dir := t.TempDir() + f, err := os.Create(filepath.Join(dir, "file.txt")) + require.NoError(t, err) + f.Close() + + r, err := New(allowAllCommandsOpt(), AllowedPaths([]string{dir})) + require.NoError(t, err) + t.Cleanup(func() { _ = r.Close() }) + r.Dir = dir + + // Quoted "*" should not trigger ReadDir. + err = r.Run(context.Background(), parseScript(t, `echo "*" '*' "hello"`)) + require.NoError(t, err) + + assert.Equal(t, int64(0), r.globReadDirCount.Load(), + "quoted strings should not trigger ReadDir calls") +} + +// TestGlobReadDirCountIncrementsForUnquotedGlob verifies that unquoted globs +// do increment the counter. +func TestGlobReadDirCountIncrementsForUnquotedGlob(t *testing.T) { + dir := t.TempDir() + f, err := os.Create(filepath.Join(dir, "file.txt")) + require.NoError(t, err) + f.Close() + + r, err := New(allowAllCommandsOpt(), AllowedPaths([]string{dir})) + require.NoError(t, err) + t.Cleanup(func() { _ = r.Close() }) + r.Dir = dir + + err = r.Run(context.Background(), parseScript(t, "echo * *.txt")) + require.NoError(t, err) + + assert.Equal(t, int64(2), r.globReadDirCount.Load(), + "two unquoted glob patterns should trigger 2 ReadDir calls") +} diff --git a/interp/runner_expand.go b/interp/runner_expand.go index e637ef34..43ed8787 100644 --- a/interp/runner_expand.go +++ b/interp/runner_expand.go @@ -30,6 +30,11 @@ func (r *Runner) fillExpandConfig(ctx context.Context) { func (r *Runner) updateExpandOpts() { r.ecfg.ReadDir2 = func(s string) ([]fs.DirEntry, error) { + if r.globReadDirCount != nil { + if r.globReadDirCount.Add(1) > MaxGlobReadDirCalls { + return nil, fmt.Errorf("glob expansion exceeded maximum number of directory reads (%d)", MaxGlobReadDirCalls) + } + } ctx := r.handlerCtx(r.ectx, todoPos) if r.readDirHandler != nil { return r.readDirHandler(ctx, s) @@ -44,6 +49,12 @@ func (r *Runner) updateExpandOpts() { // commands that produce unbounded output. const maxCmdSubstOutput = 1 << 20 // 1 MiB +// MaxGlobReadDirCalls is the maximum number of ReadDirForGlob invocations +// allowed per Run() call. This prevents memory exhaustion from scripts that +// trigger an excessive number of glob expansions (e.g. millions of unquoted +// * tokens, or deeply nested glob patterns in loops). +const MaxGlobReadDirCalls = 10_000 + // cmdSubst handles command substitution ($(...) and `...`). // It runs the commands in a subshell and writes their stdout to w. func (r *Runner) cmdSubst(w io.Writer, cs *syntax.CmdSubst) error { diff --git a/tests/scenarios/shell/globbing/star/many_repeated_star_args.yaml b/tests/scenarios/shell/globbing/star/many_repeated_star_args.yaml new file mode 100644 index 00000000..ac014a46 --- /dev/null +++ b/tests/scenarios/shell/globbing/star/many_repeated_star_args.yaml @@ -0,0 +1,16 @@ +description: Many repeated star arguments each expand independently. +setup: + files: + - path: x + content: "" + - path: y + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + echo * * * * * * * * * * +expect: + stdout: |+ + x y x y x y x y x y x y x y x y x y x y + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/shell/globbing/star/many_star_segments.yaml b/tests/scenarios/shell/globbing/star/many_star_segments.yaml new file mode 100644 index 00000000..62f68190 --- /dev/null +++ b/tests/scenarios/shell/globbing/star/many_star_segments.yaml @@ -0,0 +1,22 @@ +description: Pattern with many star-separated segments matches files with all required substrings. +setup: + files: + - path: a-b-c-d-e-f-g.txt + content: "" + - path: a1b2c3d4e5f6g.txt + content: "" + - path: abcdefg.txt + content: "" + - path: a-b-c.txt + content: "" + - path: unrelated.txt + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + echo a*b*c*d*e*f*g* +expect: + stdout: |+ + a-b-c-d-e-f-g.txt a1b2c3d4e5f6g.txt abcdefg.txt + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/shell/globbing/star/many_stars_alone.yaml b/tests/scenarios/shell/globbing/star/many_stars_alone.yaml new file mode 100644 index 00000000..8bbf1428 --- /dev/null +++ b/tests/scenarios/shell/globbing/star/many_stars_alone.yaml @@ -0,0 +1,16 @@ +description: Many consecutive stars without other characters behave like a single star. +setup: + files: + - path: foo + content: "" + - path: bar + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + echo ********** +expect: + stdout: |+ + bar foo + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/shell/globbing/star/many_stars_in_pattern.yaml b/tests/scenarios/shell/globbing/star/many_stars_in_pattern.yaml new file mode 100644 index 00000000..9d81c6d3 --- /dev/null +++ b/tests/scenarios/shell/globbing/star/many_stars_in_pattern.yaml @@ -0,0 +1,16 @@ +description: Pattern with many consecutive stars behaves like a single star. +setup: + files: + - path: a.txt + content: "" + - path: b.txt + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + echo *****.txt +expect: + stdout: |+ + a.txt b.txt + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/shell/globbing/star/stars_interspersed_with_text.yaml b/tests/scenarios/shell/globbing/star/stars_interspersed_with_text.yaml new file mode 100644 index 00000000..7887d74b --- /dev/null +++ b/tests/scenarios/shell/globbing/star/stars_interspersed_with_text.yaml @@ -0,0 +1,20 @@ +description: Stars interspersed with literal text match correctly. +setup: + files: + - path: abc.txt + content: "" + - path: axbxc.txt + content: "" + - path: aXXbXXc.txt + content: "" + - path: other.txt + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + echo a*b*c* +expect: + stdout: |+ + aXXbXXc.txt abc.txt axbxc.txt + stderr: "" + exit_code: 0