Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions allowedsymbols/symbols_interp.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ var interpAllowedSymbols = []string{
"sync.Mutex", // 🟢 mutual exclusion lock; concurrency primitive, no I/O.
"sync.Once", // 🟢 ensures a function runs exactly once; concurrency primitive, no I/O.
"sync.WaitGroup", // 🟢 waits for goroutines to finish; concurrency primitive, no I/O.
"sync/atomic.Int64", // 🟢 atomic int64 counter; concurrency primitive, no I/O.
"time.Duration", // 🟢 numeric duration type; pure type, no side effects.
"time.Now", // 🟠 returns current time; read-only, no mutation.
"time.Time", // 🟢 time value type; pure data, no side effects.
Expand Down
27 changes: 18 additions & 9 deletions interp/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"io"
"os"
"strings"
"sync/atomic"
"time"

"mvdan.cc/sh/v3/expand"
Expand Down Expand Up @@ -131,6 +132,12 @@ type runnerState struct {
// startTime is captured once at the beginning of Run() and passed to
// all builtin invocations so they share a consistent time reference.
startTime time.Time

// globReadDirCount tracks the total number of ReadDirForGlob calls
// across the entire Run() invocation. It is shared with subshells
// (including concurrent pipe subshells) via pointer, and must be
// accessed atomically.
globReadDirCount *atomic.Int64
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't actually matter, but surprised it didn't use a uint for this

Copy link
Copy Markdown
Member Author

@AlexandreYang AlexandreYang Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, not sure why, maybe the LLM thinks it doesn't matter too :p

}

// A Runner interprets shell programs. It can be reused, but it is not safe for
Expand Down Expand Up @@ -409,6 +416,7 @@ func (r *Runner) Run(ctx context.Context, node syntax.Node) (retErr error) {
}
}
r.startTime = time.Now()
r.globReadDirCount = &atomic.Int64{}
r.fillExpandConfig(ctx)
if err := validateNode(node); err != nil {
fmt.Fprintln(r.stderr, err)
Expand Down Expand Up @@ -536,15 +544,16 @@ func (r *Runner) subshell(background bool) *Runner {
r2 := &Runner{
runnerConfig: r.runnerConfig,
runnerState: runnerState{
Dir: r.Dir,
Params: r.Params,
stdin: r.stdin,
stdout: r.stdout,
stderr: r.stderr,
filename: r.filename,
exit: r.exit,
lastExit: r.lastExit,
startTime: r.startTime,
Dir: r.Dir,
Params: r.Params,
stdin: r.stdin,
stdout: r.stdout,
stderr: r.stderr,
filename: r.filename,
exit: r.exit,
lastExit: r.lastExit,
startTime: r.startTime,
globReadDirCount: r.globReadDirCount,
},
}
r2.writeEnv = newOverlayEnviron(r.writeEnv, background)
Expand Down
251 changes: 251 additions & 0 deletions interp/glob_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@
package interp_test

import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"time"

"github.com/DataDog/rshell/builtins/testutil"
"github.com/DataDog/rshell/interp"
Expand Down Expand Up @@ -239,3 +242,251 @@ func TestGlobNestedMemoryBounded(t *testing.T) {
t.Errorf("glob echo */* allocated %d bytes/op on 20×50 nested dir; want < %d", bpo, maxBytesPerOp)
}
}

// ---------------------------------------------------------------------------
// Safety edge-case tests: pathological glob patterns that could cause
// crashes, excessive memory usage, or exponential blowup.
// ---------------------------------------------------------------------------

// TestGlobManyConsecutiveStars verifies that a pattern with many consecutive
// stars (e.g. "echo ****...****") does not cause exponential blowup.
// In a naïve implementation, N consecutive stars could cause O(2^N) matching.
func TestGlobManyConsecutiveStars(t *testing.T) {
dir := createGlobDir(t, 100)

// 50 consecutive stars — should collapse to a single star internally.
pattern := "echo " + strings.Repeat("*", 50)
result := testing.Benchmark(func(b *testing.B) {
b.ReportAllocs()
for b.Loop() {
runGlob(b, pattern, dir)
}
})

const maxBytesPerOp = 10 << 20 // 10 MB ceiling
if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
t.Errorf("glob with 50 consecutive stars allocated %d bytes/op; want < %d", bpo, maxBytesPerOp)
}
}

// TestGlobManyStarSegments verifies that a pattern like "a*b*c*d*...*z" with
// many star-separated single-char segments doesn't cause exponential
// backtracking when matching against filenames.
func TestGlobManyStarSegments(t *testing.T) {
dir := t.TempDir()
// Create a file that forces maximum backtracking: a long name with
// repeated characters that partially match each segment.
longName := strings.Repeat("a", 200) + ".txt"
f, err := os.Create(filepath.Join(dir, longName))
if err != nil {
t.Fatal(err)
}
f.Close()

// Pattern: a*a*a*a*...*a*a*b (20 star-separated 'a' segments ending with 'b').
// The file has no 'b', so every segment match attempt must backtrack.
segments := make([]string, 21)
for i := 0; i < 20; i++ {
segments[i] = "a"
}
segments[20] = "b"
pattern := "echo " + strings.Join(segments, "*")

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()

stderr, exitCode := testutil.RunScriptDiscardCtx(ctx, t, pattern, dir, interp.AllowedPaths([]string{dir}))
if ctx.Err() != nil {
t.Fatal("glob pattern with many star segments timed out (possible exponential backtracking)")
}
_ = stderr
_ = exitCode
}

// TestGlobHugeNumberOfStarArgs verifies that many independent star arguments
// in a single command don't cause excessive resource consumption.
func TestGlobHugeNumberOfStarArgs(t *testing.T) {
dir := createGlobDir(t, 50)

// 100 separate "*" arguments — each expands the full directory listing.
args := make([]string, 100)
for i := range args {
args[i] = "*"
}
script := "echo " + strings.Join(args, " ")

result := testing.Benchmark(func(b *testing.B) {
b.ReportAllocs()
for b.Loop() {
runGlob(b, script, dir)
}
})

const maxBytesPerOp = 50 << 20 // 50 MB ceiling
if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
t.Errorf("glob with 100 star args allocated %d bytes/op; want < %d", bpo, maxBytesPerOp)
}
}

// TestGlobDeepNestedStarSlash verifies that deeply nested "*/" patterns
// (e.g. "*/*/*/*/*") don't cause resource exhaustion.
func TestGlobDeepNestedStarSlash(t *testing.T) {
// Create a 5-level deep tree: 3 dirs at each level = 3^5 = 243 leaf dirs.
dir := t.TempDir()
createNestedTree(t, dir, 5, 3)

pattern := "echo */*/*/*/*"

ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()

stderr, exitCode := testutil.RunScriptDiscardCtx(ctx, t, pattern, dir, interp.AllowedPaths([]string{dir}))
if ctx.Err() != nil {
t.Fatal("deep nested glob timed out")
}
_ = stderr
_ = exitCode
}

// TestGlobBacktrackingWorstCase constructs the worst-case scenario for glob
// backtracking: a pattern like "*a*a*a*a*a*b" against a file named "aaa...aaa"
// (all a's, no b). A naïve backtracking matcher would try O(n^k) combinations
// where n is the filename length and k is the number of star segments.
func TestGlobBacktrackingWorstCase(t *testing.T) {
dir := t.TempDir()

// File with 100 'a' characters — no 'b' anywhere.
name := strings.Repeat("a", 100)
f, err := os.Create(filepath.Join(dir, name))
if err != nil {
t.Fatal(err)
}
f.Close()

// Pattern with 15 star-separated 'a' segments ending in 'b'.
// This is the classic ReDoS-like pattern for glob matchers.
segments := make([]string, 16)
for i := 0; i < 15; i++ {
segments[i] = "a"
}
segments[15] = "b"
pattern := "echo " + strings.Join(segments, "*")

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()

stderr, exitCode := testutil.RunScriptDiscardCtx(ctx, t, pattern, dir, interp.AllowedPaths([]string{dir}))
if ctx.Err() != nil {
t.Fatal("glob backtracking worst case timed out — possible exponential complexity")
}
_ = stderr
_ = exitCode
}

// TestGlobManyConsecutiveStarsMemoryBounded checks that 200 consecutive stars
// don't cause memory blowup.
func TestGlobManyConsecutiveStarsMemoryBounded(t *testing.T) {
dir := createGlobDir(t, 10)

pattern := "echo " + strings.Repeat("*", 200) + ".txt"
result := testing.Benchmark(func(b *testing.B) {
b.ReportAllocs()
for b.Loop() {
runGlob(b, pattern, dir)
}
})

const maxBytesPerOp = 10 << 20 // 10 MB ceiling
if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
t.Errorf("glob with 200 consecutive stars allocated %d bytes/op; want < %d", bpo, maxBytesPerOp)
}
}

// TestGlobReadDirLimitEnforced verifies that a script triggering more than
// MaxGlobReadDirCalls directory reads is stopped with an error.
func TestGlobReadDirLimitEnforced(t *testing.T) {
dir := createGlobDir(t, 5)

// Generate a script with 10,001 glob words — each "*" triggers one
// ReadDirForGlob call, exceeding MaxGlobReadDirCalls (10,000).
args := make([]string, interp.MaxGlobReadDirCalls+1)
for i := range args {
args[i] = "*"
}
script := "echo " + strings.Join(args, " ")

stdout, stderr, exitCode := testutil.RunScript(t, script, dir, interp.AllowedPaths([]string{dir}))
_ = stdout

if exitCode == 0 {
t.Fatal("expected non-zero exit code when glob ReadDir limit is exceeded")
}
if !strings.Contains(stderr, "exceeded maximum number of directory reads") {
t.Errorf("expected glob limit error in stderr, got: %s", stderr)
}
}

// TestGlobReadDirLimitNotTriggeredBelowCap verifies that scripts just under
// the limit work fine.
func TestGlobReadDirLimitNotTriggeredBelowCap(t *testing.T) {
dir := createGlobDir(t, 5)

// Exactly MaxGlobReadDirCalls glob words — should succeed.
args := make([]string, interp.MaxGlobReadDirCalls)
for i := range args {
args[i] = "*"
}
script := "echo " + strings.Join(args, " ")

_, stderr, exitCode := testutil.RunScript(t, script, dir, interp.AllowedPaths([]string{dir}))

if exitCode != 0 {
t.Errorf("expected exit code 0 at exactly MaxGlobReadDirCalls, got %d; stderr: %s", exitCode, stderr)
}
}

// TestGlobReadDirLimitSharedAcrossSubshells verifies that the counter is
// shared between the parent shell and subshells (e.g. pipes).
func TestGlobReadDirLimitSharedAcrossSubshells(t *testing.T) {
dir := createGlobDir(t, 5)

// Use a for-in loop that expands globs many times.
// Each "echo *" triggers 1 ReadDir call. We need >10K iterations.
// Generate a sequence as for-in values and glob inside the body.
vals := make([]string, interp.MaxGlobReadDirCalls+1)
for i := range vals {
vals[i] = "x"
}
script := "for i in " + strings.Join(vals, " ") + "; do echo *; done"

_, stderr, exitCode := testutil.RunScript(t, script, dir, interp.AllowedPaths([]string{dir}))

if exitCode == 0 {
t.Fatal("expected non-zero exit code when glob ReadDir limit is exceeded via loop")
}
if !strings.Contains(stderr, "exceeded maximum number of directory reads") {
t.Errorf("expected glob limit error in stderr, got: %s", stderr)
}
}

// createNestedTree creates a directory tree with the given depth and branching
// factor. At each level, it creates 'branching' subdirectories. At the leaf
// level, it creates a single file.
func createNestedTree(tb testing.TB, dir string, depth, branching int) {
tb.Helper()
if depth == 0 {
f, err := os.Create(filepath.Join(dir, "leaf.txt"))
if err != nil {
tb.Fatal(err)
}
f.Close()
return
}
for i := range branching {
sub := filepath.Join(dir, fmt.Sprintf("d%d", i))
if err := os.Mkdir(sub, 0755); err != nil {
tb.Fatal(err)
}
createNestedTree(tb, sub, depth-1, branching)
}
}
Loading
Loading