Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions allowedsymbols/symbols_interp.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ var interpAllowedSymbols = []string{
"strconv.Itoa", // 🟢 int-to-string conversion; pure function, no I/O.
"strings.Builder", // 🟢 efficient string concatenation; pure in-memory buffer, no I/O.
"strings.ContainsRune", // 🟢 checks if a rune is in a string; pure function, no I/O.
"strings.NewReader", // 🟢 wraps a string as an io.Reader; pure function, no I/O; used by ParseScript.
"strings.Index", // 🟢 finds substring index; pure function, no I/O.
"strings.HasPrefix", // 🟢 pure function for prefix matching; no I/O.
"strings.HasSuffix", // 🟢 pure function for suffix matching; no I/O.
Expand Down Expand Up @@ -127,6 +128,7 @@ var interpAllowedSymbols = []string{
"mvdan.cc/sh/v3/syntax.TestClause", // 🟢 AST node for [[ ]] test command; pure type.
"mvdan.cc/sh/v3/syntax.TestDecl", // 🟢 AST node for test declaration; pure type.
"mvdan.cc/sh/v3/syntax.TimeClause", // 🟢 AST node for time command; pure type.
"mvdan.cc/sh/v3/syntax.NewParser", // 🟢 creates a new shell parser; used by ParseScript to parse scripts into AST nodes.
"mvdan.cc/sh/v3/syntax.Walk", // 🟢 traverses the AST; pure function, no I/O.
"mvdan.cc/sh/v3/syntax.WhileClause", // 🟢 AST node for while/until loop; pure type.
"mvdan.cc/sh/v3/syntax.Word", // 🟢 AST node for a shell word; pure type.
Expand Down
53 changes: 53 additions & 0 deletions builtins/grep/builtin_grep_pentest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,59 @@ func TestGrepPentestQuietWithMatch(t *testing.T) {
assert.Equal(t, "", stderr)
}

// --- Context byte cap ---

// TestGrepBeforeContextByteCapEvictsOldLines verifies that the aggregate byte
// cap on the before-context sliding window (MaxContextBytes) is enforced.
// The file contains 1000 non-matching lines of ~1 KiB each (total ~1 MiB of
// context) followed by a single matching line. With -B 1000 the uncapped
// window would hold all 1000 lines (~1 MiB); the cap limits the window to
// MaxContextBytes (512 KiB), so the oldest lines are evicted and the total
// before-context output stays within the cap.
func TestGrepBeforeContextByteCapEvictsOldLines(t *testing.T) {
dir := t.TempDir()
linePayload := strings.Repeat("x", 1023) // 1023 bytes + newline = 1024 bytes/line
var sb strings.Builder
for i := 0; i < 1000; i++ {
sb.WriteString(linePayload)
sb.WriteByte('\n')
}
sb.WriteString("MATCH\n")
pentestWriteFile(t, dir, "ctx.txt", sb.String())

stdout, _, code := grepRun(t, "grep -B 1000 MATCH ctx.txt", dir)
require.Equal(t, 0, code)
// Output = (evicted-before-context lines) + "MATCH\n".
// Before-context bytes must not exceed the cap.
assert.LessOrEqual(t, len(stdout)-len("MATCH\n"), grep.MaxContextBytes,
"before-context output exceeded MaxContextBytes cap")
}

// TestGrepAfterContextByteCapTruncatesOutput verifies that the aggregate byte
// cap on after-context output (MaxContextBytes) is enforced per match group.
// The file has one matching line followed by 1000 non-matching lines of ~1 KiB
// each (total ~1 MiB of potential after-context). With -A 1000 the uncapped
// stream would emit all 1000 lines; the cap stops emission once MaxContextBytes
// have been written for the group, so total after-context output stays within
// the cap.
func TestGrepAfterContextByteCapTruncatesOutput(t *testing.T) {
dir := t.TempDir()
linePayload := strings.Repeat("x", 1023) // 1023 bytes + newline = 1024 bytes/line
var sb strings.Builder
sb.WriteString("MATCH\n")
for i := 0; i < 1000; i++ {
sb.WriteString(linePayload)
sb.WriteByte('\n')
}
pentestWriteFile(t, dir, "ctx.txt", sb.String())

stdout, _, code := grepRun(t, "grep -A 1000 MATCH ctx.txt", dir)
require.Equal(t, 0, code)
// Output = "MATCH\n" + (after-context lines, capped at MaxContextBytes).
assert.LessOrEqual(t, len(stdout)-len("MATCH\n"), grep.MaxContextBytes,
"after-context output exceeded MaxContextBytes cap")
}

// --- GTFOBins validation ---

// TestGrepGTFOBinsFileReadSandboxEscape verifies that the GTFOBins file-read
Expand Down
26 changes: 23 additions & 3 deletions builtins/grep/grep.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,13 @@ const MaxLineBytes = 1 << 20 // 1 MiB
// MaxContextLines caps -A/-B/-C to prevent excessive memory use.
const MaxContextLines = 1_000 // 1k lines

// MaxContextBytes is the aggregate byte cap applied per match group to both
// the before-context sliding window and the after-context output stream.
// A single match group may emit at most this many bytes of context lines
// (before and after counted separately). The global executor output limit
// acts as the ceiling across all groups combined.
const MaxContextBytes = 512 * 1024 // 512 KiB

const scanBufInit = 4096 // initial scanner buffer

// containsNUL reports whether p contains a NUL byte, which is the
Expand Down Expand Up @@ -593,7 +600,9 @@ func grepFile(ctx context.Context, callCtx *builtins.CallContext, file string, o
// used, even with value 0. This controls the "--" group separator.
contextRequested := opts.afterContext > 0 || opts.beforeContext > 0 || opts.contextRequested
var beforeBuf []contextLine // ring buffer for before-context
beforeBufBytes := 0 // aggregate byte count of lines in beforeBuf
afterRemaining := 0 // lines of after-context still to print
afterGroupBytes := 0 // bytes of after-context emitted in current match group
lastPrintedLine := 0 // last line number we printed (for separator)
printedSeparator := false // have we ever printed a match group?

Expand Down Expand Up @@ -652,6 +661,9 @@ func grepFile(ctx context.Context, callCtx *builtins.CallContext, file string, o
}
}

// Reset per-group counters.
afterGroupBytes = 0

// Print the match.
if opts.onlyMatching && opts.invertMatch {
// -o -v: line was selected by inversion (doesn't contain
Expand All @@ -673,22 +685,30 @@ func grepFile(ctx context.Context, callCtx *builtins.CallContext, file string, o

// Clear before buffer since we've consumed it.
beforeBuf = beforeBuf[:0]
beforeBufBytes = 0
} else {
// Non-matching line: might be after-context or before-context.
if !isBinary && afterRemaining > 0 && !opts.quiet && !opts.count && !opts.filesWithMatches && !opts.filesWithoutMatch {
printContextLine(callCtx, displayName, lineNum, lineBytes, opts, '-')
lastPrintedLine = lineNum
if afterGroupBytes+len(lineBytes) <= MaxContextBytes {
printContextLine(callCtx, displayName, lineNum, lineBytes, opts, '-')
lastPrintedLine = lineNum
afterGroupBytes += len(lineBytes)
}
afterRemaining--
}

// Add to before-context ring buffer.
if !isBinary && opts.beforeContext > 0 {
if len(beforeBuf) >= opts.beforeContext {
// Evict oldest lines until both the line-count and aggregate
// byte limits are satisfied.
for len(beforeBuf) > 0 && (len(beforeBuf) >= opts.beforeContext || beforeBufBytes+len(lineBytes) > MaxContextBytes) {
beforeBufBytes -= len(beforeBuf[0].text)
beforeBuf = beforeBuf[1:]
}
cp := make([]byte, len(lineBytes))
copy(cp, lineBytes)
beforeBuf = append(beforeBuf, contextLine{num: lineNum, text: cp})
beforeBufBytes += len(lineBytes)
}
}
}
Expand Down
34 changes: 34 additions & 0 deletions builtins/grep/grep_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"io"
"os"
"path/filepath"
"strings"
"testing"

"github.com/DataDog/rshell/builtins/testutil"
Expand Down Expand Up @@ -101,6 +102,39 @@ func TestGrepMemoryBounded(t *testing.T) {
}
}

// TestGrepBeforeContextMemoryBounded asserts that grep -B N with large lines
// stays within the MaxContextBytes sliding-window cap. Lines are 8 KiB each;
// requesting -B 1000 would hold 1000 × 8 KiB ≈ 8 MiB live without the cap.
// With the cap the live window is bounded to MaxContextBytes (512 KiB).
//
// AllocedBytesPerOp captures total (not peak live) allocations: the before-
// context path allocates a copy of each line before deciding to evict, so
// total allocation tracks file size. The threshold here validates that
// allocations do not grow beyond the expected O(file_size) budget and that no
// additional unbounded accumulation occurs.
func TestGrepBeforeContextMemoryBounded(t *testing.T) {
dir := t.TempDir()
// 8 KiB lines; 10 MiB file ≈ 1280 lines. Requesting -B 1000 means the
// uncapped window would hold the entire file (1280 × 8 KiB ≈ 10 MiB live).
// With the cap the window is bounded to MaxContextBytes (512 KiB).
const lineSize = 8 * 1024
createLargeFileGrep(t, dir, "input.txt", strings.Repeat("x", lineSize-1)+"\n", 10<<20)

result := testing.Benchmark(func(b *testing.B) {
b.ReportAllocs()
for b.Loop() {
testutil.RunScriptDiscard(b, "grep -B 1000 NOMATCH input.txt", dir, interp.AllowedPaths([]string{dir}))
}
})

// Total allocation budget: ~10 MiB of per-line copies + shell/runner overhead.
// Capped at 24 MiB to catch any unexpected accumulation.
const maxBytesPerOp = 24 << 20
if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
t.Errorf("grep -B 1000 allocated %d bytes/op on 10MB input with 8KiB lines; want < %d", bpo, maxBytesPerOp)
}
}

func BenchmarkGrepMatchDiscard(b *testing.B) {
dir := b.TempDir()
createLargeFileGrep(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
Expand Down
47 changes: 47 additions & 0 deletions builtins/sort/builtin_sort_pentest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/DataDog/rshell/builtins/sort"
"github.com/DataDog/rshell/interp"
)

Expand Down Expand Up @@ -204,6 +205,52 @@ func TestCmdPentestUnknownShortFlag(t *testing.T) {
assert.Contains(t, stderr, "sort:")
}

// --- Input size cap ---

// TestSortInputExceedsMaxTotalBytes verifies that sort rejects input larger
// than MaxTotalBytes (5 MiB) with a descriptive error so callers know what
// limit was hit and can pre-filter their data.
//
// sort counts content bytes via sc.Text() which strips the line terminator,
// so we use lines of 1000 'a' chars (1001 bytes on disk, 1000 counted) to
// keep the content-to-file-size ratio high and ensure we reliably cross the
// limit.
func TestSortInputExceedsMaxTotalBytes(t *testing.T) {
dir := t.TempDir()
// Each line: 1000 content bytes + newline. Lines needed to exceed cap:
// ceil(MaxTotalBytes/1000) + 1.
line := []byte(strings.Repeat("a", 1000) + "\n")
nLines := sort.MaxTotalBytes/1000 + 2
content := bytes.Repeat(line, nLines)
require.NoError(t, os.WriteFile(filepath.Join(dir, "big.txt"), content, 0644))

mustNotHang(t, func() {
_, stderr, code := sortRun(t, "sort big.txt", dir)
assert.Equal(t, 1, code)
assert.Contains(t, stderr, "sort:")
assert.Contains(t, stderr, "exceeds maximum")
assert.Contains(t, stderr, "5 MiB")
})
}

// TestSortInputBelowMaxTotalBytes verifies that sort succeeds when input is
// just below the MaxTotalBytes cap.
func TestSortInputBelowMaxTotalBytes(t *testing.T) {
dir := t.TempDir()
// Each line: 1000 content bytes + newline; nLines × 1000 < MaxTotalBytes
// and nLines << MaxLines (1,000,000) so neither cap is hit.
line := []byte(strings.Repeat("a", 1000) + "\n")
nLines := sort.MaxTotalBytes/1000 - 2
content := bytes.Repeat(line, nLines)
require.NoError(t, os.WriteFile(filepath.Join(dir, "ok.txt"), content, 0644))

mustNotHang(t, func() {
_, stderr, code := sortRun(t, "sort ok.txt", dir)
assert.Equal(t, 0, code)
assert.Empty(t, stderr)
})
}

// --- Double dash ---

func TestCmdPentestFlagLikeName(t *testing.T) {
Expand Down
7 changes: 4 additions & 3 deletions builtins/sort/sort.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,9 @@ const MaxLineBytes = 1 << 20 // 1 MiB

// MaxTotalBytes is the cumulative byte cap across all input lines. This
// prevents OOM when many lines are each below MaxLineBytes but collectively
// consume excessive memory. 256 MiB is generous for agent workloads.
const MaxTotalBytes = 256 * 1024 * 1024 // 256 MiB
// consume excessive memory — especially in sort chains where N concurrent
// sort instances each hold their full input buffer simultaneously.
const MaxTotalBytes = 5 * 1024 * 1024 // 5 MiB

// registerFlags registers all sort flags and returns the bound handler.
func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc {
Expand Down Expand Up @@ -391,7 +392,7 @@ func readFile(ctx context.Context, callCtx *builtins.CallContext, file string, t
line := sc.Text()
*totalBytes += int64(len(line))
if *totalBytes > MaxTotalBytes {
return nil, errors.New("input exceeds maximum total size")
return nil, fmt.Errorf("input exceeds maximum of %d MiB; pre-filter or split the input before sorting", MaxTotalBytes/(1024*1024))
}
lines = append(lines, line)
if len(lines) > MaxLines {
Expand Down
32 changes: 28 additions & 4 deletions builtins/tests/tail/tail_fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ func FuzzTailLines(f *testing.F) {
f.Add(bytes.Repeat([]byte("\n"), 1000), int64(5))

f.Fuzz(func(t *testing.T, input []byte, n int64) {
if t.Context().Err() != nil {
return
}
if len(input) > 1<<20 {
return
}
Expand All @@ -66,10 +69,13 @@ func FuzzTailLines(f *testing.F) {
t.Fatal(err)
}

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second)
defer cancel() // safety net if t.Fatal fires before explicit cancel
stdout, _, code := cmdRunCtx(ctx, t, fmt.Sprintf("tail -n %d input.txt", n), dir)
cancel()
if t.Context().Err() != nil {
return
}
if code != 0 && code != 1 {
t.Errorf("tail -n %d unexpected exit code %d", n, code)
}
Expand Down Expand Up @@ -107,6 +113,9 @@ func FuzzTailBytes(f *testing.F) {
f.Add(bytes.Repeat([]byte("z"), 32*1024+1), int64(1))

f.Fuzz(func(t *testing.T, input []byte, n int64) {
if t.Context().Err() != nil {
return
}
if len(input) > 1<<20 {
return
}
Expand All @@ -123,10 +132,13 @@ func FuzzTailBytes(f *testing.F) {
t.Fatal(err)
}

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second)
defer cancel() // safety net if t.Fatal fires before explicit cancel
stdout, _, code := cmdRunCtx(ctx, t, fmt.Sprintf("tail -c %d input.txt", n), dir)
cancel()
if t.Context().Err() != nil {
return
}
if code != 0 && code != 1 {
t.Errorf("tail -c %d unexpected exit code %d", n, code)
}
Expand Down Expand Up @@ -154,6 +166,9 @@ func FuzzTailStdin(f *testing.F) {
f.Add([]byte("line1\r\nline2\r\n"), int64(1))

f.Fuzz(func(t *testing.T, input []byte, n int64) {
if t.Context().Err() != nil {
return
}
if len(input) > 1<<20 {
return
}
Expand All @@ -170,10 +185,13 @@ func FuzzTailStdin(f *testing.F) {
t.Fatal(err)
}

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second)
defer cancel() // safety net if t.Fatal fires before explicit cancel
_, _, code := cmdRunCtx(ctx, t, fmt.Sprintf("tail -n %d < stdin.txt", n), dir)
cancel()
if t.Context().Err() != nil {
return
}
if code != 0 && code != 1 {
t.Errorf("tail stdin unexpected exit code %d", code)
}
Expand Down Expand Up @@ -251,6 +269,9 @@ func FuzzTailBytesOffset(f *testing.F) {
f.Add([]byte{0x00, 0x01, 0x02, 0xff, 0xfe}, int64(2))

f.Fuzz(func(t *testing.T, input []byte, n int64) {
if t.Context().Err() != nil {
return
}
if len(input) > 1<<20 {
return
}
Expand All @@ -267,10 +288,13 @@ func FuzzTailBytesOffset(f *testing.F) {
t.Fatal(err)
}

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second)
defer cancel() // safety net if t.Fatal fires before explicit cancel
_, _, code := cmdRunCtx(ctx, t, fmt.Sprintf("tail -c +%d input.txt", n), dir)
cancel()
if t.Context().Err() != nil {
return
}
if code != 0 && code != 1 {
t.Errorf("tail -c +%d unexpected exit code %d", n, code)
}
Expand Down
5 changes: 2 additions & 3 deletions cmd/rshell/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"github.com/DataDog/rshell/internal/interpoption"
"github.com/DataDog/rshell/interp"
"github.com/spf13/cobra"
"mvdan.cc/sh/v3/syntax"
)

const exitCodeTimeout = 124
Expand Down Expand Up @@ -204,8 +203,8 @@ type executeOpts struct {
}

func execute(ctx context.Context, script, name string, opts executeOpts, stdin io.Reader, stdout, stderr io.Writer) error {
// Parse.
prog, err := syntax.NewParser().Parse(strings.NewReader(script), name)
// Parse (also enforces the MaxScriptBytes limit).
prog, err := interp.ParseScript(script, name)
if err != nil {
// Bash returns exit code 2 for syntax/parse errors.
fmt.Fprintf(stderr, "%v\n", err)
Expand Down
Loading
Loading