Skip to content
Draft
3 changes: 3 additions & 0 deletions .github/workflows/fuzz.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ jobs:
- pkg: ./builtins/tests/testcmd/
name: testcmd
corpus_path: builtins/tests/testcmd
- pkg: ./builtins/tests/du/
name: du
corpus_path: builtins/tests/du
- pkg: ./builtins/tests/ls/
name: ls
corpus_path: builtins/tests/ls
Expand Down
1 change: 1 addition & 0 deletions SHELL_FEATURES.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Blocked features are rejected before execution with exit code 2.
- ✅ `cat [-AbeEnstTuv] [FILE]...` — concatenate files to stdout; supports line numbering, blank squeezing, and non-printing character display
- ✅ `continue` — skip to the next iteration of the innermost `for` loop
- ✅ `cut [-b LIST|-c LIST|-f LIST] [-d DELIM] [-s] [-n] [--complement] [--output-delimiter=STRING] [FILE]...` — remove sections from each line of files
- ✅ `du [-asScSLP0bhkm] [-d N] [--apparent-size|--si] [FILE]...` — estimate file space usage; recursion capped at depth 256 and hardlink-dedup tracking capped at 2²⁰ entries; `--files0-from`, `--exclude-from`/`-X`, `--exclude` are rejected (data-exfiltration / file-driven control); `-B`/`--block-size`, `-t`/`--threshold`, `-x`/`--one-file-system`, `--inodes`, `--time`, `-l`/`--count-links` are not implemented
- ✅ `echo [-neE] [ARG]...` — write arguments to stdout; `-n` suppresses trailing newline, `-e` enables backslash escapes, `-E` disables them (default)
- ✅ `exit [N]` — exit the shell with status N (default 0)
- ✅ `false` — return exit code 1
Expand Down
14 changes: 14 additions & 0 deletions analysis/symbols_builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,19 @@ var builtinPerCommandSymbols = map[string][]string{
"false": {
"context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects.
},
"du": {
"context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects.
"errors.Is", // 🟢 error comparison; pure function, no I/O.
"errors.New", // 🟢 creates a simple error value; pure function, no I/O.
"fmt.Sprintf", // 🟢 string formatting; pure function, no I/O.
"io.EOF", // 🟢 sentinel error value; pure constant.
"io/fs.FileInfo", // 🟢 interface type for file information; no side effects.
"math.Ceil", // 🟢 pure arithmetic; rounds float up to nearest integer.
"math.MaxInt64", // 🟢 integer constant; used for overflow clamping.
"strconv.FormatBool", // 🟢 bool→string conversion for pflag.Value; pure function.
"strconv.ParseBool", // 🟢 string→bool conversion for pflag.Value; pure function.
"syscall.Stat_t", // 🟢 Unix file stat struct for extracting Blocks/Nlink; read-only type, no I/O.
},
"find": {
"context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects.
"errors.As", // 🟢 error type assertion; pure function, no I/O.
Expand Down Expand Up @@ -484,6 +497,7 @@ var builtinAllowedSymbols = []string{
"slices.SortStableFunc", // 🟢 stable sort with a comparison function; pure function, no I/O.
"strconv.Atoi", // 🟢 string-to-int conversion; pure function, no I/O.
"strconv.ErrRange", // 🟢 sentinel error value for overflow; pure constant.
"strconv.FormatBool", // 🟢 bool-to-string conversion; pure function, no I/O.
"strconv.FormatInt", // 🟢 int-to-string conversion; pure function, no I/O.
"strconv.FormatUint", // 🟢 uint-to-string conversion; pure function, no I/O.
"strconv.IntSize", // 🟢 platform int size constant (32 or 64); pure constant, no I/O.
Expand Down
286 changes: 286 additions & 0 deletions builtins/du/builtin_du_pentest_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2026-present Datadog, Inc.

package du_test

import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

// --- Integer edge cases ---

func TestDuPentestMaxDepthZero(t *testing.T) {
dir := t.TempDir()
require.NoError(t, os.WriteFile(filepath.Join(dir, "f"), []byte("x"), 0o644))
stdout, _, code := cmdRun(t, "du -d 0 -b .", dir)
assert.Equal(t, 0, code)
// Only one line — the operand.
assert.Equal(t, 1, strings.Count(stdout, "\n"))
}

func TestDuPentestMaxDepthHuge(t *testing.T) {
// MaxInt32 should be accepted by pflag (Int) and behave like unlimited.
dir := t.TempDir()
require.NoError(t, os.WriteFile(filepath.Join(dir, "f"), []byte("x"), 0o644))
_, _, code := cmdRun(t, "du -d 2147483647 -a -b .", dir)
assert.Equal(t, 0, code)
}

func TestDuPentestMaxDepthOverflow(t *testing.T) {
// MaxInt64+1 cannot fit in a 64-bit int — pflag should reject.
dir := t.TempDir()
_, stderr, code := cmdRun(t, "du -d 9223372036854775808 .", dir)
assert.Equal(t, 1, code)
assert.Contains(t, stderr, "du:")
}

func TestDuPentestMaxDepthLargeNegative(t *testing.T) {
dir := t.TempDir()
_, stderr, code := cmdRun(t, "du -d -9999999999 .", dir)
assert.Equal(t, 1, code)
assert.Contains(t, stderr, "du:")
}

// --- Long paths ---

func TestDuPentestLongPathName(t *testing.T) {
dir := t.TempDir()
// Build a 200-char-deep path. POSIX path length limit is 1024+; this is
// well under the cap but exercises path joining at scale.
deep := dir
for range 80 {
next := filepath.Join(deep, "x")
if err := os.Mkdir(next, 0o755); err != nil {
t.Fatalf("mkdir %s: %v", next, err)
}
deep = next
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
_, _, code := cmdRunCtx(ctx, t, "du -s -b .", dir)
assert.Equal(t, 0, code)
}

func TestDuPentestExceedsRecursionLimit(t *testing.T) {
dir := t.TempDir()
// 270 levels deep — exceeds maxRecursionDepth (256). Small enough to
// stay snappy under -race + parallel CI load.
deep := dir
for range 270 {
next := filepath.Join(deep, "x")
require.NoError(t, os.Mkdir(next, 0o755))
deep = next
}
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
_, stderr, code := cmdRunCtx(ctx, t, "du .", dir)
assert.Equal(t, 1, code)
assert.Contains(t, stderr, "recursion depth limit")
}

// --- Wide directories ---

func TestDuPentestWideDirectoryNoFDLeak(t *testing.T) {
dir := t.TempDir()
for i := range 1000 {
require.NoError(t, os.WriteFile(filepath.Join(dir, fmt.Sprintf("f%04d", i)), []byte("x"), 0o644))
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
stdout, _, code := cmdRunCtx(ctx, t, "du -s -b .", dir)
assert.Equal(t, 0, code)
// 1000 files × 1 byte each = 1000 bytes apparent total.
// Our dir-entry size is in there too via blocks but in bytes mode we
// want apparent size. With -b, files are 1000 bytes total. Dir size
// (Stat_t.Blocks*512) varies; just sanity-check that it's >= 1000.
assert.Contains(t, stdout, "\t.\n")
}

// --- Path edge cases ---

func TestDuPentestEmptyDirOperand(t *testing.T) {
dir := t.TempDir()
require.NoError(t, os.MkdirAll(filepath.Join(dir, "empty"), 0o755))
stdout, _, code := cmdRun(t, "du --apparent-size empty", dir)
assert.Equal(t, 0, code)
assert.True(t, strings.HasSuffix(stdout, "\tempty\n"))
}

func TestDuPentestDoubleSlashesInPath(t *testing.T) {
dir := t.TempDir()
require.NoError(t, os.MkdirAll(filepath.Join(dir, "sub"), 0o755))
require.NoError(t, os.WriteFile(filepath.Join(dir, "sub", "f"), []byte("ab"), 0o644))
stdout, _, code := cmdRun(t, "du -b sub//f", dir)
assert.Equal(t, 0, code)
// The path is reported verbatim — joinPath does not collapse "//".
assert.Equal(t, "2\tsub//f\n", stdout)
}

func TestDuPentestDotPath(t *testing.T) {
dir := t.TempDir()
require.NoError(t, os.WriteFile(filepath.Join(dir, "f"), []byte("ab"), 0o644))
stdout, _, code := cmdRun(t, "du -a -b .", dir)
assert.Equal(t, 0, code)
assert.Contains(t, stdout, "./f\n")
}

func TestDuPentestNonExistentFile(t *testing.T) {
dir := t.TempDir()
_, stderr, code := cmdRun(t, "du nope", dir)
assert.Equal(t, 1, code)
assert.Contains(t, stderr, "du: cannot access 'nope'")
}

func TestDuPentestPathWithLeadingDash(t *testing.T) {
dir := t.TempDir()
require.NoError(t, os.WriteFile(filepath.Join(dir, "-foo"), []byte("hi"), 0o644))
// Without --, pflag treats -foo as flags; we expect failure.
_, stderr1, code1 := cmdRun(t, "du -b -foo", dir)
assert.Equal(t, 1, code1)
assert.Contains(t, stderr1, "du:")
// With -- separator, pflag stops parsing and the file is processed.
stdout, _, code := cmdRun(t, "du -b -- -foo", dir)
assert.Equal(t, 0, code)
assert.Equal(t, "2\t-foo\n", stdout)
}

// --- Flag and argument injection ---

func TestDuPentestRejectsKnownDangerousFlags(t *testing.T) {
dir := t.TempDir()
dangerous := []string{
"--files0-from=anything",
"--exclude-from=anything",
"--exclude=*.o",
"-X",
"--block-size=1K",
"-B",
"--threshold=1024",
"-t",
"--inodes",
"--time",
"--time-style=iso",
"--exclude-from",
"-l",
"--count-links",
}
for _, f := range dangerous {
t.Run(strings.ReplaceAll(f, "/", "_"), func(t *testing.T) {
_, stderr, code := cmdRun(t, fmt.Sprintf("du %s .", f), dir)
assert.Equal(t, 1, code, "%s should be rejected", f)
assert.Contains(t, stderr, "du:")
})
}
}

// --- Many operands (FD usage) ---

func TestDuPentest100Operands(t *testing.T) {
dir := t.TempDir()
var operands []string
for i := range 100 {
name := fmt.Sprintf("file%03d", i)
require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte("a"), 0o644))
operands = append(operands, name)
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
stdout, _, code := cmdRunCtx(ctx, t, "du -b "+strings.Join(operands, " "), dir)
assert.Equal(t, 0, code)
// 100 lines, one per file.
assert.Equal(t, 100, strings.Count(stdout, "\n"))
}

// --- Output consistency ---

func TestDuPentestOutputIsDeterministic(t *testing.T) {
dir := t.TempDir()
require.NoError(t, os.WriteFile(filepath.Join(dir, "a"), []byte("xy"), 0o644))
require.NoError(t, os.WriteFile(filepath.Join(dir, "b"), []byte("xyz"), 0o644))
first, _, code1 := cmdRun(t, "du -a -b .", dir)
assert.Equal(t, 0, code1)
for range 5 {
got, _, code := cmdRun(t, "du -a -b .", dir)
assert.Equal(t, 0, code)
assert.Equal(t, first, got)
}
}

// --- Help to stdout, not stderr ---

func TestDuPentestHelpIsNotError(t *testing.T) {
dir := t.TempDir()
stdout, stderr, code := cmdRun(t, "du --help", dir)
assert.Equal(t, 0, code)
assert.NotEmpty(t, stdout)
assert.Empty(t, stderr)
}

// --- Symlink behaviour with -P (default) and -L ---

func TestDuPentestBrokenSymlinkP(t *testing.T) {
if !canSymlink() {
t.Skip("symlinks unavailable")
}
dir := t.TempDir()
require.NoError(t, os.Symlink("nonexistent-target", filepath.Join(dir, "dangling")))
// With -P (default), Lstat succeeds — the dangling link is reported as
// a symlink leaf.
_, _, code := cmdRun(t, "du dangling", dir)
assert.Equal(t, 0, code)
}

func TestDuPentestBrokenSymlinkL(t *testing.T) {
if !canSymlink() {
t.Skip("symlinks unavailable")
}
dir := t.TempDir()
require.NoError(t, os.Symlink("nonexistent-target", filepath.Join(dir, "dangling")))
// With -L, Stat fails because the link target is missing.
_, stderr, code := cmdRun(t, "du -L dangling", dir)
assert.Equal(t, 1, code)
assert.Contains(t, stderr, "du: cannot access 'dangling'")
}

// --- -c with all errors still emits "total" row ---

func TestDuPentestTotalRowOnAllErrors(t *testing.T) {
dir := t.TempDir()
stdout, _, code := cmdRun(t, "du -c -b nope1 nope2", dir)
assert.Equal(t, 1, code)
// All operands failed but a 0-total row should still appear.
assert.Contains(t, stdout, "0\ttotal\n")
}

// --- Boundary behaviour at the rounding cliff (round-up matches GNU) ---

func TestDuPentestHumanRoundingCliff(t *testing.T) {
// GNU rounds UP at the displayed precision rather than to nearest.
// At ~10 KiB the one-decimal-display threshold (val < 10) is crossed
// after rounding up: 9.94 KiB ceil-rounds to 10.0K (drops the
// decimal), and 9.96 KiB likewise. Below 10 KiB on the integer side
// the value renders with one decimal — e.g. 9.0 KiB → "9.0K".
dir := t.TempDir()

require.NoError(t, os.WriteFile(filepath.Join(dir, "ten"), make([]byte, 10178), 0o644))
stdoutTen, _, _ := cmdRun(t, "du -h --apparent-size ten", dir)
assert.Equal(t, "10K\tten\n", stdoutTen)

// 1025 bytes: 1.0009 KiB. Round-up to 1 decimal: 1.1K. Round-to-
// nearest would have produced "1.0K" — the fix specifically targets
// this case.
require.NoError(t, os.WriteFile(filepath.Join(dir, "ten25"), make([]byte, 1025), 0o644))
stdoutTen25, _, _ := cmdRun(t, "du -h --apparent-size ten25", dir)
assert.Equal(t, "1.1K\tten25\n", stdoutTen25)
}
Loading
Loading