From ec0373cdba9200124b94da885848822c55f6be6c Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Thu, 12 Mar 2026 16:43:34 +0100 Subject: [PATCH 01/10] Remove wc builtin implementation Co-Authored-By: Claude Opus 4.6 --- interp/builtin_wc_pentest_test.go | 237 --------- interp/builtins/wc/builtin_wc_pentest_test.go | 64 --- interp/builtins/wc/wc.go | 407 ---------------- interp/builtins/wc/wc_gnu_compat_test.go | 172 ------- interp/builtins/wc/wc_test.go | 454 ------------------ interp/builtins/wc/wc_unix_test.go | 35 -- 6 files changed, 1369 deletions(-) delete mode 100644 interp/builtin_wc_pentest_test.go delete mode 100644 interp/builtins/wc/builtin_wc_pentest_test.go delete mode 100644 interp/builtins/wc/wc.go delete mode 100644 interp/builtins/wc/wc_gnu_compat_test.go delete mode 100644 interp/builtins/wc/wc_test.go delete mode 100644 interp/builtins/wc/wc_unix_test.go diff --git a/interp/builtin_wc_pentest_test.go b/interp/builtin_wc_pentest_test.go deleted file mode 100644 index a5c88636..00000000 --- a/interp/builtin_wc_pentest_test.go +++ /dev/null @@ -1,237 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2026-present Datadog, Inc. - -package interp_test - -import ( - "bytes" - "context" - "errors" - "os" - "path/filepath" - "strings" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "mvdan.cc/sh/v3/syntax" - - "github.com/DataDog/rshell/interp" -) - -func wcRun(t *testing.T, script, dir string) (string, string, int) { - t.Helper() - return wcRunCtx(context.Background(), t, script, dir) -} - -func wcRunCtx(ctx context.Context, t *testing.T, script, dir string) (string, string, int) { - t.Helper() - parser := syntax.NewParser() - prog, err := parser.Parse(strings.NewReader(script), "") - require.NoError(t, err) - - var outBuf, errBuf bytes.Buffer - opts := []interp.RunnerOption{ - interp.StdIO(nil, &outBuf, &errBuf), - interp.AllowedPaths([]string{dir}), - } - - runner, err := interp.New(opts...) - require.NoError(t, err) - defer runner.Close() - - if dir != "" { - runner.Dir = dir - } - - err = runner.Run(ctx, prog) - exitCode := 0 - if err != nil { - var es interp.ExitStatus - if errors.As(err, &es) { - exitCode = int(es) - } else if ctx.Err() == nil { - t.Fatalf("unexpected error: %v", err) - } - } - return outBuf.String(), errBuf.String(), exitCode -} - -func wcWriteFile(t *testing.T, dir, name, content string) { - t.Helper() - require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0644)) -} - -// --- Flag and argument injection --- - -func TestWcPentestUnknownFlags(t *testing.T) { - dir := t.TempDir() - for _, flag := range []string{"-f", "--follow", "--no-such-flag", "--files0-from=foo"} { - _, stderr, code := wcRun(t, "wc "+flag, dir) - assert.Equal(t, 1, code, "flag: %s", flag) - assert.Contains(t, stderr, "wc:", "flag: %s", flag) - } -} - -func TestWcPentestDoubleDashFlagLikeFile(t *testing.T) { - dir := t.TempDir() - wcWriteFile(t, dir, "-v", "hello\n") - stdout, _, code := wcRun(t, "wc -- -v", dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "-v") -} - -func TestWcPentestMultipleStdin(t *testing.T) { - dir := t.TempDir() - wcWriteFile(t, dir, "file.txt", "hello\n") - stdout, _, code := wcRun(t, "cat file.txt | wc - -", dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "total") -} - -// --- Path edge cases --- - -func TestWcPentestNonexistentFile(t *testing.T) { - dir := t.TempDir() - stdout, stderr, code := wcRun(t, "wc nonexistent.txt", dir) - assert.Equal(t, 1, code) - assert.Equal(t, "", stdout) - assert.Contains(t, stderr, "wc:") -} - -func TestWcPentestEmptyFilename(t *testing.T) { - dir := t.TempDir() - stdout, stderr, code := wcRun(t, "wc ''", dir) - assert.Equal(t, 1, code) - assert.Equal(t, "", stdout) - assert.Contains(t, stderr, "wc:") -} - -// --- Special files --- - -func TestWcPentestDevNull(t *testing.T) { - dir := t.TempDir() - wcWriteFile(t, dir, "empty.txt", "") - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - stdout, _, code := wcRunCtx(ctx, t, "wc empty.txt", dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "0") -} - -// --- Context cancellation --- - -func TestWcPentestContextCancelled(t *testing.T) { - dir := t.TempDir() - ctx, cancel := context.WithCancel(context.Background()) - cancel() - _, _, _ = wcRunCtx(ctx, t, "wc", dir) -} - -func TestWcPentestContextTimeout(t *testing.T) { - dir := t.TempDir() - wcWriteFile(t, dir, "file.txt", strings.Repeat("hello\n", 10000)) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - stdout, _, code := wcRunCtx(ctx, t, "wc file.txt", dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "10000") -} - -// --- Large input --- - -func TestWcPentestLargeFile(t *testing.T) { - dir := t.TempDir() - content := strings.Repeat("word word word word word\n", 40000) - wcWriteFile(t, dir, "large.txt", content) - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - stdout, _, code := wcRunCtx(ctx, t, "wc -l large.txt", dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "40000") -} - -// --- Many files (FD leak check) --- - -func TestWcPentestManyFiles(t *testing.T) { - dir := t.TempDir() - var args []string - for i := 0; i < 50; i++ { - name := filepath.Join(dir, strings.ReplaceAll(filepath.Base(t.Name()), "/", "_")+"_"+string(rune('a'+i%26))+string(rune('0'+i/26))+".txt") - require.NoError(t, os.WriteFile(name, []byte("x\n"), 0644)) - args = append(args, filepath.Base(name)) - } - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - stdout, _, code := wcRunCtx(ctx, t, "wc "+strings.Join(args, " "), dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "total") -} - -// --- Edge case: file with only newlines --- - -func TestWcPentestOnlyNewlines(t *testing.T) { - dir := t.TempDir() - wcWriteFile(t, dir, "file.txt", strings.Repeat("\n", 100)) - stdout, _, code := wcRun(t, "wc file.txt", dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "100") - assert.Contains(t, stdout, " 0") -} - -// --- Edge case: long line --- - -func TestWcPentestLongLine(t *testing.T) { - dir := t.TempDir() - longLine := strings.Repeat("x", 1024*1024) + "\n" - wcWriteFile(t, dir, "file.txt", longLine) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - stdout, _, code := wcRunCtx(ctx, t, "wc -L file.txt", dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "1048576") -} - -// --- Invalid UTF-8 at chunk boundary --- - -func TestWcPentestInvalidUTF8AtChunkBoundary(t *testing.T) { - dir := t.TempDir() - // Build content so that invalid UTF-8 bytes (0xC0 0x80) land at the - // exact 32 KiB read boundary. This exercises the carry buffer edge - // case where invalid bytes must be handled in-place (not carried). - const chunkSize = 32 * 1024 - padding := strings.Repeat("A", chunkSize-1) // fills up to byte 32767 - // Place 0xC0 at offset 32767 (last byte of first chunk) and 0x80 at - // offset 32768 (first byte of second chunk). - content := []byte(padding) - content = append(content, 0xC0, 0x80) - content = append(content, '\n') - - require.NoError(t, os.WriteFile(filepath.Join(dir, "invalid_utf8.txt"), content, 0644)) - - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - // -c should report exact byte count - stdout, _, code := wcRunCtx(ctx, t, "wc -c invalid_utf8.txt", dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "32770") // chunkSize - 1 + 2 invalid bytes + newline - - // -l should count the newline - stdout, _, code = wcRunCtx(ctx, t, "wc -l invalid_utf8.txt", dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "1") -} - -// --- Flag expansion in loop --- - -func TestWcPentestFlagExpansion(t *testing.T) { - dir := t.TempDir() - wcWriteFile(t, dir, "file.txt", "hello\n") - _, stderr, code := wcRun(t, "for flag in --follow; do wc $flag file.txt; done", dir) - assert.Equal(t, 1, code) - assert.Contains(t, stderr, "wc:") -} diff --git a/interp/builtins/wc/builtin_wc_pentest_test.go b/interp/builtins/wc/builtin_wc_pentest_test.go deleted file mode 100644 index f7a0e53e..00000000 --- a/interp/builtins/wc/builtin_wc_pentest_test.go +++ /dev/null @@ -1,64 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2026-present Datadog, Inc. - -// Exploratory pentest for the wc builtin — GTFOBins validation. -// -// GTFOBins documents wc as capable of reading file contents via -// "wc --files0-from /path/to/file". This flag is intentionally not -// implemented in rshell, so pflag rejects it as an unknown flag. -// See: https://gtfobins.org/gtfobins/wc/ - -package wc_test - -import ( - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/DataDog/rshell/interp" -) - -// TestWcGTFOBinsFiles0FromRejected verifies that the GTFOBins file-read -// technique "wc --files0-from" is blocked because the flag is not implemented. -// -// GTFOBins: https://gtfobins.org/gtfobins/wc/ -// Technique: wc --files0-from /path/to/input-file -func TestWcGTFOBinsFiles0FromRejected(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "target.txt", "secret data\n") - _, stderr, code := cmdRun(t, "wc --files0-from target.txt", dir) - assert.Equal(t, 1, code) - assert.Contains(t, stderr, "wc:") -} - -// TestWcGTFOBinsFileReadSandboxEscape verifies that wc cannot read files -// outside the AllowedPaths sandbox. -// -// GTFOBins: https://gtfobins.org/gtfobins/wc/ -// Technique: wc /path/to/input-file (reads file to count lines/words/bytes) -func TestWcGTFOBinsFileReadSandboxEscape(t *testing.T) { - allowed := t.TempDir() - secret := t.TempDir() - require.NoError(t, os.WriteFile(filepath.Join(secret, "secret.txt"), []byte("secret data"), 0644)) - secretPath := filepath.ToSlash(filepath.Join(secret, "secret.txt")) - _, stderr, code := runScript(t, "wc "+secretPath, allowed, interp.AllowedPaths([]string{allowed})) - assert.Equal(t, 1, code) - assert.Contains(t, stderr, "wc:") -} - -// TestWcGTFOBinsFileReadTraversal verifies that path traversal cannot -// escape the sandbox when using wc. -// -// GTFOBins: https://gtfobins.org/gtfobins/wc/ -// Technique: wc ../../etc/passwd -func TestWcGTFOBinsFileReadTraversal(t *testing.T) { - dir := t.TempDir() - _, stderr, code := cmdRun(t, "wc ../../etc/passwd", dir) - assert.Equal(t, 1, code) - assert.Contains(t, stderr, "wc:") -} diff --git a/interp/builtins/wc/wc.go b/interp/builtins/wc/wc.go deleted file mode 100644 index 1b71418b..00000000 --- a/interp/builtins/wc/wc.go +++ /dev/null @@ -1,407 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2026-present Datadog, Inc. - -// Package wc implements the wc builtin command. -// -// wc — print newline, word, and byte counts for each file -// -// Usage: wc [OPTION]... [FILE]... -// -// Print newline, word, and byte counts for each FILE, and a total line -// if more than one FILE is specified. A word is a non-zero-length sequence -// of characters delimited by white space. With no FILE, or when FILE is -, -// read standard input. -// -// When no flags are given, -l, -w, and -c are assumed (lines, words, bytes). -// -// Accepted flags: -// -// -l, --lines -// Print the newline count. -// -// -w, --words -// Print the word count. -// -// -c, --bytes -// Print the byte count. -// -// -m, --chars -// Print the character count. In a multibyte locale, the number of -// characters may differ from the number of bytes. -// -// -L, --max-line-length -// Print the length of the longest line. -// -// -h, --help -// Print this usage message to stdout and exit 0. -// -// Output columns always appear in a fixed order: lines, words, chars, -// bytes, max-line-length. Only the requested columns are shown. Column -// widths are right-justified to the width of the largest count across -// all files (including the total line, if any). -// -// Exit codes: -// -// 0 All files processed successfully. -// 1 At least one error occurred (missing file, invalid argument, etc.). -// -// Memory safety: -// -// Input is read in fixed-size chunks (32 KiB). Lines longer than -// MaxLineBytes (1 MiB) are split across chunks for counting purposes -// but never fully buffered. All loops check ctx.Err() at each -// iteration to honour the shell's execution timeout. -package wc - -import ( - "context" - "io" - "os" - "strconv" - "unicode" - "unicode/utf8" - - "github.com/DataDog/rshell/interp/builtins" -) - -// Cmd is the wc builtin command descriptor. -var Cmd = builtins.Command{Name: "wc", MakeFlags: registerFlags} - -const chunkSize = 32 * 1024 // 32 KiB read buffer -const stdinMinWidth = 7 // GNU wc minimum column width for stdin - -type counts struct { - lines int64 - words int64 - chars int64 - bytes int64 - maxLineLen int64 -} - -type options struct { - showLines bool - showWords bool - showBytes bool - showChars bool - showMaxLineLen bool -} - -func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { - help := fs.BoolP("help", "h", false, "print usage and exit") - lines := fs.BoolP("lines", "l", false, "print the newline counts") - words := fs.BoolP("words", "w", false, "print the word counts") - bytesFlag := fs.BoolP("bytes", "c", false, "print the byte counts") - chars := fs.BoolP("chars", "m", false, "print the character counts") - maxLineLen := fs.BoolP("max-line-length", "L", false, "print the maximum display width") - - // Security: --files0-from is intentionally NOT implemented. - // GTFOBins: this flag reads filenames from a file, enabling - // data exfiltration in sandboxed environments. - - return func(ctx context.Context, callCtx *builtins.CallContext, files []string) builtins.Result { - if *help { - callCtx.Out("Usage: wc [OPTION]... [FILE]...\n") - callCtx.Out("Print newline, word, and byte counts for each FILE.\n") - callCtx.Out("With no FILE, or when FILE is -, read standard input.\n\n") - fs.SetOutput(callCtx.Stdout) - fs.PrintDefaults() - return builtins.Result{} - } - - opts := options{ - showLines: *lines, - showWords: *words, - showBytes: *bytesFlag, - showChars: *chars, - showMaxLineLen: *maxLineLen, - } - - if !opts.showLines && !opts.showWords && !opts.showBytes && !opts.showChars && !opts.showMaxLineLen { - opts.showLines = true - opts.showWords = true - opts.showBytes = true - } - - stdinImplicit := len(files) == 0 - if stdinImplicit { - files = []string{"-"} - } - - hasStdin := stdinImplicit - if !hasStdin { - for _, f := range files { - if f == "-" { - hasStdin = true - break - } - } - } - - var total counts - var failed bool - - type fileResult struct { - name string - c counts - } - results := make([]fileResult, 0, len(files)) - - for _, file := range files { - if ctx.Err() != nil { - break - } - c, err := countFile(ctx, callCtx, file) - if err != nil { - name := file - if file == "-" { - name = "standard input" - } - callCtx.Errf("wc: %s: %s\n", name, callCtx.PortableErr(err)) - failed = true - if c == (counts{}) { - continue - } - } - results = append(results, fileResult{name: file, c: c}) - total.lines += c.lines - total.words += c.words - total.chars += c.chars - total.bytes += c.bytes - if c.maxLineLen > total.maxLineLen { - total.maxLineLen = c.maxLineLen - } - } - - width := fieldWidth(total, opts) - if hasStdin && width < stdinMinWidth { - width = stdinMinWidth - } - - for _, fr := range results { - name := fr.name - if name == "-" && stdinImplicit { - name = "" - } - printCounts(callCtx, fr.c, opts, width, name) - } - - if len(files) > 1 { - printCounts(callCtx, total, opts, width, "total") - } - - if failed { - return builtins.Result{Code: 1} - } - return builtins.Result{} - } -} - -func countFile(ctx context.Context, callCtx *builtins.CallContext, path string) (counts, error) { - var rc io.ReadCloser - if path == "-" { - if callCtx.Stdin == nil { - return counts{}, nil - } - rc = io.NopCloser(callCtx.Stdin) - } else { - f, err := callCtx.OpenFile(ctx, path, os.O_RDONLY, 0) - if err != nil { - return counts{}, err - } - rc = f - } - defer rc.Close() - return countReader(ctx, rc) -} - -func countReader(ctx context.Context, r io.Reader) (counts, error) { - buf := make([]byte, chunkSize) - var c counts - var inWord bool - var lineLen int64 - var carry [utf8.UTFMax - 1]byte - var carryN int - - for { - if ctx.Err() != nil { - return c, ctx.Err() - } - n, err := r.Read(buf[carryN:]) - if carryN > 0 { - copy(buf, carry[:carryN]) - n += carryN - carryN = 0 - } - if n > 0 { - chunk := buf[:n] - c.bytes += int64(n) - - // Handle incomplete UTF-8 at end of chunk. - // When tail >= n (e.g., n == 1 with a single invalid byte), the - // condition below is false, so the byte stays in chunk and - // DecodeRune processes it as a replacement character — this is - // correct and matches utf8.DecodeRune semantics. - tail := 0 - if !utf8.Valid(chunk) { - for tail = 1; tail <= 3 && tail < n; tail++ { - if utf8.Valid(chunk[:n-tail]) { - break - } - } - if tail > 0 && tail <= 3 && tail < n { - carryN = copy(carry[:], chunk[n-tail:]) - chunk = chunk[:n-tail] - } else { - tail = 0 - } - } - c.chars += int64(utf8.RuneCount(chunk)) - // carryN bytes are subtracted here and will be re-added via - // n += carryN at the top of the next iteration. - c.bytes -= int64(carryN) - - for i := 0; i < len(chunk); { - r, size := utf8.DecodeRune(chunk[i:]) - i += size - if r == '\n' { - c.lines++ - if lineLen > c.maxLineLen { - c.maxLineLen = lineLen - } - lineLen = 0 - inWord = false - } else if r == '\r' { - lineLen = 0 - inWord = false - } else if r == '\t' { - lineLen = (lineLen/8 + 1) * 8 - inWord = false - } else if r == ' ' || r == '\v' || r == '\f' { - lineLen++ - inWord = false - } else { - if !inWord { - c.words++ - inWord = true - } - lineLen += int64(runeWidth(r)) - } - } - } - if err == io.EOF { - if carryN > 0 { - c.chars += int64(utf8.RuneCount(carry[:carryN])) - c.bytes += int64(carryN) - carryN = 0 - } - break - } - if err != nil { - return c, err - } - } - if lineLen > c.maxLineLen { - c.maxLineLen = lineLen - } - return c, nil -} - -func fieldWidth(total counts, opts options) int { - max := int64(0) - if opts.showLines && total.lines > max { - max = total.lines - } - if opts.showWords && total.words > max { - max = total.words - } - if opts.showChars && total.chars > max { - max = total.chars - } - if opts.showBytes && total.bytes > max { - max = total.bytes - } - if opts.showMaxLineLen && total.maxLineLen > max { - max = total.maxLineLen - } - w := len(strconv.FormatInt(max, 10)) - return w -} - -// runeWidth returns the display width of a rune following wcwidth(3) rules: -// 0 for controls, combining marks, and format chars; 2 for East Asian -// Wide/Fullwidth; 1 for everything else. -func runeWidth(r rune) int { - if unicode.Is(unicode.Cc, r) { - return 0 - } - if unicode.Is(unicode.Mn, r) || unicode.Is(unicode.Me, r) || unicode.Is(unicode.Cf, r) { - return 0 - } - // Hangul Jamo medial vowels and final consonants (zero-width in syllable composition). - if r >= 0x1160 && r <= 0x11FF { - return 0 - } - if unicode.Is(eastAsianWide, r) { - return 2 - } - return 1 -} - -// eastAsianWide is a RangeTable covering East Asian Wide and Fullwidth -// codepoints per UAX #11, matching the ranges used by wcwidth(3). -var eastAsianWide = &unicode.RangeTable{ - R16: []unicode.Range16{ - {0x1100, 0x115F, 1}, // Hangul Jamo initials - {0x2329, 0x232A, 1}, // CJK angle brackets - {0x2E80, 0x303E, 1}, // CJK Radicals Supplement .. CJK Symbols - {0x3040, 0x33BF, 1}, // Hiragana .. CJK Compatibility - {0x33C0, 0x33FF, 1}, // CJK Compatibility (cont.) - {0x3400, 0x4DBF, 1}, // CJK Unified Ideographs Extension A - {0x4E00, 0xA4CF, 1}, // CJK Unified Ideographs .. Yi - {0xAC00, 0xD7A3, 1}, // Hangul Syllables - {0xF900, 0xFAFF, 1}, // CJK Compatibility Ideographs - {0xFE10, 0xFE19, 1}, // Vertical Forms - {0xFE30, 0xFE6F, 1}, // CJK Compatibility Forms + Small Form Variants - {0xFF01, 0xFF60, 1}, // Fullwidth Forms - {0xFFE0, 0xFFE6, 1}, // Fullwidth Signs - }, - R32: []unicode.Range32{ - {0x1F300, 0x1F64F, 1}, // Misc Symbols/Pictographs + Emoticons - {0x1F900, 0x1F9FF, 1}, // Supplemental Symbols and Pictographs - {0x20000, 0x2FFFD, 1}, // CJK Extension B..F - {0x30000, 0x3FFFD, 1}, // CJK Extension G+ - }, -} - -func printCounts(callCtx *builtins.CallContext, c counts, opts options, width int, name string) { - first := true - printField := func(val int64) { - if first { - callCtx.Outf("%*d", width, val) - first = false - } else { - callCtx.Outf(" %*d", width, val) - } - } - if opts.showLines { - printField(c.lines) - } - if opts.showWords { - printField(c.words) - } - if opts.showChars { - printField(c.chars) - } - if opts.showBytes { - printField(c.bytes) - } - if opts.showMaxLineLen { - printField(c.maxLineLen) - } - if name != "" { - callCtx.Outf(" %s", name) - } - callCtx.Out("\n") -} diff --git a/interp/builtins/wc/wc_gnu_compat_test.go b/interp/builtins/wc/wc_gnu_compat_test.go deleted file mode 100644 index 90966364..00000000 --- a/interp/builtins/wc/wc_gnu_compat_test.go +++ /dev/null @@ -1,172 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2026-present Datadog, Inc. - -package wc_test - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -// TestGNUCompatDefaultEmpty — no flags on empty input. -// -// GNU command: printf ” | gwc -// Expected: " 0 0 0\n" -func TestGNUCompatDefaultEmpty(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "empty.txt", "") - stdout, _, code := cmdRun(t, "wc empty.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "0 0 0 empty.txt\n", stdout) -} - -// TestGNUCompatDefaultBasic — default counts on "a b\nc\n". -// -// GNU command: printf 'a b\nc\n' | gwc -// Expected: " 2 3 6\n" -func TestGNUCompatDefaultBasic(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "a b\nc\n") - stdout, _, code := cmdRun(t, "wc file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "2 3 6 file.txt\n", stdout) -} - -// TestGNUCompatLinesCount — -l on input with 2 newlines. -// -// GNU command: printf 'x\ny\n' | gwc -l -// Expected: "2\n" -func TestGNUCompatLinesCount(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "x\ny\n") - stdout, _, code := cmdRun(t, "wc -l file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "2 file.txt\n", stdout) -} - -// TestGNUCompatLinesNoNewline — -l on input with no newline. -// -// GNU command: printf 'x y' | gwc -l -// Expected: "0\n" -func TestGNUCompatLinesNoNewline(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "x y") - stdout, _, code := cmdRun(t, "wc -l file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "0 file.txt\n", stdout) -} - -// TestGNUCompatWordsEmpty — -w on empty. -// -// GNU command: printf ” | gwc -w -// Expected: "0\n" -func TestGNUCompatWordsEmpty(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "") - stdout, _, code := cmdRun(t, "wc -w file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "0 file.txt\n", stdout) -} - -// TestGNUCompatWordsMulti — -w on "x y\nz". -// -// GNU command: printf 'x y\nz' | gwc -w -// Expected: "3\n" -func TestGNUCompatWordsMulti(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "x y\nz") - stdout, _, code := cmdRun(t, "wc -w file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "3 file.txt\n", stdout) -} - -// TestGNUCompatBytesCount — -c on "x". -// -// GNU command: printf 'x' | gwc -c -// Expected: "1\n" -func TestGNUCompatBytesCount(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "x") - stdout, _, code := cmdRun(t, "wc -c file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "1 file.txt\n", stdout) -} - -// TestGNUCompatMaxLineLen — -L on "1\n12\n". -// -// GNU command: printf '1\n12\n' | gwc -L -// Expected: "2\n" -func TestGNUCompatMaxLineLen(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "1\n12\n") - stdout, _, code := cmdRun(t, "wc -L file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "2 file.txt\n", stdout) -} - -// TestGNUCompatMaxLineLenLastLine — -L on "\n123456" (no trailing newline). -// -// GNU command: printf '\n123456' | gwc -L -// Expected: "6\n" -func TestGNUCompatMaxLineLenLastLine(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "\n123456") - stdout, _, code := cmdRun(t, "wc -L file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "6 file.txt\n", stdout) -} - -// TestGNUCompatMultipleFiles — two files with total line. -// -// GNU command: gwc a.txt b.txt -// a.txt = "hello\n" (1 line, 1 word, 6 bytes) -// b.txt = "world foo\n" (1 line, 2 words, 10 bytes) -// Expected: -// -// " 1 1 6 a.txt\n 1 2 10 b.txt\n 2 3 16 total\n" -func TestGNUCompatMultipleFiles(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "a.txt", "hello\n") - writeFile(t, dir, "b.txt", "world foo\n") - stdout, _, code := cmdRun(t, "wc a.txt b.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, " 1 1 6 a.txt\n 1 2 10 b.txt\n 2 3 16 total\n", stdout) -} - -// TestGNUCompatCharsMultibyte — -m on "café\n". -// -// GNU command: printf 'café\n' | gwc -m -// Expected: "5\n" (5 chars: c, a, f, é, \n) -func TestGNUCompatCharsMultibyte(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "café\n") - stdout, _, code := cmdRun(t, "wc -m file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "5 file.txt\n", stdout) -} - -// TestGNUCompatControlCharIsWord — control byte \x01 counts as a word. -// -// GNU command: printf '\x01\n' | gwc -w -// Expected: "1\n" -func TestGNUCompatControlCharIsWord(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "\x01\n") - stdout, _, code := cmdRun(t, "wc -w file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "1 file.txt\n", stdout) -} - -// TestGNUCompatRejectedFlag — unknown flag exits 1. -// -// GNU command: gwc --follow -// Expected: exit 1, stderr contains "wc:" -func TestGNUCompatRejectedFlag(t *testing.T) { - dir := t.TempDir() - _, stderr, code := cmdRun(t, "wc --follow", dir) - assert.Equal(t, 1, code) - assert.Contains(t, stderr, "wc:") -} diff --git a/interp/builtins/wc/wc_test.go b/interp/builtins/wc/wc_test.go deleted file mode 100644 index 4707b0dd..00000000 --- a/interp/builtins/wc/wc_test.go +++ /dev/null @@ -1,454 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2026-present Datadog, Inc. - -package wc_test - -import ( - "context" - "os" - "path/filepath" - "strings" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/DataDog/rshell/interp" - "github.com/DataDog/rshell/interp/builtins/testutil" -) - -func runScript(t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) { - t.Helper() - return testutil.RunScript(t, script, dir, opts...) -} - -func runScriptCtx(ctx context.Context, t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) { - t.Helper() - return testutil.RunScriptCtx(ctx, t, script, dir, opts...) -} - -func cmdRun(t *testing.T, script, dir string) (string, string, int) { - t.Helper() - return runScript(t, script, dir, interp.AllowedPaths([]string{dir})) -} - -func writeFile(t *testing.T, dir, name, content string) string { - t.Helper() - require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0644)) - return name -} - -// --- Default mode (lines, words, bytes) --- - -func TestWcDefaultEmptyStdin(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "empty.txt", "") - stdout, _, code := cmdRun(t, "wc empty.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "0 0 0 empty.txt\n", stdout) -} - -func TestWcDefaultBasic(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "a b\nc\n") - stdout, _, code := cmdRun(t, "wc file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "2 3 6 file.txt\n", stdout) -} - -func TestWcDefaultNoTrailingNewline(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "hello world") - stdout, _, code := cmdRun(t, "wc file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, " 0 2 11 file.txt\n", stdout) -} - -// --- Lines --- - -func TestWcLinesEmpty(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "") - stdout, _, code := cmdRun(t, "wc -l file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "0 file.txt\n", stdout) -} - -func TestWcLinesNoNewline(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "x y") - stdout, _, code := cmdRun(t, "wc -l file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "0 file.txt\n", stdout) -} - -func TestWcLinesOneNewline(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "x y\n") - stdout, _, code := cmdRun(t, "wc -l file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "1 file.txt\n", stdout) -} - -func TestWcLinesTwoNewlines(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "x\ny\n") - stdout, _, code := cmdRun(t, "wc -l file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "2 file.txt\n", stdout) -} - -func TestWcLinesLongForm(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "a\nb\nc\n") - stdout, _, code := cmdRun(t, "wc --lines file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "3 file.txt\n", stdout) -} - -// --- Words --- - -func TestWcWordsEmpty(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "") - stdout, _, code := cmdRun(t, "wc -w file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "0 file.txt\n", stdout) -} - -func TestWcWordsSingle(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "x") - stdout, _, code := cmdRun(t, "wc -w file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "1 file.txt\n", stdout) -} - -func TestWcWordsMultiple(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "x y\nz") - stdout, _, code := cmdRun(t, "wc -w file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "3 file.txt\n", stdout) -} - -func TestWcWordsControlChar(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "\x01\n") - stdout, _, code := cmdRun(t, "wc -w file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "1 file.txt\n", stdout) -} - -// --- Bytes --- - -func TestWcBytesEmpty(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "") - stdout, _, code := cmdRun(t, "wc -c file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "0 file.txt\n", stdout) -} - -func TestWcBytesSingle(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "x") - stdout, _, code := cmdRun(t, "wc -c file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "1 file.txt\n", stdout) -} - -func TestWcBytesMulti(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "hello\n") - stdout, _, code := cmdRun(t, "wc -c file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "6 file.txt\n", stdout) -} - -// --- Chars --- - -func TestWcCharsASCII(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "hello\n") - stdout, _, code := cmdRun(t, "wc -m file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "6 file.txt\n", stdout) -} - -func TestWcCharsMultibyte(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "café\n") - stdout, _, code := cmdRun(t, "wc -m file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "5 file.txt\n", stdout) -} - -func TestWcBytesMultibyte(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "café\n") - stdout, _, code := cmdRun(t, "wc -c file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "6 file.txt\n", stdout) -} - -func TestWcCharsAndBytes(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "café\n") - stdout, _, code := cmdRun(t, "wc -cm file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "5 6 file.txt\n", stdout) -} - -// --- Max line length --- - -func TestWcMaxLineLenBasic(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "1\n12\n") - stdout, _, code := cmdRun(t, "wc -L file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "2 file.txt\n", stdout) -} - -func TestWcMaxLineLenThreeLines(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "1\n123\n1\n") - stdout, _, code := cmdRun(t, "wc -L file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "3 file.txt\n", stdout) -} - -func TestWcMaxLineLenNoTrailingNewline(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "\n123456") - stdout, _, code := cmdRun(t, "wc -L file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "6 file.txt\n", stdout) -} - -func TestWcMaxLineLenEmpty(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "") - stdout, _, code := cmdRun(t, "wc -L file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "0 file.txt\n", stdout) -} - -// --- Multiple files --- - -func TestWcMultipleFiles(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "a.txt", "hello\n") - writeFile(t, dir, "b.txt", "world foo\n") - stdout, _, code := cmdRun(t, "wc a.txt b.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, " 1 1 6 a.txt\n 1 2 10 b.txt\n 2 3 16 total\n", stdout) -} - -func TestWcMultipleFilesPartialFailure(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "a.txt", "hello\n") - stdout, stderr, code := cmdRun(t, "wc a.txt missing.txt", dir) - assert.Equal(t, 1, code) - assert.Contains(t, stdout, "a.txt") - assert.Contains(t, stdout, "total") - assert.Contains(t, stderr, "wc:") -} - -// --- Stdin --- - -func TestWcStdinImplicit(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "a b\nc\n") - stdout, _, code := cmdRun(t, "cat file.txt | wc", dir) - assert.Equal(t, 0, code) - assert.Equal(t, " 2 3 6\n", stdout) -} - -func TestWcStdinDash(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "a b\nc\n") - stdout, _, code := cmdRun(t, "cat file.txt | wc -", dir) - assert.Equal(t, 0, code) - assert.Equal(t, " 2 3 6 -\n", stdout) -} - -func TestWcNilStdin(t *testing.T) { - dir := t.TempDir() - stdout, _, code := runScript(t, "wc", dir) - assert.Equal(t, 0, code) - assert.Equal(t, " 0 0 0\n", stdout) -} - -// --- Help --- - -func TestWcHelp(t *testing.T) { - dir := t.TempDir() - stdout, _, code := cmdRun(t, "wc --help", dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "Usage:") -} - -func TestWcHelpShort(t *testing.T) { - dir := t.TempDir() - stdout, _, code := cmdRun(t, "wc -h", dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "Usage:") -} - -// --- Error cases --- - -func TestWcMissingFile(t *testing.T) { - dir := t.TempDir() - stdout, stderr, code := cmdRun(t, "wc nonexistent.txt", dir) - assert.Equal(t, 1, code) - assert.Equal(t, "", stdout) - assert.Contains(t, stderr, "wc:") -} - -func TestWcUnknownFlag(t *testing.T) { - dir := t.TempDir() - _, stderr, code := cmdRun(t, "wc --definitely-invalid", dir) - assert.Equal(t, 1, code) - assert.Contains(t, stderr, "wc:") -} - -func TestWcFiles0FromRejected(t *testing.T) { - dir := t.TempDir() - _, stderr, code := cmdRun(t, "wc --files0-from=foo", dir) - assert.Equal(t, 1, code) - assert.Contains(t, stderr, "wc:") -} - -func TestWcDirectory(t *testing.T) { - dir := t.TempDir() - _, stderr, code := cmdRun(t, "wc .", dir) - assert.Equal(t, 1, code) - assert.Contains(t, stderr, "wc:") -} - -// --- Hardening --- - -func TestWcDoubleDash(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "hello\n") - stdout, _, code := cmdRun(t, "wc -- file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "1 1 6 file.txt\n", stdout) -} - -func TestWcContextCancellation(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", strings.Repeat("x\n", 100)) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - _, _, code := runScriptCtx(ctx, t, "wc file.txt", dir, interp.AllowedPaths([]string{dir})) - assert.Equal(t, 0, code) -} - -func TestWcPipeInput(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "alpha\nbeta\ngamma\n") - stdout, _, code := cmdRun(t, "cat file.txt | wc -l", dir) - assert.Equal(t, 0, code) - assert.Equal(t, " 3\n", stdout) -} - -// --- Combined flags --- - -func TestWcAllFlags(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "a b\nc\n") - stdout, _, code := cmdRun(t, "wc -lwmcL file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "2 3 6 6 3 file.txt\n", stdout) -} - -func TestWcLinesAndWords(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "a b\nc\n") - stdout, _, code := cmdRun(t, "wc -lw file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "2 3 file.txt\n", stdout) -} - -// --- Width formatting --- - -func TestWcWidthDeterminedByTotal(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "a.txt", strings.Repeat("word ", 20)+"\n") - writeFile(t, dir, "b.txt", "x\n") - stdout, _, code := cmdRun(t, "wc -w a.txt b.txt", dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "total\n") - lines := strings.Split(strings.TrimSpace(stdout), "\n") - assert.Equal(t, 3, len(lines)) -} - -// --- Max line length: tab and CR --- - -func TestWcMaxLineLenTab(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "a\tb\n") - stdout, _, code := cmdRun(t, "wc -L file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "9 file.txt\n", stdout) -} - -func TestWcMaxLineLenCR(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "hello\rworld\n") - stdout, _, code := cmdRun(t, "wc -L file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "5 file.txt\n", stdout) -} - -func TestWcCRLFLineCount(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "a\r\nb\r\n") - stdout, _, code := cmdRun(t, "wc -l file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "2 file.txt\n", stdout) -} - -// --- Binary / non-UTF8 input --- - -func TestWcBinaryInput(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.bin", string([]byte{0x00, 0xFF, 0xFE, 0x0A, 0x41})) - stdout, _, code := cmdRun(t, "wc file.bin", dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "file.bin") - assert.Equal(t, 0, code) -} - -// --- Multibyte chars --- - -func TestWcCharsMultibyteEmoji(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "file.txt", "hi 💐\n") - stdout, _, code := cmdRun(t, "wc -m file.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "5 file.txt\n", stdout) -} - -// TestWcChunkBoundaryMultibyte verifies that a multibyte character straddling -// the 32 KiB read-buffer boundary is not double-counted. This requires -// programmatic file generation so it lives as a Go test rather than a scenario. -func TestWcChunkBoundaryMultibyte(t *testing.T) { - dir := t.TempDir() - // 💐 is 4 bytes; placing it at offset 32766 means it spans bytes 32766-32769, - // straddling the 32768-byte chunk boundary and exercising the carry logic. - prefix := strings.Repeat("a", 32*1024-2) - content := prefix + "💐\n" - writeFile(t, dir, "file.txt", content) - stdout, _, code := cmdRun(t, "wc -mL file.txt", dir) - assert.Equal(t, 0, code) - // chars: 32766 'a' + 1 emoji + 1 newline = 32768 - // max line length: 32766 + 2 (emoji display width) = 32768 - assert.Equal(t, "32768 32768 file.txt\n", stdout) -} - diff --git a/interp/builtins/wc/wc_unix_test.go b/interp/builtins/wc/wc_unix_test.go deleted file mode 100644 index 7882ae13..00000000 --- a/interp/builtins/wc/wc_unix_test.go +++ /dev/null @@ -1,35 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2026-present Datadog, Inc. - -//go:build unix - -package wc_test - -import ( - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestWcSymlinkToFile(t *testing.T) { - dir := t.TempDir() - writeFile(t, dir, "target.txt", "hello\n") - require.NoError(t, os.Symlink("target.txt", filepath.Join(dir, "link.txt"))) - stdout, _, code := cmdRun(t, "wc link.txt", dir) - assert.Equal(t, 0, code) - assert.Equal(t, "1 1 6 link.txt\n", stdout) -} - -func TestWcDanglingSymlink(t *testing.T) { - dir := t.TempDir() - require.NoError(t, os.Symlink("nonexistent", filepath.Join(dir, "dangle.txt"))) - stdout, stderr, code := cmdRun(t, "wc dangle.txt", dir) - assert.Equal(t, 1, code) - assert.Equal(t, "", stdout) - assert.Contains(t, stderr, "wc:") -} From 4b0ac18a45b22ed2a8a04226fb786701b3942ced Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Thu, 12 Mar 2026 16:58:11 +0100 Subject: [PATCH 02/10] empty From 9d41e52a08e52c946d49e895531737726cb02072 Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Thu, 12 Mar 2026 17:00:16 +0100 Subject: [PATCH 03/10] Revert "Remove wc builtin implementation" This reverts commit ec0373cdba9200124b94da885848822c55f6be6c. --- interp/builtin_wc_pentest_test.go | 237 +++++++++ interp/builtins/wc/builtin_wc_pentest_test.go | 64 +++ interp/builtins/wc/wc.go | 407 ++++++++++++++++ interp/builtins/wc/wc_gnu_compat_test.go | 172 +++++++ interp/builtins/wc/wc_test.go | 454 ++++++++++++++++++ interp/builtins/wc/wc_unix_test.go | 35 ++ 6 files changed, 1369 insertions(+) create mode 100644 interp/builtin_wc_pentest_test.go create mode 100644 interp/builtins/wc/builtin_wc_pentest_test.go create mode 100644 interp/builtins/wc/wc.go create mode 100644 interp/builtins/wc/wc_gnu_compat_test.go create mode 100644 interp/builtins/wc/wc_test.go create mode 100644 interp/builtins/wc/wc_unix_test.go diff --git a/interp/builtin_wc_pentest_test.go b/interp/builtin_wc_pentest_test.go new file mode 100644 index 00000000..a5c88636 --- /dev/null +++ b/interp/builtin_wc_pentest_test.go @@ -0,0 +1,237 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package interp_test + +import ( + "bytes" + "context" + "errors" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "mvdan.cc/sh/v3/syntax" + + "github.com/DataDog/rshell/interp" +) + +func wcRun(t *testing.T, script, dir string) (string, string, int) { + t.Helper() + return wcRunCtx(context.Background(), t, script, dir) +} + +func wcRunCtx(ctx context.Context, t *testing.T, script, dir string) (string, string, int) { + t.Helper() + parser := syntax.NewParser() + prog, err := parser.Parse(strings.NewReader(script), "") + require.NoError(t, err) + + var outBuf, errBuf bytes.Buffer + opts := []interp.RunnerOption{ + interp.StdIO(nil, &outBuf, &errBuf), + interp.AllowedPaths([]string{dir}), + } + + runner, err := interp.New(opts...) + require.NoError(t, err) + defer runner.Close() + + if dir != "" { + runner.Dir = dir + } + + err = runner.Run(ctx, prog) + exitCode := 0 + if err != nil { + var es interp.ExitStatus + if errors.As(err, &es) { + exitCode = int(es) + } else if ctx.Err() == nil { + t.Fatalf("unexpected error: %v", err) + } + } + return outBuf.String(), errBuf.String(), exitCode +} + +func wcWriteFile(t *testing.T, dir, name, content string) { + t.Helper() + require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0644)) +} + +// --- Flag and argument injection --- + +func TestWcPentestUnknownFlags(t *testing.T) { + dir := t.TempDir() + for _, flag := range []string{"-f", "--follow", "--no-such-flag", "--files0-from=foo"} { + _, stderr, code := wcRun(t, "wc "+flag, dir) + assert.Equal(t, 1, code, "flag: %s", flag) + assert.Contains(t, stderr, "wc:", "flag: %s", flag) + } +} + +func TestWcPentestDoubleDashFlagLikeFile(t *testing.T) { + dir := t.TempDir() + wcWriteFile(t, dir, "-v", "hello\n") + stdout, _, code := wcRun(t, "wc -- -v", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "-v") +} + +func TestWcPentestMultipleStdin(t *testing.T) { + dir := t.TempDir() + wcWriteFile(t, dir, "file.txt", "hello\n") + stdout, _, code := wcRun(t, "cat file.txt | wc - -", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "total") +} + +// --- Path edge cases --- + +func TestWcPentestNonexistentFile(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := wcRun(t, "wc nonexistent.txt", dir) + assert.Equal(t, 1, code) + assert.Equal(t, "", stdout) + assert.Contains(t, stderr, "wc:") +} + +func TestWcPentestEmptyFilename(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := wcRun(t, "wc ''", dir) + assert.Equal(t, 1, code) + assert.Equal(t, "", stdout) + assert.Contains(t, stderr, "wc:") +} + +// --- Special files --- + +func TestWcPentestDevNull(t *testing.T) { + dir := t.TempDir() + wcWriteFile(t, dir, "empty.txt", "") + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + stdout, _, code := wcRunCtx(ctx, t, "wc empty.txt", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "0") +} + +// --- Context cancellation --- + +func TestWcPentestContextCancelled(t *testing.T) { + dir := t.TempDir() + ctx, cancel := context.WithCancel(context.Background()) + cancel() + _, _, _ = wcRunCtx(ctx, t, "wc", dir) +} + +func TestWcPentestContextTimeout(t *testing.T) { + dir := t.TempDir() + wcWriteFile(t, dir, "file.txt", strings.Repeat("hello\n", 10000)) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + stdout, _, code := wcRunCtx(ctx, t, "wc file.txt", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "10000") +} + +// --- Large input --- + +func TestWcPentestLargeFile(t *testing.T) { + dir := t.TempDir() + content := strings.Repeat("word word word word word\n", 40000) + wcWriteFile(t, dir, "large.txt", content) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + stdout, _, code := wcRunCtx(ctx, t, "wc -l large.txt", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "40000") +} + +// --- Many files (FD leak check) --- + +func TestWcPentestManyFiles(t *testing.T) { + dir := t.TempDir() + var args []string + for i := 0; i < 50; i++ { + name := filepath.Join(dir, strings.ReplaceAll(filepath.Base(t.Name()), "/", "_")+"_"+string(rune('a'+i%26))+string(rune('0'+i/26))+".txt") + require.NoError(t, os.WriteFile(name, []byte("x\n"), 0644)) + args = append(args, filepath.Base(name)) + } + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + stdout, _, code := wcRunCtx(ctx, t, "wc "+strings.Join(args, " "), dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "total") +} + +// --- Edge case: file with only newlines --- + +func TestWcPentestOnlyNewlines(t *testing.T) { + dir := t.TempDir() + wcWriteFile(t, dir, "file.txt", strings.Repeat("\n", 100)) + stdout, _, code := wcRun(t, "wc file.txt", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "100") + assert.Contains(t, stdout, " 0") +} + +// --- Edge case: long line --- + +func TestWcPentestLongLine(t *testing.T) { + dir := t.TempDir() + longLine := strings.Repeat("x", 1024*1024) + "\n" + wcWriteFile(t, dir, "file.txt", longLine) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + stdout, _, code := wcRunCtx(ctx, t, "wc -L file.txt", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "1048576") +} + +// --- Invalid UTF-8 at chunk boundary --- + +func TestWcPentestInvalidUTF8AtChunkBoundary(t *testing.T) { + dir := t.TempDir() + // Build content so that invalid UTF-8 bytes (0xC0 0x80) land at the + // exact 32 KiB read boundary. This exercises the carry buffer edge + // case where invalid bytes must be handled in-place (not carried). + const chunkSize = 32 * 1024 + padding := strings.Repeat("A", chunkSize-1) // fills up to byte 32767 + // Place 0xC0 at offset 32767 (last byte of first chunk) and 0x80 at + // offset 32768 (first byte of second chunk). + content := []byte(padding) + content = append(content, 0xC0, 0x80) + content = append(content, '\n') + + require.NoError(t, os.WriteFile(filepath.Join(dir, "invalid_utf8.txt"), content, 0644)) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + // -c should report exact byte count + stdout, _, code := wcRunCtx(ctx, t, "wc -c invalid_utf8.txt", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "32770") // chunkSize - 1 + 2 invalid bytes + newline + + // -l should count the newline + stdout, _, code = wcRunCtx(ctx, t, "wc -l invalid_utf8.txt", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "1") +} + +// --- Flag expansion in loop --- + +func TestWcPentestFlagExpansion(t *testing.T) { + dir := t.TempDir() + wcWriteFile(t, dir, "file.txt", "hello\n") + _, stderr, code := wcRun(t, "for flag in --follow; do wc $flag file.txt; done", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "wc:") +} diff --git a/interp/builtins/wc/builtin_wc_pentest_test.go b/interp/builtins/wc/builtin_wc_pentest_test.go new file mode 100644 index 00000000..f7a0e53e --- /dev/null +++ b/interp/builtins/wc/builtin_wc_pentest_test.go @@ -0,0 +1,64 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +// Exploratory pentest for the wc builtin — GTFOBins validation. +// +// GTFOBins documents wc as capable of reading file contents via +// "wc --files0-from /path/to/file". This flag is intentionally not +// implemented in rshell, so pflag rejects it as an unknown flag. +// See: https://gtfobins.org/gtfobins/wc/ + +package wc_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/rshell/interp" +) + +// TestWcGTFOBinsFiles0FromRejected verifies that the GTFOBins file-read +// technique "wc --files0-from" is blocked because the flag is not implemented. +// +// GTFOBins: https://gtfobins.org/gtfobins/wc/ +// Technique: wc --files0-from /path/to/input-file +func TestWcGTFOBinsFiles0FromRejected(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "target.txt", "secret data\n") + _, stderr, code := cmdRun(t, "wc --files0-from target.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "wc:") +} + +// TestWcGTFOBinsFileReadSandboxEscape verifies that wc cannot read files +// outside the AllowedPaths sandbox. +// +// GTFOBins: https://gtfobins.org/gtfobins/wc/ +// Technique: wc /path/to/input-file (reads file to count lines/words/bytes) +func TestWcGTFOBinsFileReadSandboxEscape(t *testing.T) { + allowed := t.TempDir() + secret := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(secret, "secret.txt"), []byte("secret data"), 0644)) + secretPath := filepath.ToSlash(filepath.Join(secret, "secret.txt")) + _, stderr, code := runScript(t, "wc "+secretPath, allowed, interp.AllowedPaths([]string{allowed})) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "wc:") +} + +// TestWcGTFOBinsFileReadTraversal verifies that path traversal cannot +// escape the sandbox when using wc. +// +// GTFOBins: https://gtfobins.org/gtfobins/wc/ +// Technique: wc ../../etc/passwd +func TestWcGTFOBinsFileReadTraversal(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "wc ../../etc/passwd", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "wc:") +} diff --git a/interp/builtins/wc/wc.go b/interp/builtins/wc/wc.go new file mode 100644 index 00000000..1b71418b --- /dev/null +++ b/interp/builtins/wc/wc.go @@ -0,0 +1,407 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +// Package wc implements the wc builtin command. +// +// wc — print newline, word, and byte counts for each file +// +// Usage: wc [OPTION]... [FILE]... +// +// Print newline, word, and byte counts for each FILE, and a total line +// if more than one FILE is specified. A word is a non-zero-length sequence +// of characters delimited by white space. With no FILE, or when FILE is -, +// read standard input. +// +// When no flags are given, -l, -w, and -c are assumed (lines, words, bytes). +// +// Accepted flags: +// +// -l, --lines +// Print the newline count. +// +// -w, --words +// Print the word count. +// +// -c, --bytes +// Print the byte count. +// +// -m, --chars +// Print the character count. In a multibyte locale, the number of +// characters may differ from the number of bytes. +// +// -L, --max-line-length +// Print the length of the longest line. +// +// -h, --help +// Print this usage message to stdout and exit 0. +// +// Output columns always appear in a fixed order: lines, words, chars, +// bytes, max-line-length. Only the requested columns are shown. Column +// widths are right-justified to the width of the largest count across +// all files (including the total line, if any). +// +// Exit codes: +// +// 0 All files processed successfully. +// 1 At least one error occurred (missing file, invalid argument, etc.). +// +// Memory safety: +// +// Input is read in fixed-size chunks (32 KiB). Lines longer than +// MaxLineBytes (1 MiB) are split across chunks for counting purposes +// but never fully buffered. All loops check ctx.Err() at each +// iteration to honour the shell's execution timeout. +package wc + +import ( + "context" + "io" + "os" + "strconv" + "unicode" + "unicode/utf8" + + "github.com/DataDog/rshell/interp/builtins" +) + +// Cmd is the wc builtin command descriptor. +var Cmd = builtins.Command{Name: "wc", MakeFlags: registerFlags} + +const chunkSize = 32 * 1024 // 32 KiB read buffer +const stdinMinWidth = 7 // GNU wc minimum column width for stdin + +type counts struct { + lines int64 + words int64 + chars int64 + bytes int64 + maxLineLen int64 +} + +type options struct { + showLines bool + showWords bool + showBytes bool + showChars bool + showMaxLineLen bool +} + +func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { + help := fs.BoolP("help", "h", false, "print usage and exit") + lines := fs.BoolP("lines", "l", false, "print the newline counts") + words := fs.BoolP("words", "w", false, "print the word counts") + bytesFlag := fs.BoolP("bytes", "c", false, "print the byte counts") + chars := fs.BoolP("chars", "m", false, "print the character counts") + maxLineLen := fs.BoolP("max-line-length", "L", false, "print the maximum display width") + + // Security: --files0-from is intentionally NOT implemented. + // GTFOBins: this flag reads filenames from a file, enabling + // data exfiltration in sandboxed environments. + + return func(ctx context.Context, callCtx *builtins.CallContext, files []string) builtins.Result { + if *help { + callCtx.Out("Usage: wc [OPTION]... [FILE]...\n") + callCtx.Out("Print newline, word, and byte counts for each FILE.\n") + callCtx.Out("With no FILE, or when FILE is -, read standard input.\n\n") + fs.SetOutput(callCtx.Stdout) + fs.PrintDefaults() + return builtins.Result{} + } + + opts := options{ + showLines: *lines, + showWords: *words, + showBytes: *bytesFlag, + showChars: *chars, + showMaxLineLen: *maxLineLen, + } + + if !opts.showLines && !opts.showWords && !opts.showBytes && !opts.showChars && !opts.showMaxLineLen { + opts.showLines = true + opts.showWords = true + opts.showBytes = true + } + + stdinImplicit := len(files) == 0 + if stdinImplicit { + files = []string{"-"} + } + + hasStdin := stdinImplicit + if !hasStdin { + for _, f := range files { + if f == "-" { + hasStdin = true + break + } + } + } + + var total counts + var failed bool + + type fileResult struct { + name string + c counts + } + results := make([]fileResult, 0, len(files)) + + for _, file := range files { + if ctx.Err() != nil { + break + } + c, err := countFile(ctx, callCtx, file) + if err != nil { + name := file + if file == "-" { + name = "standard input" + } + callCtx.Errf("wc: %s: %s\n", name, callCtx.PortableErr(err)) + failed = true + if c == (counts{}) { + continue + } + } + results = append(results, fileResult{name: file, c: c}) + total.lines += c.lines + total.words += c.words + total.chars += c.chars + total.bytes += c.bytes + if c.maxLineLen > total.maxLineLen { + total.maxLineLen = c.maxLineLen + } + } + + width := fieldWidth(total, opts) + if hasStdin && width < stdinMinWidth { + width = stdinMinWidth + } + + for _, fr := range results { + name := fr.name + if name == "-" && stdinImplicit { + name = "" + } + printCounts(callCtx, fr.c, opts, width, name) + } + + if len(files) > 1 { + printCounts(callCtx, total, opts, width, "total") + } + + if failed { + return builtins.Result{Code: 1} + } + return builtins.Result{} + } +} + +func countFile(ctx context.Context, callCtx *builtins.CallContext, path string) (counts, error) { + var rc io.ReadCloser + if path == "-" { + if callCtx.Stdin == nil { + return counts{}, nil + } + rc = io.NopCloser(callCtx.Stdin) + } else { + f, err := callCtx.OpenFile(ctx, path, os.O_RDONLY, 0) + if err != nil { + return counts{}, err + } + rc = f + } + defer rc.Close() + return countReader(ctx, rc) +} + +func countReader(ctx context.Context, r io.Reader) (counts, error) { + buf := make([]byte, chunkSize) + var c counts + var inWord bool + var lineLen int64 + var carry [utf8.UTFMax - 1]byte + var carryN int + + for { + if ctx.Err() != nil { + return c, ctx.Err() + } + n, err := r.Read(buf[carryN:]) + if carryN > 0 { + copy(buf, carry[:carryN]) + n += carryN + carryN = 0 + } + if n > 0 { + chunk := buf[:n] + c.bytes += int64(n) + + // Handle incomplete UTF-8 at end of chunk. + // When tail >= n (e.g., n == 1 with a single invalid byte), the + // condition below is false, so the byte stays in chunk and + // DecodeRune processes it as a replacement character — this is + // correct and matches utf8.DecodeRune semantics. + tail := 0 + if !utf8.Valid(chunk) { + for tail = 1; tail <= 3 && tail < n; tail++ { + if utf8.Valid(chunk[:n-tail]) { + break + } + } + if tail > 0 && tail <= 3 && tail < n { + carryN = copy(carry[:], chunk[n-tail:]) + chunk = chunk[:n-tail] + } else { + tail = 0 + } + } + c.chars += int64(utf8.RuneCount(chunk)) + // carryN bytes are subtracted here and will be re-added via + // n += carryN at the top of the next iteration. + c.bytes -= int64(carryN) + + for i := 0; i < len(chunk); { + r, size := utf8.DecodeRune(chunk[i:]) + i += size + if r == '\n' { + c.lines++ + if lineLen > c.maxLineLen { + c.maxLineLen = lineLen + } + lineLen = 0 + inWord = false + } else if r == '\r' { + lineLen = 0 + inWord = false + } else if r == '\t' { + lineLen = (lineLen/8 + 1) * 8 + inWord = false + } else if r == ' ' || r == '\v' || r == '\f' { + lineLen++ + inWord = false + } else { + if !inWord { + c.words++ + inWord = true + } + lineLen += int64(runeWidth(r)) + } + } + } + if err == io.EOF { + if carryN > 0 { + c.chars += int64(utf8.RuneCount(carry[:carryN])) + c.bytes += int64(carryN) + carryN = 0 + } + break + } + if err != nil { + return c, err + } + } + if lineLen > c.maxLineLen { + c.maxLineLen = lineLen + } + return c, nil +} + +func fieldWidth(total counts, opts options) int { + max := int64(0) + if opts.showLines && total.lines > max { + max = total.lines + } + if opts.showWords && total.words > max { + max = total.words + } + if opts.showChars && total.chars > max { + max = total.chars + } + if opts.showBytes && total.bytes > max { + max = total.bytes + } + if opts.showMaxLineLen && total.maxLineLen > max { + max = total.maxLineLen + } + w := len(strconv.FormatInt(max, 10)) + return w +} + +// runeWidth returns the display width of a rune following wcwidth(3) rules: +// 0 for controls, combining marks, and format chars; 2 for East Asian +// Wide/Fullwidth; 1 for everything else. +func runeWidth(r rune) int { + if unicode.Is(unicode.Cc, r) { + return 0 + } + if unicode.Is(unicode.Mn, r) || unicode.Is(unicode.Me, r) || unicode.Is(unicode.Cf, r) { + return 0 + } + // Hangul Jamo medial vowels and final consonants (zero-width in syllable composition). + if r >= 0x1160 && r <= 0x11FF { + return 0 + } + if unicode.Is(eastAsianWide, r) { + return 2 + } + return 1 +} + +// eastAsianWide is a RangeTable covering East Asian Wide and Fullwidth +// codepoints per UAX #11, matching the ranges used by wcwidth(3). +var eastAsianWide = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x1100, 0x115F, 1}, // Hangul Jamo initials + {0x2329, 0x232A, 1}, // CJK angle brackets + {0x2E80, 0x303E, 1}, // CJK Radicals Supplement .. CJK Symbols + {0x3040, 0x33BF, 1}, // Hiragana .. CJK Compatibility + {0x33C0, 0x33FF, 1}, // CJK Compatibility (cont.) + {0x3400, 0x4DBF, 1}, // CJK Unified Ideographs Extension A + {0x4E00, 0xA4CF, 1}, // CJK Unified Ideographs .. Yi + {0xAC00, 0xD7A3, 1}, // Hangul Syllables + {0xF900, 0xFAFF, 1}, // CJK Compatibility Ideographs + {0xFE10, 0xFE19, 1}, // Vertical Forms + {0xFE30, 0xFE6F, 1}, // CJK Compatibility Forms + Small Form Variants + {0xFF01, 0xFF60, 1}, // Fullwidth Forms + {0xFFE0, 0xFFE6, 1}, // Fullwidth Signs + }, + R32: []unicode.Range32{ + {0x1F300, 0x1F64F, 1}, // Misc Symbols/Pictographs + Emoticons + {0x1F900, 0x1F9FF, 1}, // Supplemental Symbols and Pictographs + {0x20000, 0x2FFFD, 1}, // CJK Extension B..F + {0x30000, 0x3FFFD, 1}, // CJK Extension G+ + }, +} + +func printCounts(callCtx *builtins.CallContext, c counts, opts options, width int, name string) { + first := true + printField := func(val int64) { + if first { + callCtx.Outf("%*d", width, val) + first = false + } else { + callCtx.Outf(" %*d", width, val) + } + } + if opts.showLines { + printField(c.lines) + } + if opts.showWords { + printField(c.words) + } + if opts.showChars { + printField(c.chars) + } + if opts.showBytes { + printField(c.bytes) + } + if opts.showMaxLineLen { + printField(c.maxLineLen) + } + if name != "" { + callCtx.Outf(" %s", name) + } + callCtx.Out("\n") +} diff --git a/interp/builtins/wc/wc_gnu_compat_test.go b/interp/builtins/wc/wc_gnu_compat_test.go new file mode 100644 index 00000000..90966364 --- /dev/null +++ b/interp/builtins/wc/wc_gnu_compat_test.go @@ -0,0 +1,172 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package wc_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// TestGNUCompatDefaultEmpty — no flags on empty input. +// +// GNU command: printf ” | gwc +// Expected: " 0 0 0\n" +func TestGNUCompatDefaultEmpty(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "empty.txt", "") + stdout, _, code := cmdRun(t, "wc empty.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "0 0 0 empty.txt\n", stdout) +} + +// TestGNUCompatDefaultBasic — default counts on "a b\nc\n". +// +// GNU command: printf 'a b\nc\n' | gwc +// Expected: " 2 3 6\n" +func TestGNUCompatDefaultBasic(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "a b\nc\n") + stdout, _, code := cmdRun(t, "wc file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2 3 6 file.txt\n", stdout) +} + +// TestGNUCompatLinesCount — -l on input with 2 newlines. +// +// GNU command: printf 'x\ny\n' | gwc -l +// Expected: "2\n" +func TestGNUCompatLinesCount(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "x\ny\n") + stdout, _, code := cmdRun(t, "wc -l file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2 file.txt\n", stdout) +} + +// TestGNUCompatLinesNoNewline — -l on input with no newline. +// +// GNU command: printf 'x y' | gwc -l +// Expected: "0\n" +func TestGNUCompatLinesNoNewline(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "x y") + stdout, _, code := cmdRun(t, "wc -l file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "0 file.txt\n", stdout) +} + +// TestGNUCompatWordsEmpty — -w on empty. +// +// GNU command: printf ” | gwc -w +// Expected: "0\n" +func TestGNUCompatWordsEmpty(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "") + stdout, _, code := cmdRun(t, "wc -w file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "0 file.txt\n", stdout) +} + +// TestGNUCompatWordsMulti — -w on "x y\nz". +// +// GNU command: printf 'x y\nz' | gwc -w +// Expected: "3\n" +func TestGNUCompatWordsMulti(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "x y\nz") + stdout, _, code := cmdRun(t, "wc -w file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "3 file.txt\n", stdout) +} + +// TestGNUCompatBytesCount — -c on "x". +// +// GNU command: printf 'x' | gwc -c +// Expected: "1\n" +func TestGNUCompatBytesCount(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "x") + stdout, _, code := cmdRun(t, "wc -c file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "1 file.txt\n", stdout) +} + +// TestGNUCompatMaxLineLen — -L on "1\n12\n". +// +// GNU command: printf '1\n12\n' | gwc -L +// Expected: "2\n" +func TestGNUCompatMaxLineLen(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "1\n12\n") + stdout, _, code := cmdRun(t, "wc -L file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2 file.txt\n", stdout) +} + +// TestGNUCompatMaxLineLenLastLine — -L on "\n123456" (no trailing newline). +// +// GNU command: printf '\n123456' | gwc -L +// Expected: "6\n" +func TestGNUCompatMaxLineLenLastLine(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "\n123456") + stdout, _, code := cmdRun(t, "wc -L file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "6 file.txt\n", stdout) +} + +// TestGNUCompatMultipleFiles — two files with total line. +// +// GNU command: gwc a.txt b.txt +// a.txt = "hello\n" (1 line, 1 word, 6 bytes) +// b.txt = "world foo\n" (1 line, 2 words, 10 bytes) +// Expected: +// +// " 1 1 6 a.txt\n 1 2 10 b.txt\n 2 3 16 total\n" +func TestGNUCompatMultipleFiles(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "hello\n") + writeFile(t, dir, "b.txt", "world foo\n") + stdout, _, code := cmdRun(t, "wc a.txt b.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 1 1 6 a.txt\n 1 2 10 b.txt\n 2 3 16 total\n", stdout) +} + +// TestGNUCompatCharsMultibyte — -m on "café\n". +// +// GNU command: printf 'café\n' | gwc -m +// Expected: "5\n" (5 chars: c, a, f, é, \n) +func TestGNUCompatCharsMultibyte(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "café\n") + stdout, _, code := cmdRun(t, "wc -m file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "5 file.txt\n", stdout) +} + +// TestGNUCompatControlCharIsWord — control byte \x01 counts as a word. +// +// GNU command: printf '\x01\n' | gwc -w +// Expected: "1\n" +func TestGNUCompatControlCharIsWord(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "\x01\n") + stdout, _, code := cmdRun(t, "wc -w file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "1 file.txt\n", stdout) +} + +// TestGNUCompatRejectedFlag — unknown flag exits 1. +// +// GNU command: gwc --follow +// Expected: exit 1, stderr contains "wc:" +func TestGNUCompatRejectedFlag(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "wc --follow", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "wc:") +} diff --git a/interp/builtins/wc/wc_test.go b/interp/builtins/wc/wc_test.go new file mode 100644 index 00000000..4707b0dd --- /dev/null +++ b/interp/builtins/wc/wc_test.go @@ -0,0 +1,454 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package wc_test + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/rshell/interp" + "github.com/DataDog/rshell/interp/builtins/testutil" +) + +func runScript(t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) { + t.Helper() + return testutil.RunScript(t, script, dir, opts...) +} + +func runScriptCtx(ctx context.Context, t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) { + t.Helper() + return testutil.RunScriptCtx(ctx, t, script, dir, opts...) +} + +func cmdRun(t *testing.T, script, dir string) (string, string, int) { + t.Helper() + return runScript(t, script, dir, interp.AllowedPaths([]string{dir})) +} + +func writeFile(t *testing.T, dir, name, content string) string { + t.Helper() + require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0644)) + return name +} + +// --- Default mode (lines, words, bytes) --- + +func TestWcDefaultEmptyStdin(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "empty.txt", "") + stdout, _, code := cmdRun(t, "wc empty.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "0 0 0 empty.txt\n", stdout) +} + +func TestWcDefaultBasic(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "a b\nc\n") + stdout, _, code := cmdRun(t, "wc file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2 3 6 file.txt\n", stdout) +} + +func TestWcDefaultNoTrailingNewline(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "hello world") + stdout, _, code := cmdRun(t, "wc file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 0 2 11 file.txt\n", stdout) +} + +// --- Lines --- + +func TestWcLinesEmpty(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "") + stdout, _, code := cmdRun(t, "wc -l file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "0 file.txt\n", stdout) +} + +func TestWcLinesNoNewline(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "x y") + stdout, _, code := cmdRun(t, "wc -l file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "0 file.txt\n", stdout) +} + +func TestWcLinesOneNewline(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "x y\n") + stdout, _, code := cmdRun(t, "wc -l file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "1 file.txt\n", stdout) +} + +func TestWcLinesTwoNewlines(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "x\ny\n") + stdout, _, code := cmdRun(t, "wc -l file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2 file.txt\n", stdout) +} + +func TestWcLinesLongForm(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "a\nb\nc\n") + stdout, _, code := cmdRun(t, "wc --lines file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "3 file.txt\n", stdout) +} + +// --- Words --- + +func TestWcWordsEmpty(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "") + stdout, _, code := cmdRun(t, "wc -w file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "0 file.txt\n", stdout) +} + +func TestWcWordsSingle(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "x") + stdout, _, code := cmdRun(t, "wc -w file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "1 file.txt\n", stdout) +} + +func TestWcWordsMultiple(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "x y\nz") + stdout, _, code := cmdRun(t, "wc -w file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "3 file.txt\n", stdout) +} + +func TestWcWordsControlChar(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "\x01\n") + stdout, _, code := cmdRun(t, "wc -w file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "1 file.txt\n", stdout) +} + +// --- Bytes --- + +func TestWcBytesEmpty(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "") + stdout, _, code := cmdRun(t, "wc -c file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "0 file.txt\n", stdout) +} + +func TestWcBytesSingle(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "x") + stdout, _, code := cmdRun(t, "wc -c file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "1 file.txt\n", stdout) +} + +func TestWcBytesMulti(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "hello\n") + stdout, _, code := cmdRun(t, "wc -c file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "6 file.txt\n", stdout) +} + +// --- Chars --- + +func TestWcCharsASCII(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "hello\n") + stdout, _, code := cmdRun(t, "wc -m file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "6 file.txt\n", stdout) +} + +func TestWcCharsMultibyte(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "café\n") + stdout, _, code := cmdRun(t, "wc -m file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "5 file.txt\n", stdout) +} + +func TestWcBytesMultibyte(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "café\n") + stdout, _, code := cmdRun(t, "wc -c file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "6 file.txt\n", stdout) +} + +func TestWcCharsAndBytes(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "café\n") + stdout, _, code := cmdRun(t, "wc -cm file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "5 6 file.txt\n", stdout) +} + +// --- Max line length --- + +func TestWcMaxLineLenBasic(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "1\n12\n") + stdout, _, code := cmdRun(t, "wc -L file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2 file.txt\n", stdout) +} + +func TestWcMaxLineLenThreeLines(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "1\n123\n1\n") + stdout, _, code := cmdRun(t, "wc -L file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "3 file.txt\n", stdout) +} + +func TestWcMaxLineLenNoTrailingNewline(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "\n123456") + stdout, _, code := cmdRun(t, "wc -L file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "6 file.txt\n", stdout) +} + +func TestWcMaxLineLenEmpty(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "") + stdout, _, code := cmdRun(t, "wc -L file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "0 file.txt\n", stdout) +} + +// --- Multiple files --- + +func TestWcMultipleFiles(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "hello\n") + writeFile(t, dir, "b.txt", "world foo\n") + stdout, _, code := cmdRun(t, "wc a.txt b.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 1 1 6 a.txt\n 1 2 10 b.txt\n 2 3 16 total\n", stdout) +} + +func TestWcMultipleFilesPartialFailure(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "hello\n") + stdout, stderr, code := cmdRun(t, "wc a.txt missing.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stdout, "a.txt") + assert.Contains(t, stdout, "total") + assert.Contains(t, stderr, "wc:") +} + +// --- Stdin --- + +func TestWcStdinImplicit(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "a b\nc\n") + stdout, _, code := cmdRun(t, "cat file.txt | wc", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 2 3 6\n", stdout) +} + +func TestWcStdinDash(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "a b\nc\n") + stdout, _, code := cmdRun(t, "cat file.txt | wc -", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 2 3 6 -\n", stdout) +} + +func TestWcNilStdin(t *testing.T) { + dir := t.TempDir() + stdout, _, code := runScript(t, "wc", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 0 0 0\n", stdout) +} + +// --- Help --- + +func TestWcHelp(t *testing.T) { + dir := t.TempDir() + stdout, _, code := cmdRun(t, "wc --help", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "Usage:") +} + +func TestWcHelpShort(t *testing.T) { + dir := t.TempDir() + stdout, _, code := cmdRun(t, "wc -h", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "Usage:") +} + +// --- Error cases --- + +func TestWcMissingFile(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := cmdRun(t, "wc nonexistent.txt", dir) + assert.Equal(t, 1, code) + assert.Equal(t, "", stdout) + assert.Contains(t, stderr, "wc:") +} + +func TestWcUnknownFlag(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "wc --definitely-invalid", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "wc:") +} + +func TestWcFiles0FromRejected(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "wc --files0-from=foo", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "wc:") +} + +func TestWcDirectory(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "wc .", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "wc:") +} + +// --- Hardening --- + +func TestWcDoubleDash(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "hello\n") + stdout, _, code := cmdRun(t, "wc -- file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "1 1 6 file.txt\n", stdout) +} + +func TestWcContextCancellation(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", strings.Repeat("x\n", 100)) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _, _, code := runScriptCtx(ctx, t, "wc file.txt", dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) +} + +func TestWcPipeInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "alpha\nbeta\ngamma\n") + stdout, _, code := cmdRun(t, "cat file.txt | wc -l", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 3\n", stdout) +} + +// --- Combined flags --- + +func TestWcAllFlags(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "a b\nc\n") + stdout, _, code := cmdRun(t, "wc -lwmcL file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2 3 6 6 3 file.txt\n", stdout) +} + +func TestWcLinesAndWords(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "a b\nc\n") + stdout, _, code := cmdRun(t, "wc -lw file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2 3 file.txt\n", stdout) +} + +// --- Width formatting --- + +func TestWcWidthDeterminedByTotal(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", strings.Repeat("word ", 20)+"\n") + writeFile(t, dir, "b.txt", "x\n") + stdout, _, code := cmdRun(t, "wc -w a.txt b.txt", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "total\n") + lines := strings.Split(strings.TrimSpace(stdout), "\n") + assert.Equal(t, 3, len(lines)) +} + +// --- Max line length: tab and CR --- + +func TestWcMaxLineLenTab(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "a\tb\n") + stdout, _, code := cmdRun(t, "wc -L file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "9 file.txt\n", stdout) +} + +func TestWcMaxLineLenCR(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "hello\rworld\n") + stdout, _, code := cmdRun(t, "wc -L file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "5 file.txt\n", stdout) +} + +func TestWcCRLFLineCount(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "a\r\nb\r\n") + stdout, _, code := cmdRun(t, "wc -l file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2 file.txt\n", stdout) +} + +// --- Binary / non-UTF8 input --- + +func TestWcBinaryInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.bin", string([]byte{0x00, 0xFF, 0xFE, 0x0A, 0x41})) + stdout, _, code := cmdRun(t, "wc file.bin", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "file.bin") + assert.Equal(t, 0, code) +} + +// --- Multibyte chars --- + +func TestWcCharsMultibyteEmoji(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "hi 💐\n") + stdout, _, code := cmdRun(t, "wc -m file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "5 file.txt\n", stdout) +} + +// TestWcChunkBoundaryMultibyte verifies that a multibyte character straddling +// the 32 KiB read-buffer boundary is not double-counted. This requires +// programmatic file generation so it lives as a Go test rather than a scenario. +func TestWcChunkBoundaryMultibyte(t *testing.T) { + dir := t.TempDir() + // 💐 is 4 bytes; placing it at offset 32766 means it spans bytes 32766-32769, + // straddling the 32768-byte chunk boundary and exercising the carry logic. + prefix := strings.Repeat("a", 32*1024-2) + content := prefix + "💐\n" + writeFile(t, dir, "file.txt", content) + stdout, _, code := cmdRun(t, "wc -mL file.txt", dir) + assert.Equal(t, 0, code) + // chars: 32766 'a' + 1 emoji + 1 newline = 32768 + // max line length: 32766 + 2 (emoji display width) = 32768 + assert.Equal(t, "32768 32768 file.txt\n", stdout) +} + diff --git a/interp/builtins/wc/wc_unix_test.go b/interp/builtins/wc/wc_unix_test.go new file mode 100644 index 00000000..7882ae13 --- /dev/null +++ b/interp/builtins/wc/wc_unix_test.go @@ -0,0 +1,35 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +//go:build unix + +package wc_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestWcSymlinkToFile(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "target.txt", "hello\n") + require.NoError(t, os.Symlink("target.txt", filepath.Join(dir, "link.txt"))) + stdout, _, code := cmdRun(t, "wc link.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "1 1 6 link.txt\n", stdout) +} + +func TestWcDanglingSymlink(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.Symlink("nonexistent", filepath.Join(dir, "dangle.txt"))) + stdout, stderr, code := cmdRun(t, "wc dangle.txt", dir) + assert.Equal(t, 1, code) + assert.Equal(t, "", stdout) + assert.Contains(t, stderr, "wc:") +} From 3c9cb2cd0a0cc4a4db36e9ba781a61ff72c53eca Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Thu, 12 Mar 2026 17:02:07 +0100 Subject: [PATCH 04/10] Remove wc tests, registration, and documentation references Co-Authored-By: Claude Opus 4.6 --- README.md | 2 +- SHELL_FEATURES.md | 1 - interp/register_builtins.go | 2 -- tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml | 22 ------------------- tests/scenarios/cmd/wc/bytes/empty_stdin.yaml | 14 ------------ tests/scenarios/cmd/wc/bytes/single_byte.yaml | 14 ------------ tests/scenarios/cmd/wc/chars/basic.yaml | 14 ------------ tests/scenarios/cmd/wc/default/basic.yaml | 14 ------------ .../scenarios/cmd/wc/default/empty_file.yaml | 14 ------------ .../scenarios/cmd/wc/default/empty_stdin.yaml | 14 ------------ .../scenarios/cmd/wc/default/single_file.yaml | 14 ------------ .../cmd/wc/errors/files0_from_rejected.yaml | 10 --------- .../scenarios/cmd/wc/errors/missing_file.yaml | 10 --------- .../scenarios/cmd/wc/errors/unknown_flag.yaml | 10 --------- .../wc/hardening/double_dash_separator.yaml | 14 ------------ tests/scenarios/cmd/wc/lines/empty_stdin.yaml | 14 ------------ .../cmd/wc/lines/no_trailing_newline.yaml | 14 ------------ tests/scenarios/cmd/wc/lines/one_newline.yaml | 14 ------------ .../scenarios/cmd/wc/lines/two_newlines.yaml | 14 ------------ .../cmd/wc/max_line_length/basic.yaml | 14 ------------ .../cmd/wc/max_line_length/fullwidth_cjk.yaml | 14 ------------ .../wc/max_line_length/fullwidth_emoji.yaml | 14 ------------ .../max_line_length/no_trailing_newline.yaml | 14 ------------ .../cmd/wc/max_line_length/three_lines.yaml | 14 ------------ .../cmd/wc/multiple_files/total_line.yaml | 16 -------------- .../scenarios/cmd/wc/stdin/dash_explicit.yaml | 14 ------------ tests/scenarios/cmd/wc/stdin/implicit.yaml | 14 ------------ .../scenarios/cmd/wc/words/across_lines.yaml | 14 ------------ tests/scenarios/cmd/wc/words/empty_stdin.yaml | 14 ------------ tests/scenarios/cmd/wc/words/single_word.yaml | 14 ------------ tests/scenarios/cmd/wc/words/two_words.yaml | 14 ------------ 31 files changed, 1 insertion(+), 394 deletions(-) delete mode 100644 tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml delete mode 100644 tests/scenarios/cmd/wc/bytes/empty_stdin.yaml delete mode 100644 tests/scenarios/cmd/wc/bytes/single_byte.yaml delete mode 100644 tests/scenarios/cmd/wc/chars/basic.yaml delete mode 100644 tests/scenarios/cmd/wc/default/basic.yaml delete mode 100644 tests/scenarios/cmd/wc/default/empty_file.yaml delete mode 100644 tests/scenarios/cmd/wc/default/empty_stdin.yaml delete mode 100644 tests/scenarios/cmd/wc/default/single_file.yaml delete mode 100644 tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml delete mode 100644 tests/scenarios/cmd/wc/errors/missing_file.yaml delete mode 100644 tests/scenarios/cmd/wc/errors/unknown_flag.yaml delete mode 100644 tests/scenarios/cmd/wc/hardening/double_dash_separator.yaml delete mode 100644 tests/scenarios/cmd/wc/lines/empty_stdin.yaml delete mode 100644 tests/scenarios/cmd/wc/lines/no_trailing_newline.yaml delete mode 100644 tests/scenarios/cmd/wc/lines/one_newline.yaml delete mode 100644 tests/scenarios/cmd/wc/lines/two_newlines.yaml delete mode 100644 tests/scenarios/cmd/wc/max_line_length/basic.yaml delete mode 100644 tests/scenarios/cmd/wc/max_line_length/fullwidth_cjk.yaml delete mode 100644 tests/scenarios/cmd/wc/max_line_length/fullwidth_emoji.yaml delete mode 100644 tests/scenarios/cmd/wc/max_line_length/no_trailing_newline.yaml delete mode 100644 tests/scenarios/cmd/wc/max_line_length/three_lines.yaml delete mode 100644 tests/scenarios/cmd/wc/multiple_files/total_line.yaml delete mode 100644 tests/scenarios/cmd/wc/stdin/dash_explicit.yaml delete mode 100644 tests/scenarios/cmd/wc/stdin/implicit.yaml delete mode 100644 tests/scenarios/cmd/wc/words/across_lines.yaml delete mode 100644 tests/scenarios/cmd/wc/words/empty_stdin.yaml delete mode 100644 tests/scenarios/cmd/wc/words/single_word.yaml delete mode 100644 tests/scenarios/cmd/wc/words/two_words.yaml diff --git a/README.md b/README.md index 8d2ab8e3..e1476146 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ Linux, macOS, and Windows. ``` tests/scenarios/ -├── cmd/ # builtin command tests (echo, cat, grep, head, tail, uniq, wc, ...) +├── cmd/ # builtin command tests (echo, cat, grep, head, tail, uniq, ...) └── shell/ # shell feature tests (pipes, variables, control flow, ...) ``` diff --git a/SHELL_FEATURES.md b/SHELL_FEATURES.md index 2eb5d070..b8810462 100644 --- a/SHELL_FEATURES.md +++ b/SHELL_FEATURES.md @@ -21,7 +21,6 @@ Blocked features are rejected before execution with exit code 2. - ✅ `tr [-cdsCt] SET1 [SET2]` — translate, squeeze, and/or delete characters from stdin - ✅ `true` — return exit code 0 - ✅ `uniq [OPTION]... [INPUT]` — report or omit repeated lines -- ✅ `wc [-l] [-w] [-c] [-m] [FILE]...` — count lines, words, bytes, or characters in files - ❌ All other commands — return exit code 127 with `: not found` unless an ExecHandler is configured ## Variables diff --git a/interp/register_builtins.go b/interp/register_builtins.go index a86488a6..0d6fd97b 100644 --- a/interp/register_builtins.go +++ b/interp/register_builtins.go @@ -26,7 +26,6 @@ import ( "github.com/DataDog/rshell/interp/builtins/tr" truecmd "github.com/DataDog/rshell/interp/builtins/true" "github.com/DataDog/rshell/interp/builtins/uniq" - "github.com/DataDog/rshell/interp/builtins/wc" ) var registerOnce sync.Once @@ -52,7 +51,6 @@ func registerBuiltins() { tr.Cmd, truecmd.Cmd, uniq.Cmd, - wc.Cmd, } { cmd.Register() } diff --git a/tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml b/tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml deleted file mode 100644 index 9c687821..00000000 --- a/tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml +++ /dev/null @@ -1,22 +0,0 @@ -description: ls piped to wc -l counts the number of entries. -skip_assert_against_bash: true -setup: - files: - - path: a.txt - content: "a" - chmod: 0644 - - path: b.txt - content: "b" - chmod: 0644 - - path: c.txt - content: "c" - chmod: 0644 -input: - allowed_paths: ["$DIR"] - script: |+ - ls | wc -l -expect: - stdout: |2+ - 3 - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/bytes/empty_stdin.yaml b/tests/scenarios/cmd/wc/bytes/empty_stdin.yaml deleted file mode 100644 index f1a19458..00000000 --- a/tests/scenarios/cmd/wc/bytes/empty_stdin.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test a0 -description: wc -c on empty file outputs 0. -setup: - files: - - path: empty.txt - content: "" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -c empty.txt -expect: - stdout: "0 empty.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/bytes/single_byte.yaml b/tests/scenarios/cmd/wc/bytes/single_byte.yaml deleted file mode 100644 index a4f69956..00000000 --- a/tests/scenarios/cmd/wc/bytes/single_byte.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test a3 -description: wc -c on single byte input outputs 1. -setup: - files: - - path: file.txt - content: "x" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -c file.txt -expect: - stdout: "1 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/chars/basic.yaml b/tests/scenarios/cmd/wc/chars/basic.yaml deleted file mode 100644 index 25aaceb6..00000000 --- a/tests/scenarios/cmd/wc/chars/basic.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from standard POSIX wc -m behavior -description: wc -m counts characters (bytes for ASCII). -setup: - files: - - path: file.txt - content: "hello\n" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -m file.txt -expect: - stdout: "6 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/default/basic.yaml b/tests/scenarios/cmd/wc/default/basic.yaml deleted file mode 100644 index a52874db..00000000 --- a/tests/scenarios/cmd/wc/default/basic.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test b1 -description: wc default counts lines, words, bytes. -setup: - files: - - path: file.txt - content: "a b\nc\n" -input: - allowed_paths: ["$DIR"] - script: |+ - wc file.txt -expect: - stdout: "2 3 6 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/default/empty_file.yaml b/tests/scenarios/cmd/wc/default/empty_file.yaml deleted file mode 100644 index 5b00343c..00000000 --- a/tests/scenarios/cmd/wc/default/empty_file.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from uutils test_file_empty -description: wc on an empty file shows all zeros. -setup: - files: - - path: empty.txt - content: "" -input: - allowed_paths: ["$DIR"] - script: |+ - wc empty.txt -expect: - stdout: "0 0 0 empty.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/default/empty_stdin.yaml b/tests/scenarios/cmd/wc/default/empty_stdin.yaml deleted file mode 100644 index 5249634e..00000000 --- a/tests/scenarios/cmd/wc/default/empty_stdin.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test b0 -description: wc with no args and empty stdin outputs all zeros. -setup: - files: - - path: empty.txt - content: "" -input: - allowed_paths: ["$DIR"] - script: |+ - wc empty.txt -expect: - stdout: "0 0 0 empty.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/default/single_file.yaml b/tests/scenarios/cmd/wc/default/single_file.yaml deleted file mode 100644 index f6c1f873..00000000 --- a/tests/scenarios/cmd/wc/default/single_file.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from uutils test_single_default -description: wc with a single file shows lines words bytes and filename. -setup: - files: - - path: file.txt - content: "alpha\nbeta\n" -input: - allowed_paths: ["$DIR"] - script: |+ - wc file.txt -expect: - stdout: " 2 2 11 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml b/tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml deleted file mode 100644 index d74a82d9..00000000 --- a/tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# Derived from GTFOBins safety requirement -description: wc rejects --files0-from flag (security risk). -input: - allowed_paths: ["$DIR"] - script: |+ - wc --files0-from=foo -expect: - stdout: "" - stderr_contains: ["wc:"] - exit_code: 1 diff --git a/tests/scenarios/cmd/wc/errors/missing_file.yaml b/tests/scenarios/cmd/wc/errors/missing_file.yaml deleted file mode 100644 index 5cc5aeb4..00000000 --- a/tests/scenarios/cmd/wc/errors/missing_file.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# Derived from uutils test_read_from_nonexistent_file -description: wc exits 1 and prints error for nonexistent file. -input: - allowed_paths: ["$DIR"] - script: |+ - wc bogusfile -expect: - stdout: "" - stderr_contains: ["wc: bogusfile:"] - exit_code: 1 diff --git a/tests/scenarios/cmd/wc/errors/unknown_flag.yaml b/tests/scenarios/cmd/wc/errors/unknown_flag.yaml deleted file mode 100644 index f14f0ba4..00000000 --- a/tests/scenarios/cmd/wc/errors/unknown_flag.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# Derived from uutils test_invalid_arg -description: wc rejects unknown flags with exit code 1. -input: - allowed_paths: ["$DIR"] - script: |+ - wc --definitely-invalid -expect: - stdout: "" - stderr_contains: ["wc:"] - exit_code: 1 diff --git a/tests/scenarios/cmd/wc/hardening/double_dash_separator.yaml b/tests/scenarios/cmd/wc/hardening/double_dash_separator.yaml deleted file mode 100644 index 50b91f43..00000000 --- a/tests/scenarios/cmd/wc/hardening/double_dash_separator.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from standard POSIX -- convention -description: wc accepts -- to end flag parsing. -setup: - files: - - path: file.txt - content: "hello\n" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -- file.txt -expect: - stdout: "1 1 6 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/lines/empty_stdin.yaml b/tests/scenarios/cmd/wc/lines/empty_stdin.yaml deleted file mode 100644 index 33775902..00000000 --- a/tests/scenarios/cmd/wc/lines/empty_stdin.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test a1 -description: wc -l on empty file outputs 0. -setup: - files: - - path: empty.txt - content: "" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -l empty.txt -expect: - stdout: "0 empty.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/lines/no_trailing_newline.yaml b/tests/scenarios/cmd/wc/lines/no_trailing_newline.yaml deleted file mode 100644 index ecb45502..00000000 --- a/tests/scenarios/cmd/wc/lines/no_trailing_newline.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test a7 -description: wc -l counts newline bytes; text with no newline counts as 0 lines. -setup: - files: - - path: file.txt - content: "x y" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -l file.txt -expect: - stdout: "0 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/lines/one_newline.yaml b/tests/scenarios/cmd/wc/lines/one_newline.yaml deleted file mode 100644 index 817ea07f..00000000 --- a/tests/scenarios/cmd/wc/lines/one_newline.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test a8 -description: wc -l counts 1 for a single newline-terminated line. -setup: - files: - - path: file.txt - content: "x y\n" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -l file.txt -expect: - stdout: "1 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/lines/two_newlines.yaml b/tests/scenarios/cmd/wc/lines/two_newlines.yaml deleted file mode 100644 index e9109634..00000000 --- a/tests/scenarios/cmd/wc/lines/two_newlines.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test a9 -description: wc -l counts 2 for two newline-terminated lines. -setup: - files: - - path: file.txt - content: "x\ny\n" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -l file.txt -expect: - stdout: "2 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/max_line_length/basic.yaml b/tests/scenarios/cmd/wc/max_line_length/basic.yaml deleted file mode 100644 index e7461fde..00000000 --- a/tests/scenarios/cmd/wc/max_line_length/basic.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test c0 -description: wc -L reports the length of the longest line. -setup: - files: - - path: file.txt - content: "1\n12\n" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -L file.txt -expect: - stdout: "2 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/max_line_length/fullwidth_cjk.yaml b/tests/scenarios/cmd/wc/max_line_length/fullwidth_cjk.yaml deleted file mode 100644 index b8b50009..00000000 --- a/tests/scenarios/cmd/wc/max_line_length/fullwidth_cjk.yaml +++ /dev/null @@ -1,14 +0,0 @@ -description: wc -L counts display columns, CJK characters are width 2. -skip_assert_against_bash: true # display width depends on locale; we always use Unicode width -setup: - files: - - path: file.txt - content: "你好\n" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -L file.txt -expect: - stdout: "4 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/max_line_length/fullwidth_emoji.yaml b/tests/scenarios/cmd/wc/max_line_length/fullwidth_emoji.yaml deleted file mode 100644 index 88329252..00000000 --- a/tests/scenarios/cmd/wc/max_line_length/fullwidth_emoji.yaml +++ /dev/null @@ -1,14 +0,0 @@ -description: wc -L counts display columns, emoji characters are width 2. -skip_assert_against_bash: true # display width depends on locale; we always use Unicode width -setup: - files: - - path: file.txt - content: "ab💐\n" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -L file.txt -expect: - stdout: "4 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/max_line_length/no_trailing_newline.yaml b/tests/scenarios/cmd/wc/max_line_length/no_trailing_newline.yaml deleted file mode 100644 index c417d540..00000000 --- a/tests/scenarios/cmd/wc/max_line_length/no_trailing_newline.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test c2 -description: wc -L counts a final line with no trailing newline. -setup: - files: - - path: file.txt - content: "\n123456" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -L file.txt -expect: - stdout: "6 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/max_line_length/three_lines.yaml b/tests/scenarios/cmd/wc/max_line_length/three_lines.yaml deleted file mode 100644 index d70b6a20..00000000 --- a/tests/scenarios/cmd/wc/max_line_length/three_lines.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test c1 -description: wc -L picks the max among multiple lines. -setup: - files: - - path: file.txt - content: "1\n123\n1\n" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -L file.txt -expect: - stdout: "3 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/multiple_files/total_line.yaml b/tests/scenarios/cmd/wc/multiple_files/total_line.yaml deleted file mode 100644 index b374e75f..00000000 --- a/tests/scenarios/cmd/wc/multiple_files/total_line.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Derived from GNU coreutils wc-total.sh -description: wc prints a total line when given multiple files. -setup: - files: - - path: a.txt - content: "hello\n" - - path: b.txt - content: "world foo\n" -input: - allowed_paths: ["$DIR"] - script: |+ - wc a.txt b.txt -expect: - stdout: " 1 1 6 a.txt\n 1 2 10 b.txt\n 2 3 16 total\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/stdin/dash_explicit.yaml b/tests/scenarios/cmd/wc/stdin/dash_explicit.yaml deleted file mode 100644 index 1804c0ad..00000000 --- a/tests/scenarios/cmd/wc/stdin/dash_explicit.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from uutils test_stdin_explicit -description: wc with explicit - reads stdin and shows filename -. -setup: - files: - - path: file.txt - content: "a b\nc\n" -input: - allowed_paths: ["$DIR"] - script: |+ - cat file.txt | wc - -expect: - stdout: " 2 3 6 -\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/stdin/implicit.yaml b/tests/scenarios/cmd/wc/stdin/implicit.yaml deleted file mode 100644 index ed40861c..00000000 --- a/tests/scenarios/cmd/wc/stdin/implicit.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from standard POSIX wc behavior -description: wc reads stdin implicitly when no files are given. -setup: - files: - - path: file.txt - content: "a b\nc\n" -input: - allowed_paths: ["$DIR"] - script: |+ - cat file.txt | wc -expect: - stdout: " 2 3 6\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/words/across_lines.yaml b/tests/scenarios/cmd/wc/words/across_lines.yaml deleted file mode 100644 index b57e7ff8..00000000 --- a/tests/scenarios/cmd/wc/words/across_lines.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test a6 -description: wc -w counts words across lines including a line without trailing newline. -setup: - files: - - path: file.txt - content: "x y\nz" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -w file.txt -expect: - stdout: "3 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/words/empty_stdin.yaml b/tests/scenarios/cmd/wc/words/empty_stdin.yaml deleted file mode 100644 index 2f77334b..00000000 --- a/tests/scenarios/cmd/wc/words/empty_stdin.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test a2 -description: wc -w on empty file outputs 0. -setup: - files: - - path: empty.txt - content: "" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -w empty.txt -expect: - stdout: "0 empty.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/words/single_word.yaml b/tests/scenarios/cmd/wc/words/single_word.yaml deleted file mode 100644 index c5fb92da..00000000 --- a/tests/scenarios/cmd/wc/words/single_word.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test a4 -description: wc -w counts 1 word for a single non-whitespace token. -setup: - files: - - path: file.txt - content: "x" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -w file.txt -expect: - stdout: "1 file.txt\n" - stderr: "" - exit_code: 0 diff --git a/tests/scenarios/cmd/wc/words/two_words.yaml b/tests/scenarios/cmd/wc/words/two_words.yaml deleted file mode 100644 index 9981335d..00000000 --- a/tests/scenarios/cmd/wc/words/two_words.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# Derived from GNU coreutils wc.pl test a5 -description: wc -w counts 2 words on a line with two tokens. -setup: - files: - - path: file.txt - content: "x y\n" -input: - allowed_paths: ["$DIR"] - script: |+ - wc -w file.txt -expect: - stdout: "2 file.txt\n" - stderr: "" - exit_code: 0 From 7adbe8078e50a02bdf11163ef7ad0c765c4a29e3 Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Thu, 12 Mar 2026 17:02:07 +0100 Subject: [PATCH 05/10] Revert "Remove wc tests, registration, and documentation references" This reverts commit 3c9cb2cd0a0cc4a4db36e9ba781a61ff72c53eca. --- README.md | 2 +- SHELL_FEATURES.md | 1 + interp/register_builtins.go | 2 ++ tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml | 22 +++++++++++++++++++ tests/scenarios/cmd/wc/bytes/empty_stdin.yaml | 14 ++++++++++++ tests/scenarios/cmd/wc/bytes/single_byte.yaml | 14 ++++++++++++ tests/scenarios/cmd/wc/chars/basic.yaml | 14 ++++++++++++ tests/scenarios/cmd/wc/default/basic.yaml | 14 ++++++++++++ .../scenarios/cmd/wc/default/empty_file.yaml | 14 ++++++++++++ .../scenarios/cmd/wc/default/empty_stdin.yaml | 14 ++++++++++++ .../scenarios/cmd/wc/default/single_file.yaml | 14 ++++++++++++ .../cmd/wc/errors/files0_from_rejected.yaml | 10 +++++++++ .../scenarios/cmd/wc/errors/missing_file.yaml | 10 +++++++++ .../scenarios/cmd/wc/errors/unknown_flag.yaml | 10 +++++++++ .../wc/hardening/double_dash_separator.yaml | 14 ++++++++++++ tests/scenarios/cmd/wc/lines/empty_stdin.yaml | 14 ++++++++++++ .../cmd/wc/lines/no_trailing_newline.yaml | 14 ++++++++++++ tests/scenarios/cmd/wc/lines/one_newline.yaml | 14 ++++++++++++ .../scenarios/cmd/wc/lines/two_newlines.yaml | 14 ++++++++++++ .../cmd/wc/max_line_length/basic.yaml | 14 ++++++++++++ .../cmd/wc/max_line_length/fullwidth_cjk.yaml | 14 ++++++++++++ .../wc/max_line_length/fullwidth_emoji.yaml | 14 ++++++++++++ .../max_line_length/no_trailing_newline.yaml | 14 ++++++++++++ .../cmd/wc/max_line_length/three_lines.yaml | 14 ++++++++++++ .../cmd/wc/multiple_files/total_line.yaml | 16 ++++++++++++++ .../scenarios/cmd/wc/stdin/dash_explicit.yaml | 14 ++++++++++++ tests/scenarios/cmd/wc/stdin/implicit.yaml | 14 ++++++++++++ .../scenarios/cmd/wc/words/across_lines.yaml | 14 ++++++++++++ tests/scenarios/cmd/wc/words/empty_stdin.yaml | 14 ++++++++++++ tests/scenarios/cmd/wc/words/single_word.yaml | 14 ++++++++++++ tests/scenarios/cmd/wc/words/two_words.yaml | 14 ++++++++++++ 31 files changed, 394 insertions(+), 1 deletion(-) create mode 100644 tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml create mode 100644 tests/scenarios/cmd/wc/bytes/empty_stdin.yaml create mode 100644 tests/scenarios/cmd/wc/bytes/single_byte.yaml create mode 100644 tests/scenarios/cmd/wc/chars/basic.yaml create mode 100644 tests/scenarios/cmd/wc/default/basic.yaml create mode 100644 tests/scenarios/cmd/wc/default/empty_file.yaml create mode 100644 tests/scenarios/cmd/wc/default/empty_stdin.yaml create mode 100644 tests/scenarios/cmd/wc/default/single_file.yaml create mode 100644 tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml create mode 100644 tests/scenarios/cmd/wc/errors/missing_file.yaml create mode 100644 tests/scenarios/cmd/wc/errors/unknown_flag.yaml create mode 100644 tests/scenarios/cmd/wc/hardening/double_dash_separator.yaml create mode 100644 tests/scenarios/cmd/wc/lines/empty_stdin.yaml create mode 100644 tests/scenarios/cmd/wc/lines/no_trailing_newline.yaml create mode 100644 tests/scenarios/cmd/wc/lines/one_newline.yaml create mode 100644 tests/scenarios/cmd/wc/lines/two_newlines.yaml create mode 100644 tests/scenarios/cmd/wc/max_line_length/basic.yaml create mode 100644 tests/scenarios/cmd/wc/max_line_length/fullwidth_cjk.yaml create mode 100644 tests/scenarios/cmd/wc/max_line_length/fullwidth_emoji.yaml create mode 100644 tests/scenarios/cmd/wc/max_line_length/no_trailing_newline.yaml create mode 100644 tests/scenarios/cmd/wc/max_line_length/three_lines.yaml create mode 100644 tests/scenarios/cmd/wc/multiple_files/total_line.yaml create mode 100644 tests/scenarios/cmd/wc/stdin/dash_explicit.yaml create mode 100644 tests/scenarios/cmd/wc/stdin/implicit.yaml create mode 100644 tests/scenarios/cmd/wc/words/across_lines.yaml create mode 100644 tests/scenarios/cmd/wc/words/empty_stdin.yaml create mode 100644 tests/scenarios/cmd/wc/words/single_word.yaml create mode 100644 tests/scenarios/cmd/wc/words/two_words.yaml diff --git a/README.md b/README.md index e1476146..8d2ab8e3 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ Linux, macOS, and Windows. ``` tests/scenarios/ -├── cmd/ # builtin command tests (echo, cat, grep, head, tail, uniq, ...) +├── cmd/ # builtin command tests (echo, cat, grep, head, tail, uniq, wc, ...) └── shell/ # shell feature tests (pipes, variables, control flow, ...) ``` diff --git a/SHELL_FEATURES.md b/SHELL_FEATURES.md index b8810462..2eb5d070 100644 --- a/SHELL_FEATURES.md +++ b/SHELL_FEATURES.md @@ -21,6 +21,7 @@ Blocked features are rejected before execution with exit code 2. - ✅ `tr [-cdsCt] SET1 [SET2]` — translate, squeeze, and/or delete characters from stdin - ✅ `true` — return exit code 0 - ✅ `uniq [OPTION]... [INPUT]` — report or omit repeated lines +- ✅ `wc [-l] [-w] [-c] [-m] [FILE]...` — count lines, words, bytes, or characters in files - ❌ All other commands — return exit code 127 with `: not found` unless an ExecHandler is configured ## Variables diff --git a/interp/register_builtins.go b/interp/register_builtins.go index 0d6fd97b..a86488a6 100644 --- a/interp/register_builtins.go +++ b/interp/register_builtins.go @@ -26,6 +26,7 @@ import ( "github.com/DataDog/rshell/interp/builtins/tr" truecmd "github.com/DataDog/rshell/interp/builtins/true" "github.com/DataDog/rshell/interp/builtins/uniq" + "github.com/DataDog/rshell/interp/builtins/wc" ) var registerOnce sync.Once @@ -51,6 +52,7 @@ func registerBuiltins() { tr.Cmd, truecmd.Cmd, uniq.Cmd, + wc.Cmd, } { cmd.Register() } diff --git a/tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml b/tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml new file mode 100644 index 00000000..9c687821 --- /dev/null +++ b/tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml @@ -0,0 +1,22 @@ +description: ls piped to wc -l counts the number of entries. +skip_assert_against_bash: true +setup: + files: + - path: a.txt + content: "a" + chmod: 0644 + - path: b.txt + content: "b" + chmod: 0644 + - path: c.txt + content: "c" + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + ls | wc -l +expect: + stdout: |2+ + 3 + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/bytes/empty_stdin.yaml b/tests/scenarios/cmd/wc/bytes/empty_stdin.yaml new file mode 100644 index 00000000..f1a19458 --- /dev/null +++ b/tests/scenarios/cmd/wc/bytes/empty_stdin.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test a0 +description: wc -c on empty file outputs 0. +setup: + files: + - path: empty.txt + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -c empty.txt +expect: + stdout: "0 empty.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/bytes/single_byte.yaml b/tests/scenarios/cmd/wc/bytes/single_byte.yaml new file mode 100644 index 00000000..a4f69956 --- /dev/null +++ b/tests/scenarios/cmd/wc/bytes/single_byte.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test a3 +description: wc -c on single byte input outputs 1. +setup: + files: + - path: file.txt + content: "x" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -c file.txt +expect: + stdout: "1 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/chars/basic.yaml b/tests/scenarios/cmd/wc/chars/basic.yaml new file mode 100644 index 00000000..25aaceb6 --- /dev/null +++ b/tests/scenarios/cmd/wc/chars/basic.yaml @@ -0,0 +1,14 @@ +# Derived from standard POSIX wc -m behavior +description: wc -m counts characters (bytes for ASCII). +setup: + files: + - path: file.txt + content: "hello\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -m file.txt +expect: + stdout: "6 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/default/basic.yaml b/tests/scenarios/cmd/wc/default/basic.yaml new file mode 100644 index 00000000..a52874db --- /dev/null +++ b/tests/scenarios/cmd/wc/default/basic.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test b1 +description: wc default counts lines, words, bytes. +setup: + files: + - path: file.txt + content: "a b\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc file.txt +expect: + stdout: "2 3 6 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/default/empty_file.yaml b/tests/scenarios/cmd/wc/default/empty_file.yaml new file mode 100644 index 00000000..5b00343c --- /dev/null +++ b/tests/scenarios/cmd/wc/default/empty_file.yaml @@ -0,0 +1,14 @@ +# Derived from uutils test_file_empty +description: wc on an empty file shows all zeros. +setup: + files: + - path: empty.txt + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + wc empty.txt +expect: + stdout: "0 0 0 empty.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/default/empty_stdin.yaml b/tests/scenarios/cmd/wc/default/empty_stdin.yaml new file mode 100644 index 00000000..5249634e --- /dev/null +++ b/tests/scenarios/cmd/wc/default/empty_stdin.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test b0 +description: wc with no args and empty stdin outputs all zeros. +setup: + files: + - path: empty.txt + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + wc empty.txt +expect: + stdout: "0 0 0 empty.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/default/single_file.yaml b/tests/scenarios/cmd/wc/default/single_file.yaml new file mode 100644 index 00000000..f6c1f873 --- /dev/null +++ b/tests/scenarios/cmd/wc/default/single_file.yaml @@ -0,0 +1,14 @@ +# Derived from uutils test_single_default +description: wc with a single file shows lines words bytes and filename. +setup: + files: + - path: file.txt + content: "alpha\nbeta\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc file.txt +expect: + stdout: " 2 2 11 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml b/tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml new file mode 100644 index 00000000..d74a82d9 --- /dev/null +++ b/tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml @@ -0,0 +1,10 @@ +# Derived from GTFOBins safety requirement +description: wc rejects --files0-from flag (security risk). +input: + allowed_paths: ["$DIR"] + script: |+ + wc --files0-from=foo +expect: + stdout: "" + stderr_contains: ["wc:"] + exit_code: 1 diff --git a/tests/scenarios/cmd/wc/errors/missing_file.yaml b/tests/scenarios/cmd/wc/errors/missing_file.yaml new file mode 100644 index 00000000..5cc5aeb4 --- /dev/null +++ b/tests/scenarios/cmd/wc/errors/missing_file.yaml @@ -0,0 +1,10 @@ +# Derived from uutils test_read_from_nonexistent_file +description: wc exits 1 and prints error for nonexistent file. +input: + allowed_paths: ["$DIR"] + script: |+ + wc bogusfile +expect: + stdout: "" + stderr_contains: ["wc: bogusfile:"] + exit_code: 1 diff --git a/tests/scenarios/cmd/wc/errors/unknown_flag.yaml b/tests/scenarios/cmd/wc/errors/unknown_flag.yaml new file mode 100644 index 00000000..f14f0ba4 --- /dev/null +++ b/tests/scenarios/cmd/wc/errors/unknown_flag.yaml @@ -0,0 +1,10 @@ +# Derived from uutils test_invalid_arg +description: wc rejects unknown flags with exit code 1. +input: + allowed_paths: ["$DIR"] + script: |+ + wc --definitely-invalid +expect: + stdout: "" + stderr_contains: ["wc:"] + exit_code: 1 diff --git a/tests/scenarios/cmd/wc/hardening/double_dash_separator.yaml b/tests/scenarios/cmd/wc/hardening/double_dash_separator.yaml new file mode 100644 index 00000000..50b91f43 --- /dev/null +++ b/tests/scenarios/cmd/wc/hardening/double_dash_separator.yaml @@ -0,0 +1,14 @@ +# Derived from standard POSIX -- convention +description: wc accepts -- to end flag parsing. +setup: + files: + - path: file.txt + content: "hello\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -- file.txt +expect: + stdout: "1 1 6 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/lines/empty_stdin.yaml b/tests/scenarios/cmd/wc/lines/empty_stdin.yaml new file mode 100644 index 00000000..33775902 --- /dev/null +++ b/tests/scenarios/cmd/wc/lines/empty_stdin.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test a1 +description: wc -l on empty file outputs 0. +setup: + files: + - path: empty.txt + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -l empty.txt +expect: + stdout: "0 empty.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/lines/no_trailing_newline.yaml b/tests/scenarios/cmd/wc/lines/no_trailing_newline.yaml new file mode 100644 index 00000000..ecb45502 --- /dev/null +++ b/tests/scenarios/cmd/wc/lines/no_trailing_newline.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test a7 +description: wc -l counts newline bytes; text with no newline counts as 0 lines. +setup: + files: + - path: file.txt + content: "x y" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -l file.txt +expect: + stdout: "0 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/lines/one_newline.yaml b/tests/scenarios/cmd/wc/lines/one_newline.yaml new file mode 100644 index 00000000..817ea07f --- /dev/null +++ b/tests/scenarios/cmd/wc/lines/one_newline.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test a8 +description: wc -l counts 1 for a single newline-terminated line. +setup: + files: + - path: file.txt + content: "x y\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -l file.txt +expect: + stdout: "1 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/lines/two_newlines.yaml b/tests/scenarios/cmd/wc/lines/two_newlines.yaml new file mode 100644 index 00000000..e9109634 --- /dev/null +++ b/tests/scenarios/cmd/wc/lines/two_newlines.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test a9 +description: wc -l counts 2 for two newline-terminated lines. +setup: + files: + - path: file.txt + content: "x\ny\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -l file.txt +expect: + stdout: "2 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/max_line_length/basic.yaml b/tests/scenarios/cmd/wc/max_line_length/basic.yaml new file mode 100644 index 00000000..e7461fde --- /dev/null +++ b/tests/scenarios/cmd/wc/max_line_length/basic.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test c0 +description: wc -L reports the length of the longest line. +setup: + files: + - path: file.txt + content: "1\n12\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -L file.txt +expect: + stdout: "2 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/max_line_length/fullwidth_cjk.yaml b/tests/scenarios/cmd/wc/max_line_length/fullwidth_cjk.yaml new file mode 100644 index 00000000..b8b50009 --- /dev/null +++ b/tests/scenarios/cmd/wc/max_line_length/fullwidth_cjk.yaml @@ -0,0 +1,14 @@ +description: wc -L counts display columns, CJK characters are width 2. +skip_assert_against_bash: true # display width depends on locale; we always use Unicode width +setup: + files: + - path: file.txt + content: "你好\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -L file.txt +expect: + stdout: "4 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/max_line_length/fullwidth_emoji.yaml b/tests/scenarios/cmd/wc/max_line_length/fullwidth_emoji.yaml new file mode 100644 index 00000000..88329252 --- /dev/null +++ b/tests/scenarios/cmd/wc/max_line_length/fullwidth_emoji.yaml @@ -0,0 +1,14 @@ +description: wc -L counts display columns, emoji characters are width 2. +skip_assert_against_bash: true # display width depends on locale; we always use Unicode width +setup: + files: + - path: file.txt + content: "ab💐\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -L file.txt +expect: + stdout: "4 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/max_line_length/no_trailing_newline.yaml b/tests/scenarios/cmd/wc/max_line_length/no_trailing_newline.yaml new file mode 100644 index 00000000..c417d540 --- /dev/null +++ b/tests/scenarios/cmd/wc/max_line_length/no_trailing_newline.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test c2 +description: wc -L counts a final line with no trailing newline. +setup: + files: + - path: file.txt + content: "\n123456" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -L file.txt +expect: + stdout: "6 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/max_line_length/three_lines.yaml b/tests/scenarios/cmd/wc/max_line_length/three_lines.yaml new file mode 100644 index 00000000..d70b6a20 --- /dev/null +++ b/tests/scenarios/cmd/wc/max_line_length/three_lines.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test c1 +description: wc -L picks the max among multiple lines. +setup: + files: + - path: file.txt + content: "1\n123\n1\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -L file.txt +expect: + stdout: "3 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/multiple_files/total_line.yaml b/tests/scenarios/cmd/wc/multiple_files/total_line.yaml new file mode 100644 index 00000000..b374e75f --- /dev/null +++ b/tests/scenarios/cmd/wc/multiple_files/total_line.yaml @@ -0,0 +1,16 @@ +# Derived from GNU coreutils wc-total.sh +description: wc prints a total line when given multiple files. +setup: + files: + - path: a.txt + content: "hello\n" + - path: b.txt + content: "world foo\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc a.txt b.txt +expect: + stdout: " 1 1 6 a.txt\n 1 2 10 b.txt\n 2 3 16 total\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/stdin/dash_explicit.yaml b/tests/scenarios/cmd/wc/stdin/dash_explicit.yaml new file mode 100644 index 00000000..1804c0ad --- /dev/null +++ b/tests/scenarios/cmd/wc/stdin/dash_explicit.yaml @@ -0,0 +1,14 @@ +# Derived from uutils test_stdin_explicit +description: wc with explicit - reads stdin and shows filename -. +setup: + files: + - path: file.txt + content: "a b\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + cat file.txt | wc - +expect: + stdout: " 2 3 6 -\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/stdin/implicit.yaml b/tests/scenarios/cmd/wc/stdin/implicit.yaml new file mode 100644 index 00000000..ed40861c --- /dev/null +++ b/tests/scenarios/cmd/wc/stdin/implicit.yaml @@ -0,0 +1,14 @@ +# Derived from standard POSIX wc behavior +description: wc reads stdin implicitly when no files are given. +setup: + files: + - path: file.txt + content: "a b\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + cat file.txt | wc +expect: + stdout: " 2 3 6\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/words/across_lines.yaml b/tests/scenarios/cmd/wc/words/across_lines.yaml new file mode 100644 index 00000000..b57e7ff8 --- /dev/null +++ b/tests/scenarios/cmd/wc/words/across_lines.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test a6 +description: wc -w counts words across lines including a line without trailing newline. +setup: + files: + - path: file.txt + content: "x y\nz" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -w file.txt +expect: + stdout: "3 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/words/empty_stdin.yaml b/tests/scenarios/cmd/wc/words/empty_stdin.yaml new file mode 100644 index 00000000..2f77334b --- /dev/null +++ b/tests/scenarios/cmd/wc/words/empty_stdin.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test a2 +description: wc -w on empty file outputs 0. +setup: + files: + - path: empty.txt + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -w empty.txt +expect: + stdout: "0 empty.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/words/single_word.yaml b/tests/scenarios/cmd/wc/words/single_word.yaml new file mode 100644 index 00000000..c5fb92da --- /dev/null +++ b/tests/scenarios/cmd/wc/words/single_word.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test a4 +description: wc -w counts 1 word for a single non-whitespace token. +setup: + files: + - path: file.txt + content: "x" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -w file.txt +expect: + stdout: "1 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/words/two_words.yaml b/tests/scenarios/cmd/wc/words/two_words.yaml new file mode 100644 index 00000000..9981335d --- /dev/null +++ b/tests/scenarios/cmd/wc/words/two_words.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils wc.pl test a5 +description: wc -w counts 2 words on a line with two tokens. +setup: + files: + - path: file.txt + content: "x y\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -w file.txt +expect: + stdout: "2 file.txt\n" + stderr: "" + exit_code: 0 From b9219b44acb5d00763d14e1356a801efd48d7983 Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Thu, 12 Mar 2026 17:15:24 +0100 Subject: [PATCH 06/10] [iter 1] Fix wc bash compatibility: \v/\f display width, control char word counting, indentation - \f now resets lineLen (saves max first), matching GNU wc -L behavior - \v now has zero display width, matching GNU wc -L behavior - Control characters (unicode.Cc) no longer start new words, matching GNU wc -w - Fix indentation inconsistency in countReader carry handling - Update tests to expect correct GNU-compatible output (0 words for \x01) Co-Authored-By: Claude Opus 4.6 --- interp/builtins/wc/wc.go | 18 ++++++++++++++---- interp/builtins/wc/wc_gnu_compat_test.go | 8 ++++---- interp/builtins/wc/wc_test.go | 2 +- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/interp/builtins/wc/wc.go b/interp/builtins/wc/wc.go index 1b71418b..f154f7ce 100644 --- a/interp/builtins/wc/wc.go +++ b/interp/builtins/wc/wc.go @@ -259,8 +259,8 @@ func countReader(ctx context.Context, r io.Reader) (counts, error) { } c.chars += int64(utf8.RuneCount(chunk)) // carryN bytes are subtracted here and will be re-added via - // n += carryN at the top of the next iteration. - c.bytes -= int64(carryN) + // n += carryN at the top of the next iteration. + c.bytes -= int64(carryN) for i := 0; i < len(chunk); { r, size := utf8.DecodeRune(chunk[i:]) @@ -278,11 +278,21 @@ func countReader(ctx context.Context, r io.Reader) (counts, error) { } else if r == '\t' { lineLen = (lineLen/8 + 1) * 8 inWord = false - } else if r == ' ' || r == '\v' || r == '\f' { + } else if r == '\f' { + if lineLen > c.maxLineLen { + c.maxLineLen = lineLen + } + lineLen = 0 + inWord = false + } else if r == ' ' { lineLen++ inWord = false + } else if r == '\v' { + // vertical tab: zero display width, breaks words + inWord = false } else { - if !inWord { + isControl := unicode.Is(unicode.Cc, r) + if !inWord && !isControl { c.words++ inWord = true } diff --git a/interp/builtins/wc/wc_gnu_compat_test.go b/interp/builtins/wc/wc_gnu_compat_test.go index 90966364..74e29322 100644 --- a/interp/builtins/wc/wc_gnu_compat_test.go +++ b/interp/builtins/wc/wc_gnu_compat_test.go @@ -86,7 +86,7 @@ func TestGNUCompatWordsMulti(t *testing.T) { // TestGNUCompatBytesCount — -c on "x". // // GNU command: printf 'x' | gwc -c -// Expected: "1\n" +// Expected: "0\n" func TestGNUCompatBytesCount(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "file.txt", "x") @@ -148,16 +148,16 @@ func TestGNUCompatCharsMultibyte(t *testing.T) { assert.Equal(t, "5 file.txt\n", stdout) } -// TestGNUCompatControlCharIsWord — control byte \x01 counts as a word. +// TestGNUCompatControlCharIsWord — control byte \x01 does NOT count as a word. // // GNU command: printf '\x01\n' | gwc -w -// Expected: "1\n" +// Expected: "0\n" func TestGNUCompatControlCharIsWord(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "file.txt", "\x01\n") stdout, _, code := cmdRun(t, "wc -w file.txt", dir) assert.Equal(t, 0, code) - assert.Equal(t, "1 file.txt\n", stdout) + assert.Equal(t, "0 file.txt\n", stdout) } // TestGNUCompatRejectedFlag — unknown flag exits 1. diff --git a/interp/builtins/wc/wc_test.go b/interp/builtins/wc/wc_test.go index 4707b0dd..090a1cca 100644 --- a/interp/builtins/wc/wc_test.go +++ b/interp/builtins/wc/wc_test.go @@ -140,7 +140,7 @@ func TestWcWordsControlChar(t *testing.T) { writeFile(t, dir, "file.txt", "\x01\n") stdout, _, code := cmdRun(t, "wc -w file.txt", dir) assert.Equal(t, 0, code) - assert.Equal(t, "1 file.txt\n", stdout) + assert.Equal(t, "0 file.txt\n", stdout) } // --- Bytes --- From 40f2efc2ea1948755fbc9820edd6f988b00fed12 Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Thu, 12 Mar 2026 17:23:44 +0100 Subject: [PATCH 07/10] [iter 2] Add skip_assert_against_bash for files0_from_rejected scenario This scenario tests intentional security divergence: rshell rejects --files0-from (GTFOBins risk) while GNU wc accepts it. Co-Authored-By: Claude Opus 4.6 --- tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml b/tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml index d74a82d9..cda15c61 100644 --- a/tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml +++ b/tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml @@ -4,6 +4,7 @@ input: allowed_paths: ["$DIR"] script: |+ wc --files0-from=foo +skip_assert_against_bash: true expect: stdout: "" stderr_contains: ["wc:"] From f0741a5e8b1448bf1388b1e747dc1df9b19c9b02 Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Thu, 12 Mar 2026 17:56:24 +0100 Subject: [PATCH 08/10] [iter 5] Fix stdin min-width padding to only apply with 2+ output columns GNU wc applies the 7-character minimum field width for stdin only when there are 2+ output columns (e.g. default mode, -lw). With a single flag like -l, no padding is applied. Fix the condition to check column count before applying stdinMinWidth. Co-Authored-By: Claude Opus 4.6 --- interp/builtins/wc/wc.go | 23 ++++++++++++++++++++++- interp/builtins/wc/wc_test.go | 2 +- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/interp/builtins/wc/wc.go b/interp/builtins/wc/wc.go index f154f7ce..f9850f39 100644 --- a/interp/builtins/wc/wc.go +++ b/interp/builtins/wc/wc.go @@ -88,6 +88,26 @@ type options struct { showMaxLineLen bool } +func (o options) columnCount() int { + n := 0 + if o.showLines { + n++ + } + if o.showWords { + n++ + } + if o.showChars { + n++ + } + if o.showBytes { + n++ + } + if o.showMaxLineLen { + n++ + } + return n +} + func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { help := fs.BoolP("help", "h", false, "print usage and exit") lines := fs.BoolP("lines", "l", false, "print the newline counts") @@ -175,7 +195,8 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { } width := fieldWidth(total, opts) - if hasStdin && width < stdinMinWidth { + nCols := opts.columnCount() + if hasStdin && nCols >= 2 && width < stdinMinWidth { width = stdinMinWidth } diff --git a/interp/builtins/wc/wc_test.go b/interp/builtins/wc/wc_test.go index 090a1cca..7714aa5d 100644 --- a/interp/builtins/wc/wc_test.go +++ b/interp/builtins/wc/wc_test.go @@ -354,7 +354,7 @@ func TestWcPipeInput(t *testing.T) { writeFile(t, dir, "file.txt", "alpha\nbeta\ngamma\n") stdout, _, code := cmdRun(t, "cat file.txt | wc -l", dir) assert.Equal(t, 0, code) - assert.Equal(t, " 3\n", stdout) + assert.Equal(t, "3\n", stdout) } // --- Combined flags --- From f0f68725cc932591a00cacf55df90111fb790d6f Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Thu, 12 Mar 2026 18:03:22 +0100 Subject: [PATCH 09/10] [iter 6] Fix \r to save maxLineLen before resetting, add regression test The \r handler was resetting lineLen to 0 without first checking if it was the longest line seen, causing incorrect wc -L output when pre-CR text is longer than post-CR text. Added maxLineLen save before reset, matching the \f handler pattern. Added TestWcMaxLineLenCRLongerPrefix to catch regressions (previous test passed by coincidence with equal-length segments). Co-Authored-By: Claude Opus 4.6 --- interp/builtins/wc/wc.go | 3 +++ interp/builtins/wc/wc_test.go | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/interp/builtins/wc/wc.go b/interp/builtins/wc/wc.go index f9850f39..89ce4062 100644 --- a/interp/builtins/wc/wc.go +++ b/interp/builtins/wc/wc.go @@ -294,6 +294,9 @@ func countReader(ctx context.Context, r io.Reader) (counts, error) { lineLen = 0 inWord = false } else if r == '\r' { + if lineLen > c.maxLineLen { + c.maxLineLen = lineLen + } lineLen = 0 inWord = false } else if r == '\t' { diff --git a/interp/builtins/wc/wc_test.go b/interp/builtins/wc/wc_test.go index 7714aa5d..1cc02994 100644 --- a/interp/builtins/wc/wc_test.go +++ b/interp/builtins/wc/wc_test.go @@ -406,6 +406,14 @@ func TestWcMaxLineLenCR(t *testing.T) { assert.Equal(t, "5 file.txt\n", stdout) } +func TestWcMaxLineLenCRLongerPrefix(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "abcdef\rxy\n") + stdout, _, code := cmdRun(t, "wc -L file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "6 file.txt\n", stdout) +} + func TestWcCRLFLineCount(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "file.txt", "a\r\nb\r\n") From bca4408307cf1c133005cef44e434cb9e955e054 Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Thu, 12 Mar 2026 18:14:17 +0100 Subject: [PATCH 10/10] [iter 1] Fix pipe_to_wc.yaml expected stdout to match unpadded wc -l output The scenario expected padded output (stdinMinWidth=7) but wc -l from stdin with a single output column correctly produces unpadded "3\n", matching bash behavior. Co-Authored-By: Claude Opus 4.6 --- tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml b/tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml index 9c687821..eab32a95 100644 --- a/tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml +++ b/tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml @@ -16,7 +16,6 @@ input: script: |+ ls | wc -l expect: - stdout: |2+ - 3 + stdout: "3\n" stderr: "" exit_code: 0