diff --git a/SHELL_FEATURES.md b/SHELL_FEATURES.md index eabc114c..f63ae606 100644 --- a/SHELL_FEATURES.md +++ b/SHELL_FEATURES.md @@ -24,7 +24,7 @@ Blocked features are rejected before execution with exit code 2. - ✅ `tr [-cdsCt] SET1 [SET2]` — translate, squeeze, and/or delete characters from stdin - ✅ `true` — return exit code 0 - ✅ `uniq [OPTION]... [INPUT]` — report or omit repeated lines -- ✅ `wc [-l] [-w] [-c] [-m] [FILE]...` — count lines, words, bytes, or characters in files +- ✅ `wc [-l] [-w] [-c] [-m] [-L] [FILE]...` — count lines, words, bytes, characters, or max line length - ❌ All other commands — return exit code 127 with `: not found` unless an ExecHandler is configured ## Variables diff --git a/interp/builtins/tests/wc/testdata/fuzz/FuzzWcDifferentialWords/cd24dde99d3a6e0f b/interp/builtins/tests/wc/testdata/fuzz/FuzzWcDifferentialWords/cd24dde99d3a6e0f new file mode 100644 index 00000000..ff2def96 --- /dev/null +++ b/interp/builtins/tests/wc/testdata/fuzz/FuzzWcDifferentialWords/cd24dde99d3a6e0f @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("\xe2\xbf\xbf") diff --git a/interp/builtins/wc/wc.go b/interp/builtins/wc/wc.go index d2752c5f..bb37215f 100644 --- a/interp/builtins/wc/wc.go +++ b/interp/builtins/wc/wc.go @@ -34,7 +34,7 @@ // -L, --max-line-length // Print the length of the longest line. // -// -h, --help +// --help // Print this usage message to stdout and exit 0. // // Output columns always appear in a fixed order: lines, words, chars, @@ -57,6 +57,7 @@ package wc import ( "context" + "errors" "io" "os" "strconv" @@ -69,8 +70,8 @@ import ( // Cmd is the wc builtin command descriptor. var Cmd = builtins.Command{Name: "wc", MakeFlags: registerFlags} -const chunkSize = 32 * 1024 // 32 KiB read buffer -const stdinMinWidth = 7 // GNU wc minimum column width for stdin +const chunkSize = 32 * 1024 // 32 KiB read buffer +const nonRegularMinWidth = 7 // GNU wc minimum column width for non-regular files type counts struct { lines int64 @@ -88,8 +89,29 @@ type options struct { showMaxLineLen bool } +// numCols returns the number of output columns that will be printed. +func (o options) numCols() int { + n := 0 + if o.showLines { + n++ + } + if o.showWords { + n++ + } + if o.showChars { + n++ + } + if o.showBytes { + n++ + } + if o.showMaxLineLen { + n++ + } + return n +} + func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { - help := fs.BoolP("help", "h", false, "print usage and exit") + help := fs.Bool("help", false, "print usage and exit") lines := fs.BoolP("lines", "l", false, "print the newline counts") words := fs.BoolP("words", "w", false, "print the word counts") bytesFlag := fs.BoolP("bytes", "c", false, "print the byte counts") @@ -147,6 +169,7 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { c counts } results := make([]fileResult, 0, len(files)) + hasNonRegular := hasStdin // stdin (pipe) is non-regular for _, file := range files { if ctx.Err() != nil { @@ -158,11 +181,18 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { if file == "-" { name = "standard input" } - callCtx.Errf("wc: %s: %s\n", name, callCtx.PortableErr(err)) + if name == "" { + callCtx.Errf("wc: %s\n", callCtx.PortableErr(err)) + } else { + callCtx.Errf("wc: %s: %s\n", name, callCtx.PortableErr(err)) + } failed = true - if c == (counts{}) { + // GNU wc prints a zero count line for directories but not + // for missing files or other open errors. + if !isErrIsDir(err) { continue } + hasNonRegular = true } results = append(results, fileResult{name: file, c: c}) total.lines += c.lines @@ -175,8 +205,16 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { } width := fieldWidth(total, opts) - if hasStdin && width < stdinMinWidth { - width = stdinMinWidth + // GNU wc uses a minimum column width of 7 for non-regular files + // (stdin pipes, directories, devices, etc.) when two or more + // columns are printed — whether in default mode or with explicit + // multi-column flags (e.g. wc -lw). GNU also applies this minimum + // when multiple files are processed (a total line is printed), even + // with a single column (e.g. wc -l dir file). When only a single + // column is active with a single file, the width is determined + // solely by the count values. + if hasNonRegular && (opts.numCols() >= 2 || len(files) > 1) && width < nonRegularMinWidth { + width = nonRegularMinWidth } for _, fr := range results { @@ -199,6 +237,9 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { } func countFile(ctx context.Context, callCtx *builtins.CallContext, path string) (counts, error) { + if path == "" { + return counts{}, errors.New("invalid zero-length file name") + } var rc io.ReadCloser if path == "-" { if callCtx.Stdin == nil { @@ -262,47 +303,74 @@ func countReader(ctx context.Context, r io.Reader) (counts, error) { c.bytes -= int64(carryN) for i := 0; i < len(chunk); { - r, size := utf8.DecodeRune(chunk[i:]) + ch, size := utf8.DecodeRune(chunk[i:]) i += size // Invalid UTF-8 byte: not a character in C.UTF-8 locale. // Skip entirely — no char count, no word effect. - if r == utf8.RuneError && size == 1 { + if ch == utf8.RuneError && size == 1 { continue } c.chars++ - if r == '\n' { + if ch == '\n' { c.lines++ if lineLen > c.maxLineLen { c.maxLineLen = lineLen } lineLen = 0 inWord = false - } else if r == '\r' { + } else if ch == '\r' { + if lineLen > c.maxLineLen { + c.maxLineLen = lineLen + } lineLen = 0 inWord = false - } else if r == '\t' { + } else if ch == '\t' { lineLen = (lineLen/8 + 1) * 8 inWord = false - } else if r == ' ' || r == '\v' || r == '\f' { + } else if ch == ' ' { lineLen++ inWord = false - } else if unicode.IsControl(r) { - // Non-whitespace control chars (C0, DEL, C1) are transparent: - // they do not start or end words, matching GNU wc in POSIX locale. - } else if unicode.Is(unicode.Zs, r) { + } else if ch == '\f' { + if lineLen > c.maxLineLen { + c.maxLineLen = lineLen + } + lineLen = 0 + inWord = false + } else if ch == '\v' { + // vertical tab: zero display width, but breaks words + inWord = false + } else if unicode.Is(unicode.Cc, ch) { + // Control characters are transparent to word counting: + // they don't start or end words, matching GNU wc. + lineLen += int64(runeWidth(ch)) + } else if unicode.Is(unicode.Zs, ch) { // Unicode space separators (NBSP, thin space, etc.) end words, // matching GNU wc behaviour under C.UTF-8 locale. lineLen++ inWord = false - } else if !unicode.IsGraphic(r) && !unicode.Is(unicode.Cf, r) && !unicode.Is(unicode.Co, r) { - // Cn (unassigned codepoints): transparent like control chars -- - // they do not start or end words, matching GNU wc under C.UTF-8. - } else { + } else if unicode.IsGraphic(ch) || unicode.Is(unicode.Co, ch) || unicode.Is(unicode.Cf, ch) || unicode.Is(unicode151Print, ch) { + // Printable characters start or continue a word, + // matching GNU wc which gates word counting on + // iswprint() in C.UTF-8 locale. IsGraphic covers + // letters, marks, numbers, punctuation, and + // symbols; Co adds private-use characters; Cf adds + // format characters (e.g. U+06DD ARABIC END OF + // AYAH, U+200B ZERO WIDTH SPACE) which glibc's + // iswprint considers printable; unicode151Print + // adds characters assigned in Unicode 15.1 that + // Go's tables don't yet include (Go ships + // Unicode 15.0). if !inWord { c.words++ inWord = true } - lineLen += int64(runeWidth(r)) + lineLen += int64(runeWidth(ch)) + } else { + // Non-printable, non-whitespace, non-control chars + // (e.g. unassigned Cn codepoints) are transparent + // to both word counting and line length — they + // neither start nor end words, and GNU wc treats + // them as non-printable (wcwidth=-1, width 0). } } } @@ -345,6 +413,25 @@ func fieldWidth(total counts, opts options) int { return w } +// unicode151Print covers characters assigned in Unicode 15.1 that are +// printable (graphic) but absent from Go's unicode package (Unicode 15.0). +// CI runs GNU wc linked against glibc ≥ 2.39 (Ubuntu 24.04) which uses +// Unicode 15.1+ character data, so these codepoints must be treated as +// word characters to match GNU wc output. +// +// This table can be removed once Go's unicode package is updated to +// Unicode 15.1 or later (tracked in https://github.com/golang/go/issues/65141, +// expected in Go 1.27). +var unicode151Print = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x2FFC, 0x2FFF, 1}, // Ideographic Description Characters (4 new IDCs) + {0x31EF, 0x31EF, 1}, // Ideographic Description Character OVERLAID + }, + R32: []unicode.Range32{ + {0x2EBF0, 0x2EE5D, 1}, // CJK Unified Ideographs Extension I + }, +} + // runeWidth returns the display width of a rune following wcwidth(3) rules: // 0 for controls, combining marks, and format chars; 2 for East Asian // Wide/Fullwidth; 1 for everything else. diff --git a/interp/builtins/wc/wc_gnu_compat_test.go b/interp/builtins/wc/wc_gnu_compat_test.go index 4d2255e4..b0062bfd 100644 --- a/interp/builtins/wc/wc_gnu_compat_test.go +++ b/interp/builtins/wc/wc_gnu_compat_test.go @@ -148,14 +148,14 @@ func TestGNUCompatCharsMultibyte(t *testing.T) { assert.Equal(t, "5 file.txt\n", stdout) } -// TestGNUCompatControlCharIsWord — control byte \x01 does not count as a word. +// TestGNUCompatControlCharIsNotWord — control byte \x01 is transparent to word counting. // // GNU wc in POSIX locale treats C0 control characters as transparent: // they neither start nor end words. Only printable chars form words. // -// GNU command (Debian/Ubuntu POSIX locale): printf '\x01\n' | wc -w +// GNU command: printf '\x01\n' | gwc -w // Expected: "0\n" -func TestGNUCompatControlCharIsWord(t *testing.T) { +func TestGNUCompatControlCharIsNotWord(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "file.txt", "\x01\n") stdout, _, code := cmdRun(t, "wc -w file.txt", dir) @@ -163,6 +163,102 @@ func TestGNUCompatControlCharIsWord(t *testing.T) { assert.Equal(t, "0 file.txt\n", stdout) } +// TestGNUCompatMaxLineLenVerticalTab — -L with \v (zero display width). +// +// GNU command: printf 'a\vb\n' | wc -L +// Expected: "2\n" — \v has zero width, so a(1) + b(1) = 2. +func TestGNUCompatMaxLineLenVerticalTab(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "a\vb\n") + stdout, _, code := cmdRun(t, "wc -L file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2 file.txt\n", stdout) +} + +// TestGNUCompatMaxLineLenFormFeed — -L with \f (resets line position). +// +// GNU command: printf 'abc\fdef\n' | wc -L +// Expected: "3\n" — \f resets position, so def = 3. +func TestGNUCompatMaxLineLenFormFeed(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "abc\fdef\n") + stdout, _, code := cmdRun(t, "wc -L file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "3 file.txt\n", stdout) +} + +// TestGNUCompatMaxLineLenCRAsymmetric — -L with \r where text before \r is longer. +// +// GNU command: printf 'abcdef\rxy\n' | wc -L +// Expected: "6\n" — max(6, 2) = 6; \r resets position but preserves max. +func TestGNUCompatMaxLineLenCRAsymmetric(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "abcdef\rxy\n") + stdout, _, code := cmdRun(t, "wc -L file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "6 file.txt\n", stdout) +} + +// TestGNUCompatMaxLineLenFFAsymmetric — -L with \f where text before \f is longer. +// +// GNU command: printf 'abcdef\fxy\n' | wc -L +// Expected: "6\n" — max(6, 2) = 6; \f resets position but preserves max. +func TestGNUCompatMaxLineLenFFAsymmetric(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "abcdef\fxy\n") + stdout, _, code := cmdRun(t, "wc -L file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "6 file.txt\n", stdout) +} + +// TestGNUCompatDirectoryDefaultWidth — directory gets width-7 padding in default mode. +// +// GNU command: mkdir /tmp/d && wc /tmp/d +// Expected: " 0 0 0 .\n" (width 7, non-regular file) +func TestGNUCompatDirectoryDefaultWidth(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := cmdRun(t, "wc .", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "wc:") + assert.Equal(t, " 0 0 0 .\n", stdout) +} + +// TestGNUCompatDirectoryExplicitFlag — directory with explicit flag uses width 1. +// +// GNU command: mkdir /tmp/d && wc -l /tmp/d +// Expected: "0 .\n" (width 1, explicit flag) +func TestGNUCompatDirectoryExplicitFlag(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := cmdRun(t, "wc -l .", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "wc:") + assert.Equal(t, "0 .\n", stdout) +} + +// TestGNUCompatVerticalTabWordsBreak — \v breaks words for wc -w. +// +// GNU command: printf 'a\vb\n' | wc -w +// Expected: "2\n" — \v is a word delimiter. +func TestGNUCompatVerticalTabWordsBreak(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "a\vb\n") + stdout, _, code := cmdRun(t, "wc -w file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2 file.txt\n", stdout) +} + +// TestGNUCompatVerticalTabThreeWords — \v separates three words. +// +// GNU command: printf 'a\vb\vc\n' | wc -w +// Expected: "3\n" +func TestGNUCompatVerticalTabThreeWords(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "a\vb\vc\n") + stdout, _, code := cmdRun(t, "wc -w file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "3 file.txt\n", stdout) +} + // TestGNUCompatRejectedFlag — unknown flag exits 1. // // GNU command: gwc --follow diff --git a/interp/builtins/wc/wc_isdir_unix.go b/interp/builtins/wc/wc_isdir_unix.go new file mode 100644 index 00000000..cf40c3a4 --- /dev/null +++ b/interp/builtins/wc/wc_isdir_unix.go @@ -0,0 +1,18 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +//go:build !windows + +package wc + +import ( + "errors" + "syscall" +) + +// isErrIsDir reports whether err wraps a "is a directory" error. +func isErrIsDir(err error) bool { + return errors.Is(err, syscall.EISDIR) +} diff --git a/interp/builtins/wc/wc_isdir_windows.go b/interp/builtins/wc/wc_isdir_windows.go new file mode 100644 index 00000000..fa0cd3fe --- /dev/null +++ b/interp/builtins/wc/wc_isdir_windows.go @@ -0,0 +1,27 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +//go:build windows + +package wc + +import ( + "errors" + "syscall" +) + +// errnoERROR_INVALID_FUNCTION is the Windows errno for ERROR_INVALID_FUNCTION. +// Go's syscall package does not export this constant, so we define it here. +const errnoERROR_INVALID_FUNCTION = syscall.Errno(1) + +// isErrIsDir reports whether err wraps the Windows equivalent of EISDIR. +// On Windows, reading a directory handle returns ERROR_INVALID_FUNCTION (errno 1). +func isErrIsDir(err error) bool { + var errno syscall.Errno + if errors.As(err, &errno) { + return errno == errnoERROR_INVALID_FUNCTION + } + return false +} diff --git a/interp/builtins/wc/wc_test.go b/interp/builtins/wc/wc_test.go index 9b0b74f2..1e6d303e 100644 --- a/interp/builtins/wc/wc_test.go +++ b/interp/builtins/wc/wc_test.go @@ -135,7 +135,7 @@ func TestWcWordsMultiple(t *testing.T) { assert.Equal(t, "3 file.txt\n", stdout) } -func TestWcWordsControlChar(t *testing.T) { +func TestWcWordsControlCharNotWord(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "file.txt", "\x01\n") stdout, _, code := cmdRun(t, "wc -w file.txt", dir) @@ -292,11 +292,12 @@ func TestWcHelp(t *testing.T) { assert.Contains(t, stdout, "Usage:") } -func TestWcHelpShort(t *testing.T) { +func TestWcHelpShortRejected(t *testing.T) { + // GNU wc does not support -h; it's an invalid option. dir := t.TempDir() - stdout, _, code := cmdRun(t, "wc -h", dir) - assert.Equal(t, 0, code) - assert.Contains(t, stdout, "Usage:") + _, stderr, code := cmdRun(t, "wc -h", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "wc:") } // --- Error cases --- @@ -325,9 +326,11 @@ func TestWcFiles0FromRejected(t *testing.T) { func TestWcDirectory(t *testing.T) { dir := t.TempDir() - _, stderr, code := cmdRun(t, "wc .", dir) + stdout, stderr, code := cmdRun(t, "wc .", dir) assert.Equal(t, 1, code) assert.Contains(t, stderr, "wc:") + // GNU wc prints a zero count line with width-7 padding (non-regular file) + assert.Equal(t, " 0 0 0 .\n", stdout) } // --- Hardening --- @@ -354,7 +357,7 @@ func TestWcPipeInput(t *testing.T) { writeFile(t, dir, "file.txt", "alpha\nbeta\ngamma\n") stdout, _, code := cmdRun(t, "cat file.txt | wc -l", dir) assert.Equal(t, 0, code) - assert.Equal(t, " 3\n", stdout) + assert.Equal(t, "3\n", stdout) } // --- Combined flags --- diff --git a/tests/allowed_symbols_test.go b/tests/allowed_symbols_test.go index 7a8adffb..819abfc0 100644 --- a/tests/allowed_symbols_test.go +++ b/tests/allowed_symbols_test.go @@ -120,20 +120,20 @@ var builtinAllowedSymbols = []string{ "strings.ReplaceAll", // strings.ToLower — converts string to lowercase; pure function, no I/O. "strings.ToLower", - // strconv.IntSize — platform int size constant (32 or 64); pure constant, no I/O. - "strconv.IntSize", // strings.Split — splits a string by separator into a slice; pure function, no I/O. "strings.Split", // strconv.Atoi — string-to-int conversion; pure function, no I/O. "strconv.Atoi", - // strconv.ParseBool — string-to-bool conversion; pure function, no I/O. - "strconv.ParseBool", - // strconv.Itoa — int-to-string conversion; pure function, no I/O. - "strconv.Itoa", // strconv.ErrRange — sentinel error value for overflow; pure constant. "strconv.ErrRange", + // strconv.IntSize — platform int size constant (32 or 64); pure constant, no I/O. + "strconv.IntSize", + // strconv.Itoa — int-to-string conversion; pure function, no I/O. + "strconv.Itoa", // strconv.NumError — error type for numeric conversion failures; pure type. "strconv.NumError", + // strconv.ParseBool — string-to-bool conversion; pure function, no I/O. + "strconv.ParseBool", // strconv.ParseFloat — string-to-float conversion; pure function, no I/O. "strconv.ParseFloat", // strconv.ParseInt — string-to-int conversion with base/bit-size; pure function, no I/O. @@ -142,6 +142,10 @@ var builtinAllowedSymbols = []string{ "strconv.ParseUint", // strconv.FormatInt — int-to-string conversion; pure function, no I/O. "strconv.FormatInt", + // syscall.EISDIR — errno constant for "is a directory"; pure constant, no I/O. + "syscall.EISDIR", + // syscall.Errno — error type wrapping an OS errno value; pure type, no I/O. + "syscall.Errno", // strings.HasPrefix — pure function for prefix matching; no I/O. "strings.HasPrefix", // strings.IndexByte — finds byte in string; pure function, no I/O. @@ -156,13 +160,11 @@ var builtinAllowedSymbols = []string{ "unicode.Cc", // unicode.Cf — format character category range table; pure data, no I/O. "unicode.Cf", - // unicode.Co — private-use character category range table; pure data, no I/O. + // unicode.Co — private use area category range table; pure data, no I/O. "unicode.Co", // unicode.Is — checks if rune belongs to a range table; pure function, no I/O. "unicode.Is", - // unicode.IsControl — reports whether rune is a control character; pure function, no I/O. - "unicode.IsControl", - // unicode.IsGraphic — reports whether rune is defined as a graphic character; pure function, no I/O. + // unicode.IsGraphic — checks if rune is a graphic character; pure function, no I/O. "unicode.IsGraphic", // unicode.Zs — Unicode space separator category range table; pure data, no I/O. "unicode.Zs", diff --git a/tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml b/tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml index 9c687821..875dd7e5 100644 --- a/tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml +++ b/tests/scenarios/cmd/ls/pipes/pipe_to_wc.yaml @@ -1,5 +1,4 @@ description: ls piped to wc -l counts the number of entries. -skip_assert_against_bash: true setup: files: - path: a.txt @@ -16,7 +15,6 @@ input: script: |+ ls | wc -l expect: - stdout: |2+ - 3 + stdout: "3\n" stderr: "" exit_code: 0 diff --git a/tests/scenarios/cmd/wc/errors/dir_single_col_width7.yaml b/tests/scenarios/cmd/wc/errors/dir_single_col_width7.yaml new file mode 100644 index 00000000..4ed13f46 --- /dev/null +++ b/tests/scenarios/cmd/wc/errors/dir_single_col_width7.yaml @@ -0,0 +1,16 @@ +# GNU wc applies width-7 padding for non-regular files (directories) even +# with a single column flag, when multiple files produce a total line. +description: wc -l with directory and file uses width-7 padding. +skip_assert_against_bash: true # stderr format differs from GNU wc (PortableErr normalization) +setup: + files: + - path: f.txt + content: "a\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -l . f.txt +expect: + stdout: " 0 .\n 1 f.txt\n 1 total\n" + stderr: "wc: .: is a directory\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/wc/errors/empty_filename.yaml b/tests/scenarios/cmd/wc/errors/empty_filename.yaml new file mode 100644 index 00000000..a00ce0ca --- /dev/null +++ b/tests/scenarios/cmd/wc/errors/empty_filename.yaml @@ -0,0 +1,9 @@ +description: wc exits 1 and prints error for empty filename argument. +skip_assert_against_bash: true # rshell returns a clearer error than GNU wc for empty filenames +input: + script: |+ + wc "" +expect: + stdout: "" + stderr: "wc: invalid zero-length file name\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml b/tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml index d74a82d9..a4ce305e 100644 --- a/tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml +++ b/tests/scenarios/cmd/wc/errors/files0_from_rejected.yaml @@ -1,5 +1,6 @@ # Derived from GTFOBins safety requirement description: wc rejects --files0-from flag (security risk). +skip_assert_against_bash: true # intentionally rejects --files0-from (GTFOBins mitigation) input: allowed_paths: ["$DIR"] script: |+ diff --git a/tests/scenarios/cmd/wc/errors/h_short_flag_rejected.yaml b/tests/scenarios/cmd/wc/errors/h_short_flag_rejected.yaml new file mode 100644 index 00000000..d42b2cfa --- /dev/null +++ b/tests/scenarios/cmd/wc/errors/h_short_flag_rejected.yaml @@ -0,0 +1,9 @@ +description: wc rejects -h flag (GNU wc does not support -h). +input: + allowed_paths: ["$DIR"] + script: |+ + wc -h +expect: + stdout: "" + stderr_contains: ["wc:"] + exit_code: 1 diff --git a/tests/scenarios/cmd/wc/max_line_length/cr_asymmetric.yaml b/tests/scenarios/cmd/wc/max_line_length/cr_asymmetric.yaml new file mode 100644 index 00000000..14839363 --- /dev/null +++ b/tests/scenarios/cmd/wc/max_line_length/cr_asymmetric.yaml @@ -0,0 +1,13 @@ +description: wc -L preserves max line length across carriage return reset. +setup: + files: + - path: file.txt + content: "abcdef\rxy\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -L file.txt +expect: + stdout: "6 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/max_line_length/ff_asymmetric.yaml b/tests/scenarios/cmd/wc/max_line_length/ff_asymmetric.yaml new file mode 100644 index 00000000..39664e0d --- /dev/null +++ b/tests/scenarios/cmd/wc/max_line_length/ff_asymmetric.yaml @@ -0,0 +1,13 @@ +description: wc -L preserves max line length across form feed reset. +setup: + files: + - path: file.txt + content: "abcdef\fxy\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -L file.txt +expect: + stdout: "6 file.txt\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/stdin/multi_col_explicit_flags_padding.yaml b/tests/scenarios/cmd/wc/stdin/multi_col_explicit_flags_padding.yaml new file mode 100644 index 00000000..ba345696 --- /dev/null +++ b/tests/scenarios/cmd/wc/stdin/multi_col_explicit_flags_padding.yaml @@ -0,0 +1,15 @@ +# Verify that GNU wc minimum width-7 padding applies to non-regular +# (stdin) input when two or more columns are printed with explicit flags. +description: wc pads non-regular input to width 7 with explicit multi-column flags. +setup: + files: + - path: file.txt + content: "a\n" +input: + allowed_paths: ["$DIR"] + script: |+ + cat file.txt | wc -lw +expect: + stdout: " 1 1\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/stdin/single_col_explicit_flag_no_padding.yaml b/tests/scenarios/cmd/wc/stdin/single_col_explicit_flag_no_padding.yaml new file mode 100644 index 00000000..3420fa62 --- /dev/null +++ b/tests/scenarios/cmd/wc/stdin/single_col_explicit_flag_no_padding.yaml @@ -0,0 +1,15 @@ +# Verify that GNU wc does NOT apply width-7 padding for non-regular +# (stdin) input when only a single column is printed. +description: wc does not pad non-regular input when only one flag is given. +setup: + files: + - path: file.txt + content: "a\n" +input: + allowed_paths: ["$DIR"] + script: |+ + cat file.txt | wc -l +expect: + stdout: "1\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/wc/words/vertical_tab_separator.yaml b/tests/scenarios/cmd/wc/words/vertical_tab_separator.yaml new file mode 100644 index 00000000..66fab7a7 --- /dev/null +++ b/tests/scenarios/cmd/wc/words/vertical_tab_separator.yaml @@ -0,0 +1,13 @@ +description: wc -w treats vertical tab (\v) as a word separator. +setup: + files: + - path: file.txt + content: "a\vb\vc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + wc -w file.txt +expect: + stdout: "3 file.txt\n" + stderr: "" + exit_code: 0