diff --git a/SHELL_COMMANDS.md b/SHELL_COMMANDS.md index 618f420f..8615516f 100644 --- a/SHELL_COMMANDS.md +++ b/SHELL_COMMANDS.md @@ -9,6 +9,7 @@ Short reference for builtin commands | `echo [ARG ...]` | none | Print arguments separated by spaces, then newline. | | `cat [FILE ...]` | `-` (read stdin) | Print files; with no args, read stdin. | | `head [FILE ...]` | `-n N` (lines), `-c N` (bytes), `-q`/`--quiet`/`--silent` (no headers), `-v` (force headers) | Print first 10 lines of each FILE; with no FILE or `-`, read stdin. | +| `uniq [INPUT]` | `-c` (count), `-d` (repeated only), `-u` (unique only), `-i` (ignore case), `-f N` (skip fields), `-s N` (skip chars), `-w N` (check chars), `-z` (NUL-delimited), `-D` (all repeated), `--group` (group lines) | Filter adjacent matching lines from INPUT (or stdin), writing to stdout. | | `exit [N]` | `N` (status code) | Exit the shell with `N` (default: last status). | | `break [N]` | `N` (loop levels) | Break current loop, or `N` enclosing loops. | | `continue [N]` | `N` (loop levels) | Continue current loop, or `N` enclosing loops. | diff --git a/interp/builtin_uniq_gnu_compat_test.go b/interp/builtin_uniq_gnu_compat_test.go new file mode 100644 index 00000000..cebc0281 --- /dev/null +++ b/interp/builtin_uniq_gnu_compat_test.go @@ -0,0 +1,284 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package interp_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/rshell/interp" +) + +func setupUniqDir(t *testing.T, files map[string]string) string { + t.Helper() + dir := t.TempDir() + for name, content := range files { + require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0644)) + } + return dir +} + +func uniqCmdRun(t *testing.T, script, dir string) (string, string, int) { + t.Helper() + return runScript(t, script, dir, interp.AllowedPaths([]string{dir})) +} + +// TestGNUCompatUniqEmptyInput — empty input produces empty output. +// +// GNU command: printf ” | guniq +// Expected: "" +func TestGNUCompatUniqEmptyInput(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": ""}) + stdout, _, code := uniqCmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// TestGNUCompatUniqBasicDedupe — adjacent duplicates are merged. +// +// GNU command: printf 'a\na\n' | guniq +// Expected: "a\n" +func TestGNUCompatUniqBasicDedupe(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\na\n"}) + stdout, _, code := uniqCmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +// TestGNUCompatUniqNoTrailingNewline — input without trailing newline adds one. +// +// GNU command: printf 'a\na' | guniq +// Expected: "a\n" +func TestGNUCompatUniqNoTrailingNewline(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\na"}) + stdout, _, code := uniqCmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +// TestGNUCompatUniqTwoDifferent — two different lines both emitted. +// +// GNU command: printf 'a\nb' | guniq +// Expected: "a\nb\n" +func TestGNUCompatUniqTwoDifferent(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\nb"}) + stdout, _, code := uniqCmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +// TestGNUCompatUniqThreeLinesMixed — duplicates then unique. +// +// GNU command: printf 'a\na\nb' | guniq +// Expected: "a\nb\n" +func TestGNUCompatUniqThreeLinesMixed(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\na\nb"}) + stdout, _, code := uniqCmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +// TestGNUCompatUniqAllUnique — three unique lines all emitted. +// +// GNU command: printf 'a\nb\nc\n' | guniq +// Expected: "a\nb\nc\n" +func TestGNUCompatUniqAllUnique(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\nb\nc\n"}) + stdout, _, code := uniqCmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\nc\n", stdout) +} + +// TestGNUCompatUniqCountTwoUnique — -c with all unique lines. +// +// GNU command: printf 'a\nb\n' | guniq -c +// Expected: " 1 a\n 1 b\n" +func TestGNUCompatUniqCountTwoUnique(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\nb\n"}) + stdout, _, code := uniqCmdRun(t, "uniq -c f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 1 a\n 1 b\n", stdout) +} + +// TestGNUCompatUniqCountDuplicates — -c with duplicates. +// +// GNU command: printf 'a\na\n' | guniq -c +// Expected: " 2 a\n" +func TestGNUCompatUniqCountDuplicates(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\na\n"}) + stdout, _, code := uniqCmdRun(t, "uniq -c f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 2 a\n", stdout) +} + +// TestGNUCompatUniqIgnoreCase — -i ignores case. +// +// GNU command: printf 'A\na\n' | guniq -i +// Expected: "A\n" +func TestGNUCompatUniqIgnoreCase(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "A\na\n"}) + stdout, _, code := uniqCmdRun(t, "uniq -i f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "A\n", stdout) +} + +// TestGNUCompatUniqCaseSensitive — default is case-sensitive. +// +// GNU command: printf 'A\na\n' | guniq +// Expected: "A\na\n" +func TestGNUCompatUniqCaseSensitive(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "A\na\n"}) + stdout, _, code := uniqCmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "A\na\n", stdout) +} + +// TestGNUCompatUniqRepeated — -d only prints duplicated lines. +// +// GNU command: printf 'a\na\nb\n' | guniq -d +// Expected: "a\n" +func TestGNUCompatUniqRepeated(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\na\nb\n"}) + stdout, _, code := uniqCmdRun(t, "uniq -d f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +// TestGNUCompatUniqUnique — -u only prints unique lines. +// +// GNU command: printf 'a\na\nb\n' | guniq -u +// Expected: "b\n" +func TestGNUCompatUniqUnique(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\na\nb\n"}) + stdout, _, code := uniqCmdRun(t, "uniq -u f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "b\n", stdout) +} + +// TestGNUCompatUniqDAndU — -d -u together produce no output. +// +// GNU command: printf 'a\na\n\b' | guniq -d -u +// Expected: "" +func TestGNUCompatUniqDAndU(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\na\n\b"}) + stdout, _, code := uniqCmdRun(t, "uniq -d -u f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// TestGNUCompatUniqSkipField — -f 1 skips the first field. +// +// GNU command: printf 'a a\nb a\n' | guniq -f 1 +// Expected: "a a\n" +func TestGNUCompatUniqSkipField(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a a\nb a\n"}) + stdout, _, code := uniqCmdRun(t, "uniq -f 1 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a\n", stdout) +} + +// TestGNUCompatUniqSkipChars — -s 2 skips two characters. +// +// GNU command: printf 'baa\naaa\n' | guniq -s 2 +// Expected: "baa\n" +func TestGNUCompatUniqSkipChars(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "baa\naaa\n"}) + stdout, _, code := uniqCmdRun(t, "uniq -s 2 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "baa\n", stdout) +} + +// TestGNUCompatUniqCheckCharsZero — -w 0 treats all lines as equal. +// +// GNU command: printf 'abc\nabcd\n' | guniq -w 0 +// Expected: "abc\n" +func TestGNUCompatUniqCheckCharsZero(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "abc\nabcd\n"}) + stdout, _, code := uniqCmdRun(t, "uniq -w 0 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\n", stdout) +} + +// TestGNUCompatUniqGroupSeparate — --group=separate inserts blank lines. +// +// GNU command: printf 'a\na\nb\n' | guniq --group=separate +// Expected: "a\na\n\nb\n" +func TestGNUCompatUniqGroupSeparate(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\na\nb\n"}) + stdout, _, code := uniqCmdRun(t, "uniq --group=separate f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\n", stdout) +} + +// TestGNUCompatUniqGroupPrepend — --group=prepend prepends blank lines. +// +// GNU command: printf 'a\na\nb\n' | guniq --group=prepend +// Expected: "\na\na\n\nb\n" +func TestGNUCompatUniqGroupPrepend(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\na\nb\n"}) + stdout, _, code := uniqCmdRun(t, "uniq --group=prepend f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nb\n", stdout) +} + +// TestGNUCompatUniqGroupAppend — --group=append appends blank lines. +// +// GNU command: printf 'a\na\nb\n' | guniq --group=append +// Expected: "a\na\n\nb\n\n" +func TestGNUCompatUniqGroupAppend(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\na\nb\n"}) + stdout, _, code := uniqCmdRun(t, "uniq --group=append f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\n\n", stdout) +} + +// TestGNUCompatUniqGroupBoth — --group=both prepends and appends blank lines. +// +// GNU command: printf 'a\na\nb\n' | guniq --group=both +// Expected: "\na\na\n\nb\n\n" +func TestGNUCompatUniqGroupBoth(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\na\nb\n"}) + stdout, _, code := uniqCmdRun(t, "uniq --group=both f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nb\n\n", stdout) +} + +// TestGNUCompatUniqAllRepeatedSeparate — --all-repeated=separate. +// +// GNU command: printf 'a\na\nb\nc\nc\n' | guniq --all-repeated=separate +// Expected: "a\na\n\nc\nc\n" +func TestGNUCompatUniqAllRepeatedSeparate(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\na\nb\nc\nc\n"}) + stdout, _, code := uniqCmdRun(t, "uniq --all-repeated=separate f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nc\nc\n", stdout) +} + +// TestGNUCompatUniqAllRepeatedPrepend — --all-repeated=prepend. +// +// GNU command: printf 'a\na\nb\nc\nc\n' | guniq --all-repeated=prepend +// Expected: "\na\na\n\nc\nc\n" +func TestGNUCompatUniqAllRepeatedPrepend(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"f.txt": "a\na\nb\nc\nc\n"}) + stdout, _, code := uniqCmdRun(t, "uniq --all-repeated=prepend f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nc\nc\n", stdout) +} + +// TestGNUCompatUniqRejectedExtraOperand — extra operand is rejected. +// +// GNU command: guniq a.txt b.txt (would use b.txt as output) +// Our behavior: reject with exit 1 (no filesystem writes) +func TestGNUCompatUniqRejectedExtraOperand(t *testing.T) { + dir := setupUniqDir(t, map[string]string{"a.txt": "a\n", "b.txt": "b\n"}) + _, stderr, code := uniqCmdRun(t, "uniq a.txt b.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} diff --git a/interp/builtin_uniq_pentest_test.go b/interp/builtin_uniq_pentest_test.go new file mode 100644 index 00000000..9620977c --- /dev/null +++ b/interp/builtin_uniq_pentest_test.go @@ -0,0 +1,276 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package interp_test + +import ( + "bytes" + "context" + "errors" + "math" + "os" + "path/filepath" + "strconv" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "mvdan.cc/sh/v3/syntax" + + "github.com/DataDog/rshell/interp" +) + +func pentestUniqDir(t *testing.T, files map[string]string) string { + t.Helper() + dir := t.TempDir() + for name, content := range files { + require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0644)) + } + return dir +} + +func pentestUniqRun(t *testing.T, script, dir string) (string, string, int) { + t.Helper() + return runScript(t, script, dir, interp.AllowedPaths([]string{dir})) +} + +func pentestUniqRunCtx(ctx context.Context, t *testing.T, script, dir string) (string, string, int) { + t.Helper() + parser := syntax.NewParser() + prog, err := parser.Parse(strings.NewReader(script), "") + require.NoError(t, err) + + var outBuf, errBuf bytes.Buffer + allOpts := []interp.RunnerOption{interp.StdIO(nil, &outBuf, &errBuf), interp.AllowedPaths([]string{dir})} + runner, err := interp.New(allOpts...) + require.NoError(t, err) + defer runner.Close() + runner.Dir = dir + + err = runner.Run(ctx, prog) + exitCode := 0 + if err != nil { + var es interp.ExitStatus + if errors.As(err, &es) { + exitCode = int(es) + } else if ctx.Err() == nil { + t.Fatalf("unexpected error: %v", err) + } + } + return outBuf.String(), errBuf.String(), exitCode +} + +// --- Integer edge cases --- + +func TestUniqPentestFieldsZero(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "a b\na b\n"}) + stdout, _, code := pentestUniqRun(t, "uniq -f 0 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a b\n", stdout) +} + +func TestUniqPentestSkipCharsZero(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "abc\nabc\n"}) + stdout, _, code := pentestUniqRun(t, "uniq -s 0 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\n", stdout) +} + +func TestUniqPentestCheckCharsZero(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "abc\nxyz\n"}) + stdout, _, code := pentestUniqRun(t, "uniq -w 0 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\n", stdout) +} + +func TestUniqPentestCheckCharsOne(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "abc\nabc\n"}) + stdout, _, code := pentestUniqRun(t, "uniq -w 1 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\n", stdout) +} + +func TestUniqPentestLargeFieldCount(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "a\nb\n"}) + stdout, _, code := pentestUniqRun(t, "uniq -f "+strconv.Itoa(math.MaxInt32-1)+" f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqPentestLargeCheckChars(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "abc\nabc\n"}) + stdout, _, code := pentestUniqRun(t, "uniq -w "+strconv.Itoa(math.MaxInt32-1)+" f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\n", stdout) +} + +func TestUniqPentestNegativeField(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "a\n"}) + _, stderr, code := pentestUniqRun(t, "uniq -f -1 f.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestNegativeSkipChars(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "a\n"}) + _, stderr, code := pentestUniqRun(t, "uniq -s -1 f.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestNegativeCheckChars(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "a\n"}) + _, stderr, code := pentestUniqRun(t, "uniq -w -1 f.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestOverflowFieldCount(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "a\n"}) + _, stderr, code := pentestUniqRun(t, "uniq -f 99999999999999999999 f.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +// --- Path and filename edge cases --- + +func TestUniqPentestNonexistentFile(t *testing.T) { + dir := pentestUniqDir(t, nil) + _, stderr, code := pentestUniqRun(t, "uniq nonexistent", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestEmptyFilename(t *testing.T) { + dir := pentestUniqDir(t, nil) + _, stderr, code := pentestUniqRun(t, `uniq ""`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestDoubleDashFlagLikeFile(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"-d": "hello\nhello\n"}) + stdout, _, code := pentestUniqRun(t, "uniq -- -d", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "hello\n", stdout) +} + +// --- Flag and argument injection --- + +func TestUniqPentestUnknownLongFlag(t *testing.T) { + dir := pentestUniqDir(t, nil) + _, stderr, code := pentestUniqRun(t, "uniq --follow", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestUnknownShortFlag(t *testing.T) { + dir := pentestUniqDir(t, nil) + _, stderr, code := pentestUniqRun(t, "uniq -X", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestMultipleDashStdin(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"src.txt": "a\na\n"}) + _, stderr, code := pentestUniqRun(t, "uniq - - < src.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "extra operand") +} + +func TestUniqPentestFlagViaWordExpansion(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "a\na\n"}) + _, stderr, code := pentestUniqRun(t, `for flag in --no-such; do uniq $flag f.txt; done`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +// --- Behaviour verification --- + +func TestUniqPentestBadAllRepeatedOption(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "a\n"}) + _, stderr, code := pentestUniqRun(t, "uniq --all-repeated=badoption f.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestBadGroupOption(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "a\n"}) + _, stderr, code := pentestUniqRun(t, "uniq --group=badoption f.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +// --- Context cancellation / timeout --- + +func TestUniqPentestPreCancelledContext(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "a\na\nb\n"}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + _, _, _ = pentestUniqRunCtx(ctx, t, "uniq f.txt", dir) +} + +func TestUniqPentestTimeout(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "a\na\nb\n"}) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + stdout, _, code := pentestUniqRunCtx(ctx, t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +// --- Large file --- + +func TestUniqPentestLargeFile(t *testing.T) { + dir := t.TempDir() + var b strings.Builder + for i := 0; i < 100000; i++ { + b.WriteString("line\n") + } + require.NoError(t, os.WriteFile(filepath.Join(dir, "big.txt"), []byte(b.String()), 0644)) + stdout, _, code := pentestUniqRun(t, "uniq big.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "line\n", stdout) +} + +func TestUniqPentestLargeFileWithCount(t *testing.T) { + dir := t.TempDir() + var b strings.Builder + for i := 0; i < 100000; i++ { + b.WriteString("line\n") + } + require.NoError(t, os.WriteFile(filepath.Join(dir, "big.txt"), []byte(b.String()), 0644)) + stdout, _, code := pentestUniqRun(t, "uniq -c big.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 100000 line\n", stdout) +} + +// --- Binary / NUL content --- + +func TestUniqPentestBinaryContent(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "\x00\x01\x02\n\x00\x01\x02\n"}) + stdout, _, code := pentestUniqRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\x00\x01\x02\n", stdout) +} + +func TestUniqPentestCRLFHandling(t *testing.T) { + dir := pentestUniqDir(t, map[string]string{"f.txt": "a\r\na\r\nb\r\n"}) + stdout, _, code := pentestUniqRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\r\nb\r\n", stdout) +} + +// --- Help exits 0 --- + +func TestUniqPentestHelpExitsZero(t *testing.T) { + dir := pentestUniqDir(t, nil) + stdout, stderr, code := pentestUniqRun(t, "uniq --help", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stderr) + assert.Contains(t, stdout, "Usage:") +} diff --git a/interp/builtins/uniq/uniq.go b/interp/builtins/uniq/uniq.go new file mode 100644 index 00000000..8c1817cc --- /dev/null +++ b/interp/builtins/uniq/uniq.go @@ -0,0 +1,545 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +// Package uniq implements the uniq builtin command. +// +// uniq — report or omit repeated lines +// +// Usage: uniq [OPTION]... [INPUT] +// +// Filter adjacent matching lines from INPUT (or standard input), +// writing to standard output. +// +// Accepted flags: +// +// -c, --count +// Prefix lines by the number of occurrences. +// +// -d, --repeated +// Only print duplicate lines, one for each group. +// +// -D, --all-repeated[=METHOD] +// Print all duplicate lines. METHOD is one of: none (default), +// prepend, separate. Delimit groups with empty lines per METHOD. +// +// -f N, --skip-fields=N +// Avoid comparing the first N fields. A field is a run of blanks +// (space or tab) followed by non-blank characters. +// +// -i, --ignore-case +// Ignore differences in case when comparing lines. +// +// -s N, --skip-chars=N +// Avoid comparing the first N characters. +// +// -u, --unique +// Only print unique lines (lines that are not repeated). +// +// -w N, --check-chars=N +// Compare no more than N characters in lines. +// +// -z, --zero-terminated +// Line delimiter is NUL (\0), not newline. +// +// --group[=METHOD] +// Show all items, separating groups with an empty line. METHOD +// is one of: separate (default), prepend, append, both. +// Cannot be used with -c, -d, -D, or -u. +// +// -h, --help +// Print this usage message to stdout and exit 0. +// +// The OUTPUT positional argument accepted by GNU uniq is rejected because +// this shell does not permit filesystem writes. +// +// Exit codes: +// +// 0 Success. +// 1 At least one error occurred (missing file, invalid argument, etc.). +// +// Memory safety: +// +// Processing is streaming: only the current and previous lines are kept +// in memory. A per-line cap of MaxLineBytes (1 MiB) prevents unbounded +// allocation on very long lines. All loops check ctx.Err() at each +// iteration to honour the shell's execution timeout. +package uniq + +import ( + "bufio" + "context" + "io" + "os" + "strconv" + "strings" + + "github.com/spf13/pflag" + + "github.com/DataDog/rshell/interp/builtins" +) + +func init() { + builtins.Register("uniq", run) +} + +// MaxLineBytes is the per-line buffer cap for the line scanner. +const MaxLineBytes = 1 << 20 // 1 MiB + +// MaxFieldOrChar is the maximum accepted value for -f, -s, -w flags. +const MaxFieldOrChar = 1<<31 - 1 + +// countFieldWidth is the width of the right-justified count field +// used by the -c flag, matching GNU uniq's format. +const countFieldWidth = 7 + +// countFieldPad is a string of spaces used for right-justifying the count. +const countFieldPad = " " // must be countFieldWidth spaces + +// maxGroupLines caps the number of lines buffered per group in +// --all-repeated mode to prevent unbounded memory growth. +const maxGroupLines = 100000 + +type groupMethod int + +const ( + groupNone groupMethod = iota + groupSeparate + groupPrepend + groupAppend + groupBoth +) + +type allRepeatedMethod int + +const ( + allRepeatedNone allRepeatedMethod = iota + allRepeatedPrepend + allRepeatedSeparate +) + +func run(ctx context.Context, callCtx *builtins.CallContext, args []string) builtins.Result { + fs := pflag.NewFlagSet("uniq", pflag.ContinueOnError) + fs.SetOutput(io.Discard) + + help := fs.BoolP("help", "h", false, "print usage and exit") + count := fs.BoolP("count", "c", false, "prefix lines by the number of occurrences") + repeated := fs.BoolP("repeated", "d", false, "only print duplicate lines") + unique := fs.BoolP("unique", "u", false, "only print unique lines") + ignoreCase := fs.BoolP("ignore-case", "i", false, "ignore case when comparing") + zeroTerm := fs.BoolP("zero-terminated", "z", false, "line delimiter is NUL, not newline") + + skipFields := fs.IntP("skip-fields", "f", 0, "avoid comparing the first N fields") + skipChars := fs.IntP("skip-chars", "s", 0, "avoid comparing the first N characters") + checkChars := fs.IntP("check-chars", "w", 0, "compare no more than N characters") + + allRepeatedStr := fs.StringP("all-repeated", "D", "", "print all duplicate lines; optionally delimited by METHOD") + fs.Lookup("all-repeated").NoOptDefVal = "none" + + groupStr := fs.String("group", "", "show all items, separated by METHOD") + fs.Lookup("group").NoOptDefVal = "separate" + + if err := fs.Parse(args); err != nil { + callCtx.Errf("uniq: %v\n", err) + return builtins.Result{Code: 1} + } + + if *help { + callCtx.Out("Usage: uniq [OPTION]... [INPUT]\n") + callCtx.Out("Filter adjacent matching lines from INPUT (or stdin), writing to stdout.\n\n") + fs.SetOutput(callCtx.Stdout) + fs.PrintDefaults() + return builtins.Result{} + } + + positional := fs.Args() + if len(positional) > 1 { + callCtx.Errf("uniq: extra operand %q\n", positional[1]) + return builtins.Result{Code: 1} + } + + if *skipFields < 0 || *skipFields > MaxFieldOrChar { + callCtx.Errf("uniq: invalid number of fields to skip: %q\n", strconv.Itoa(*skipFields)) + return builtins.Result{Code: 1} + } + if *skipChars < 0 || *skipChars > MaxFieldOrChar { + callCtx.Errf("uniq: invalid number of bytes to skip: %q\n", strconv.Itoa(*skipChars)) + return builtins.Result{Code: 1} + } + if *checkChars < 0 || *checkChars > MaxFieldOrChar { + callCtx.Errf("uniq: invalid number of bytes to compare: %q\n", strconv.Itoa(*checkChars)) + return builtins.Result{Code: 1} + } + + useCheckChars := fs.Changed("check-chars") + + var arMethod allRepeatedMethod + useAllRepeated := fs.Changed("all-repeated") + if useAllRepeated { + switch { + case hasPrefix("none", *allRepeatedStr): + arMethod = allRepeatedNone + case hasPrefix("prepend", *allRepeatedStr): + arMethod = allRepeatedPrepend + case hasPrefix("separate", *allRepeatedStr): + arMethod = allRepeatedSeparate + default: + callCtx.Errf("uniq: invalid argument %q for '--all-repeated'\n", *allRepeatedStr) + return builtins.Result{Code: 1} + } + } + + var grpMethod groupMethod + useGroup := fs.Changed("group") + if useGroup { + switch { + case hasPrefix("separate", *groupStr): + grpMethod = groupSeparate + case hasPrefix("prepend", *groupStr): + grpMethod = groupPrepend + case hasPrefix("append", *groupStr): + grpMethod = groupAppend + case hasPrefix("both", *groupStr): + grpMethod = groupBoth + default: + callCtx.Errf("uniq: invalid argument %q for '--group'\n", *groupStr) + return builtins.Result{Code: 1} + } + } + + if useGroup && (*count || *repeated || useAllRepeated || *unique) { + callCtx.Errf("uniq: --group is mutually exclusive with -c/-d/-D/-u\n") + return builtins.Result{Code: 1} + } + + if useAllRepeated && *count { + callCtx.Errf("uniq: printing all duplicated lines and repeat counts is meaningless\n") + return builtins.Result{Code: 1} + } + + file := "-" + if len(positional) == 1 { + file = positional[0] + } + + delim := byte('\n') + if *zeroTerm { + delim = 0 + } + + cfg := &config{ + count: *count, + repeated: *repeated, + unique: *unique, + ignoreCase: *ignoreCase, + skipFields: *skipFields, + skipChars: *skipChars, + checkChars: *checkChars, + useCheckChars: useCheckChars, + allRepeated: useAllRepeated, + arMethod: arMethod, + grpMethod: grpMethod, + delim: delim, + } + + if err := process(ctx, callCtx, file, cfg); err != nil { + name := file + if file == "-" { + name = "standard input" + } + callCtx.Errf("uniq: %s: %s\n", name, callCtx.PortableErr(err)) + return builtins.Result{Code: 1} + } + return builtins.Result{} +} + +type config struct { + count bool + repeated bool + unique bool + ignoreCase bool + skipFields int + skipChars int + checkChars int + useCheckChars bool + allRepeated bool + arMethod allRepeatedMethod + grpMethod groupMethod + delim byte +} + +func process(ctx context.Context, callCtx *builtins.CallContext, file string, cfg *config) error { + var rc io.ReadCloser + if file == "-" { + if callCtx.Stdin == nil { + return nil + } + rc = io.NopCloser(callCtx.Stdin) + } else { + f, err := callCtx.OpenFile(ctx, file, os.O_RDONLY, 0) + if err != nil { + return err + } + defer f.Close() + rc = f + } + + sc := bufio.NewScanner(rc) + buf := make([]byte, 4096) + sc.Buffer(buf, MaxLineBytes) + sc.Split(makeSplitFunc(cfg.delim)) + + w := callCtx.Stdout + delimStr := string(cfg.delim) + + if cfg.grpMethod != groupNone { + return processGroup(ctx, w, sc, cfg, delimStr) + } + if cfg.allRepeated { + return processAllRepeated(ctx, w, sc, cfg, delimStr) + } + return processDefault(ctx, w, sc, cfg, delimStr) +} + +func processDefault(ctx context.Context, w io.Writer, sc *bufio.Scanner, cfg *config, delimStr string) error { + var prev string + var prevCount int + first := true + + for sc.Scan() { + if ctx.Err() != nil { + return ctx.Err() + } + line := sc.Text() + if first { + prev = line + prevCount = 1 + first = false + continue + } + if linesEqual(prev, line, cfg) { + prevCount++ + continue + } + if err := emitLine(w, prev, prevCount, cfg, delimStr); err != nil { + return err + } + prev = line + prevCount = 1 + } + if err := sc.Err(); err != nil { + return err + } + if !first { + return emitLine(w, prev, prevCount, cfg, delimStr) + } + return nil +} + +func emitLine(w io.Writer, line string, n int, cfg *config, delimStr string) error { + if cfg.repeated && n < 2 { + return nil + } + if cfg.unique && n >= 2 { + return nil + } + if cfg.count { + s := strconv.Itoa(n) + pad := max(0, countFieldWidth-len(s)) + _, err := io.WriteString(w, countFieldPad[:pad]+s+" "+line+delimStr) + return err + } + _, err := io.WriteString(w, line+delimStr) + return err +} + +func processAllRepeated(ctx context.Context, w io.Writer, sc *bufio.Scanner, cfg *config, delimStr string) error { + var group []string + var prev string + first := true + firstGroup := true + + for sc.Scan() { + if ctx.Err() != nil { + return ctx.Err() + } + line := sc.Text() + if first { + prev = line + group = append(group, line) + first = false + continue + } + if linesEqual(prev, line, cfg) { + if len(group) < maxGroupLines { + group = append(group, line) + } + continue + } + if err := emitAllRepeatedGroup(w, group, cfg, delimStr, &firstGroup); err != nil { + return err + } + group = group[:0] + group = append(group, line) + prev = line + } + if err := sc.Err(); err != nil { + return err + } + if !first { + return emitAllRepeatedGroup(w, group, cfg, delimStr, &firstGroup) + } + return nil +} + +func emitAllRepeatedGroup(w io.Writer, group []string, cfg *config, delimStr string, firstGroup *bool) error { + if len(group) < 2 { + return nil + } + switch cfg.arMethod { + case allRepeatedPrepend: + if _, err := io.WriteString(w, delimStr); err != nil { + return err + } + case allRepeatedSeparate: + if !*firstGroup { + if _, err := io.WriteString(w, delimStr); err != nil { + return err + } + } + } + *firstGroup = false + for _, line := range group { + if _, err := io.WriteString(w, line+delimStr); err != nil { + return err + } + } + return nil +} + +func processGroup(ctx context.Context, w io.Writer, sc *bufio.Scanner, cfg *config, delimStr string) error { + var prev string + first := true + firstGroup := true + + for sc.Scan() { + if ctx.Err() != nil { + return ctx.Err() + } + line := sc.Text() + if first { + prev = line + if err := emitGroupStart(w, cfg, delimStr, &firstGroup); err != nil { + return err + } + if _, err := io.WriteString(w, line+delimStr); err != nil { + return err + } + first = false + continue + } + if linesEqual(prev, line, cfg) { + if _, err := io.WriteString(w, line+delimStr); err != nil { + return err + } + continue + } + if cfg.grpMethod == groupAppend { + if _, err := io.WriteString(w, delimStr); err != nil { + return err + } + } + if err := emitGroupStart(w, cfg, delimStr, &firstGroup); err != nil { + return err + } + if _, err := io.WriteString(w, line+delimStr); err != nil { + return err + } + prev = line + } + if err := sc.Err(); err != nil { + return err + } + if !first && (cfg.grpMethod == groupAppend || cfg.grpMethod == groupBoth) { + if _, err := io.WriteString(w, delimStr); err != nil { + return err + } + } + return nil +} + +func emitGroupStart(w io.Writer, cfg *config, delimStr string, firstGroup *bool) error { + if cfg.grpMethod == groupPrepend || cfg.grpMethod == groupBoth { + if _, err := io.WriteString(w, delimStr); err != nil { + return err + } + } else if cfg.grpMethod == groupSeparate && !*firstGroup { + if _, err := io.WriteString(w, delimStr); err != nil { + return err + } + } + *firstGroup = false + return nil +} + +func linesEqual(a, b string, cfg *config) bool { + a = extractCompareKey(a, cfg) + b = extractCompareKey(b, cfg) + if cfg.ignoreCase { + return strings.EqualFold(a, b) + } + return a == b +} + +func extractCompareKey(line string, cfg *config) string { + s := line + if cfg.skipFields > 0 { + s = skipFieldsN(s, cfg.skipFields) + } + if cfg.skipChars > 0 { + if cfg.skipChars >= len(s) { + s = "" + } else { + s = s[cfg.skipChars:] + } + } + if cfg.useCheckChars && cfg.checkChars < len(s) { + s = s[:cfg.checkChars] + } + return s +} + +func skipFieldsN(s string, n int) string { + i := 0 + for field := 0; field < n && i < len(s); field++ { + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++ + } + for i < len(s) && s[i] != ' ' && s[i] != '\t' { + i++ + } + } + return s[i:] +} + +func hasPrefix(full, abbrev string) bool { + return len(abbrev) > 0 && len(abbrev) <= len(full) && full[:len(abbrev)] == abbrev +} + +func makeSplitFunc(delim byte) bufio.SplitFunc { + return func(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + for i, b := range data { + if b == delim { + return i + 1, data[:i], nil + } + } + if atEOF { + return len(data), data, nil + } + return 0, nil, nil + } +} diff --git a/interp/builtins/uniq/uniq_test.go b/interp/builtins/uniq/uniq_test.go new file mode 100644 index 00000000..67fb6109 --- /dev/null +++ b/interp/builtins/uniq/uniq_test.go @@ -0,0 +1,675 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package uniq_test + +import ( + "bytes" + "context" + "errors" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "mvdan.cc/sh/v3/syntax" + + "github.com/DataDog/rshell/interp" + _ "github.com/DataDog/rshell/interp/builtins/uniq" +) + +func runScriptCtx(ctx context.Context, t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) { + t.Helper() + parser := syntax.NewParser() + prog, err := parser.Parse(strings.NewReader(script), "") + require.NoError(t, err) + + var outBuf, errBuf bytes.Buffer + allOpts := append([]interp.RunnerOption{interp.StdIO(nil, &outBuf, &errBuf)}, opts...) + runner, err := interp.New(allOpts...) + require.NoError(t, err) + defer runner.Close() + + if dir != "" { + runner.Dir = dir + } + + err = runner.Run(ctx, prog) + exitCode := 0 + if err != nil { + var es interp.ExitStatus + if errors.As(err, &es) { + exitCode = int(es) + } else if ctx.Err() == nil { + t.Fatalf("unexpected error: %v", err) + } + } + return outBuf.String(), errBuf.String(), exitCode +} + +func runScript(t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) { + t.Helper() + return runScriptCtx(context.Background(), t, script, dir, opts...) +} + +func cmdRun(t *testing.T, script, dir string) (string, string, int) { + t.Helper() + return runScript(t, script, dir, interp.AllowedPaths([]string{dir})) +} + +func writeFile(t *testing.T, dir, name, content string) string { + t.Helper() + p := filepath.Join(dir, name) + require.NoError(t, os.WriteFile(p, []byte(content), 0644)) + return name +} + +// --- Default behaviour --- + +func TestDefaultRemovesDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\nb\nb\nc\n") + stdout, stderr, code := cmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stderr) + assert.Equal(t, "a\nb\nc\n", stdout) +} + +func TestDefaultAllUnique(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\nb\nc\n") + stdout, _, code := cmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\nc\n", stdout) +} + +func TestDefaultEmptyFile(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "") + stdout, stderr, code := cmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stderr) + assert.Equal(t, "", stdout) +} + +func TestDefaultNoTrailingNewline(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na") + stdout, _, code := cmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestDefaultSingleLine(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "hello\n") + stdout, _, code := cmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "hello\n", stdout) +} + +func TestDefaultNonAdjacentDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\nb\na\n") + stdout, _, code := cmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\na\n", stdout) +} + +// --- Count flag --- + +func TestCountFlag(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq -c f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 2 a\n 1 b\n", stdout) +} + +func TestCountFlagAllUnique(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\nb\n") + stdout, _, code := cmdRun(t, "uniq -c f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 1 a\n 1 b\n", stdout) +} + +// --- Repeated flag --- + +func TestRepeatedFlag(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq -d f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestRepeatedFlagNoDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\nb\n") + stdout, _, code := cmdRun(t, "uniq -d f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// --- Unique flag --- + +func TestUniqueFlag(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq -u f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "b\n", stdout) +} + +func TestUniqueFlagAllDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -u f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestRepeatedAndUniqueFlags(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq -d -u f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// --- Ignore case --- + +func TestIgnoreCase(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "A\na\n") + stdout, _, code := cmdRun(t, "uniq -i f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "A\n", stdout) +} + +func TestIgnoreCaseLongForm(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "A\na\n") + stdout, _, code := cmdRun(t, "uniq --ignore-case f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "A\n", stdout) +} + +func TestCaseSensitiveByDefault(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "A\na\n") + stdout, _, code := cmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "A\na\n", stdout) +} + +// --- Skip fields --- + +func TestSkipFields(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a a\nb a\n") + stdout, _, code := cmdRun(t, "uniq -f1 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a\n", stdout) +} + +func TestSkipFieldsDifferent(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a a\nb b\n") + stdout, _, code := cmdRun(t, "uniq -f 1 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a\nb b\n", stdout) +} + +func TestSkipFieldsTabs(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\ta\na\ta\n") + stdout, _, code := cmdRun(t, "uniq -f 1 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\ta\n", stdout) +} + +func TestSkipTwoFields(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a a c\nb a c\n") + stdout, _, code := cmdRun(t, "uniq -f 2 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a c\n", stdout) +} + +// --- Skip chars --- + +func TestSkipChars(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "aaa\naaa\n") + stdout, _, code := cmdRun(t, "uniq -s 1 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "aaa\n", stdout) +} + +func TestSkipCharsDifferent(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "baa\naaa\n") + stdout, _, code := cmdRun(t, "uniq -s 2 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "baa\n", stdout) +} + +func TestSkipCharsBeyondLength(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "abc\nabcd\n") + stdout, _, code := cmdRun(t, "uniq -s 4 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\n", stdout) +} + +func TestSkipCharsZero(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "abc\nabcd\n") + stdout, _, code := cmdRun(t, "uniq -s 0 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\nabcd\n", stdout) +} + +// --- Check chars --- + +func TestCheckChars(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "abc\nabcd\n") + stdout, _, code := cmdRun(t, "uniq -w 0 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\n", stdout) +} + +func TestCheckCharsOne(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a a\nb a\n") + stdout, _, code := cmdRun(t, "uniq -w 1 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a\nb a\n", stdout) +} + +func TestCheckCharsWithSkipFields(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a a a\nb a c\n") + stdout, _, code := cmdRun(t, "uniq -f 1 -w 1 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a a\n", stdout) +} + +// --- Skip fields + skip chars --- + +func TestSkipFieldsAndChars(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a aaa\nb ab\n") + stdout, _, code := cmdRun(t, "uniq -f 1 -s 1 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a aaa\nb ab\n", stdout) +} + +func TestSkipFieldsAndCharsEqual(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a aaa\nb aaa\n") + stdout, _, code := cmdRun(t, "uniq -f 1 -s 1 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a aaa\n", stdout) +} + +// --- All-repeated flag (-D) --- + +func TestAllRepeatedDefault(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -D f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n", stdout) +} + +func TestAllRepeatedSeparate(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\nb\nc\nc\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=separate f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nc\nc\n", stdout) +} + +func TestAllRepeatedPrepend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\nb\nc\nc\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=prepend f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nc\nc\n", stdout) +} + +func TestAllRepeatedPrependNoRepeats(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\nb\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=prepend f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestAllRepeatedBadOption(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\n") + _, stderr, code := cmdRun(t, "uniq --all-repeated=badoption f.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestAllRepeatedWithCount(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\n") + _, stderr, code := cmdRun(t, "uniq -D -c f.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "meaningless") +} + +// --- Group flag --- + +func TestGroupDefault(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\n", stdout) +} + +func TestGroupPrepend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=prepend f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nb\n", stdout) +} + +func TestGroupAppend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=append f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\n\n", stdout) +} + +func TestGroupBoth(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=both f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nb\n\n", stdout) +} + +func TestGroupEmptyInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "") + stdout, _, code := cmdRun(t, "uniq --group=prepend f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestGroupWithCount(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\n") + _, stderr, code := cmdRun(t, "uniq --group -c f.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "mutually exclusive") +} + +func TestGroupWithRepeated(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\n") + _, stderr, code := cmdRun(t, "uniq --group -d f.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "mutually exclusive") +} + +func TestGroupWithUnique(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\n") + _, stderr, code := cmdRun(t, "uniq --group -u f.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "mutually exclusive") +} + +func TestGroupWithAllRepeated(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\n") + _, stderr, code := cmdRun(t, "uniq --group -D f.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "mutually exclusive") +} + +func TestGroupBadOption(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\n") + _, stderr, code := cmdRun(t, "uniq --group=badoption f.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +// --- Zero-terminated --- + +func TestZeroTerminated(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\x00a\x00b") + stdout, _, code := cmdRun(t, "uniq -z f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\x00b\x00", stdout) +} + +func TestZeroTerminatedNewlinesInContent(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -z f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\x00", stdout) +} + +func TestZeroTerminatedLongForm(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\x00a\x00b") + stdout, _, code := cmdRun(t, "uniq --zero-terminated f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\x00b\x00", stdout) +} + +// --- Stdin --- + +func TestStdinImplicit(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "src.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq < src.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +func TestStdinDash(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "src.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq - < src.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +func TestPipeInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "cat f.txt | uniq", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +// --- Help --- + +func TestHelp(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := cmdRun(t, "uniq --help", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stderr) + assert.Contains(t, stdout, "Usage:") +} + +func TestHelpShort(t *testing.T) { + dir := t.TempDir() + stdout, _, code := cmdRun(t, "uniq -h", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "Usage:") +} + +// --- Error cases --- + +func TestMissingFile(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq nonexistent.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestExtraOperand(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "a\n") + writeFile(t, dir, "b.txt", "b\n") + _, stderr, code := cmdRun(t, "uniq a.txt b.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "extra operand") +} + +func TestUnknownFlag(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq --no-such-flag", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUnknownShortFlag(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq -X", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +// --- 8-bit chars and NUL in content --- + +func TestEightBitChars(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "ö\nv\n") + stdout, _, code := cmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "ö\nv\n", stdout) +} + +func TestNullBytesInContent(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\x00a\na\n") + stdout, _, code := cmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\x00a\na\n", stdout) +} + +// --- Context cancellation --- + +func TestContextCancellation(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\nb\n") + ctx, cancel := context.WithCancel(context.Background()) + cancel() + _, _, _ = runScriptCtx(ctx, t, "uniq f.txt", dir, interp.AllowedPaths([]string{dir})) +} + +// --- CRLF --- + +func TestCRLFPreserved(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\r\na\r\nb\r\n") + stdout, _, code := cmdRun(t, "uniq f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\r\nb\r\n", stdout) +} + +// --- Double dash --- + +func TestDoubleDash(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "-d", "hello\n") + stdout, _, code := cmdRun(t, "uniq -- -d", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "hello\n", stdout) +} + +// --- Outside allowed paths --- + +func TestOutsideAllowedPaths(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq /etc/passwd", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +// --- Nil stdin --- + +func TestNilStdin(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := runScript(t, "uniq", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stderr) + assert.Equal(t, "", stdout) +} + +// --- Abbreviation matching for --all-repeated and --group --- + +func TestAllRepeatedAbbrev(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=s f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n", stdout) +} + +func TestGroupAbbrev(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=p f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nb\n", stdout) +} + +// --- All-repeated with -w (check-chars) --- + +func TestAllRepeatedWithCheckChars(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a a\na b\n") + stdout, _, code := cmdRun(t, "uniq -D -w1 f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a\na b\n", stdout) +} + +// --- Group single group cases --- + +func TestGroupSingleGroupPrepend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq --group=prepend f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n", stdout) +} + +func TestGroupSingleGroupAppend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq --group=append f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\n", stdout) +} + +func TestGroupSingleGroupSeparate(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "f.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq --group=separate f.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n", stdout) +} diff --git a/interp/runner_exec.go b/interp/runner_exec.go index e767ec5e..f46e51b2 100644 --- a/interp/runner_exec.go +++ b/interp/runner_exec.go @@ -24,6 +24,7 @@ import ( _ "github.com/DataDog/rshell/interp/builtins/false" _ "github.com/DataDog/rshell/interp/builtins/head" _ "github.com/DataDog/rshell/interp/builtins/true" + _ "github.com/DataDog/rshell/interp/builtins/uniq" ) func (r *Runner) stmt(ctx context.Context, st *syntax.Stmt) { diff --git a/tests/import_allowlist_test.go b/tests/import_allowlist_test.go index 4df9afd3..f23753d5 100644 --- a/tests/import_allowlist_test.go +++ b/tests/import_allowlist_test.go @@ -29,6 +29,8 @@ import ( // third-party packages and other internal module packages. var builtinAllowedSymbols = []string{ "bufio.NewScanner", + "bufio.Scanner", + "bufio.SplitFunc", "context.Context", "errors.Is", "github.com/spf13/pflag.ContinueOnError", @@ -39,9 +41,13 @@ var builtinAllowedSymbols = []string{ "io.NopCloser", "io.ReadCloser", "io.Reader", + "io.WriteString", + "io.Writer", "os.O_RDONLY", "strconv.Atoi", + "strconv.Itoa", "strconv.ParseInt", + "strings.EqualFold", } // permanentlyBanned lists packages that may never be imported by builtin diff --git a/tests/scenarios/cmd/uniq/all_repeated/prepend.yaml b/tests/scenarios/cmd/uniq/all_repeated/prepend.yaml new file mode 100644 index 00000000..b494fc58 --- /dev/null +++ b/tests/scenarios/cmd/uniq/all_repeated/prepend.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 116 +description: uniq --all-repeated=prepend prepends blank line before each group. +setup: + files: + - path: f.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --all-repeated=prepend f.txt +expect: + stdout: "\na\na\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/all_repeated/separate.yaml b/tests/scenarios/cmd/uniq/all_repeated/separate.yaml new file mode 100644 index 00000000..4036ef98 --- /dev/null +++ b/tests/scenarios/cmd/uniq/all_repeated/separate.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 113 +description: uniq --all-repeated=separate separates groups with blank lines. +setup: + files: + - path: f.txt + content: "a\na\nb\nc\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --all-repeated=separate f.txt +expect: + stdout: "a\na\n\nc\nc\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/adjacent_duplicates.yaml b/tests/scenarios/cmd/uniq/basic/adjacent_duplicates.yaml new file mode 100644 index 00000000..add77e25 --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/adjacent_duplicates.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 2 +description: uniq merges adjacent duplicate lines. +setup: + files: + - path: f.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq f.txt +expect: + stdout: "a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/all_unique.yaml b/tests/scenarios/cmd/uniq/basic/all_unique.yaml new file mode 100644 index 00000000..cf2ef74e --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/all_unique.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 7 +description: uniq with all unique lines passes them through. +setup: + files: + - path: f.txt + content: "a\nb\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq f.txt +expect: + stdout: "a\nb\nc\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/eight_bit_chars.yaml b/tests/scenarios/cmd/uniq/basic/eight_bit_chars.yaml new file mode 100644 index 00000000..b596baac --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/eight_bit_chars.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 8 +description: uniq preserves 8-bit characters. +setup: + files: + - path: f.txt + content: "ö\nv\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq f.txt +expect: + stdout: "ö\nv\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/empty_file.yaml b/tests/scenarios/cmd/uniq/basic/empty_file.yaml new file mode 100644 index 00000000..f7eaf636 --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/empty_file.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 1 +description: uniq on empty file produces empty output. +setup: + files: + - path: f.txt + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq f.txt +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/no_trailing_newline.yaml b/tests/scenarios/cmd/uniq/basic/no_trailing_newline.yaml new file mode 100644 index 00000000..f123db3d --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/no_trailing_newline.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 3 +description: uniq with no trailing newline still outputs a newline. +setup: + files: + - path: f.txt + content: "a\na" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq f.txt +expect: + stdout: "a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/non_adjacent.yaml b/tests/scenarios/cmd/uniq/basic/non_adjacent.yaml new file mode 100644 index 00000000..944ff143 --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/non_adjacent.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 6 +description: uniq does not merge non-adjacent duplicates. +setup: + files: + - path: f.txt + content: "b\na\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq f.txt +expect: + stdout: "b\na\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/check_chars/one.yaml b/tests/scenarios/cmd/uniq/check_chars/one.yaml new file mode 100644 index 00000000..0560b883 --- /dev/null +++ b/tests/scenarios/cmd/uniq/check_chars/one.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 60 +description: uniq -w 1 compares only first character. +setup: + files: + - path: f.txt + content: "a a\nb a\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -w 1 f.txt +expect: + stdout: "a a\nb a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/check_chars/zero.yaml b/tests/scenarios/cmd/uniq/check_chars/zero.yaml new file mode 100644 index 00000000..07897404 --- /dev/null +++ b/tests/scenarios/cmd/uniq/check_chars/zero.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 57 +description: uniq -w 0 treats all lines as equal. +setup: + files: + - path: f.txt + content: "abc\nabcd\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -w 0 f.txt +expect: + stdout: "abc\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/count/basic.yaml b/tests/scenarios/cmd/uniq/count/basic.yaml new file mode 100644 index 00000000..3132964c --- /dev/null +++ b/tests/scenarios/cmd/uniq/count/basic.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 101 +description: uniq -c prefixes lines with count. +setup: + files: + - path: f.txt + content: "a\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -c f.txt +expect: + stdout: " 1 a\n 1 b\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/count/duplicates.yaml b/tests/scenarios/cmd/uniq/count/duplicates.yaml new file mode 100644 index 00000000..d2813168 --- /dev/null +++ b/tests/scenarios/cmd/uniq/count/duplicates.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 102 +description: uniq -c counts duplicates. +setup: + files: + - path: f.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -c f.txt +expect: + stdout: " 2 a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml b/tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml new file mode 100644 index 00000000..2c7ecb91 --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 112 +description: uniq -D -c is meaningless. +setup: + files: + - path: f.txt + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -D -c f.txt +expect: + stdout: "" + stderr_contains: ["meaningless"] + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/errors/extra_operand.yaml b/tests/scenarios/cmd/uniq/errors/extra_operand.yaml new file mode 100644 index 00000000..85741ea2 --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/extra_operand.yaml @@ -0,0 +1,15 @@ +description: uniq rejects extra operand (OUTPUT argument not allowed). +setup: + files: + - path: a.txt + content: "a\n" + - path: b.txt + content: "b\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq a.txt b.txt +expect: + stdout: "" + stderr_contains: ["extra operand"] + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/errors/group_with_count.yaml b/tests/scenarios/cmd/uniq/errors/group_with_count.yaml new file mode 100644 index 00000000..5cce6f03 --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/group_with_count.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 141 +description: uniq --group -c is mutually exclusive. +setup: + files: + - path: f.txt + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --group -c f.txt +expect: + stdout: "" + stderr_contains: ["mutually exclusive"] + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/errors/missing_file.yaml b/tests/scenarios/cmd/uniq/errors/missing_file.yaml new file mode 100644 index 00000000..16b09244 --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/missing_file.yaml @@ -0,0 +1,9 @@ +description: uniq exits 1 and prints an error when a file does not exist. +input: + allowed_paths: ["$DIR"] + script: |+ + uniq nonexistent.txt +expect: + stdout: "" + stderr_contains: ["uniq:"] + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/errors/unknown_flag.yaml b/tests/scenarios/cmd/uniq/errors/unknown_flag.yaml new file mode 100644 index 00000000..bff788e8 --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/unknown_flag.yaml @@ -0,0 +1,9 @@ +description: uniq rejects unknown flags. +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --no-such-flag +expect: + stdout: "" + stderr_contains: ["uniq:"] + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/group/append.yaml b/tests/scenarios/cmd/uniq/group/append.yaml new file mode 100644 index 00000000..59197210 --- /dev/null +++ b/tests/scenarios/cmd/uniq/group/append.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 129 +description: uniq --group=append appends blank line after each group. +setup: + files: + - path: f.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --group=append f.txt +expect: + stdout: "a\na\n\nb\n\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/group/both.yaml b/tests/scenarios/cmd/uniq/group/both.yaml new file mode 100644 index 00000000..b51d6f0f --- /dev/null +++ b/tests/scenarios/cmd/uniq/group/both.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 132 +description: uniq --group=both prepends and appends blank lines around each group. +setup: + files: + - path: f.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --group=both f.txt +expect: + stdout: "\na\na\n\nb\n\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/group/prepend.yaml b/tests/scenarios/cmd/uniq/group/prepend.yaml new file mode 100644 index 00000000..62c55a19 --- /dev/null +++ b/tests/scenarios/cmd/uniq/group/prepend.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 128 +description: uniq --group=prepend prepends blank line before each group. +setup: + files: + - path: f.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --group=prepend f.txt +expect: + stdout: "\na\na\n\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/group/separate.yaml b/tests/scenarios/cmd/uniq/group/separate.yaml new file mode 100644 index 00000000..af8c6b5c --- /dev/null +++ b/tests/scenarios/cmd/uniq/group/separate.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 130 +description: uniq --group=separate separates groups with blank lines. +setup: + files: + - path: f.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --group=separate f.txt +expect: + stdout: "a\na\n\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/hardening/double_dash.yaml b/tests/scenarios/cmd/uniq/hardening/double_dash.yaml new file mode 100644 index 00000000..4fb782b0 --- /dev/null +++ b/tests/scenarios/cmd/uniq/hardening/double_dash.yaml @@ -0,0 +1,13 @@ +description: uniq -- separates flags from file-like filenames. +setup: + files: + - path: "-d" + content: "hello\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -- -d +expect: + stdout: "hello\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/hardening/outside_allowed_paths.yaml b/tests/scenarios/cmd/uniq/hardening/outside_allowed_paths.yaml new file mode 100644 index 00000000..3a182e27 --- /dev/null +++ b/tests/scenarios/cmd/uniq/hardening/outside_allowed_paths.yaml @@ -0,0 +1,14 @@ +skip_assert_against_bash: true +description: uniq is blocked from reading files outside the allowed paths sandbox. +setup: + files: + - path: local.txt + content: "local\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq /etc/passwd +expect: + stdout: "" + stderr_contains: ["uniq:"] + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/ignore_case/basic.yaml b/tests/scenarios/cmd/uniq/ignore_case/basic.yaml new file mode 100644 index 00000000..e9dcbcae --- /dev/null +++ b/tests/scenarios/cmd/uniq/ignore_case/basic.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 126 +description: uniq -i ignores case when comparing. +setup: + files: + - path: f.txt + content: "A\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -i f.txt +expect: + stdout: "A\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/repeated/basic.yaml b/tests/scenarios/cmd/uniq/repeated/basic.yaml new file mode 100644 index 00000000..3f84f37d --- /dev/null +++ b/tests/scenarios/cmd/uniq/repeated/basic.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 20 +description: uniq -d only prints duplicate lines. +setup: + files: + - path: f.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -d f.txt +expect: + stdout: "a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/repeated/no_duplicates.yaml b/tests/scenarios/cmd/uniq/repeated/no_duplicates.yaml new file mode 100644 index 00000000..1a350a82 --- /dev/null +++ b/tests/scenarios/cmd/uniq/repeated/no_duplicates.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 21 +description: uniq -d with no duplicates produces no output. +setup: + files: + - path: f.txt + content: "a\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -d f.txt +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/skip/chars_basic.yaml b/tests/scenarios/cmd/uniq/skip/chars_basic.yaml new file mode 100644 index 00000000..ee91c652 --- /dev/null +++ b/tests/scenarios/cmd/uniq/skip/chars_basic.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 42 +description: uniq -s 1 skips the first character when comparing. +setup: + files: + - path: f.txt + content: "aaa\naaa\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -s 1 f.txt +expect: + stdout: "aaa\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/skip/fields_basic.yaml b/tests/scenarios/cmd/uniq/skip/fields_basic.yaml new file mode 100644 index 00000000..2ba8b5b0 --- /dev/null +++ b/tests/scenarios/cmd/uniq/skip/fields_basic.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 31 +description: uniq -f 1 skips the first field when comparing. +setup: + files: + - path: f.txt + content: "a a\nb a\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -f 1 f.txt +expect: + stdout: "a a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/stdin/dash.yaml b/tests/scenarios/cmd/uniq/stdin/dash.yaml new file mode 100644 index 00000000..5e566326 --- /dev/null +++ b/tests/scenarios/cmd/uniq/stdin/dash.yaml @@ -0,0 +1,13 @@ +description: uniq reads from stdin when file is -. +setup: + files: + - path: src.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq - < src.txt +expect: + stdout: "a\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/stdin/implicit.yaml b/tests/scenarios/cmd/uniq/stdin/implicit.yaml new file mode 100644 index 00000000..2100a802 --- /dev/null +++ b/tests/scenarios/cmd/uniq/stdin/implicit.yaml @@ -0,0 +1,13 @@ +description: uniq with no file arguments reads from standard input. +setup: + files: + - path: src.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq < src.txt +expect: + stdout: "a\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/stdin/pipe.yaml b/tests/scenarios/cmd/uniq/stdin/pipe.yaml new file mode 100644 index 00000000..7cd0f89b --- /dev/null +++ b/tests/scenarios/cmd/uniq/stdin/pipe.yaml @@ -0,0 +1,13 @@ +description: uniq works with piped input. +setup: + files: + - path: f.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + cat f.txt | uniq +expect: + stdout: "a\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/unique/all_unique.yaml b/tests/scenarios/cmd/uniq/unique/all_unique.yaml new file mode 100644 index 00000000..091255d3 --- /dev/null +++ b/tests/scenarios/cmd/uniq/unique/all_unique.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 10 +description: uniq -u with all unique lines passes them through. +setup: + files: + - path: f.txt + content: "a\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -u f.txt +expect: + stdout: "a\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/unique/basic.yaml b/tests/scenarios/cmd/uniq/unique/basic.yaml new file mode 100644 index 00000000..c97fbf4a --- /dev/null +++ b/tests/scenarios/cmd/uniq/unique/basic.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 9 +description: uniq -u only prints unique lines. +setup: + files: + - path: f.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -u f.txt +expect: + stdout: "b\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/zero_terminated/basic.yaml b/tests/scenarios/cmd/uniq/zero_terminated/basic.yaml new file mode 100644 index 00000000..a094284f --- /dev/null +++ b/tests/scenarios/cmd/uniq/zero_terminated/basic.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 124 +description: uniq -z uses NUL as line delimiter. +setup: + files: + - path: f.txt + content: "a\x00a\x00b" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -z f.txt +expect: + stdout: "a\x00b\x00" + stderr: "" + exit_code: 0