From 0145c4e1c10c3caf83a0434f00d3ebdbda17333a Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Thu, 12 Mar 2026 17:09:43 +0100 Subject: [PATCH 1/4] Revert "Remove uniq builtin command for re-review" This reverts commit 1ce8b8fdf70c2c8804b9112776b58a5ebbf451ff. --- README.md | 2 +- SHELL_FEATURES.md | 1 + interp/builtins/uniq/uniq.go | 580 ++++++++++++++++ interp/builtins/uniq/uniq_gnu_compat_test.go | 233 +++++++ interp/builtins/uniq/uniq_pentest_test.go | 252 +++++++ interp/builtins/uniq/uniq_test.go | 656 ++++++++++++++++++ interp/register_builtins.go | 2 + .../cmd/uniq/all_repeated/prepend.yaml | 14 + .../cmd/uniq/all_repeated/separate.yaml | 14 + .../cmd/uniq/all_repeated/with_unique.yaml | 13 + .../cmd/uniq/basic/adjacent_duplicates.yaml | 14 + .../scenarios/cmd/uniq/basic/all_unique.yaml | 14 + .../cmd/uniq/basic/d_and_u_suppress.yaml | 14 + .../cmd/uniq/basic/different_lines.yaml | 14 + .../cmd/uniq/basic/eight_bit_chars.yaml | 14 + .../scenarios/cmd/uniq/basic/empty_input.yaml | 14 + .../scenarios/cmd/uniq/basic/ignore_case.yaml | 14 + .../cmd/uniq/basic/ignore_case_unicode.yaml | 13 + .../cmd/uniq/basic/no_trailing_newline.yaml | 14 + .../scenarios/cmd/uniq/check_chars/w_one.yaml | 14 + .../cmd/uniq/check_chars/w_zero.yaml | 14 + .../scenarios/cmd/uniq/count/basic_count.yaml | 14 + .../cmd/uniq/count/count_duplicates.yaml | 14 + .../errors/all_repeated_empty_method.yaml | 9 + .../uniq/errors/all_repeated_with_count.yaml | 9 + .../cmd/uniq/errors/group_empty_method.yaml | 9 + .../cmd/uniq/errors/group_with_count.yaml | 9 + .../cmd/uniq/errors/missing_file.yaml | 10 + .../errors/negative_overflow_skip_fields.yaml | 13 + .../cmd/uniq/errors/unknown_flag.yaml | 9 + tests/scenarios/cmd/uniq/group/append.yaml | 14 + tests/scenarios/cmd/uniq/group/both.yaml | 14 + tests/scenarios/cmd/uniq/group/prepend.yaml | 14 + tests/scenarios/cmd/uniq/group/separate.yaml | 14 + .../uniq/hardening/null_bytes_in_lines.yaml | 14 + .../cmd/uniq/repeated/basic_repeated.yaml | 14 + .../cmd/uniq/repeated/no_repeated.yaml | 14 + tests/scenarios/cmd/uniq/skip/skip_chars.yaml | 14 + .../scenarios/cmd/uniq/skip/skip_fields.yaml | 14 + .../scenarios/cmd/uniq/stdin/pipe_input.yaml | 14 + .../cmd/uniq/unique/all_duplicated.yaml | 14 + .../scenarios/cmd/uniq/unique/all_unique.yaml | 14 + .../cmd/uniq/zero_terminated/basic_zero.yaml | 14 + 43 files changed, 2197 insertions(+), 1 deletion(-) create mode 100644 interp/builtins/uniq/uniq.go create mode 100644 interp/builtins/uniq/uniq_gnu_compat_test.go create mode 100644 interp/builtins/uniq/uniq_pentest_test.go create mode 100644 interp/builtins/uniq/uniq_test.go create mode 100644 tests/scenarios/cmd/uniq/all_repeated/prepend.yaml create mode 100644 tests/scenarios/cmd/uniq/all_repeated/separate.yaml create mode 100644 tests/scenarios/cmd/uniq/all_repeated/with_unique.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/adjacent_duplicates.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/all_unique.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/d_and_u_suppress.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/different_lines.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/eight_bit_chars.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/empty_input.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/ignore_case.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/ignore_case_unicode.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/no_trailing_newline.yaml create mode 100644 tests/scenarios/cmd/uniq/check_chars/w_one.yaml create mode 100644 tests/scenarios/cmd/uniq/check_chars/w_zero.yaml create mode 100644 tests/scenarios/cmd/uniq/count/basic_count.yaml create mode 100644 tests/scenarios/cmd/uniq/count/count_duplicates.yaml create mode 100644 tests/scenarios/cmd/uniq/errors/all_repeated_empty_method.yaml create mode 100644 tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml create mode 100644 tests/scenarios/cmd/uniq/errors/group_empty_method.yaml create mode 100644 tests/scenarios/cmd/uniq/errors/group_with_count.yaml create mode 100644 tests/scenarios/cmd/uniq/errors/missing_file.yaml create mode 100644 tests/scenarios/cmd/uniq/errors/negative_overflow_skip_fields.yaml create mode 100644 tests/scenarios/cmd/uniq/errors/unknown_flag.yaml create mode 100644 tests/scenarios/cmd/uniq/group/append.yaml create mode 100644 tests/scenarios/cmd/uniq/group/both.yaml create mode 100644 tests/scenarios/cmd/uniq/group/prepend.yaml create mode 100644 tests/scenarios/cmd/uniq/group/separate.yaml create mode 100644 tests/scenarios/cmd/uniq/hardening/null_bytes_in_lines.yaml create mode 100644 tests/scenarios/cmd/uniq/repeated/basic_repeated.yaml create mode 100644 tests/scenarios/cmd/uniq/repeated/no_repeated.yaml create mode 100644 tests/scenarios/cmd/uniq/skip/skip_chars.yaml create mode 100644 tests/scenarios/cmd/uniq/skip/skip_fields.yaml create mode 100644 tests/scenarios/cmd/uniq/stdin/pipe_input.yaml create mode 100644 tests/scenarios/cmd/uniq/unique/all_duplicated.yaml create mode 100644 tests/scenarios/cmd/uniq/unique/all_unique.yaml create mode 100644 tests/scenarios/cmd/uniq/zero_terminated/basic_zero.yaml diff --git a/README.md b/README.md index 533830a4..8d2ab8e3 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ Linux, macOS, and Windows. ``` tests/scenarios/ -├── cmd/ # builtin command tests (echo, cat, grep, head, tail, wc, ...) +├── cmd/ # builtin command tests (echo, cat, grep, head, tail, uniq, wc, ...) └── shell/ # shell feature tests (pipes, variables, control flow, ...) ``` diff --git a/SHELL_FEATURES.md b/SHELL_FEATURES.md index d4dc9fb1..2eb5d070 100644 --- a/SHELL_FEATURES.md +++ b/SHELL_FEATURES.md @@ -20,6 +20,7 @@ Blocked features are rejected before execution with exit code 2. - ✅ `tail [-n N|-c N] [-q|-v] [-z] [FILE]...` — output the last part of files (default: last 10 lines); supports `+N` offset mode; `-f`/`--follow` is rejected - ✅ `tr [-cdsCt] SET1 [SET2]` — translate, squeeze, and/or delete characters from stdin - ✅ `true` — return exit code 0 +- ✅ `uniq [OPTION]... [INPUT]` — report or omit repeated lines - ✅ `wc [-l] [-w] [-c] [-m] [FILE]...` — count lines, words, bytes, or characters in files - ❌ All other commands — return exit code 127 with `: not found` unless an ExecHandler is configured diff --git a/interp/builtins/uniq/uniq.go b/interp/builtins/uniq/uniq.go new file mode 100644 index 00000000..4b44598a --- /dev/null +++ b/interp/builtins/uniq/uniq.go @@ -0,0 +1,580 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +// Package uniq implements the uniq builtin command. +// +// uniq — report or omit repeated lines +// +// Usage: uniq [OPTION]... [INPUT_FILE] +// +// Filter adjacent matching lines from INPUT_FILE (or standard input), +// writing to standard output. +// +// With no INPUT_FILE, or when INPUT_FILE is -, read standard input. +// +// Note: the output file argument (second positional arg) supported by +// GNU uniq is intentionally NOT implemented because it writes to the +// filesystem, violating the shell's safety rules. +// +// Accepted flags: +// +// -c, --count +// Prefix lines by the number of occurrences. +// +// -d, --repeated +// Only print duplicate lines, one for each group. +// +// -D, --all-repeated[=METHOD] +// Print all duplicate lines. METHOD={none,prepend,separate} +// (default: none). Mutually exclusive with --group. +// +// -u, --unique +// Only print unique lines (lines that are not repeated). +// +// -i, --ignore-case +// Ignore differences in case when comparing lines. +// +// -f N, --skip-fields=N +// Avoid comparing the first N fields. Fields are sequences of +// non-blank characters separated by blanks (spaces and tabs). +// +// -s N, --skip-chars=N +// Avoid comparing the first N characters (applied after field +// skipping). +// +// -w N, --check-chars=N +// Compare no more than N characters in each line. +// +// -z, --zero-terminated +// Line delimiter is NUL (\0), not newline. +// +// --group[=METHOD] +// Show all input lines, separating groups with an empty line. +// METHOD={separate,prepend,append,both} (default: separate). +// Mutually exclusive with -c, -d, -D, -u. +// +// -h, --help +// Print this usage message to stdout and exit 0. +// +// Exit codes: +// +// 0 Success. +// 1 An error occurred (invalid argument, missing file, incompatible flags). +// +// Memory safety: +// +// Lines are processed one at a time via a streaming scanner with a +// per-line cap of MaxLineBytes (1 MiB). Only the current and previous +// lines are kept in memory. All loops check ctx.Err() to honour the +// shell's execution timeout. +package uniq + +import ( + "bufio" + "context" + "io" + "math" + "os" + "strconv" + "strings" + + "github.com/DataDog/rshell/interp/builtins" +) + +// Cmd is the uniq builtin command descriptor. +var Cmd = builtins.Command{Name: "uniq", MakeFlags: registerFlags} + +// MaxLineBytes is the per-line buffer cap for the line scanner. +const MaxLineBytes = 1 << 20 // 1 MiB + +// MaxCount is the maximum accepted value for -f, -s, -w flags. +const MaxCount = 1<<31 - 1 // 2 147 483 647 + +// countFieldWidth is the width of the count prefix produced by -c. +const countFieldWidth = 7 + +// initialBufSize is the starting buffer size for the scanner. +const initialBufSize = 4096 + +// groupMethod controls how --group inserts blank-line separators. +type groupMethod int + +const ( + groupSeparate groupMethod = iota + groupPrepend + groupAppend + groupBoth +) + +// allRepeatedMethod controls how -D/--all-repeated delimits groups. +type allRepeatedMethod int + +const ( + allRepeatedNone allRepeatedMethod = iota + allRepeatedPrepend + allRepeatedSeparate +) + +func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { + help := fs.BoolP("help", "h", false, "print usage and exit") + count := fs.BoolP("count", "c", false, "prefix lines by the number of occurrences") + repeated := fs.BoolP("repeated", "d", false, "only print duplicate lines, one for each group") + unique := fs.BoolP("unique", "u", false, "only print unique lines") + ignoreCase := fs.BoolP("ignore-case", "i", false, "ignore differences in case when comparing") + zeroTerminated := fs.BoolP("zero-terminated", "z", false, "line delimiter is NUL, not newline") + + skipFieldsStr := fs.StringP("skip-fields", "f", "0", "avoid comparing the first N fields") + skipCharsStr := fs.StringP("skip-chars", "s", "0", "avoid comparing the first N characters") + checkCharsStr := fs.StringP("check-chars", "w", "", "compare no more than N characters") + + allRepeatedStr := fs.StringP("all-repeated", "D", "", "print all duplicate lines; METHOD={none,prepend,separate}") + groupStr := fs.String("group", "", "show all input lines with group separators; METHOD={separate,prepend,append,both}") + + fs.Lookup("all-repeated").NoOptDefVal = "none" + fs.Lookup("group").NoOptDefVal = "separate" + + return func(ctx context.Context, callCtx *builtins.CallContext, args []string) builtins.Result { + if *help { + callCtx.Out("Usage: uniq [OPTION]... [INPUT]\n") + callCtx.Out("Filter adjacent matching lines from INPUT (or stdin),\n") + callCtx.Out("writing to standard output.\n\n") + fs.SetOutput(callCtx.Stdout) + fs.PrintDefaults() + return builtins.Result{} + } + + skipFields, ok := parseNonNegativeInt(*skipFieldsStr) + if !ok { + callCtx.Errf("uniq: %s: invalid number of fields to skip\n", *skipFieldsStr) + return builtins.Result{Code: 1} + } + + skipChars, ok := parseNonNegativeInt(*skipCharsStr) + if !ok { + callCtx.Errf("uniq: %s: invalid number of bytes to skip\n", *skipCharsStr) + return builtins.Result{Code: 1} + } + + checkChars := int64(-1) + if fs.Changed("check-chars") { + checkChars, ok = parseNonNegativeInt(*checkCharsStr) + if !ok { + callCtx.Errf("uniq: %s: invalid number of bytes to compare\n", *checkCharsStr) + return builtins.Result{Code: 1} + } + } + + useAllRepeated := fs.Changed("all-repeated") + arMethod := allRepeatedNone + if useAllRepeated { + var err error + arMethod, err = parseAllRepeatedMethod(*allRepeatedStr) + if err != nil { + callCtx.Errf("uniq: %v\n", err) + return builtins.Result{Code: 1} + } + } + + useGroup := fs.Changed("group") + grpMethod := groupSeparate + if useGroup { + var err error + grpMethod, err = parseGroupMethod(*groupStr) + if err != nil { + callCtx.Errf("uniq: %v\n", err) + return builtins.Result{Code: 1} + } + } + + if useGroup && (*count || *repeated || useAllRepeated || *unique) { + callCtx.Errf("uniq: --group is mutually exclusive with -c/-d/-D/-u\n") + callCtx.Errf("Try 'uniq --help' for more information.\n") + return builtins.Result{Code: 1} + } + if useAllRepeated && *count { + callCtx.Errf("uniq: printing all duplicated lines and repeat counts is meaningless\n") + callCtx.Errf("Try 'uniq --help' for more information.\n") + return builtins.Result{Code: 1} + } + + if len(args) > 1 { + callCtx.Errf("uniq: extra operand %q\n", args[1]) + return builtins.Result{Code: 1} + } + + file := "-" + if len(args) == 1 { + file = args[0] + } + + var rc io.ReadCloser + if file == "-" { + if callCtx.Stdin == nil { + return builtins.Result{} + } + rc = io.NopCloser(callCtx.Stdin) + } else { + f, err := callCtx.OpenFile(ctx, file, os.O_RDONLY, 0) + if err != nil { + callCtx.Errf("uniq: %s: %s\n", file, callCtx.PortableErr(err)) + return builtins.Result{Code: 1} + } + defer f.Close() + rc = f + } + + delim := byte('\n') + if *zeroTerminated { + delim = 0 + } + + // GNU uniq: --all-repeated --unique collapses to -d behavior (one per + // duplicate group). Downgrade to the standard repeated path. + if useAllRepeated && *unique { + useAllRepeated = false + *repeated = true + *unique = false + } + + cfg := &uniqConfig{ + count: *count, + repeated: *repeated, + unique: *unique, + ignoreCase: *ignoreCase, + skipFields: skipFields, + skipChars: skipChars, + checkChars: checkChars, + delim: delim, + useAllRepeated: useAllRepeated, + arMethod: arMethod, + useGroup: useGroup, + grpMethod: grpMethod, + } + + if err := processInput(ctx, callCtx, rc, cfg); err != nil { + return builtins.Result{Code: 1} + } + return builtins.Result{} + } +} + +type uniqConfig struct { + count bool + repeated bool + unique bool + ignoreCase bool + skipFields int64 + skipChars int64 + checkChars int64 + delim byte + useAllRepeated bool + arMethod allRepeatedMethod + useGroup bool + grpMethod groupMethod +} + +func processInput(ctx context.Context, callCtx *builtins.CallContext, r io.Reader, cfg *uniqConfig) error { + sc := bufio.NewScanner(r) + buf := make([]byte, initialBufSize) + sc.Buffer(buf, MaxLineBytes) + sc.Split(makeSplitFunc(cfg.delim)) + + w := callCtx.Stdout + delimStr := string([]byte{cfg.delim}) + + reportWrite := func(err error) error { + if err != nil { + callCtx.Errf("uniq: write error\n") + } + return err + } + + var prevLine string + var prevKey string + var lineCount int64 + first := true + groupNum := 0 + + for sc.Scan() { + if ctx.Err() != nil { + return ctx.Err() + } + curLine := sc.Text() + curKey := compareKey(curLine, cfg) + + if first { + prevLine = curLine + prevKey = curKey + lineCount = 1 + first = false + + if cfg.useGroup { + if cfg.grpMethod == groupPrepend || cfg.grpMethod == groupBoth { + if err := reportWrite(writeStr(w, delimStr)); err != nil { + return err + } + } + if err := reportWrite(writeStr(w, curLine+delimStr)); err != nil { + return err + } + } + continue + } + + same := prevKey == curKey + + if same { + if lineCount < math.MaxInt64 { + lineCount++ + } + if cfg.useGroup { + if err := reportWrite(writeStr(w, curLine+delimStr)); err != nil { + return err + } + } else if cfg.useAllRepeated { + if lineCount == 2 { + if groupNum > 0 && cfg.arMethod != allRepeatedNone { + if err := reportWrite(writeStr(w, delimStr)); err != nil { + return err + } + } + if groupNum == 0 && cfg.arMethod == allRepeatedPrepend { + if err := reportWrite(writeStr(w, delimStr)); err != nil { + return err + } + } + if err := reportWrite(writeStr(w, prevLine+delimStr)); err != nil { + return err + } + groupNum++ + } + if err := reportWrite(writeStr(w, curLine+delimStr)); err != nil { + return err + } + } + } else { + if cfg.useGroup { + if err := reportWrite(writeStr(w, delimStr)); err != nil { + return err + } + if err := reportWrite(writeStr(w, curLine+delimStr)); err != nil { + return err + } + groupNum++ + } else if cfg.useAllRepeated { + // Nothing to do — non-repeated last group is simply dropped. + } else { + if err := reportWrite(emitStandard(w, cfg, prevLine, lineCount, delimStr)); err != nil { + return err + } + } + prevLine = curLine + prevKey = curKey + lineCount = 1 + } + } + + if err := sc.Err(); err != nil { + callCtx.Errf("uniq: %s\n", callCtx.PortableErr(err)) + return err + } + + if first { + return nil + } + + // Flush last group. + if cfg.useGroup { + if cfg.grpMethod == groupAppend || cfg.grpMethod == groupBoth { + return reportWrite(writeStr(w, delimStr)) + } + return nil + } + if cfg.useAllRepeated { + return nil + } + return reportWrite(emitStandard(w, cfg, prevLine, lineCount, delimStr)) +} + +func emitStandard(w io.Writer, cfg *uniqConfig, line string, count int64, delimStr string) error { + if cfg.repeated && cfg.unique { + return nil + } + if cfg.repeated && count < 2 { + return nil + } + if cfg.unique && count >= 2 { + return nil + } + if cfg.count { + s := strconv.FormatInt(count, 10) + for len(s) < countFieldWidth { + s = " " + s + } + return writeStr(w, s+" "+line+delimStr) + } + return writeStr(w, line+delimStr) +} + +func writeStr(w io.Writer, s string) error { + _, err := io.WriteString(w, s) + return err +} + +// compareKey extracts the portion of line used for comparison, applying +// field skipping, char skipping, check-chars, and case folding. +func compareKey(line string, cfg *uniqConfig) string { + s := line + if cfg.skipFields > 0 { + s = skipFieldsN(s, cfg.skipFields) + } + if cfg.skipChars > 0 && len(s) > 0 { + skip := cfg.skipChars + if skip > int64(len(s)) { + skip = int64(len(s)) + } + s = s[skip:] + } + if cfg.checkChars >= 0 && cfg.checkChars < int64(len(s)) { + s = s[:cfg.checkChars] + } + if cfg.ignoreCase { + s = asciiToLower(s) + } + return s +} + +// asciiToLower folds only ASCII A-Z to a-z, matching GNU uniq behavior +// in the default C/POSIX locale. Unlike strings.ToLower, this does not +// apply Unicode case folding, so non-ASCII characters are left unchanged. +func asciiToLower(s string) string { + for i := 0; i < len(s); i++ { + if s[i] >= 'A' && s[i] <= 'Z' { + b := make([]byte, len(s)) + copy(b, s[:i]) + b[i] = s[i] + ('a' - 'A') + for j := i + 1; j < len(s); j++ { + c := s[j] + if c >= 'A' && c <= 'Z' { + c += 'a' - 'A' + } + b[j] = c + } + return string(b) + } + } + return s +} + +// skipFieldsN skips the first n blank-delimited fields and returns the +// remainder of the string, starting immediately after the last character +// of the n-th field (before any subsequent blanks). +func skipFieldsN(s string, n int64) string { + i := 0 + for field := int64(0); field < n && i < len(s); field++ { + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++ + } + for i < len(s) && s[i] != ' ' && s[i] != '\t' { + i++ + } + } + return s[i:] +} + +func parseNonNegativeInt(s string) (int64, bool) { + if s == "" { + return 0, false + } + n, err := strconv.ParseInt(s, 10, 64) + if err != nil { + if ne, ok := err.(*strconv.NumError); ok && ne.Err == strconv.ErrRange { + // Reject negative overflow (e.g. -999999999999999999999). + if len(s) > 0 && s[0] == '-' { + return 0, false + } + return MaxCount, true + } + return 0, false + } + if n < 0 { + return 0, false + } + if n > MaxCount { + n = MaxCount + } + return n, true +} + +// parseAllRepeatedMethod parses the METHOD for --all-repeated. +// Cases are ordered deliberately: first match wins for prefix abbreviation, +// matching GNU coreutils behavior. If adding new options that share a prefix +// with existing ones, add explicit ambiguity detection. +func parseAllRepeatedMethod(s string) (allRepeatedMethod, error) { + switch { + case s == "": + return 0, &invalidArgError{arg: s, flag: "--all-repeated", valid: []string{"none", "prepend", "separate"}} + case strings.HasPrefix("none", s): + return allRepeatedNone, nil + case strings.HasPrefix("prepend", s): + return allRepeatedPrepend, nil + case strings.HasPrefix("separate", s): + return allRepeatedSeparate, nil + } + return 0, &invalidArgError{arg: s, flag: "--all-repeated", valid: []string{"none", "prepend", "separate"}} +} + +// parseGroupMethod parses the METHOD for --group. +// Cases are ordered deliberately: first match wins for prefix abbreviation, +// matching GNU coreutils behavior. If adding new options that share a prefix +// with existing ones, add explicit ambiguity detection. +func parseGroupMethod(s string) (groupMethod, error) { + switch { + case s == "": + return 0, &invalidArgError{arg: s, flag: "--group", valid: []string{"prepend", "append", "separate", "both"}} + case strings.HasPrefix("separate", s): + return groupSeparate, nil + case strings.HasPrefix("prepend", s): + return groupPrepend, nil + case strings.HasPrefix("append", s): + return groupAppend, nil + case strings.HasPrefix("both", s): + return groupBoth, nil + } + return 0, &invalidArgError{arg: s, flag: "--group", valid: []string{"prepend", "append", "separate", "both"}} +} + +type invalidArgError struct { + arg string + flag string + valid []string +} + +func (e *invalidArgError) Error() string { + msg := "invalid argument '" + e.arg + "' for '" + e.flag + "'\nValid arguments are:\n" + for _, v := range e.valid { + msg += " - '" + v + "'\n" + } + return msg +} + +// makeSplitFunc returns a bufio.SplitFunc that splits on the given delimiter. +// The token returned does NOT include the trailing delimiter. +func makeSplitFunc(delim byte) bufio.SplitFunc { + return func(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + for i, b := range data { + if b == delim { + return i + 1, data[:i], nil + } + } + if atEOF { + return len(data), data, nil + } + return 0, nil, nil + } +} diff --git a/interp/builtins/uniq/uniq_gnu_compat_test.go b/interp/builtins/uniq/uniq_gnu_compat_test.go new file mode 100644 index 00000000..b35756c4 --- /dev/null +++ b/interp/builtins/uniq/uniq_gnu_compat_test.go @@ -0,0 +1,233 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +// GNU compatibility tests for the uniq builtin. +// +// Expected outputs were captured from GNU coreutils uniq 9.6 +// and are embedded as string literals so the tests run without any GNU +// tooling present on CI. + +package uniq_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// TestGNUCompatEmptyInput — empty input produces empty output. +// +// GNU command: printf ” | guniq +// Expected: "" +func TestGNUCompatEmptyInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "empty.txt", "") + stdout, _, code := cmdRun(t, "uniq empty.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// TestGNUCompatAdjacentDuplicates — adjacent duplicates collapsed. +// +// GNU command: printf 'a\na\n' | guniq +// Expected: "a\n" +func TestGNUCompatAdjacentDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +// TestGNUCompatNoTrailingNewline — last line without newline gets one added. +// +// GNU command: printf 'a\na' | guniq +// Expected: "a\n" +func TestGNUCompatNoTrailingNewline(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +// TestGNUCompatDifferentLines — different lines both preserved. +// +// GNU command: printf 'a\nb' | guniq +// Expected: "a\nb\n" +func TestGNUCompatDifferentLines(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +// TestGNUCompatCountBasic — -c formats count with 7-char right-aligned field. +// +// GNU command: printf 'a\nb\n' | guniq -c +// Expected: " 1 a\n 1 b\n" +func TestGNUCompatCountBasic(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\n") + stdout, _, code := cmdRun(t, "uniq -c in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 1 a\n 1 b\n", stdout) +} + +// TestGNUCompatCountDuplicates — -c with repeated lines. +// +// GNU command: printf 'a\na\n' | guniq -c +// Expected: " 2 a\n" +func TestGNUCompatCountDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -c in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 2 a\n", stdout) +} + +// TestGNUCompatIgnoreCase — -i folds case. +// +// GNU command: printf 'A\na\n' | guniq -i +// Expected: "A\n" +func TestGNUCompatIgnoreCase(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "A\na\n") + stdout, _, code := cmdRun(t, "uniq -i in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "A\n", stdout) +} + +// TestGNUCompatAllRepeatedSeparate — --all-repeated=separate with two groups. +// +// GNU command: printf 'a\na\nb\nc\nc\n' | guniq --all-repeated=separate +// Expected: "a\na\n\nc\nc\n" +func TestGNUCompatAllRepeatedSeparate(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\nc\nc\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=separate in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nc\nc\n", stdout) +} + +// TestGNUCompatAllRepeatedPrepend — --all-repeated=prepend prefixes first group. +// +// GNU command: printf 'a\na\nb\nc\nc\n' | guniq --all-repeated=prepend +// Expected: "\na\na\n\nc\nc\n" +func TestGNUCompatAllRepeatedPrepend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\nc\nc\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=prepend in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nc\nc\n", stdout) +} + +// TestGNUCompatGroupSeparate — --group=separate with two groups. +// +// GNU command: printf 'a\na\nb\n' | guniq --group=separate +// Expected: "a\na\n\nb\n" +func TestGNUCompatGroupSeparate(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=separate in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\n", stdout) +} + +// TestGNUCompatGroupPrepend — --group=prepend with two groups. +// +// GNU command: printf 'a\na\nb\n' | guniq --group=prepend +// Expected: "\na\na\n\nb\n" +func TestGNUCompatGroupPrepend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=prepend in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nb\n", stdout) +} + +// TestGNUCompatGroupAppend — --group=append with two groups. +// +// GNU command: printf 'a\na\nb\n' | guniq --group=append +// Expected: "a\na\n\nb\n\n" +func TestGNUCompatGroupAppend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=append in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\n\n", stdout) +} + +// TestGNUCompatGroupBoth — --group=both with two groups. +// +// GNU command: printf 'a\na\nb\n' | guniq --group=both +// Expected: "\na\na\n\nb\n\n" +func TestGNUCompatGroupBoth(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=both in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nb\n\n", stdout) +} + +// TestGNUCompatRepeatedOnly — -d only emits repeated lines. +// +// GNU command: printf 'a\na\nb\n' | guniq -d +// Expected: "a\n" +func TestGNUCompatRepeatedOnly(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq -d in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +// TestGNUCompatUniqueOnly — -u only emits unique lines. +// +// GNU command: printf 'a\nb\na\n' | guniq -u +// Expected: "a\nb\na\n" +func TestGNUCompatUniqueOnly(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\na\n") + stdout, _, code := cmdRun(t, "uniq -u in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\na\n", stdout) +} + +// TestGNUCompatRejectedFlag — unknown flag produces exit 1. +// +// GNU command: guniq --no-such-flag → exit 1 +func TestGNUCompatRejectedFlag(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\n") + _, stderr, code := cmdRun(t, "uniq --no-such-flag in.txt", dir) + assert.Equal(t, 1, code) + assert.NotEmpty(t, stderr) +} + +// TestGNUCompatSkipFields — -f 2 skips two fields. +// +// GNU command: printf 'a\ta a\na a a\n' | guniq -f 2 +// Expected: "a\ta a\n" +func TestGNUCompatSkipFields(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\ta a\na a a\n") + stdout, _, code := cmdRun(t, "uniq -f 2 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\ta a\n", stdout) +} + +// TestGNUCompatZeroTerminated — -z uses NUL delimiter. +// +// GNU command: printf 'a\0a\0b' | guniq -z +// Expected: "a\0b\0" +func TestGNUCompatZeroTerminated(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\x00a\x00b") + stdout, _, code := cmdRun(t, "uniq -z in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\x00b\x00", stdout) +} diff --git a/interp/builtins/uniq/uniq_pentest_test.go b/interp/builtins/uniq/uniq_pentest_test.go new file mode 100644 index 00000000..86bda6db --- /dev/null +++ b/interp/builtins/uniq/uniq_pentest_test.go @@ -0,0 +1,252 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package uniq_test + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/rshell/interp" +) + +// --- Integer edge cases --- + +func TestUniqPentestSkipFieldsZero(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -f 0 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqPentestSkipFieldsOne(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "x a\ny a\n") + stdout, _, code := cmdRun(t, "uniq -f 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "x a\n", stdout) +} + +func TestUniqPentestSkipFieldsMaxInt(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\n") + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + stdout, _, code := runScriptCtx(ctx, t, "uniq -f 2147483647 in.txt", dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqPentestSkipFieldsOverflow(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\n") + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + stdout, _, code := runScriptCtx(ctx, t, "uniq -f 99999999999999999999 in.txt", dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqPentestSkipCharsZero(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "abc\nabcd\n") + stdout, _, code := cmdRun(t, "uniq -s 0 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\nabcd\n", stdout) +} + +func TestUniqPentestCheckCharsZero(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "abc\ndef\n") + stdout, _, code := cmdRun(t, "uniq -w 0 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\n", stdout) +} + +func TestUniqPentestCheckCharsMaxInt(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -w 2147483647 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqPentestCheckCharsOverflow(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n\x08") + stdout, _, code := cmdRun(t, "uniq -d -u -w340282366920938463463374607431768211456 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// --- Long lines --- + +func TestUniqPentestLineBelowCap(t *testing.T) { + dir := t.TempDir() + line := strings.Repeat("a", 1<<20-2) + content := line + "\n" + line + "\n" + require.NoError(t, os.WriteFile(filepath.Join(dir, "in.txt"), []byte(content), 0644)) + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, line+"\n", stdout) +} + +func TestUniqPentestLineAtCap(t *testing.T) { + dir := t.TempDir() + line := strings.Repeat("a", 1<<20) + content := line + "\n" + require.NoError(t, os.WriteFile(filepath.Join(dir, "in.txt"), []byte(content), 0644)) + _, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 1, code) +} + +func TestUniqPentestLineBeyondCap(t *testing.T) { + dir := t.TempDir() + line := strings.Repeat("a", 1<<20+1) + require.NoError(t, os.WriteFile(filepath.Join(dir, "in.txt"), []byte(line), 0644)) + _, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 1, code) +} + +// --- Path and filename edge cases --- + +func TestUniqPentestNonExistentFile(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq nonexistent.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestEmptyFilename(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `uniq ""`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestDoubleDashFlagLikeFilename(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "-c", "content\n") + stdout, _, code := cmdRun(t, "uniq -- -c", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "content\n", stdout) +} + +func TestUniqPentestOutsideAllowedPaths(t *testing.T) { + allowed := t.TempDir() + secret := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(secret, "secret.txt"), []byte("secret"), 0644)) + + secretPath := strings.ReplaceAll(filepath.Join(secret, "secret.txt"), `\`, `/`) + _, stderr, code := runScript(t, "uniq "+secretPath, allowed, interp.AllowedPaths([]string{allowed})) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +// --- Flag and argument injection --- + +func TestUniqPentestUnknownFlagLong(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq --follow in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestUnknownFlagShort(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq -x in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestMultipleStdinDash(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "src.txt", "a\na\n") + _, stderr, code := cmdRun(t, "uniq - - < src.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "extra operand") +} + +// --- Context cancellation --- + +func TestUniqPentestPreCancelledContext(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + done := make(chan struct{}) + go func() { + runScriptCtx(ctx, t, "uniq in.txt", dir, interp.AllowedPaths([]string{dir})) + close(done) + }() + select { + case <-done: + case <-time.After(5 * time.Second): + t.Fatal("uniq with pre-cancelled context did not return within 5s") + } +} + +// --- Large input --- + +func TestUniqPentestManyLines(t *testing.T) { + dir := t.TempDir() + var sb strings.Builder + for i := 0; i < 10000; i++ { + sb.WriteString("line\n") + } + writeFile(t, dir, "in.txt", sb.String()) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + stdout, _, code := runScriptCtx(ctx, t, "uniq in.txt", dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) + assert.Equal(t, "line\n", stdout) +} + +func TestUniqPentestManyUniqueLines(t *testing.T) { + dir := t.TempDir() + var sb strings.Builder + for i := 0; i < 10000; i++ { + sb.WriteString("line") + sb.WriteString(strings.Repeat("x", i%100)) + sb.WriteByte('\n') + } + content := sb.String() + writeFile(t, dir, "in.txt", content) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + _, _, code := runScriptCtx(ctx, t, "uniq -c in.txt", dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) +} + +// --- Behavior matching: -D with no duplicates --- + +func TestUniqPentestAllRepeatedNoDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\nc\n") + stdout, _, code := cmdRun(t, "uniq -D in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// --- Null byte handling --- + +func TestUniqPentestBinaryContent(t *testing.T) { + dir := t.TempDir() + content := []byte{0xfc, 0x80, 0x80, '\n', 0xfc, 0x80, 0x80, '\n'} + require.NoError(t, os.WriteFile(filepath.Join(dir, "bad.bin"), content, 0644)) + stdout, _, code := cmdRun(t, "uniq bad.bin", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\xfc\x80\x80\n", stdout) +} diff --git a/interp/builtins/uniq/uniq_test.go b/interp/builtins/uniq/uniq_test.go new file mode 100644 index 00000000..2a02e0a9 --- /dev/null +++ b/interp/builtins/uniq/uniq_test.go @@ -0,0 +1,656 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package uniq_test + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/rshell/interp" + "github.com/DataDog/rshell/interp/builtins/testutil" +) + +func runScriptCtx(ctx context.Context, t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) { + t.Helper() + return testutil.RunScriptCtx(ctx, t, script, dir, opts...) +} + +func runScript(t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) { + t.Helper() + return testutil.RunScript(t, script, dir, opts...) +} + +func cmdRun(t *testing.T, script, dir string) (string, string, int) { + t.Helper() + return runScript(t, script, dir, interp.AllowedPaths([]string{dir})) +} + +func writeFile(t *testing.T, dir, name, content string) string { + t.Helper() + require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0644)) + return name +} + +// --- Default behaviour --- + +func TestUniqEmptyInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "empty.txt", "") + stdout, _, code := cmdRun(t, "uniq empty.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestUniqAdjacentDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqNoTrailingNewline(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqDifferentLines(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +func TestUniqMixedDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +func TestUniqAllUnique(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\nc\n") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\nc\n", stdout) +} + +func TestUniqNonAdjacentDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "b\na\na\n") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "b\na\n", stdout) +} + +// --- -c / --count --- + +func TestUniqCountBasic(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\n") + stdout, _, code := cmdRun(t, "uniq -c in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 1 a\n 1 b\n", stdout) +} + +func TestUniqCountDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -c in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 2 a\n", stdout) +} + +func TestUniqCountLongForm(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --count in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 2 a\n 1 b\n", stdout) +} + +// --- -d / --repeated --- + +func TestUniqRepeatedBasic(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -d in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqRepeatedNone(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\n") + stdout, _, code := cmdRun(t, "uniq -d in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestUniqRepeatedNonAdjacent(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\na\n") + stdout, _, code := cmdRun(t, "uniq -d in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// --- -u / --unique --- + +func TestUniqUniqueBasic(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -u in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestUniqUniqueAllUnique(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\n") + stdout, _, code := cmdRun(t, "uniq -u in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +func TestUniqUniqueMixed(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\na\n") + stdout, _, code := cmdRun(t, "uniq -u in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\na\n", stdout) +} + +// --- -d -u combined --- + +func TestUniqRepeatedAndUniqueSuppressAll(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n\x08") + stdout, _, code := cmdRun(t, "uniq -d -u in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// --- -i / --ignore-case --- + +func TestUniqIgnoreCase(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "A\na\n") + stdout, _, code := cmdRun(t, "uniq -i in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "A\n", stdout) +} + +func TestUniqIgnoreCaseLongForm(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "A\na\n") + stdout, _, code := cmdRun(t, "uniq --ignore-case in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "A\n", stdout) +} + +func TestUniqCaseSensitiveDefault(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "A\na\n") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "A\na\n", stdout) +} + +// --- -f / --skip-fields --- + +func TestUniqSkipFields1(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a a\nb a\n") + stdout, _, code := cmdRun(t, "uniq -f 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a\n", stdout) +} + +func TestUniqSkipFields1DifferentAfterField(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a a\nb b\n") + stdout, _, code := cmdRun(t, "uniq -f 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a\nb b\n", stdout) +} + +func TestUniqSkipFieldsTabVsSpace(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\ta\na a\n") + stdout, _, code := cmdRun(t, "uniq -f 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\ta\na a\n", stdout) +} + +func TestUniqSkipFields2(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a a c\nb a c\n") + stdout, _, code := cmdRun(t, "uniq -f 2 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a c\n", stdout) +} + +// --- -s / --skip-chars --- + +func TestUniqSkipChars1(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "aaa\naaa\n") + stdout, _, code := cmdRun(t, "uniq -s 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "aaa\n", stdout) +} + +func TestUniqSkipChars2(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "baa\naaa\n") + stdout, _, code := cmdRun(t, "uniq -s 2 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "baa\n", stdout) +} + +func TestUniqSkipChars4ShortLine(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "abc\nabcd\n") + stdout, _, code := cmdRun(t, "uniq -s 4 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\n", stdout) +} + +// --- -w / --check-chars --- + +func TestUniqCheckChars0(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "abc\nabcd\n") + stdout, _, code := cmdRun(t, "uniq -w 0 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\n", stdout) +} + +func TestUniqCheckChars1(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a a\nb a\n") + stdout, _, code := cmdRun(t, "uniq -w 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a\nb a\n", stdout) +} + +func TestUniqCheckCharsWithSkipFields(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a a a\nb a c\n") + stdout, _, code := cmdRun(t, "uniq -f 1 -w 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a a\n", stdout) +} + +// --- -z / --zero-terminated --- + +func TestUniqZeroTerminated(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\x00a\x00b") + stdout, _, code := cmdRun(t, "uniq -z in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\x00b\x00", stdout) +} + +func TestUniqZeroTerminatedNewlinesPreserved(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -z in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\x00", stdout) +} + +// --- -D / --all-repeated --- + +func TestUniqAllRepeatedDefault(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -D in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n", stdout) +} + +func TestUniqAllRepeatedSeparate(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\nc\nc\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=separate in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nc\nc\n", stdout) +} + +func TestUniqAllRepeatedPrepend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=prepend in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n", stdout) +} + +func TestUniqAllRepeatedPrependMultiple(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\nc\nc\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=prepend in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nc\nc\n", stdout) +} + +func TestUniqAllRepeatedNoneOnUniqueInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=prepend in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestUniqAllRepeatedNoneMultipleGroups(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\nb\n") + stdout, _, code := cmdRun(t, "uniq -D in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\nb\nb\n", stdout) +} + +func TestUniqAllRepeatedSeparateMultipleGroups(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\nb\nc\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=separate in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\nb\n", stdout) +} + +func TestUniqAllRepeatedWithCheckChars(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a a\na b\n") + stdout, _, code := cmdRun(t, "uniq -D -w1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a\na b\n", stdout) +} + +// --- --group --- + +func TestUniqGroupSeparate(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=separate in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\n", stdout) +} + +func TestUniqGroupPrepend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=prepend in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nb\n", stdout) +} + +func TestUniqGroupAppend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=append in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\n\n", stdout) +} + +func TestUniqGroupBoth(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=both in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nb\n\n", stdout) +} + +func TestUniqGroupDefault(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\n", stdout) +} + +func TestUniqGroupEmptyInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "") + stdout, _, code := cmdRun(t, "uniq --group in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestUniqGroupSingleGroup(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq --group=prepend in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n", stdout) +} + +func TestUniqGroupSingleGroupAppend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq --group=append in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\n", stdout) +} + +func TestUniqGroupSingleGroupSeparate(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq --group=separate in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n", stdout) +} + +// --- Mutual exclusion errors --- + +func TestUniqGroupWithCount(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq --group -c in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "mutually exclusive") +} + +func TestUniqGroupWithRepeated(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq --group -d in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "mutually exclusive") +} + +func TestUniqGroupWithUnique(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq --group -u in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "mutually exclusive") +} + +func TestUniqGroupWithAllRepeated(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq --group -D in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "mutually exclusive") +} + +func TestUniqAllRepeatedWithCount(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq -D -c in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "meaningless") +} + +// --- Help --- + +func TestUniqHelp(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := cmdRun(t, "uniq --help", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "Usage:") + assert.Empty(t, stderr) +} + +func TestUniqHelpShort(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := cmdRun(t, "uniq -h", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "Usage:") + assert.Empty(t, stderr) +} + +// --- Error cases --- + +func TestUniqMissingFile(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq nonexistent.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqUnknownFlag(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq --no-such-flag in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqExtraOperand(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "a\n") + writeFile(t, dir, "b.txt", "b\n") + _, stderr, code := cmdRun(t, "uniq a.txt b.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "extra operand") +} + +func TestUniqInvalidAllRepeatedMethod(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\n") + _, stderr, code := cmdRun(t, "uniq --all-repeated=badoption in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "invalid argument") +} + +func TestUniqInvalidGroupMethod(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\n") + _, stderr, code := cmdRun(t, "uniq --group=badoption in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "invalid argument") +} + +// --- Stdin --- + +func TestUniqStdinPipe(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "src.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq < src.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +func TestUniqStdinDash(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "src.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq - < src.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +func TestUniqNilStdin(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := runScript(t, "uniq -", dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) + assert.Equal(t, "", stderr) +} + +// --- Context cancellation --- + +func TestUniqContextCancellation(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _, _, code := runScriptCtx(ctx, t, "uniq in.txt", dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) +} + +// --- Null bytes --- + +func TestUniqNullBytesInContent(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\x00a\na\n") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\x00a\na\n", stdout) +} + +// --- Combined skip fields + skip chars --- + +func TestUniqSkipFieldsAndChars(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a aaa\nb ab\n") + stdout, _, code := cmdRun(t, "uniq -f 1 -s 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a aaa\nb ab\n", stdout) +} + +func TestUniqSkipFieldsAndCharsEqual(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a aaa\nb aaa\n") + stdout, _, code := cmdRun(t, "uniq -f 1 -s 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a aaa\n", stdout) +} + +// --- Double dash --- + +func TestUniqDoubleDash(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "-f", "flag-looking-name\n") + stdout, _, code := cmdRun(t, "uniq -- -f", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "flag-looking-name\n", stdout) +} + +// --- Eight bit characters --- + +func TestUniqEightBitChars(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "ö\nv\n") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "ö\nv\n", stdout) +} + +// --- Large count clamped --- + +func TestUniqLargeSkipFieldsClamped(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -f 9999999999 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqOverflowCheckCharsClamped(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n\x08") + stdout, _, code := cmdRun(t, "uniq -d -u -w340282366920938463463374607431768211456 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} diff --git a/interp/register_builtins.go b/interp/register_builtins.go index 91a0cb64..a86488a6 100644 --- a/interp/register_builtins.go +++ b/interp/register_builtins.go @@ -25,6 +25,7 @@ import ( "github.com/DataDog/rshell/interp/builtins/testcmd" "github.com/DataDog/rshell/interp/builtins/tr" truecmd "github.com/DataDog/rshell/interp/builtins/true" + "github.com/DataDog/rshell/interp/builtins/uniq" "github.com/DataDog/rshell/interp/builtins/wc" ) @@ -50,6 +51,7 @@ func registerBuiltins() { testcmd.BracketCmd, tr.Cmd, truecmd.Cmd, + uniq.Cmd, wc.Cmd, } { cmd.Register() diff --git a/tests/scenarios/cmd/uniq/all_repeated/prepend.yaml b/tests/scenarios/cmd/uniq/all_repeated/prepend.yaml new file mode 100644 index 00000000..24adad96 --- /dev/null +++ b/tests/scenarios/cmd/uniq/all_repeated/prepend.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 116 +description: uniq --all-repeated=prepend adds blank line before first group. +setup: + files: + - path: input.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --all-repeated=prepend input.txt +expect: + stdout: "\na\na\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/all_repeated/separate.yaml b/tests/scenarios/cmd/uniq/all_repeated/separate.yaml new file mode 100644 index 00000000..f632a039 --- /dev/null +++ b/tests/scenarios/cmd/uniq/all_repeated/separate.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 114 +description: uniq --all-repeated=separate separates groups with blank lines. +setup: + files: + - path: input.txt + content: "a\na\nb\nc\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --all-repeated=separate input.txt +expect: + stdout: "a\na\n\nc\nc\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/all_repeated/with_unique.yaml b/tests/scenarios/cmd/uniq/all_repeated/with_unique.yaml new file mode 100644 index 00000000..9f54e55c --- /dev/null +++ b/tests/scenarios/cmd/uniq/all_repeated/with_unique.yaml @@ -0,0 +1,13 @@ +description: uniq --all-repeated --unique collapses to -d behavior (one per duplicate group). +setup: + files: + - path: input.txt + content: "a\na\nb\nb\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --all-repeated --unique input.txt +expect: + stdout: "a\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/adjacent_duplicates.yaml b/tests/scenarios/cmd/uniq/basic/adjacent_duplicates.yaml new file mode 100644 index 00000000..0bf4193a --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/adjacent_duplicates.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 2 +description: uniq deduplicates adjacent identical lines. +setup: + files: + - path: input.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq input.txt +expect: + stdout: "a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/all_unique.yaml b/tests/scenarios/cmd/uniq/basic/all_unique.yaml new file mode 100644 index 00000000..0b30432c --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/all_unique.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 7 +description: uniq preserves all unique lines. +setup: + files: + - path: input.txt + content: "a\nb\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq input.txt +expect: + stdout: "a\nb\nc\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/d_and_u_suppress.yaml b/tests/scenarios/cmd/uniq/basic/d_and_u_suppress.yaml new file mode 100644 index 00000000..429c5ca4 --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/d_and_u_suppress.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 120 +description: uniq -d -u suppresses all output. +setup: + files: + - path: input.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -d -u input.txt +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/different_lines.yaml b/tests/scenarios/cmd/uniq/basic/different_lines.yaml new file mode 100644 index 00000000..9189d064 --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/different_lines.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 4 +description: uniq preserves different adjacent lines. +setup: + files: + - path: input.txt + content: "a\nb" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq input.txt +expect: + stdout: "a\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/eight_bit_chars.yaml b/tests/scenarios/cmd/uniq/basic/eight_bit_chars.yaml new file mode 100644 index 00000000..837244d4 --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/eight_bit_chars.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 8 +description: uniq handles eight-bit characters correctly. +setup: + files: + - path: input.txt + content: "ö\nv\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq input.txt +expect: + stdout: "ö\nv\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/empty_input.yaml b/tests/scenarios/cmd/uniq/basic/empty_input.yaml new file mode 100644 index 00000000..67f8c2aa --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/empty_input.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 1 +description: uniq with empty input produces no output. +setup: + files: + - path: empty.txt + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq empty.txt +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/ignore_case.yaml b/tests/scenarios/cmd/uniq/basic/ignore_case.yaml new file mode 100644 index 00000000..4f5225c6 --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/ignore_case.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 126 +description: uniq -i ignores case when comparing. +setup: + files: + - path: input.txt + content: "A\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -i input.txt +expect: + stdout: "A\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/ignore_case_unicode.yaml b/tests/scenarios/cmd/uniq/basic/ignore_case_unicode.yaml new file mode 100644 index 00000000..90697aa4 --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/ignore_case_unicode.yaml @@ -0,0 +1,13 @@ +description: uniq -i uses ASCII-only folding; non-ASCII characters are not case-folded. +setup: + files: + - path: input.txt + content: "Ä\nä\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -i input.txt +expect: + stdout: "Ä\nä\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/no_trailing_newline.yaml b/tests/scenarios/cmd/uniq/basic/no_trailing_newline.yaml new file mode 100644 index 00000000..ec106ac9 --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/no_trailing_newline.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 3 +description: uniq normalizes last line without trailing newline. +setup: + files: + - path: input.txt + content: "a\na" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq input.txt +expect: + stdout: "a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/check_chars/w_one.yaml b/tests/scenarios/cmd/uniq/check_chars/w_one.yaml new file mode 100644 index 00000000..55bcf470 --- /dev/null +++ b/tests/scenarios/cmd/uniq/check_chars/w_one.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 60 +description: uniq -w 1 compares only first character. +setup: + files: + - path: input.txt + content: "a a\nb a\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -w 1 input.txt +expect: + stdout: "a a\nb a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/check_chars/w_zero.yaml b/tests/scenarios/cmd/uniq/check_chars/w_zero.yaml new file mode 100644 index 00000000..df5f4f8f --- /dev/null +++ b/tests/scenarios/cmd/uniq/check_chars/w_zero.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 57 +description: uniq -w 0 treats all lines as identical. +setup: + files: + - path: input.txt + content: "abc\nabcd\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -w 0 input.txt +expect: + stdout: "abc\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/count/basic_count.yaml b/tests/scenarios/cmd/uniq/count/basic_count.yaml new file mode 100644 index 00000000..b1639c74 --- /dev/null +++ b/tests/scenarios/cmd/uniq/count/basic_count.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 101 +description: uniq -c prefixes lines with occurrence count. +setup: + files: + - path: input.txt + content: "a\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -c input.txt +expect: + stdout: " 1 a\n 1 b\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/count/count_duplicates.yaml b/tests/scenarios/cmd/uniq/count/count_duplicates.yaml new file mode 100644 index 00000000..818d9d57 --- /dev/null +++ b/tests/scenarios/cmd/uniq/count/count_duplicates.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 102 +description: uniq -c shows count of 2 for duplicate lines. +setup: + files: + - path: input.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -c input.txt +expect: + stdout: " 2 a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/errors/all_repeated_empty_method.yaml b/tests/scenarios/cmd/uniq/errors/all_repeated_empty_method.yaml new file mode 100644 index 00000000..92b97c5c --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/all_repeated_empty_method.yaml @@ -0,0 +1,9 @@ +description: uniq --all-repeated= with explicit empty method is rejected. +skip_assert_against_bash: true +input: + script: |+ + printf 'a\na\n' | uniq --all-repeated= +expect: + stdout: "" + stderr_contains: ["invalid argument '' for '--all-repeated'"] + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml b/tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml new file mode 100644 index 00000000..9d9a5143 --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml @@ -0,0 +1,9 @@ +# Derived from GNU coreutils uniq.pl test 112 +description: uniq -D -c is mutually exclusive and fails. +input: + script: |+ + uniq -D -c +expect: + stdout: "" + stderr: "uniq: printing all duplicated lines and repeat counts is meaningless\nTry 'uniq --help' for more information.\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/errors/group_empty_method.yaml b/tests/scenarios/cmd/uniq/errors/group_empty_method.yaml new file mode 100644 index 00000000..db2cd791 --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/group_empty_method.yaml @@ -0,0 +1,9 @@ +description: uniq --group= with explicit empty method is rejected. +skip_assert_against_bash: true +input: + script: |+ + printf 'a\na\n' | uniq --group= +expect: + stdout: "" + stderr_contains: ["invalid argument '' for '--group'"] + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/errors/group_with_count.yaml b/tests/scenarios/cmd/uniq/errors/group_with_count.yaml new file mode 100644 index 00000000..64d8af26 --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/group_with_count.yaml @@ -0,0 +1,9 @@ +# Derived from GNU coreutils uniq.pl test 141 +description: uniq --group -c is mutually exclusive and fails. +input: + script: |+ + uniq --group -c +expect: + stdout: "" + stderr: "uniq: --group is mutually exclusive with -c/-d/-D/-u\nTry 'uniq --help' for more information.\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/errors/missing_file.yaml b/tests/scenarios/cmd/uniq/errors/missing_file.yaml new file mode 100644 index 00000000..4adcec66 --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/missing_file.yaml @@ -0,0 +1,10 @@ +# Test missing file error +description: uniq reports error for nonexistent file. +input: + allowed_paths: ["$DIR"] + script: |+ + uniq nonexistent.txt +expect: + stdout: "" + stderr_contains: ["uniq:"] + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/errors/negative_overflow_skip_fields.yaml b/tests/scenarios/cmd/uniq/errors/negative_overflow_skip_fields.yaml new file mode 100644 index 00000000..22aa0df9 --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/negative_overflow_skip_fields.yaml @@ -0,0 +1,13 @@ +description: uniq rejects negative overflow for --skip-fields. +setup: + files: + - path: input.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --skip-fields=-999999999999999999999 input.txt +expect: + stdout: "" + stderr: "uniq: -999999999999999999999: invalid number of fields to skip\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/errors/unknown_flag.yaml b/tests/scenarios/cmd/uniq/errors/unknown_flag.yaml new file mode 100644 index 00000000..258ca647 --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/unknown_flag.yaml @@ -0,0 +1,9 @@ +# Test unknown flag rejection +description: uniq rejects unknown flags with exit code 1. +skip_assert_against_bash: true +input: + script: |+ + uniq --definitely-invalid +expect: + stderr_contains: ["unknown flag"] + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/group/append.yaml b/tests/scenarios/cmd/uniq/group/append.yaml new file mode 100644 index 00000000..8f94ed27 --- /dev/null +++ b/tests/scenarios/cmd/uniq/group/append.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 129 +description: uniq --group=append adds blank line after last group too. +setup: + files: + - path: input.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --group=append input.txt +expect: + stdout: "a\na\n\nb\n\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/group/both.yaml b/tests/scenarios/cmd/uniq/group/both.yaml new file mode 100644 index 00000000..db9168ff --- /dev/null +++ b/tests/scenarios/cmd/uniq/group/both.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 132 +description: uniq --group=both adds blank lines before and after groups. +setup: + files: + - path: input.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --group=both input.txt +expect: + stdout: "\na\na\n\nb\n\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/group/prepend.yaml b/tests/scenarios/cmd/uniq/group/prepend.yaml new file mode 100644 index 00000000..735743eb --- /dev/null +++ b/tests/scenarios/cmd/uniq/group/prepend.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 128 +description: uniq --group=prepend adds blank line before first group too. +setup: + files: + - path: input.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --group=prepend input.txt +expect: + stdout: "\na\na\n\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/group/separate.yaml b/tests/scenarios/cmd/uniq/group/separate.yaml new file mode 100644 index 00000000..d6806c16 --- /dev/null +++ b/tests/scenarios/cmd/uniq/group/separate.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 130 +description: uniq --group=separate inserts blank lines between groups. +setup: + files: + - path: input.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --group=separate input.txt +expect: + stdout: "a\na\n\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/hardening/null_bytes_in_lines.yaml b/tests/scenarios/cmd/uniq/hardening/null_bytes_in_lines.yaml new file mode 100644 index 00000000..0ef6ddbe --- /dev/null +++ b/tests/scenarios/cmd/uniq/hardening/null_bytes_in_lines.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 90 +description: uniq preserves null bytes within lines. +setup: + files: + - path: input.txt + content: "a\x00a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq input.txt +expect: + stdout: "a\x00a\na\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/repeated/basic_repeated.yaml b/tests/scenarios/cmd/uniq/repeated/basic_repeated.yaml new file mode 100644 index 00000000..972581f9 --- /dev/null +++ b/tests/scenarios/cmd/uniq/repeated/basic_repeated.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 20 +description: uniq -d prints only repeated lines. +setup: + files: + - path: input.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -d input.txt +expect: + stdout: "a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/repeated/no_repeated.yaml b/tests/scenarios/cmd/uniq/repeated/no_repeated.yaml new file mode 100644 index 00000000..f234486a --- /dev/null +++ b/tests/scenarios/cmd/uniq/repeated/no_repeated.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 21 +description: uniq -d produces no output when no lines are repeated. +setup: + files: + - path: input.txt + content: "a\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -d input.txt +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/skip/skip_chars.yaml b/tests/scenarios/cmd/uniq/skip/skip_chars.yaml new file mode 100644 index 00000000..ebfaa4d0 --- /dev/null +++ b/tests/scenarios/cmd/uniq/skip/skip_chars.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 42 +description: uniq -s 1 skips first character when comparing. +setup: + files: + - path: input.txt + content: "aaa\naaa\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -s 1 input.txt +expect: + stdout: "aaa\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/skip/skip_fields.yaml b/tests/scenarios/cmd/uniq/skip/skip_fields.yaml new file mode 100644 index 00000000..18c0f0df --- /dev/null +++ b/tests/scenarios/cmd/uniq/skip/skip_fields.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 31 +description: uniq -f 1 skips first field when comparing. +setup: + files: + - path: input.txt + content: "a a\nb a\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -f 1 input.txt +expect: + stdout: "a a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/stdin/pipe_input.yaml b/tests/scenarios/cmd/uniq/stdin/pipe_input.yaml new file mode 100644 index 00000000..848878f2 --- /dev/null +++ b/tests/scenarios/cmd/uniq/stdin/pipe_input.yaml @@ -0,0 +1,14 @@ +# Test stdin input via redirect +description: uniq reads from stdin when no file is given. +setup: + files: + - path: src.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq < src.txt +expect: + stdout: "a\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/unique/all_duplicated.yaml b/tests/scenarios/cmd/uniq/unique/all_duplicated.yaml new file mode 100644 index 00000000..d26c3bd1 --- /dev/null +++ b/tests/scenarios/cmd/uniq/unique/all_duplicated.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 9 +description: uniq -u produces no output when all lines are duplicated. +setup: + files: + - path: input.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -u input.txt +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/unique/all_unique.yaml b/tests/scenarios/cmd/uniq/unique/all_unique.yaml new file mode 100644 index 00000000..60abe8b6 --- /dev/null +++ b/tests/scenarios/cmd/uniq/unique/all_unique.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 10 +description: uniq -u prints all lines when all are unique. +setup: + files: + - path: input.txt + content: "a\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -u input.txt +expect: + stdout: "a\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/zero_terminated/basic_zero.yaml b/tests/scenarios/cmd/uniq/zero_terminated/basic_zero.yaml new file mode 100644 index 00000000..d4638814 --- /dev/null +++ b/tests/scenarios/cmd/uniq/zero_terminated/basic_zero.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 124 +description: uniq -z uses NUL as line delimiter. +setup: + files: + - path: input.txt + content: "a\0a\0b" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -z input.txt +expect: + stdout: "a\0b\0" + stderr: "" + exit_code: 0 From a4a180738dda2a77b04f8f4c14f3eb5b282ad80c Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Thu, 12 Mar 2026 18:19:50 +0100 Subject: [PATCH 2/4] [iter 2] Fix -D -u rewrite logic and address PR review comments Fix P1 bug: the --all-repeated --unique combination was incorrectly rewritten to -d behavior, which only prints one line per duplicate group. GNU uniq -D -u prints all duplicate lines but suppresses one occurrence per group (the "unique" representative). For groups of N, this means N-1 lines are printed. The old code was correct only for groups of exactly 2 members. Also addresses P3 suggestions from @AlexandreYang: - Use a more descriptive error message when rejecting output file args - Add inline comment about countFieldWidth matching GNU coreutils Co-Authored-By: Claude Opus 4.6 --- interp/builtins/uniq/uniq.go | 21 ++++++++++--------- interp/builtins/uniq/uniq_pentest_test.go | 2 +- interp/builtins/uniq/uniq_test.go | 2 +- .../cmd/uniq/all_repeated/with_unique.yaml | 2 +- .../with_unique_large_groups.yaml | 13 ++++++++++++ 5 files changed, 27 insertions(+), 13 deletions(-) create mode 100644 tests/scenarios/cmd/uniq/all_repeated/with_unique_large_groups.yaml diff --git a/interp/builtins/uniq/uniq.go b/interp/builtins/uniq/uniq.go index 4b44598a..453b85bc 100644 --- a/interp/builtins/uniq/uniq.go +++ b/interp/builtins/uniq/uniq.go @@ -93,6 +93,7 @@ const MaxLineBytes = 1 << 20 // 1 MiB const MaxCount = 1<<31 - 1 // 2 147 483 647 // countFieldWidth is the width of the count prefix produced by -c. +// Matches GNU coreutils. Counts >= 10,000,000 naturally widen the field. const countFieldWidth = 7 // initialBufSize is the starting buffer size for the scanner. @@ -200,7 +201,7 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { } if len(args) > 1 { - callCtx.Errf("uniq: extra operand %q\n", args[1]) + callCtx.Errf("uniq: output file argument is not supported: %q\n", args[1]) return builtins.Result{Code: 1} } @@ -230,13 +231,9 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { delim = 0 } - // GNU uniq: --all-repeated --unique collapses to -d behavior (one per - // duplicate group). Downgrade to the standard repeated path. - if useAllRepeated && *unique { - useAllRepeated = false - *repeated = true - *unique = false - } + // GNU uniq -D -u: print all duplicate lines but suppress one + // occurrence per group (the "unique" representative). Handled in + // the allRepeated output path; keep both flags in the config. cfg := &uniqConfig{ count: *count, @@ -345,8 +342,12 @@ func processInput(ctx context.Context, callCtx *builtins.CallContext, r io.Reade return err } } - if err := reportWrite(writeStr(w, prevLine+delimStr)); err != nil { - return err + // With -D -u, suppress one occurrence per group (the + // first/"unique" representative) to match GNU behavior. + if !cfg.unique { + if err := reportWrite(writeStr(w, prevLine+delimStr)); err != nil { + return err + } } groupNum++ } diff --git a/interp/builtins/uniq/uniq_pentest_test.go b/interp/builtins/uniq/uniq_pentest_test.go index 86bda6db..aaa49b89 100644 --- a/interp/builtins/uniq/uniq_pentest_test.go +++ b/interp/builtins/uniq/uniq_pentest_test.go @@ -174,7 +174,7 @@ func TestUniqPentestMultipleStdinDash(t *testing.T) { writeFile(t, dir, "src.txt", "a\na\n") _, stderr, code := cmdRun(t, "uniq - - < src.txt", dir) assert.Equal(t, 1, code) - assert.Contains(t, stderr, "extra operand") + assert.Contains(t, stderr, "output file argument is not supported") } // --- Context cancellation --- diff --git a/interp/builtins/uniq/uniq_test.go b/interp/builtins/uniq/uniq_test.go index 2a02e0a9..e265ba0c 100644 --- a/interp/builtins/uniq/uniq_test.go +++ b/interp/builtins/uniq/uniq_test.go @@ -533,7 +533,7 @@ func TestUniqExtraOperand(t *testing.T) { writeFile(t, dir, "b.txt", "b\n") _, stderr, code := cmdRun(t, "uniq a.txt b.txt", dir) assert.Equal(t, 1, code) - assert.Contains(t, stderr, "extra operand") + assert.Contains(t, stderr, "output file argument is not supported") } func TestUniqInvalidAllRepeatedMethod(t *testing.T) { diff --git a/tests/scenarios/cmd/uniq/all_repeated/with_unique.yaml b/tests/scenarios/cmd/uniq/all_repeated/with_unique.yaml index 9f54e55c..3ab4260b 100644 --- a/tests/scenarios/cmd/uniq/all_repeated/with_unique.yaml +++ b/tests/scenarios/cmd/uniq/all_repeated/with_unique.yaml @@ -1,4 +1,4 @@ -description: uniq --all-repeated --unique collapses to -d behavior (one per duplicate group). +description: uniq --all-repeated --unique suppresses one occurrence per duplicate group. setup: files: - path: input.txt diff --git a/tests/scenarios/cmd/uniq/all_repeated/with_unique_large_groups.yaml b/tests/scenarios/cmd/uniq/all_repeated/with_unique_large_groups.yaml new file mode 100644 index 00000000..57f215af --- /dev/null +++ b/tests/scenarios/cmd/uniq/all_repeated/with_unique_large_groups.yaml @@ -0,0 +1,13 @@ +description: uniq --all-repeated --unique with groups larger than 2 prints count-1 lines per group. +setup: + files: + - path: input.txt + content: "a\na\na\nb\nb\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --all-repeated --unique input.txt +expect: + stdout: "a\na\nb\n" + stderr: "" + exit_code: 0 From c90bfc279648c76ee98f9f37b778d31b702bd189 Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Thu, 12 Mar 2026 18:31:16 +0100 Subject: [PATCH 3/4] [iter 3] Add scenario tests for --all-repeated=separate/prepend with -u Add test coverage for --all-repeated=separate and --all-repeated=prepend combined with --unique flag. Both verified against GNU coreutils bash. Co-Authored-By: Claude Opus 4.6 --- .../cmd/uniq/all_repeated/prepend_with_unique.yaml | 13 +++++++++++++ .../cmd/uniq/all_repeated/separate_with_unique.yaml | 13 +++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 tests/scenarios/cmd/uniq/all_repeated/prepend_with_unique.yaml create mode 100644 tests/scenarios/cmd/uniq/all_repeated/separate_with_unique.yaml diff --git a/tests/scenarios/cmd/uniq/all_repeated/prepend_with_unique.yaml b/tests/scenarios/cmd/uniq/all_repeated/prepend_with_unique.yaml new file mode 100644 index 00000000..5973ce60 --- /dev/null +++ b/tests/scenarios/cmd/uniq/all_repeated/prepend_with_unique.yaml @@ -0,0 +1,13 @@ +description: uniq --all-repeated=prepend --unique prepends blank line and suppresses one occurrence per group. +setup: + files: + - path: input.txt + content: "a\na\na\nb\nb\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --all-repeated=prepend --unique input.txt +expect: + stdout: "\na\na\n\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/all_repeated/separate_with_unique.yaml b/tests/scenarios/cmd/uniq/all_repeated/separate_with_unique.yaml new file mode 100644 index 00000000..e1a2c175 --- /dev/null +++ b/tests/scenarios/cmd/uniq/all_repeated/separate_with_unique.yaml @@ -0,0 +1,13 @@ +description: uniq --all-repeated=separate --unique suppresses one occurrence per group with blank-line separators. +setup: + files: + - path: input.txt + content: "a\na\na\nb\nb\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --all-repeated=separate --unique input.txt +expect: + stdout: "a\na\n\nb\n" + stderr: "" + exit_code: 0 From cf275a6fd60c6ecc3f5cbb60559ac876111a88e5 Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Thu, 12 Mar 2026 19:55:29 +0100 Subject: [PATCH 4/4] [iter 6] Fix -D -u with comparison flags and -z -f with embedded newlines Two bash-compatibility fixes for uniq: 1. -D -u with comparison flags (-w, -f, -s, -i): When non-identical lines compare equal via flags, GNU keeps the first N-1 lines and drops the last. Our implementation was dropping the first and keeping the rest. Fixed by buffering curLine and emitting prevLine in the -D -u path, so the last occurrence is suppressed at the group boundary. 2. -z -f with embedded newlines: In NUL-delimited mode, GNU treats '\n' as a blank character for field-skipping purposes. Our skipFieldsN only recognized space and tab. Fixed by adding '\n' to the blank set when zeroTerminated is true. Both fixes verified against GNU coreutils via Docker bash comparison tests. Co-Authored-By: Claude Opus 4.6 --- interp/builtins/uniq/uniq.go | 37 +++++++++++++------ .../with_unique_comparison_flags.yaml | 13 +++++++ .../zero_skip_fields_embedded_newline.yaml | 13 +++++++ 3 files changed, 52 insertions(+), 11 deletions(-) create mode 100644 tests/scenarios/cmd/uniq/all_repeated/with_unique_comparison_flags.yaml create mode 100644 tests/scenarios/cmd/uniq/zero_terminated/zero_skip_fields_embedded_newline.yaml diff --git a/interp/builtins/uniq/uniq.go b/interp/builtins/uniq/uniq.go index 453b85bc..1aeef57b 100644 --- a/interp/builtins/uniq/uniq.go +++ b/interp/builtins/uniq/uniq.go @@ -342,17 +342,26 @@ func processInput(ctx context.Context, callCtx *builtins.CallContext, r io.Reade return err } } - // With -D -u, suppress one occurrence per group (the - // first/"unique" representative) to match GNU behavior. - if !cfg.unique { + if err := reportWrite(writeStr(w, prevLine+delimStr)); err != nil { + return err + } + groupNum++ + } + // With -D -u, emit the previous line and buffer the + // current one; the last occurrence in each group is + // suppressed when the group boundary is reached (GNU + // drops the last, not the first). + if cfg.unique { + if lineCount > 2 { if err := reportWrite(writeStr(w, prevLine+delimStr)); err != nil { return err } } - groupNum++ - } - if err := reportWrite(writeStr(w, curLine+delimStr)); err != nil { - return err + prevLine = curLine + } else { + if err := reportWrite(writeStr(w, curLine+delimStr)); err != nil { + return err + } } } } else { @@ -429,7 +438,7 @@ func writeStr(w io.Writer, s string) error { func compareKey(line string, cfg *uniqConfig) string { s := line if cfg.skipFields > 0 { - s = skipFieldsN(s, cfg.skipFields) + s = skipFieldsN(s, cfg.skipFields, cfg.delim == 0) } if cfg.skipChars > 0 && len(s) > 0 { skip := cfg.skipChars @@ -472,13 +481,19 @@ func asciiToLower(s string) string { // skipFieldsN skips the first n blank-delimited fields and returns the // remainder of the string, starting immediately after the last character // of the n-th field (before any subsequent blanks). -func skipFieldsN(s string, n int64) string { +// When zeroTerminated is true, embedded newlines ('\n') are treated as +// blanks in addition to spaces and tabs, matching GNU coreutils behavior +// in NUL-delimited mode. +func skipFieldsN(s string, n int64, zeroTerminated bool) string { + isBlank := func(c byte) bool { + return c == ' ' || c == '\t' || (zeroTerminated && c == '\n') + } i := 0 for field := int64(0); field < n && i < len(s); field++ { - for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + for i < len(s) && isBlank(s[i]) { i++ } - for i < len(s) && s[i] != ' ' && s[i] != '\t' { + for i < len(s) && !isBlank(s[i]) { i++ } } diff --git a/tests/scenarios/cmd/uniq/all_repeated/with_unique_comparison_flags.yaml b/tests/scenarios/cmd/uniq/all_repeated/with_unique_comparison_flags.yaml new file mode 100644 index 00000000..fdc3c672 --- /dev/null +++ b/tests/scenarios/cmd/uniq/all_repeated/with_unique_comparison_flags.yaml @@ -0,0 +1,13 @@ +description: uniq -D -u with comparison flags keeps first N-1 lines, dropping the last. +setup: + files: + - path: input.txt + content: "x\ny\nz\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -D -u -w0 input.txt +expect: + stdout: "x\ny\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/zero_terminated/zero_skip_fields_embedded_newline.yaml b/tests/scenarios/cmd/uniq/zero_terminated/zero_skip_fields_embedded_newline.yaml new file mode 100644 index 00000000..51025095 --- /dev/null +++ b/tests/scenarios/cmd/uniq/zero_terminated/zero_skip_fields_embedded_newline.yaml @@ -0,0 +1,13 @@ +description: uniq -z -f1 treats embedded newlines as blanks for field skipping. +setup: + files: + - path: input.txt + content: "1\0c\na\0" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -z -f1 input.txt +expect: + stdout: "1\0c\na\0" + stderr: "" + exit_code: 0