From 6f830c1dbe644890c624b1c8c70b85ff0b7d0492 Mon Sep 17 00:00:00 2001 From: "datadog-datadog-prod-us1-2[bot]" <261164178+datadog-datadog-prod-us1-2[bot]@users.noreply.github.com> Date: Tue, 10 Mar 2026 10:30:37 +0000 Subject: [PATCH 01/19] Implement uniq builtin command Co-authored-by: AlexandreYang <49917914+AlexandreYang@users.noreply.github.com> --- interp/builtins/uniq/uniq.go | 538 ++++++++++++++ interp/builtins/uniq/uniq_gnu_compat_test.go | 233 +++++++ interp/builtins/uniq/uniq_pentest_test.go | 252 +++++++ interp/builtins/uniq/uniq_test.go | 656 ++++++++++++++++++ interp/register_builtins.go | 2 + tests/import_allowlist_test.go | 16 + .../cmd/uniq/all_repeated/prepend.yaml | 14 + .../cmd/uniq/all_repeated/separate.yaml | 14 + .../cmd/uniq/basic/adjacent_duplicates.yaml | 14 + .../scenarios/cmd/uniq/basic/all_unique.yaml | 14 + .../cmd/uniq/basic/d_and_u_suppress.yaml | 14 + .../cmd/uniq/basic/different_lines.yaml | 14 + .../cmd/uniq/basic/eight_bit_chars.yaml | 14 + .../scenarios/cmd/uniq/basic/empty_input.yaml | 14 + .../scenarios/cmd/uniq/basic/ignore_case.yaml | 14 + .../cmd/uniq/basic/no_trailing_newline.yaml | 14 + .../scenarios/cmd/uniq/check_chars/w_one.yaml | 14 + .../cmd/uniq/check_chars/w_zero.yaml | 14 + .../scenarios/cmd/uniq/count/basic_count.yaml | 14 + .../cmd/uniq/count/count_duplicates.yaml | 14 + .../uniq/errors/all_repeated_with_count.yaml | 9 + .../cmd/uniq/errors/group_with_count.yaml | 9 + .../cmd/uniq/errors/missing_file.yaml | 10 + .../cmd/uniq/errors/unknown_flag.yaml | 9 + tests/scenarios/cmd/uniq/group/append.yaml | 14 + tests/scenarios/cmd/uniq/group/both.yaml | 14 + tests/scenarios/cmd/uniq/group/prepend.yaml | 14 + tests/scenarios/cmd/uniq/group/separate.yaml | 14 + .../uniq/hardening/null_bytes_in_lines.yaml | 14 + .../cmd/uniq/repeated/basic_repeated.yaml | 14 + .../cmd/uniq/repeated/no_repeated.yaml | 14 + tests/scenarios/cmd/uniq/skip/skip_chars.yaml | 14 + .../scenarios/cmd/uniq/skip/skip_fields.yaml | 14 + .../scenarios/cmd/uniq/stdin/pipe_input.yaml | 14 + .../cmd/uniq/unique/all_duplicated.yaml | 14 + .../scenarios/cmd/uniq/unique/all_unique.yaml | 14 + .../cmd/uniq/zero_terminated/basic_zero.yaml | 14 + 37 files changed, 2112 insertions(+) create mode 100644 interp/builtins/uniq/uniq.go create mode 100644 interp/builtins/uniq/uniq_gnu_compat_test.go create mode 100644 interp/builtins/uniq/uniq_pentest_test.go create mode 100644 interp/builtins/uniq/uniq_test.go create mode 100644 tests/scenarios/cmd/uniq/all_repeated/prepend.yaml create mode 100644 tests/scenarios/cmd/uniq/all_repeated/separate.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/adjacent_duplicates.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/all_unique.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/d_and_u_suppress.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/different_lines.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/eight_bit_chars.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/empty_input.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/ignore_case.yaml create mode 100644 tests/scenarios/cmd/uniq/basic/no_trailing_newline.yaml create mode 100644 tests/scenarios/cmd/uniq/check_chars/w_one.yaml create mode 100644 tests/scenarios/cmd/uniq/check_chars/w_zero.yaml create mode 100644 tests/scenarios/cmd/uniq/count/basic_count.yaml create mode 100644 tests/scenarios/cmd/uniq/count/count_duplicates.yaml create mode 100644 tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml create mode 100644 tests/scenarios/cmd/uniq/errors/group_with_count.yaml create mode 100644 tests/scenarios/cmd/uniq/errors/missing_file.yaml create mode 100644 tests/scenarios/cmd/uniq/errors/unknown_flag.yaml create mode 100644 tests/scenarios/cmd/uniq/group/append.yaml create mode 100644 tests/scenarios/cmd/uniq/group/both.yaml create mode 100644 tests/scenarios/cmd/uniq/group/prepend.yaml create mode 100644 tests/scenarios/cmd/uniq/group/separate.yaml create mode 100644 tests/scenarios/cmd/uniq/hardening/null_bytes_in_lines.yaml create mode 100644 tests/scenarios/cmd/uniq/repeated/basic_repeated.yaml create mode 100644 tests/scenarios/cmd/uniq/repeated/no_repeated.yaml create mode 100644 tests/scenarios/cmd/uniq/skip/skip_chars.yaml create mode 100644 tests/scenarios/cmd/uniq/skip/skip_fields.yaml create mode 100644 tests/scenarios/cmd/uniq/stdin/pipe_input.yaml create mode 100644 tests/scenarios/cmd/uniq/unique/all_duplicated.yaml create mode 100644 tests/scenarios/cmd/uniq/unique/all_unique.yaml create mode 100644 tests/scenarios/cmd/uniq/zero_terminated/basic_zero.yaml diff --git a/interp/builtins/uniq/uniq.go b/interp/builtins/uniq/uniq.go new file mode 100644 index 00000000..5bc13b33 --- /dev/null +++ b/interp/builtins/uniq/uniq.go @@ -0,0 +1,538 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +// Package uniq implements the uniq builtin command. +// +// uniq — report or omit repeated lines +// +// Usage: uniq [OPTION]... [INPUT_FILE] +// +// Filter adjacent matching lines from INPUT_FILE (or standard input), +// writing to standard output. +// +// With no INPUT_FILE, or when INPUT_FILE is -, read standard input. +// +// Note: the output file argument (second positional arg) supported by +// GNU uniq is intentionally NOT implemented because it writes to the +// filesystem, violating the shell's safety rules. +// +// Accepted flags: +// +// -c, --count +// Prefix lines by the number of occurrences. +// +// -d, --repeated +// Only print duplicate lines, one for each group. +// +// -D, --all-repeated[=METHOD] +// Print all duplicate lines. METHOD={none,prepend,separate} +// (default: none). Mutually exclusive with --group. +// +// -u, --unique +// Only print unique lines (lines that are not repeated). +// +// -i, --ignore-case +// Ignore differences in case when comparing lines. +// +// -f N, --skip-fields=N +// Avoid comparing the first N fields. Fields are sequences of +// non-blank characters separated by blanks (spaces and tabs). +// +// -s N, --skip-chars=N +// Avoid comparing the first N characters (applied after field +// skipping). +// +// -w N, --check-chars=N +// Compare no more than N characters in each line. +// +// -z, --zero-terminated +// Line delimiter is NUL (\0), not newline. +// +// --group[=METHOD] +// Show all input lines, separating groups with an empty line. +// METHOD={separate,prepend,append,both} (default: separate). +// Mutually exclusive with -c, -d, -D, -u. +// +// -h, --help +// Print this usage message to stdout and exit 0. +// +// Exit codes: +// +// 0 Success. +// 1 An error occurred (invalid argument, missing file, incompatible flags). +// +// Memory safety: +// +// Lines are processed one at a time via a streaming scanner with a +// per-line cap of MaxLineBytes (1 MiB). Only the current and previous +// lines are kept in memory. All loops check ctx.Err() to honour the +// shell's execution timeout. +package uniq + +import ( + "bufio" + "context" + "io" + "os" + "strconv" + "strings" + + "github.com/spf13/pflag" + + "github.com/DataDog/rshell/interp/builtins" +) + +// Cmd is the uniq builtin command descriptor. +var Cmd = builtins.Command{Name: "uniq", Run: run} + +// MaxLineBytes is the per-line buffer cap for the line scanner. +const MaxLineBytes = 1 << 20 // 1 MiB + +// MaxCount is the maximum accepted value for -f, -s, -w flags. +const MaxCount = 1<<31 - 1 // 2 147 483 647 + +// countFieldWidth is the width of the count prefix produced by -c. +const countFieldWidth = 7 + +// initialBufSize is the starting buffer size for the scanner. +const initialBufSize = 4096 + +// groupMethod controls how --group inserts blank-line separators. +type groupMethod int + +const ( + groupSeparate groupMethod = iota + groupPrepend + groupAppend + groupBoth +) + +// allRepeatedMethod controls how -D/--all-repeated delimits groups. +type allRepeatedMethod int + +const ( + allRepeatedNone allRepeatedMethod = iota + allRepeatedPrepend + allRepeatedSeparate +) + +func run(ctx context.Context, callCtx *builtins.CallContext, args []string) builtins.Result { + fs := pflag.NewFlagSet("uniq", pflag.ContinueOnError) + fs.SetOutput(io.Discard) + + help := fs.BoolP("help", "h", false, "print usage and exit") + count := fs.BoolP("count", "c", false, "prefix lines by the number of occurrences") + repeated := fs.BoolP("repeated", "d", false, "only print duplicate lines, one for each group") + unique := fs.BoolP("unique", "u", false, "only print unique lines") + ignoreCase := fs.BoolP("ignore-case", "i", false, "ignore differences in case when comparing") + zeroTerminated := fs.BoolP("zero-terminated", "z", false, "line delimiter is NUL, not newline") + + skipFieldsStr := fs.StringP("skip-fields", "f", "0", "avoid comparing the first N fields") + skipCharsStr := fs.StringP("skip-chars", "s", "0", "avoid comparing the first N characters") + checkCharsStr := fs.StringP("check-chars", "w", "", "compare no more than N characters") + + allRepeatedStr := fs.StringP("all-repeated", "D", "", "print all duplicate lines; METHOD={none,prepend,separate}") + groupStr := fs.String("group", "", "show all input lines with group separators; METHOD={separate,prepend,append,both}") + + fs.Lookup("all-repeated").NoOptDefVal = "none" + fs.Lookup("group").NoOptDefVal = "separate" + + if err := fs.Parse(args); err != nil { + callCtx.Errf("uniq: %v\n", err) + return builtins.Result{Code: 1} + } + + if *help { + callCtx.Out("Usage: uniq [OPTION]... [INPUT]\n") + callCtx.Out("Filter adjacent matching lines from INPUT (or stdin),\n") + callCtx.Out("writing to standard output.\n\n") + fs.SetOutput(callCtx.Stdout) + fs.PrintDefaults() + return builtins.Result{} + } + + skipFields, ok := parseNonNegativeInt(*skipFieldsStr) + if !ok { + callCtx.Errf("uniq: invalid number of fields to skip: %q\n", *skipFieldsStr) + return builtins.Result{Code: 1} + } + + skipChars, ok := parseNonNegativeInt(*skipCharsStr) + if !ok { + callCtx.Errf("uniq: invalid number of characters to skip: %q\n", *skipCharsStr) + return builtins.Result{Code: 1} + } + + checkChars := int64(-1) + if fs.Changed("check-chars") { + checkChars, ok = parseNonNegativeInt(*checkCharsStr) + if !ok { + callCtx.Errf("uniq: invalid number of characters to check: %q\n", *checkCharsStr) + return builtins.Result{Code: 1} + } + } + + useAllRepeated := fs.Changed("all-repeated") + arMethod := allRepeatedNone + if useAllRepeated { + var err error + arMethod, err = parseAllRepeatedMethod(*allRepeatedStr) + if err != nil { + callCtx.Errf("uniq: %v\n", err) + return builtins.Result{Code: 1} + } + } + + useGroup := fs.Changed("group") + grpMethod := groupSeparate + if useGroup { + var err error + grpMethod, err = parseGroupMethod(*groupStr) + if err != nil { + callCtx.Errf("uniq: %v\n", err) + return builtins.Result{Code: 1} + } + } + + if useGroup && (*count || *repeated || useAllRepeated || *unique) { + callCtx.Errf("uniq: --group is mutually exclusive with -c/-d/-D/-u\n") + return builtins.Result{Code: 1} + } + if useAllRepeated && *count { + callCtx.Errf("uniq: printing all duplicated lines and repeat counts is meaningless\n") + return builtins.Result{Code: 1} + } + + positional := fs.Args() + if len(positional) > 1 { + callCtx.Errf("uniq: extra operand %q\n", positional[1]) + return builtins.Result{Code: 1} + } + + file := "-" + if len(positional) == 1 { + file = positional[0] + } + + var rc io.ReadCloser + if file == "-" { + if callCtx.Stdin == nil { + return builtins.Result{} + } + rc = io.NopCloser(callCtx.Stdin) + } else { + f, err := callCtx.OpenFile(ctx, file, os.O_RDONLY, 0) + if err != nil { + callCtx.Errf("uniq: %s: %s\n", file, callCtx.PortableErr(err)) + return builtins.Result{Code: 1} + } + defer f.Close() + rc = f + } + + delim := byte('\n') + if *zeroTerminated { + delim = 0 + } + + cfg := &uniqConfig{ + count: *count, + repeated: *repeated, + unique: *unique, + ignoreCase: *ignoreCase, + skipFields: skipFields, + skipChars: skipChars, + checkChars: checkChars, + delim: delim, + useAllRepeated: useAllRepeated, + arMethod: arMethod, + useGroup: useGroup, + grpMethod: grpMethod, + } + + if err := processInput(ctx, callCtx, rc, cfg); err != nil { + if ctx.Err() != nil { + return builtins.Result{Code: 1} + } + callCtx.Errf("uniq: write error\n") + return builtins.Result{Code: 1} + } + return builtins.Result{} +} + +type uniqConfig struct { + count bool + repeated bool + unique bool + ignoreCase bool + skipFields int64 + skipChars int64 + checkChars int64 + delim byte + useAllRepeated bool + arMethod allRepeatedMethod + useGroup bool + grpMethod groupMethod +} + +func processInput(ctx context.Context, callCtx *builtins.CallContext, r io.Reader, cfg *uniqConfig) error { + sc := bufio.NewScanner(r) + buf := make([]byte, initialBufSize) + sc.Buffer(buf, MaxLineBytes) + sc.Split(makeSplitFunc(cfg.delim)) + + w := callCtx.Stdout + delimStr := string([]byte{cfg.delim}) + + var prevLine string + var prevKey string + var lineCount int64 + first := true + groupNum := 0 + + for sc.Scan() { + if ctx.Err() != nil { + return ctx.Err() + } + curLine := sc.Text() + curKey := compareKey(curLine, cfg) + + if first { + prevLine = curLine + prevKey = curKey + lineCount = 1 + first = false + + if cfg.useGroup { + if cfg.grpMethod == groupPrepend || cfg.grpMethod == groupBoth { + if err := writeStr(w, delimStr); err != nil { + return err + } + } + if err := writeStr(w, curLine+delimStr); err != nil { + return err + } + } + continue + } + + same := prevKey == curKey + + if same { + lineCount++ + if cfg.useGroup { + if err := writeStr(w, curLine+delimStr); err != nil { + return err + } + } else if cfg.useAllRepeated { + if lineCount == 2 { + if groupNum > 0 && cfg.arMethod != allRepeatedNone { + if err := writeStr(w, delimStr); err != nil { + return err + } + } + if groupNum == 0 && cfg.arMethod == allRepeatedPrepend { + if err := writeStr(w, delimStr); err != nil { + return err + } + } + if err := writeStr(w, prevLine+delimStr); err != nil { + return err + } + groupNum++ + } + if err := writeStr(w, curLine+delimStr); err != nil { + return err + } + } + } else { + if cfg.useGroup { + if err := writeStr(w, delimStr); err != nil { + return err + } + if err := writeStr(w, curLine+delimStr); err != nil { + return err + } + groupNum++ + } else if cfg.useAllRepeated { + // Nothing to do — non-repeated last group is simply dropped. + } else { + if err := emitStandard(w, cfg, prevLine, lineCount, delimStr); err != nil { + return err + } + } + prevLine = curLine + prevKey = curKey + lineCount = 1 + } + } + + if err := sc.Err(); err != nil { + callCtx.Errf("uniq: %s\n", err.Error()) + return err + } + + if first { + return nil + } + + // Flush last group. + if cfg.useGroup { + if cfg.grpMethod == groupAppend || cfg.grpMethod == groupBoth { + return writeStr(w, delimStr) + } + return nil + } + if cfg.useAllRepeated { + return nil + } + return emitStandard(w, cfg, prevLine, lineCount, delimStr) +} + +func emitStandard(w io.Writer, cfg *uniqConfig, line string, count int64, delimStr string) error { + if cfg.repeated && cfg.unique { + return nil + } + if cfg.repeated && count < 2 { + return nil + } + if cfg.unique && count >= 2 { + return nil + } + if cfg.count { + s := strconv.FormatInt(count, 10) + for len(s) < countFieldWidth { + s = " " + s + } + return writeStr(w, s+" "+line+delimStr) + } + return writeStr(w, line+delimStr) +} + +func writeStr(w io.Writer, s string) error { + _, err := io.WriteString(w, s) + return err +} + +// compareKey extracts the portion of line used for comparison, applying +// field skipping, char skipping, check-chars, and case folding. +func compareKey(line string, cfg *uniqConfig) string { + s := line + if cfg.skipFields > 0 { + s = skipFieldsN(s, cfg.skipFields) + } + if cfg.skipChars > 0 && len(s) > 0 { + skip := int(cfg.skipChars) + if skip > len(s) { + skip = len(s) + } + s = s[skip:] + } + if cfg.checkChars >= 0 && int(cfg.checkChars) < len(s) { + s = s[:int(cfg.checkChars)] + } + if cfg.ignoreCase { + s = strings.ToLower(s) + } + return s +} + +// skipFieldsN skips the first n blank-delimited fields and returns the +// remainder of the string, starting immediately after the last character +// of the n-th field (before any subsequent blanks). +func skipFieldsN(s string, n int64) string { + i := 0 + for field := int64(0); field < n; field++ { + if i >= len(s) { + break + } + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++ + } + for i < len(s) && s[i] != ' ' && s[i] != '\t' { + i++ + } + } + return s[i:] +} + +func parseNonNegativeInt(s string) (int64, bool) { + if s == "" { + return 0, false + } + n, err := strconv.ParseInt(s, 10, 64) + if err != nil { + if ne, ok := err.(*strconv.NumError); ok && ne.Err == strconv.ErrRange { + return MaxCount, true + } + return 0, false + } + if n < 0 { + return 0, false + } + if n > MaxCount { + n = MaxCount + } + return n, true +} + +func parseAllRepeatedMethod(s string) (allRepeatedMethod, error) { + switch { + case s == "" || strings.HasPrefix("none", s): + return allRepeatedNone, nil + case strings.HasPrefix("prepend", s): + return allRepeatedPrepend, nil + case strings.HasPrefix("separate", s): + return allRepeatedSeparate, nil + } + return 0, &invalidArgError{arg: s, flag: "--all-repeated", valid: []string{"none", "prepend", "separate"}} +} + +func parseGroupMethod(s string) (groupMethod, error) { + switch { + case s == "" || strings.HasPrefix("separate", s): + return groupSeparate, nil + case strings.HasPrefix("prepend", s): + return groupPrepend, nil + case strings.HasPrefix("append", s): + return groupAppend, nil + case strings.HasPrefix("both", s): + return groupBoth, nil + } + return 0, &invalidArgError{arg: s, flag: "--group", valid: []string{"prepend", "append", "separate", "both"}} +} + +type invalidArgError struct { + arg string + flag string + valid []string +} + +func (e *invalidArgError) Error() string { + msg := "invalid argument '" + e.arg + "' for '" + e.flag + "'\nValid arguments are:\n" + for _, v := range e.valid { + msg += " - '" + v + "'\n" + } + return msg +} + +// makeSplitFunc returns a bufio.SplitFunc that splits on the given delimiter. +// The token returned does NOT include the trailing delimiter. +func makeSplitFunc(delim byte) bufio.SplitFunc { + return func(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + for i, b := range data { + if b == delim { + return i + 1, data[:i], nil + } + } + if atEOF { + return len(data), data, nil + } + return 0, nil, nil + } +} diff --git a/interp/builtins/uniq/uniq_gnu_compat_test.go b/interp/builtins/uniq/uniq_gnu_compat_test.go new file mode 100644 index 00000000..b35756c4 --- /dev/null +++ b/interp/builtins/uniq/uniq_gnu_compat_test.go @@ -0,0 +1,233 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +// GNU compatibility tests for the uniq builtin. +// +// Expected outputs were captured from GNU coreutils uniq 9.6 +// and are embedded as string literals so the tests run without any GNU +// tooling present on CI. + +package uniq_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// TestGNUCompatEmptyInput — empty input produces empty output. +// +// GNU command: printf ” | guniq +// Expected: "" +func TestGNUCompatEmptyInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "empty.txt", "") + stdout, _, code := cmdRun(t, "uniq empty.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// TestGNUCompatAdjacentDuplicates — adjacent duplicates collapsed. +// +// GNU command: printf 'a\na\n' | guniq +// Expected: "a\n" +func TestGNUCompatAdjacentDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +// TestGNUCompatNoTrailingNewline — last line without newline gets one added. +// +// GNU command: printf 'a\na' | guniq +// Expected: "a\n" +func TestGNUCompatNoTrailingNewline(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +// TestGNUCompatDifferentLines — different lines both preserved. +// +// GNU command: printf 'a\nb' | guniq +// Expected: "a\nb\n" +func TestGNUCompatDifferentLines(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +// TestGNUCompatCountBasic — -c formats count with 7-char right-aligned field. +// +// GNU command: printf 'a\nb\n' | guniq -c +// Expected: " 1 a\n 1 b\n" +func TestGNUCompatCountBasic(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\n") + stdout, _, code := cmdRun(t, "uniq -c in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 1 a\n 1 b\n", stdout) +} + +// TestGNUCompatCountDuplicates — -c with repeated lines. +// +// GNU command: printf 'a\na\n' | guniq -c +// Expected: " 2 a\n" +func TestGNUCompatCountDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -c in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 2 a\n", stdout) +} + +// TestGNUCompatIgnoreCase — -i folds case. +// +// GNU command: printf 'A\na\n' | guniq -i +// Expected: "A\n" +func TestGNUCompatIgnoreCase(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "A\na\n") + stdout, _, code := cmdRun(t, "uniq -i in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "A\n", stdout) +} + +// TestGNUCompatAllRepeatedSeparate — --all-repeated=separate with two groups. +// +// GNU command: printf 'a\na\nb\nc\nc\n' | guniq --all-repeated=separate +// Expected: "a\na\n\nc\nc\n" +func TestGNUCompatAllRepeatedSeparate(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\nc\nc\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=separate in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nc\nc\n", stdout) +} + +// TestGNUCompatAllRepeatedPrepend — --all-repeated=prepend prefixes first group. +// +// GNU command: printf 'a\na\nb\nc\nc\n' | guniq --all-repeated=prepend +// Expected: "\na\na\n\nc\nc\n" +func TestGNUCompatAllRepeatedPrepend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\nc\nc\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=prepend in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nc\nc\n", stdout) +} + +// TestGNUCompatGroupSeparate — --group=separate with two groups. +// +// GNU command: printf 'a\na\nb\n' | guniq --group=separate +// Expected: "a\na\n\nb\n" +func TestGNUCompatGroupSeparate(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=separate in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\n", stdout) +} + +// TestGNUCompatGroupPrepend — --group=prepend with two groups. +// +// GNU command: printf 'a\na\nb\n' | guniq --group=prepend +// Expected: "\na\na\n\nb\n" +func TestGNUCompatGroupPrepend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=prepend in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nb\n", stdout) +} + +// TestGNUCompatGroupAppend — --group=append with two groups. +// +// GNU command: printf 'a\na\nb\n' | guniq --group=append +// Expected: "a\na\n\nb\n\n" +func TestGNUCompatGroupAppend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=append in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\n\n", stdout) +} + +// TestGNUCompatGroupBoth — --group=both with two groups. +// +// GNU command: printf 'a\na\nb\n' | guniq --group=both +// Expected: "\na\na\n\nb\n\n" +func TestGNUCompatGroupBoth(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=both in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nb\n\n", stdout) +} + +// TestGNUCompatRepeatedOnly — -d only emits repeated lines. +// +// GNU command: printf 'a\na\nb\n' | guniq -d +// Expected: "a\n" +func TestGNUCompatRepeatedOnly(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq -d in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +// TestGNUCompatUniqueOnly — -u only emits unique lines. +// +// GNU command: printf 'a\nb\na\n' | guniq -u +// Expected: "a\nb\na\n" +func TestGNUCompatUniqueOnly(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\na\n") + stdout, _, code := cmdRun(t, "uniq -u in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\na\n", stdout) +} + +// TestGNUCompatRejectedFlag — unknown flag produces exit 1. +// +// GNU command: guniq --no-such-flag → exit 1 +func TestGNUCompatRejectedFlag(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\n") + _, stderr, code := cmdRun(t, "uniq --no-such-flag in.txt", dir) + assert.Equal(t, 1, code) + assert.NotEmpty(t, stderr) +} + +// TestGNUCompatSkipFields — -f 2 skips two fields. +// +// GNU command: printf 'a\ta a\na a a\n' | guniq -f 2 +// Expected: "a\ta a\n" +func TestGNUCompatSkipFields(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\ta a\na a a\n") + stdout, _, code := cmdRun(t, "uniq -f 2 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\ta a\n", stdout) +} + +// TestGNUCompatZeroTerminated — -z uses NUL delimiter. +// +// GNU command: printf 'a\0a\0b' | guniq -z +// Expected: "a\0b\0" +func TestGNUCompatZeroTerminated(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\x00a\x00b") + stdout, _, code := cmdRun(t, "uniq -z in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\x00b\x00", stdout) +} diff --git a/interp/builtins/uniq/uniq_pentest_test.go b/interp/builtins/uniq/uniq_pentest_test.go new file mode 100644 index 00000000..86bda6db --- /dev/null +++ b/interp/builtins/uniq/uniq_pentest_test.go @@ -0,0 +1,252 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package uniq_test + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/rshell/interp" +) + +// --- Integer edge cases --- + +func TestUniqPentestSkipFieldsZero(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -f 0 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqPentestSkipFieldsOne(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "x a\ny a\n") + stdout, _, code := cmdRun(t, "uniq -f 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "x a\n", stdout) +} + +func TestUniqPentestSkipFieldsMaxInt(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\n") + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + stdout, _, code := runScriptCtx(ctx, t, "uniq -f 2147483647 in.txt", dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqPentestSkipFieldsOverflow(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\n") + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + stdout, _, code := runScriptCtx(ctx, t, "uniq -f 99999999999999999999 in.txt", dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqPentestSkipCharsZero(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "abc\nabcd\n") + stdout, _, code := cmdRun(t, "uniq -s 0 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\nabcd\n", stdout) +} + +func TestUniqPentestCheckCharsZero(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "abc\ndef\n") + stdout, _, code := cmdRun(t, "uniq -w 0 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\n", stdout) +} + +func TestUniqPentestCheckCharsMaxInt(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -w 2147483647 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqPentestCheckCharsOverflow(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n\x08") + stdout, _, code := cmdRun(t, "uniq -d -u -w340282366920938463463374607431768211456 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// --- Long lines --- + +func TestUniqPentestLineBelowCap(t *testing.T) { + dir := t.TempDir() + line := strings.Repeat("a", 1<<20-2) + content := line + "\n" + line + "\n" + require.NoError(t, os.WriteFile(filepath.Join(dir, "in.txt"), []byte(content), 0644)) + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, line+"\n", stdout) +} + +func TestUniqPentestLineAtCap(t *testing.T) { + dir := t.TempDir() + line := strings.Repeat("a", 1<<20) + content := line + "\n" + require.NoError(t, os.WriteFile(filepath.Join(dir, "in.txt"), []byte(content), 0644)) + _, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 1, code) +} + +func TestUniqPentestLineBeyondCap(t *testing.T) { + dir := t.TempDir() + line := strings.Repeat("a", 1<<20+1) + require.NoError(t, os.WriteFile(filepath.Join(dir, "in.txt"), []byte(line), 0644)) + _, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 1, code) +} + +// --- Path and filename edge cases --- + +func TestUniqPentestNonExistentFile(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq nonexistent.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestEmptyFilename(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `uniq ""`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestDoubleDashFlagLikeFilename(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "-c", "content\n") + stdout, _, code := cmdRun(t, "uniq -- -c", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "content\n", stdout) +} + +func TestUniqPentestOutsideAllowedPaths(t *testing.T) { + allowed := t.TempDir() + secret := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(secret, "secret.txt"), []byte("secret"), 0644)) + + secretPath := strings.ReplaceAll(filepath.Join(secret, "secret.txt"), `\`, `/`) + _, stderr, code := runScript(t, "uniq "+secretPath, allowed, interp.AllowedPaths([]string{allowed})) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +// --- Flag and argument injection --- + +func TestUniqPentestUnknownFlagLong(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq --follow in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestUnknownFlagShort(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq -x in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqPentestMultipleStdinDash(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "src.txt", "a\na\n") + _, stderr, code := cmdRun(t, "uniq - - < src.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "extra operand") +} + +// --- Context cancellation --- + +func TestUniqPentestPreCancelledContext(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + done := make(chan struct{}) + go func() { + runScriptCtx(ctx, t, "uniq in.txt", dir, interp.AllowedPaths([]string{dir})) + close(done) + }() + select { + case <-done: + case <-time.After(5 * time.Second): + t.Fatal("uniq with pre-cancelled context did not return within 5s") + } +} + +// --- Large input --- + +func TestUniqPentestManyLines(t *testing.T) { + dir := t.TempDir() + var sb strings.Builder + for i := 0; i < 10000; i++ { + sb.WriteString("line\n") + } + writeFile(t, dir, "in.txt", sb.String()) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + stdout, _, code := runScriptCtx(ctx, t, "uniq in.txt", dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) + assert.Equal(t, "line\n", stdout) +} + +func TestUniqPentestManyUniqueLines(t *testing.T) { + dir := t.TempDir() + var sb strings.Builder + for i := 0; i < 10000; i++ { + sb.WriteString("line") + sb.WriteString(strings.Repeat("x", i%100)) + sb.WriteByte('\n') + } + content := sb.String() + writeFile(t, dir, "in.txt", content) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + _, _, code := runScriptCtx(ctx, t, "uniq -c in.txt", dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) +} + +// --- Behavior matching: -D with no duplicates --- + +func TestUniqPentestAllRepeatedNoDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\nc\n") + stdout, _, code := cmdRun(t, "uniq -D in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// --- Null byte handling --- + +func TestUniqPentestBinaryContent(t *testing.T) { + dir := t.TempDir() + content := []byte{0xfc, 0x80, 0x80, '\n', 0xfc, 0x80, 0x80, '\n'} + require.NoError(t, os.WriteFile(filepath.Join(dir, "bad.bin"), content, 0644)) + stdout, _, code := cmdRun(t, "uniq bad.bin", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\xfc\x80\x80\n", stdout) +} diff --git a/interp/builtins/uniq/uniq_test.go b/interp/builtins/uniq/uniq_test.go new file mode 100644 index 00000000..2a02e0a9 --- /dev/null +++ b/interp/builtins/uniq/uniq_test.go @@ -0,0 +1,656 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package uniq_test + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/rshell/interp" + "github.com/DataDog/rshell/interp/builtins/testutil" +) + +func runScriptCtx(ctx context.Context, t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) { + t.Helper() + return testutil.RunScriptCtx(ctx, t, script, dir, opts...) +} + +func runScript(t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) { + t.Helper() + return testutil.RunScript(t, script, dir, opts...) +} + +func cmdRun(t *testing.T, script, dir string) (string, string, int) { + t.Helper() + return runScript(t, script, dir, interp.AllowedPaths([]string{dir})) +} + +func writeFile(t *testing.T, dir, name, content string) string { + t.Helper() + require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0644)) + return name +} + +// --- Default behaviour --- + +func TestUniqEmptyInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "empty.txt", "") + stdout, _, code := cmdRun(t, "uniq empty.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestUniqAdjacentDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqNoTrailingNewline(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqDifferentLines(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +func TestUniqMixedDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +func TestUniqAllUnique(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\nc\n") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\nc\n", stdout) +} + +func TestUniqNonAdjacentDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "b\na\na\n") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "b\na\n", stdout) +} + +// --- -c / --count --- + +func TestUniqCountBasic(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\n") + stdout, _, code := cmdRun(t, "uniq -c in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 1 a\n 1 b\n", stdout) +} + +func TestUniqCountDuplicates(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -c in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 2 a\n", stdout) +} + +func TestUniqCountLongForm(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --count in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, " 2 a\n 1 b\n", stdout) +} + +// --- -d / --repeated --- + +func TestUniqRepeatedBasic(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -d in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqRepeatedNone(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\n") + stdout, _, code := cmdRun(t, "uniq -d in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestUniqRepeatedNonAdjacent(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\na\n") + stdout, _, code := cmdRun(t, "uniq -d in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// --- -u / --unique --- + +func TestUniqUniqueBasic(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -u in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestUniqUniqueAllUnique(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\n") + stdout, _, code := cmdRun(t, "uniq -u in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +func TestUniqUniqueMixed(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\na\n") + stdout, _, code := cmdRun(t, "uniq -u in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\na\n", stdout) +} + +// --- -d -u combined --- + +func TestUniqRepeatedAndUniqueSuppressAll(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n\x08") + stdout, _, code := cmdRun(t, "uniq -d -u in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// --- -i / --ignore-case --- + +func TestUniqIgnoreCase(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "A\na\n") + stdout, _, code := cmdRun(t, "uniq -i in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "A\n", stdout) +} + +func TestUniqIgnoreCaseLongForm(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "A\na\n") + stdout, _, code := cmdRun(t, "uniq --ignore-case in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "A\n", stdout) +} + +func TestUniqCaseSensitiveDefault(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "A\na\n") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "A\na\n", stdout) +} + +// --- -f / --skip-fields --- + +func TestUniqSkipFields1(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a a\nb a\n") + stdout, _, code := cmdRun(t, "uniq -f 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a\n", stdout) +} + +func TestUniqSkipFields1DifferentAfterField(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a a\nb b\n") + stdout, _, code := cmdRun(t, "uniq -f 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a\nb b\n", stdout) +} + +func TestUniqSkipFieldsTabVsSpace(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\ta\na a\n") + stdout, _, code := cmdRun(t, "uniq -f 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\ta\na a\n", stdout) +} + +func TestUniqSkipFields2(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a a c\nb a c\n") + stdout, _, code := cmdRun(t, "uniq -f 2 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a c\n", stdout) +} + +// --- -s / --skip-chars --- + +func TestUniqSkipChars1(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "aaa\naaa\n") + stdout, _, code := cmdRun(t, "uniq -s 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "aaa\n", stdout) +} + +func TestUniqSkipChars2(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "baa\naaa\n") + stdout, _, code := cmdRun(t, "uniq -s 2 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "baa\n", stdout) +} + +func TestUniqSkipChars4ShortLine(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "abc\nabcd\n") + stdout, _, code := cmdRun(t, "uniq -s 4 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\n", stdout) +} + +// --- -w / --check-chars --- + +func TestUniqCheckChars0(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "abc\nabcd\n") + stdout, _, code := cmdRun(t, "uniq -w 0 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\n", stdout) +} + +func TestUniqCheckChars1(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a a\nb a\n") + stdout, _, code := cmdRun(t, "uniq -w 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a\nb a\n", stdout) +} + +func TestUniqCheckCharsWithSkipFields(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a a a\nb a c\n") + stdout, _, code := cmdRun(t, "uniq -f 1 -w 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a a\n", stdout) +} + +// --- -z / --zero-terminated --- + +func TestUniqZeroTerminated(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\x00a\x00b") + stdout, _, code := cmdRun(t, "uniq -z in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\x00b\x00", stdout) +} + +func TestUniqZeroTerminatedNewlinesPreserved(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -z in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\x00", stdout) +} + +// --- -D / --all-repeated --- + +func TestUniqAllRepeatedDefault(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -D in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n", stdout) +} + +func TestUniqAllRepeatedSeparate(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\nc\nc\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=separate in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nc\nc\n", stdout) +} + +func TestUniqAllRepeatedPrepend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=prepend in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n", stdout) +} + +func TestUniqAllRepeatedPrependMultiple(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\nc\nc\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=prepend in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nc\nc\n", stdout) +} + +func TestUniqAllRepeatedNoneOnUniqueInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\nb\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=prepend in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestUniqAllRepeatedNoneMultipleGroups(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\nb\n") + stdout, _, code := cmdRun(t, "uniq -D in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\nb\nb\n", stdout) +} + +func TestUniqAllRepeatedSeparateMultipleGroups(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\nb\nc\n") + stdout, _, code := cmdRun(t, "uniq --all-repeated=separate in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\nb\n", stdout) +} + +func TestUniqAllRepeatedWithCheckChars(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a a\na b\n") + stdout, _, code := cmdRun(t, "uniq -D -w1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a a\na b\n", stdout) +} + +// --- --group --- + +func TestUniqGroupSeparate(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=separate in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\n", stdout) +} + +func TestUniqGroupPrepend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=prepend in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nb\n", stdout) +} + +func TestUniqGroupAppend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=append in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\n\n", stdout) +} + +func TestUniqGroupBoth(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group=both in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n\nb\n\n", stdout) +} + +func TestUniqGroupDefault(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq --group in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\nb\n", stdout) +} + +func TestUniqGroupEmptyInput(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "") + stdout, _, code := cmdRun(t, "uniq --group in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestUniqGroupSingleGroup(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq --group=prepend in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\na\n", stdout) +} + +func TestUniqGroupSingleGroupAppend(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq --group=append in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n\n", stdout) +} + +func TestUniqGroupSingleGroupSeparate(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq --group=separate in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\na\n", stdout) +} + +// --- Mutual exclusion errors --- + +func TestUniqGroupWithCount(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq --group -c in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "mutually exclusive") +} + +func TestUniqGroupWithRepeated(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq --group -d in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "mutually exclusive") +} + +func TestUniqGroupWithUnique(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq --group -u in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "mutually exclusive") +} + +func TestUniqGroupWithAllRepeated(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq --group -D in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "mutually exclusive") +} + +func TestUniqAllRepeatedWithCount(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq -D -c in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "meaningless") +} + +// --- Help --- + +func TestUniqHelp(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := cmdRun(t, "uniq --help", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "Usage:") + assert.Empty(t, stderr) +} + +func TestUniqHelpShort(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := cmdRun(t, "uniq -h", dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "Usage:") + assert.Empty(t, stderr) +} + +// --- Error cases --- + +func TestUniqMissingFile(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq nonexistent.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqUnknownFlag(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "uniq --no-such-flag in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "uniq:") +} + +func TestUniqExtraOperand(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "a\n") + writeFile(t, dir, "b.txt", "b\n") + _, stderr, code := cmdRun(t, "uniq a.txt b.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "extra operand") +} + +func TestUniqInvalidAllRepeatedMethod(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\n") + _, stderr, code := cmdRun(t, "uniq --all-repeated=badoption in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "invalid argument") +} + +func TestUniqInvalidGroupMethod(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\n") + _, stderr, code := cmdRun(t, "uniq --group=badoption in.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "invalid argument") +} + +// --- Stdin --- + +func TestUniqStdinPipe(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "src.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq < src.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +func TestUniqStdinDash(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "src.txt", "a\na\nb\n") + stdout, _, code := cmdRun(t, "uniq - < src.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\n", stdout) +} + +func TestUniqNilStdin(t *testing.T) { + dir := t.TempDir() + stdout, stderr, code := runScript(t, "uniq -", dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) + assert.Equal(t, "", stderr) +} + +// --- Context cancellation --- + +func TestUniqContextCancellation(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\nb\n") + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _, _, code := runScriptCtx(ctx, t, "uniq in.txt", dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) +} + +// --- Null bytes --- + +func TestUniqNullBytesInContent(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\x00a\na\n") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\x00a\na\n", stdout) +} + +// --- Combined skip fields + skip chars --- + +func TestUniqSkipFieldsAndChars(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a aaa\nb ab\n") + stdout, _, code := cmdRun(t, "uniq -f 1 -s 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a aaa\nb ab\n", stdout) +} + +func TestUniqSkipFieldsAndCharsEqual(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a aaa\nb aaa\n") + stdout, _, code := cmdRun(t, "uniq -f 1 -s 1 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a aaa\n", stdout) +} + +// --- Double dash --- + +func TestUniqDoubleDash(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "-f", "flag-looking-name\n") + stdout, _, code := cmdRun(t, "uniq -- -f", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "flag-looking-name\n", stdout) +} + +// --- Eight bit characters --- + +func TestUniqEightBitChars(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "ö\nv\n") + stdout, _, code := cmdRun(t, "uniq in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "ö\nv\n", stdout) +} + +// --- Large count clamped --- + +func TestUniqLargeSkipFieldsClamped(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n") + stdout, _, code := cmdRun(t, "uniq -f 9999999999 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\n", stdout) +} + +func TestUniqOverflowCheckCharsClamped(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "in.txt", "a\na\n\x08") + stdout, _, code := cmdRun(t, "uniq -d -u -w340282366920938463463374607431768211456 in.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} diff --git a/interp/register_builtins.go b/interp/register_builtins.go index f6ff973d..4729ee71 100644 --- a/interp/register_builtins.go +++ b/interp/register_builtins.go @@ -17,6 +17,7 @@ import ( falsecmd "github.com/DataDog/rshell/interp/builtins/false" "github.com/DataDog/rshell/interp/builtins/head" truecmd "github.com/DataDog/rshell/interp/builtins/true" + "github.com/DataDog/rshell/interp/builtins/uniq" ) var registerOnce sync.Once @@ -32,6 +33,7 @@ func registerBuiltins() { falsecmd.Cmd, head.Cmd, truecmd.Cmd, + uniq.Cmd, } { builtins.Register(cmd.Name, cmd.Run) } diff --git a/tests/import_allowlist_test.go b/tests/import_allowlist_test.go index 3ba7053a..f418c401 100644 --- a/tests/import_allowlist_test.go +++ b/tests/import_allowlist_test.go @@ -34,6 +34,8 @@ import ( var builtinAllowedSymbols = []string{ // bufio.NewScanner — line-by-line input reading (e.g. head, cat); no write or exec capability. "bufio.NewScanner", + // bufio.SplitFunc — type for custom scanner split functions; pure type, no I/O. + "bufio.SplitFunc", // context.Context — deadline/cancellation plumbing; pure interface, no side effects. "context.Context", // errors.Is — error comparison; pure function, no I/O. @@ -60,6 +62,20 @@ var builtinAllowedSymbols = []string{ "strconv.Atoi", // strconv.ParseInt — string-to-int conversion with base/bit-size; pure function, no I/O. "strconv.ParseInt", + // strconv.FormatInt — int-to-string conversion; pure function, no I/O. + "strconv.FormatInt", + // strconv.ErrRange — sentinel error value for overflow detection; pure constant. + "strconv.ErrRange", + // strconv.NumError — error type from strconv parse functions; pure type, no I/O. + "strconv.NumError", + // strings.HasPrefix — pure function for prefix matching; no I/O. + "strings.HasPrefix", + // strings.ToLower — pure function for case folding; no I/O. + "strings.ToLower", + // io.WriteString — writes a string to a writer; no filesystem access, delegates to Write. + "io.WriteString", + // io.Writer — interface type for writing; no side effects. + "io.Writer", } // permanentlyBanned lists packages that may never be imported by builtin diff --git a/tests/scenarios/cmd/uniq/all_repeated/prepend.yaml b/tests/scenarios/cmd/uniq/all_repeated/prepend.yaml new file mode 100644 index 00000000..24adad96 --- /dev/null +++ b/tests/scenarios/cmd/uniq/all_repeated/prepend.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 116 +description: uniq --all-repeated=prepend adds blank line before first group. +setup: + files: + - path: input.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --all-repeated=prepend input.txt +expect: + stdout: "\na\na\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/all_repeated/separate.yaml b/tests/scenarios/cmd/uniq/all_repeated/separate.yaml new file mode 100644 index 00000000..f632a039 --- /dev/null +++ b/tests/scenarios/cmd/uniq/all_repeated/separate.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 114 +description: uniq --all-repeated=separate separates groups with blank lines. +setup: + files: + - path: input.txt + content: "a\na\nb\nc\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --all-repeated=separate input.txt +expect: + stdout: "a\na\n\nc\nc\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/adjacent_duplicates.yaml b/tests/scenarios/cmd/uniq/basic/adjacent_duplicates.yaml new file mode 100644 index 00000000..0bf4193a --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/adjacent_duplicates.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 2 +description: uniq deduplicates adjacent identical lines. +setup: + files: + - path: input.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq input.txt +expect: + stdout: "a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/all_unique.yaml b/tests/scenarios/cmd/uniq/basic/all_unique.yaml new file mode 100644 index 00000000..0b30432c --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/all_unique.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 7 +description: uniq preserves all unique lines. +setup: + files: + - path: input.txt + content: "a\nb\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq input.txt +expect: + stdout: "a\nb\nc\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/d_and_u_suppress.yaml b/tests/scenarios/cmd/uniq/basic/d_and_u_suppress.yaml new file mode 100644 index 00000000..429c5ca4 --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/d_and_u_suppress.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 120 +description: uniq -d -u suppresses all output. +setup: + files: + - path: input.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -d -u input.txt +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/different_lines.yaml b/tests/scenarios/cmd/uniq/basic/different_lines.yaml new file mode 100644 index 00000000..9189d064 --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/different_lines.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 4 +description: uniq preserves different adjacent lines. +setup: + files: + - path: input.txt + content: "a\nb" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq input.txt +expect: + stdout: "a\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/eight_bit_chars.yaml b/tests/scenarios/cmd/uniq/basic/eight_bit_chars.yaml new file mode 100644 index 00000000..837244d4 --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/eight_bit_chars.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 8 +description: uniq handles eight-bit characters correctly. +setup: + files: + - path: input.txt + content: "ö\nv\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq input.txt +expect: + stdout: "ö\nv\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/empty_input.yaml b/tests/scenarios/cmd/uniq/basic/empty_input.yaml new file mode 100644 index 00000000..67f8c2aa --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/empty_input.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 1 +description: uniq with empty input produces no output. +setup: + files: + - path: empty.txt + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq empty.txt +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/ignore_case.yaml b/tests/scenarios/cmd/uniq/basic/ignore_case.yaml new file mode 100644 index 00000000..4f5225c6 --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/ignore_case.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 126 +description: uniq -i ignores case when comparing. +setup: + files: + - path: input.txt + content: "A\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -i input.txt +expect: + stdout: "A\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/basic/no_trailing_newline.yaml b/tests/scenarios/cmd/uniq/basic/no_trailing_newline.yaml new file mode 100644 index 00000000..ec106ac9 --- /dev/null +++ b/tests/scenarios/cmd/uniq/basic/no_trailing_newline.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 3 +description: uniq normalizes last line without trailing newline. +setup: + files: + - path: input.txt + content: "a\na" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq input.txt +expect: + stdout: "a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/check_chars/w_one.yaml b/tests/scenarios/cmd/uniq/check_chars/w_one.yaml new file mode 100644 index 00000000..55bcf470 --- /dev/null +++ b/tests/scenarios/cmd/uniq/check_chars/w_one.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 60 +description: uniq -w 1 compares only first character. +setup: + files: + - path: input.txt + content: "a a\nb a\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -w 1 input.txt +expect: + stdout: "a a\nb a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/check_chars/w_zero.yaml b/tests/scenarios/cmd/uniq/check_chars/w_zero.yaml new file mode 100644 index 00000000..df5f4f8f --- /dev/null +++ b/tests/scenarios/cmd/uniq/check_chars/w_zero.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 57 +description: uniq -w 0 treats all lines as identical. +setup: + files: + - path: input.txt + content: "abc\nabcd\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -w 0 input.txt +expect: + stdout: "abc\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/count/basic_count.yaml b/tests/scenarios/cmd/uniq/count/basic_count.yaml new file mode 100644 index 00000000..b1639c74 --- /dev/null +++ b/tests/scenarios/cmd/uniq/count/basic_count.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 101 +description: uniq -c prefixes lines with occurrence count. +setup: + files: + - path: input.txt + content: "a\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -c input.txt +expect: + stdout: " 1 a\n 1 b\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/count/count_duplicates.yaml b/tests/scenarios/cmd/uniq/count/count_duplicates.yaml new file mode 100644 index 00000000..818d9d57 --- /dev/null +++ b/tests/scenarios/cmd/uniq/count/count_duplicates.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 102 +description: uniq -c shows count of 2 for duplicate lines. +setup: + files: + - path: input.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -c input.txt +expect: + stdout: " 2 a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml b/tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml new file mode 100644 index 00000000..09db2042 --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml @@ -0,0 +1,9 @@ +# Derived from GNU coreutils uniq.pl test 112 +description: uniq -D -c is mutually exclusive and fails. +input: + script: |+ + uniq -D -c +expect: + stdout: "" + stderr: "uniq: printing all duplicated lines and repeat counts is meaningless\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/errors/group_with_count.yaml b/tests/scenarios/cmd/uniq/errors/group_with_count.yaml new file mode 100644 index 00000000..73b2161b --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/group_with_count.yaml @@ -0,0 +1,9 @@ +# Derived from GNU coreutils uniq.pl test 141 +description: uniq --group -c is mutually exclusive and fails. +input: + script: |+ + uniq --group -c +expect: + stdout: "" + stderr: "uniq: --group is mutually exclusive with -c/-d/-D/-u\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/errors/missing_file.yaml b/tests/scenarios/cmd/uniq/errors/missing_file.yaml new file mode 100644 index 00000000..4adcec66 --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/missing_file.yaml @@ -0,0 +1,10 @@ +# Test missing file error +description: uniq reports error for nonexistent file. +input: + allowed_paths: ["$DIR"] + script: |+ + uniq nonexistent.txt +expect: + stdout: "" + stderr_contains: ["uniq:"] + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/errors/unknown_flag.yaml b/tests/scenarios/cmd/uniq/errors/unknown_flag.yaml new file mode 100644 index 00000000..258ca647 --- /dev/null +++ b/tests/scenarios/cmd/uniq/errors/unknown_flag.yaml @@ -0,0 +1,9 @@ +# Test unknown flag rejection +description: uniq rejects unknown flags with exit code 1. +skip_assert_against_bash: true +input: + script: |+ + uniq --definitely-invalid +expect: + stderr_contains: ["unknown flag"] + exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/group/append.yaml b/tests/scenarios/cmd/uniq/group/append.yaml new file mode 100644 index 00000000..8f94ed27 --- /dev/null +++ b/tests/scenarios/cmd/uniq/group/append.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 129 +description: uniq --group=append adds blank line after last group too. +setup: + files: + - path: input.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --group=append input.txt +expect: + stdout: "a\na\n\nb\n\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/group/both.yaml b/tests/scenarios/cmd/uniq/group/both.yaml new file mode 100644 index 00000000..db9168ff --- /dev/null +++ b/tests/scenarios/cmd/uniq/group/both.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 132 +description: uniq --group=both adds blank lines before and after groups. +setup: + files: + - path: input.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --group=both input.txt +expect: + stdout: "\na\na\n\nb\n\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/group/prepend.yaml b/tests/scenarios/cmd/uniq/group/prepend.yaml new file mode 100644 index 00000000..735743eb --- /dev/null +++ b/tests/scenarios/cmd/uniq/group/prepend.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 128 +description: uniq --group=prepend adds blank line before first group too. +setup: + files: + - path: input.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --group=prepend input.txt +expect: + stdout: "\na\na\n\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/group/separate.yaml b/tests/scenarios/cmd/uniq/group/separate.yaml new file mode 100644 index 00000000..d6806c16 --- /dev/null +++ b/tests/scenarios/cmd/uniq/group/separate.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 130 +description: uniq --group=separate inserts blank lines between groups. +setup: + files: + - path: input.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq --group=separate input.txt +expect: + stdout: "a\na\n\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/hardening/null_bytes_in_lines.yaml b/tests/scenarios/cmd/uniq/hardening/null_bytes_in_lines.yaml new file mode 100644 index 00000000..0ef6ddbe --- /dev/null +++ b/tests/scenarios/cmd/uniq/hardening/null_bytes_in_lines.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 90 +description: uniq preserves null bytes within lines. +setup: + files: + - path: input.txt + content: "a\x00a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq input.txt +expect: + stdout: "a\x00a\na\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/repeated/basic_repeated.yaml b/tests/scenarios/cmd/uniq/repeated/basic_repeated.yaml new file mode 100644 index 00000000..972581f9 --- /dev/null +++ b/tests/scenarios/cmd/uniq/repeated/basic_repeated.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 20 +description: uniq -d prints only repeated lines. +setup: + files: + - path: input.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -d input.txt +expect: + stdout: "a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/repeated/no_repeated.yaml b/tests/scenarios/cmd/uniq/repeated/no_repeated.yaml new file mode 100644 index 00000000..f234486a --- /dev/null +++ b/tests/scenarios/cmd/uniq/repeated/no_repeated.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 21 +description: uniq -d produces no output when no lines are repeated. +setup: + files: + - path: input.txt + content: "a\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -d input.txt +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/skip/skip_chars.yaml b/tests/scenarios/cmd/uniq/skip/skip_chars.yaml new file mode 100644 index 00000000..ebfaa4d0 --- /dev/null +++ b/tests/scenarios/cmd/uniq/skip/skip_chars.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 42 +description: uniq -s 1 skips first character when comparing. +setup: + files: + - path: input.txt + content: "aaa\naaa\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -s 1 input.txt +expect: + stdout: "aaa\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/skip/skip_fields.yaml b/tests/scenarios/cmd/uniq/skip/skip_fields.yaml new file mode 100644 index 00000000..18c0f0df --- /dev/null +++ b/tests/scenarios/cmd/uniq/skip/skip_fields.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 31 +description: uniq -f 1 skips first field when comparing. +setup: + files: + - path: input.txt + content: "a a\nb a\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -f 1 input.txt +expect: + stdout: "a a\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/stdin/pipe_input.yaml b/tests/scenarios/cmd/uniq/stdin/pipe_input.yaml new file mode 100644 index 00000000..848878f2 --- /dev/null +++ b/tests/scenarios/cmd/uniq/stdin/pipe_input.yaml @@ -0,0 +1,14 @@ +# Test stdin input via redirect +description: uniq reads from stdin when no file is given. +setup: + files: + - path: src.txt + content: "a\na\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq < src.txt +expect: + stdout: "a\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/unique/all_duplicated.yaml b/tests/scenarios/cmd/uniq/unique/all_duplicated.yaml new file mode 100644 index 00000000..d26c3bd1 --- /dev/null +++ b/tests/scenarios/cmd/uniq/unique/all_duplicated.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 9 +description: uniq -u produces no output when all lines are duplicated. +setup: + files: + - path: input.txt + content: "a\na\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -u input.txt +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/unique/all_unique.yaml b/tests/scenarios/cmd/uniq/unique/all_unique.yaml new file mode 100644 index 00000000..60abe8b6 --- /dev/null +++ b/tests/scenarios/cmd/uniq/unique/all_unique.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 10 +description: uniq -u prints all lines when all are unique. +setup: + files: + - path: input.txt + content: "a\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -u input.txt +expect: + stdout: "a\nb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/uniq/zero_terminated/basic_zero.yaml b/tests/scenarios/cmd/uniq/zero_terminated/basic_zero.yaml new file mode 100644 index 00000000..d4638814 --- /dev/null +++ b/tests/scenarios/cmd/uniq/zero_terminated/basic_zero.yaml @@ -0,0 +1,14 @@ +# Derived from GNU coreutils uniq.pl test 124 +description: uniq -z uses NUL as line delimiter. +setup: + files: + - path: input.txt + content: "a\0a\0b" +input: + allowed_paths: ["$DIR"] + script: |+ + uniq -z input.txt +expect: + stdout: "a\0b\0" + stderr: "" + exit_code: 0 From cd62e37096dac39244032f4fdc23b777113897d5 Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Tue, 10 Mar 2026 11:50:39 +0100 Subject: [PATCH 02/19] Fix uniq error messages to match bash by adding Try --help hint GNU coreutils uniq appends "Try 'uniq --help' for more information." after mutually exclusive flag errors. Match that behavior. Co-Authored-By: Claude Opus 4.6 --- interp/builtins/uniq/uniq.go | 2 ++ tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml | 2 +- tests/scenarios/cmd/uniq/errors/group_with_count.yaml | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/interp/builtins/uniq/uniq.go b/interp/builtins/uniq/uniq.go index 5bc13b33..accc354f 100644 --- a/interp/builtins/uniq/uniq.go +++ b/interp/builtins/uniq/uniq.go @@ -198,10 +198,12 @@ func run(ctx context.Context, callCtx *builtins.CallContext, args []string) buil if useGroup && (*count || *repeated || useAllRepeated || *unique) { callCtx.Errf("uniq: --group is mutually exclusive with -c/-d/-D/-u\n") + callCtx.Errf("Try 'uniq --help' for more information.\n") return builtins.Result{Code: 1} } if useAllRepeated && *count { callCtx.Errf("uniq: printing all duplicated lines and repeat counts is meaningless\n") + callCtx.Errf("Try 'uniq --help' for more information.\n") return builtins.Result{Code: 1} } diff --git a/tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml b/tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml index 09db2042..9d9a5143 100644 --- a/tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml +++ b/tests/scenarios/cmd/uniq/errors/all_repeated_with_count.yaml @@ -5,5 +5,5 @@ input: uniq -D -c expect: stdout: "" - stderr: "uniq: printing all duplicated lines and repeat counts is meaningless\n" + stderr: "uniq: printing all duplicated lines and repeat counts is meaningless\nTry 'uniq --help' for more information.\n" exit_code: 1 diff --git a/tests/scenarios/cmd/uniq/errors/group_with_count.yaml b/tests/scenarios/cmd/uniq/errors/group_with_count.yaml index 73b2161b..64d8af26 100644 --- a/tests/scenarios/cmd/uniq/errors/group_with_count.yaml +++ b/tests/scenarios/cmd/uniq/errors/group_with_count.yaml @@ -5,5 +5,5 @@ input: uniq --group -c expect: stdout: "" - stderr: "uniq: --group is mutually exclusive with -c/-d/-D/-u\n" + stderr: "uniq: --group is mutually exclusive with -c/-d/-D/-u\nTry 'uniq --help' for more information.\n" exit_code: 1 From 57a059974a007a0bb8ecebe1287f8bfa7774c8fa Mon Sep 17 00:00:00 2001 From: Alexandre Yang Date: Tue, 10 Mar 2026 11:58:52 +0100 Subject: [PATCH 03/19] add .claude/skills/fix-tests/SKILL.md --- .claude/skills/fix-tests/SKILL.md | 111 ++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 .claude/skills/fix-tests/SKILL.md diff --git a/.claude/skills/fix-tests/SKILL.md b/.claude/skills/fix-tests/SKILL.md new file mode 100644 index 00000000..ba5abeec --- /dev/null +++ b/.claude/skills/fix-tests/SKILL.md @@ -0,0 +1,111 @@ +--- +name: fix-tests +description: Fix failing tests by prioritising shell implementation fixes to match bash behaviour +argument-hint: "[test filter or description of failure]" +--- + +Fix failing tests. **The implementation is more likely wrong than the test.** Always try to fix the shell implementation to match bash behaviour before touching the test expectations. + +--- + +## Workflow + +### 1. Reproduce the failures + +Run the relevant tests to capture the actual failures: + +```bash +# If a specific test filter was given, use it: +go test ./interp/... ./tests/... -run "$ARGUMENTS" -v 2>&1 | head -200 + +# Otherwise run the full suite: +go test ./interp/... ./tests/... -v 2>&1 | head -200 +``` + +If the failure involves YAML scenario tests, also run the bash comparison tests to see what bash actually produces: + +```bash +RSHELL_BASH_TEST=1 go test ./tests/ -run TestShellScenariosAgainstBash -timeout 120s -v 2>&1 | head -300 +``` + +Collect every distinct failure. For each one, note: +- The test name and file +- Expected vs actual output +- The exit code difference (if any) + +### 2. Determine what bash does + +For **every** failure, determine the correct bash behaviour before making any changes. Use one or more of these methods: + +**Method A — bash comparison test output.** If the `TestShellScenariosAgainstBash` output is available from step 1, it already shows what bash produces. Use that. + +**Method B — run in Docker.** For cases not covered by comparison tests or when you need to experiment: + +```bash +docker run --rm debian:bookworm-slim bash -c '' +``` + +**Method C — run locally with bash.** For quick checks on macOS/Linux: + +```bash +bash -c '