diff --git a/README.md b/README.md index 2cdadbf0..b6690f57 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ Linux, macOS, and Windows. ``` tests/scenarios/ -├── cmd/ # builtin command tests (echo, cat, head, tail, uniq, wc, ...) +├── cmd/ # builtin command tests (echo, cat, grep, head, tail, uniq, wc, ...) └── shell/ # shell feature tests (pipes, variables, control flow, ...) ``` diff --git a/SHELL_FEATURES.md b/SHELL_FEATURES.md index 6dfa9ca9..9d24fc8c 100644 --- a/SHELL_FEATURES.md +++ b/SHELL_FEATURES.md @@ -12,6 +12,7 @@ Blocked features are rejected before execution with exit code 2. - ✅ `echo [-n] [-e] [ARG]...` — write arguments to stdout - ✅ `exit [N]` — exit the shell with status N (default 0) - ✅ `false` — return exit code 1 +- ✅ `grep [-EFGivclLnHhoqsxw] [-e PATTERN] [-m NUM] [-A NUM] [-B NUM] [-C NUM] PATTERN [FILE]...` — print lines that match patterns; uses RE2 regex engine (linear-time, no backtracking) - ✅ `head [-n N|-c N] [-q|-v] [-z] [FILE]...` — output the first part of files (default: first 10 lines) - ✅ `ls [-1aAdFhlpRrSt] [FILE]...` — list directory contents - ✅ `tail [-n N|-c N] [-q|-v] [-z] [FILE]...` — output the last part of files (default: last 10 lines); supports `+N` offset mode; `-f`/`--follow` is rejected diff --git a/interp/builtins/grep/builtin_grep_pentest_test.go b/interp/builtins/grep/builtin_grep_pentest_test.go new file mode 100644 index 00000000..6d96e241 --- /dev/null +++ b/interp/builtins/grep/builtin_grep_pentest_test.go @@ -0,0 +1,298 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package grep_test + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/rshell/interp" + "github.com/DataDog/rshell/interp/builtins/grep" + "github.com/DataDog/rshell/interp/builtins/testutil" +) + +const pentestTimeout = 10 * time.Second + +func grepRun(t *testing.T, script, dir string, extraPaths ...string) (string, string, int) { + t.Helper() + paths := append([]string{dir}, extraPaths...) + return testutil.RunScript(t, script, dir, interp.AllowedPaths(paths)) +} + +func grepRunCtx(ctx context.Context, t *testing.T, script, dir string) (string, string, int) { + t.Helper() + return testutil.RunScriptCtx(ctx, t, script, dir, interp.AllowedPaths([]string{dir})) +} + +func mustNotHang(t *testing.T, fn func()) { + t.Helper() + done := make(chan struct{}) + go func() { + fn() + close(done) + }() + select { + case <-done: + case <-time.After(pentestTimeout): + t.Fatal("operation did not complete within timeout") + } +} + +func pentestWriteFile(t *testing.T, dir, name, content string) string { + t.Helper() + require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0644)) + return name +} + +// --- Flag and argument injection --- + +func TestGrepPentestUnknownLongFlag(t *testing.T) { + dir := t.TempDir() + _, stderr, code := grepRun(t, "grep --recursive foo", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "grep:") +} + +func TestGrepPentestUnknownShortFlag(t *testing.T) { + dir := t.TempDir() + _, stderr, code := grepRun(t, "grep -P foo file.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "grep:") +} + +func TestGrepPentestFlagViaExpansion(t *testing.T) { + dir := t.TempDir() + pentestWriteFile(t, dir, "file.txt", "ok\n") + _, stderr, code := grepRun(t, `for flag in -P; do grep $flag foo file.txt; done`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "grep:") +} + +func TestGrepPentestDoubleDashThenFlagLikeArg(t *testing.T) { + dir := t.TempDir() + pentestWriteFile(t, dir, "file.txt", "-v\nfoo\n") + stdout, _, code := grepRun(t, "grep -- -v file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "-v\n", stdout) +} + +// --- Path and filename edge cases --- + +func TestGrepPentestNonexistentFile(t *testing.T) { + dir := t.TempDir() + _, stderr, code := grepRun(t, "grep foo does_not_exist", dir) + assert.Equal(t, 2, code) + assert.Contains(t, stderr, "grep:") +} + +func TestGrepPentestDirectoryAsFile(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.Mkdir(filepath.Join(dir, "subdir"), 0755)) + _, stderr, code := grepRun(t, "grep foo subdir", dir) + assert.Equal(t, 2, code) + assert.Contains(t, stderr, "grep:") +} + +func TestGrepPentestEmptyStringFilename(t *testing.T) { + dir := t.TempDir() + _, stderr, code := grepRun(t, `grep foo ""`, dir) + assert.Equal(t, 2, code) + assert.Contains(t, stderr, "grep:") +} + +func TestGrepPentestPathTraversal(t *testing.T) { + dir := t.TempDir() + outer := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(outer, "secret.txt"), []byte("secret"), 0644)) + outerPath := strings.ReplaceAll(filepath.Join(outer, "secret.txt"), `\`, `/`) + _, stderr, code := grepRun(t, "grep secret "+outerPath, dir) + assert.Equal(t, 2, code) + assert.Contains(t, stderr, "grep:") +} + +// --- Long lines --- + +func TestGrepPentestLineAtCapMinus1(t *testing.T) { + dir := t.TempDir() + content := make([]byte, grep.MaxLineBytes-1) + for i := range content { + content[i] = 'a' + } + content = append(content, '\n') + require.NoError(t, os.WriteFile(filepath.Join(dir, "file.txt"), content, 0644)) + stdout, _, code := grepRun(t, "grep a file.txt", dir) + assert.Equal(t, 0, code) + assert.NotEmpty(t, stdout) +} + +func TestGrepPentestLineAtCapExact(t *testing.T) { + dir := t.TempDir() + content := make([]byte, grep.MaxLineBytes) + for i := range content { + content[i] = 'a' + } + require.NoError(t, os.WriteFile(filepath.Join(dir, "file.txt"), content, 0644)) + _, _, code := grepRun(t, "grep a file.txt", dir) + assert.Equal(t, 2, code) +} + +func TestGrepPentestLineAtCapPlus1(t *testing.T) { + dir := t.TempDir() + content := make([]byte, grep.MaxLineBytes+1) + for i := range content { + content[i] = 'a' + } + require.NoError(t, os.WriteFile(filepath.Join(dir, "file.txt"), content, 0644)) + _, _, code := grepRun(t, "grep a file.txt", dir) + assert.Equal(t, 2, code) +} + +// --- Memory / resource exhaustion --- + +func TestGrepPentestManyFiles(t *testing.T) { + dir := t.TempDir() + var args []string + for i := 0; i < 200; i++ { + name := "f" + strings.Repeat("0", 3-len(string(rune('0'+i/100)))) + string(rune('0'+i/100)) + string(rune('0'+i%100/10)) + string(rune('0'+i%10)) + ".txt" + pentestWriteFile(t, dir, name, "line\n") + args = append(args, name) + } + mustNotHang(t, func() { + _, _, code := grepRun(t, "grep line "+strings.Join(args, " "), dir) + assert.Equal(t, 0, code) + }) +} + +func TestGrepPentestLargeFile(t *testing.T) { + dir := t.TempDir() + var sb strings.Builder + for i := 0; i < 10000; i++ { + sb.WriteString("line\n") + } + pentestWriteFile(t, dir, "big.txt", sb.String()) + mustNotHang(t, func() { + stdout, _, code := grepRun(t, "grep -c line big.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "10000\n", stdout) + }) +} + +// --- ReDoS protection --- + +func TestGrepPentestLinearTimeRegex(t *testing.T) { + // Go's regexp uses RE2, which guarantees linear-time matching. + // This test verifies that pathological patterns complete quickly. + dir := t.TempDir() + // Create a string that would cause exponential backtracking in PCRE + pentestWriteFile(t, dir, "file.txt", strings.Repeat("a", 100)+"\n") + mustNotHang(t, func() { + _, _, _ = grepRun(t, "grep -E 'a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaa' file.txt", dir) + }) +} + +// --- Pattern edge cases --- + +func TestGrepPentestEmptyPattern(t *testing.T) { + dir := t.TempDir() + pentestWriteFile(t, dir, "file.txt", "foo\nbar\n") + mustNotHang(t, func() { + stdout, _, code := grepRun(t, `grep '' file.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo\nbar\n", stdout) + }) +} + +func TestGrepPentestNullByteInFile(t *testing.T) { + dir := t.TempDir() + pentestWriteFile(t, dir, "file.txt", "foo\x00bar\nbaz\n") + mustNotHang(t, func() { + _, _, code := grepRun(t, "grep foo file.txt", dir) + assert.Equal(t, 0, code) + }) +} + +func TestGrepPentestFixedStringsRegexInjection(t *testing.T) { + // Verify -F prevents regex metacharacter interpretation. + dir := t.TempDir() + pentestWriteFile(t, dir, "file.txt", "a.*b\naxb\n") + stdout, _, code := grepRun(t, "grep -F 'a.*b' file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a.*b\n", stdout) +} + +// --- Special files / infinite sources --- + +func TestGrepPentestDevNull(t *testing.T) { + if os.DevNull == "NUL" { + dir := t.TempDir() + mustNotHang(t, func() { + _, stderr, code := grepRun(t, "grep foo NUL", dir) + assert.Equal(t, 2, code) + assert.Contains(t, stderr, "grep:") + }) + return + } + dir := t.TempDir() + mustNotHang(t, func() { + _, _, code := grepRun(t, "grep foo "+os.DevNull, dir, filepath.Dir(os.DevNull)) + assert.Equal(t, 1, code) // no match in empty file + }) +} + +func TestGrepPentestContextCancelledDuringSearch(t *testing.T) { + dir := t.TempDir() + pentestWriteFile(t, dir, "file.txt", strings.Repeat("x\n", 10000)) + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + mustNotHang(t, func() { + grepRunCtx(ctx, t, "grep x file.txt", dir) + }) +} + +// --- No-messages flag edge cases --- + +func TestGrepPentestNoMessagesSuppressesErrors(t *testing.T) { + dir := t.TempDir() + _, stderr, code := grepRun(t, "grep -s foo nonexistent.txt", dir) + assert.Equal(t, 2, code) + assert.Equal(t, "", stderr) +} + +// --- Multiple -e patterns --- + +func TestGrepPentestManyPatterns(t *testing.T) { + dir := t.TempDir() + pentestWriteFile(t, dir, "file.txt", "target\n") + // Build a command with many -e flags + var args []string + for i := 0; i < 50; i++ { + args = append(args, "-e", "pattern"+strings.Repeat("x", i)) + } + args = append(args, "-e", "target") + mustNotHang(t, func() { + stdout, _, code := grepRun(t, "grep "+strings.Join(args, " ")+" file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "target\n", stdout) + }) +} + +// --- Quiet mode with error --- + +func TestGrepPentestQuietWithMatch(t *testing.T) { + dir := t.TempDir() + pentestWriteFile(t, dir, "file.txt", "foo\n") + stdout, stderr, code := grepRun(t, "grep -q foo file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) + assert.Equal(t, "", stderr) +} diff --git a/interp/builtins/grep/grep.go b/interp/builtins/grep/grep.go new file mode 100644 index 00000000..281efe50 --- /dev/null +++ b/interp/builtins/grep/grep.go @@ -0,0 +1,682 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +// Package grep implements the grep builtin command. +// +// grep — print lines that match patterns +// +// Usage: grep [OPTION]... PATTERN [FILE]... +// +// grep [OPTION]... -e PATTERN [-e PATTERN]... [FILE]... +// +// Search for PATTERN in each FILE. When FILE is -, read standard input. +// With no FILE, read standard input. +// +// Accepted flags: +// +// -E, --extended-regexp +// Interpret PATTERN as an extended regular expression (ERE). +// +// -F, --fixed-strings +// Interpret PATTERN as a list of fixed strings (not regexps), +// separated by newlines, any of which is to be matched. +// +// -G, --basic-regexp +// Interpret PATTERN as a basic regular expression (BRE). This is +// the default. +// +// -i, --ignore-case +// Ignore case distinctions in patterns and input data. +// +// -v, --invert-match +// Invert the sense of matching, to select non-matching lines. +// +// -c, --count +// Suppress normal output; instead print a count of matching lines +// for each input file. +// +// -l, --files-with-matches +// Suppress normal output; instead print the name of each input +// file from which output would normally have been printed. +// +// -L, --files-without-match +// Suppress normal output; instead print the name of each input +// file from which no output would normally have been printed. +// +// -n, --line-number +// Prefix each line of output with the 1-based line number within +// its input file. +// +// -H, --with-filename +// Print the file name for each match. This is the default when +// there is more than one file to search. +// +// -h, --no-filename +// Suppress the prefixing of file names on output. +// +// -o, --only-matching +// Print only the matched (non-empty) parts of a matching line, +// with each such part on a separate output line. +// +// -q, --quiet, --silent +// Quiet. Do not write anything to standard output. Exit with zero +// status if any match is found, even if an error was detected. +// +// -s, --no-messages +// Suppress error messages about nonexistent or unreadable files. +// +// -x, --line-regexp +// Select only those matches that exactly match the whole line. +// +// -w, --word-regexp +// Select only those lines containing matches that form whole +// words. +// +// -e PATTERN, --regexp=PATTERN +// Use PATTERN as the pattern. If this option is used multiple +// times, search for all patterns given. +// +// -m NUM, --max-count=NUM +// Stop reading a file after NUM matching lines. +// +// -A NUM, --after-context=NUM +// Print NUM lines of trailing context after matching lines. +// +// -B NUM, --before-context=NUM +// Print NUM lines of leading context before matching lines. +// +// -C NUM, --context=NUM +// Print NUM lines of output context. Equivalent to -A NUM -B NUM. +// +// Exit codes: +// +// 0 At least one match was found. +// 1 No matches were found. +// 2 An error occurred. +// +// Memory safety: +// +// All processing is streaming: input is read line-by-line with a per-line +// cap of MaxLineBytes (1 MiB). Lines exceeding this cap cause an error +// rather than an unbounded allocation. All read loops check ctx.Err() at +// each iteration to honour the shell's execution timeout and support +// graceful cancellation. Go's regexp package uses the RE2 engine which +// guarantees linear-time matching, preventing ReDoS attacks. +package grep + +import ( + "bufio" + "context" + "errors" + "io" + "os" + "regexp" + "strconv" + "strings" + + "github.com/DataDog/rshell/interp/builtins" +) + +// Cmd is the grep builtin command descriptor. +var Cmd = builtins.Command{Name: "grep", MakeFlags: registerFlags} + +// MaxLineBytes is the per-line buffer cap for the line scanner. Lines +// longer than this are reported as an error instead of being buffered. +const MaxLineBytes = 1 << 20 // 1 MiB + +// MaxContextLines caps -A/-B/-C to prevent excessive memory use. +const MaxContextLines = 1_000 // 1k lines + +const ( + scanBufInit = 4096 // initial scanner buffer +) + +// Exit code constants matching POSIX grep convention. +const ( + exitMatch = 0 + exitNoMatch = 1 + exitError = 2 +) + +func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { + // Pattern mode flags. + extendedRegexp := fs.BoolP("extended-regexp", "E", false, "use extended regular expressions") + fixedStrings := fs.BoolP("fixed-strings", "F", false, "interpret pattern as fixed strings") + basicRegexp := fs.BoolP("basic-regexp", "G", false, "use basic regular expressions (default)") + + // Matching flags. + ignoreCase := fs.BoolP("ignore-case", "i", false, "ignore case distinctions") + invertMatch := fs.BoolP("invert-match", "v", false, "select non-matching lines") + wordRegexp := fs.BoolP("word-regexp", "w", false, "match only whole words") + lineRegexp := fs.BoolP("line-regexp", "x", false, "match only whole lines") + + // Output flags. + count := fs.BoolP("count", "c", false, "print only a count of matching lines per file") + var outputSeq int + filesWithMatches := newOrderedBoolFlag(&outputSeq) + filesWithoutMatch := newOrderedBoolFlag(&outputSeq) + fs.VarP(filesWithMatches, "files-with-matches", "l", "print only names of files with matches") + fs.VarP(filesWithoutMatch, "files-without-match", "L", "print only names of files without matches") + fs.Lookup("files-with-matches").NoOptDefVal = "true" + fs.Lookup("files-without-match").NoOptDefVal = "true" + lineNumber := fs.BoolP("line-number", "n", false, "prefix output with line numbers") + var filenameSeq int + withFilename := newOrderedBoolFlag(&filenameSeq) + noFilename := newOrderedBoolFlag(&filenameSeq) + fs.VarP(withFilename, "with-filename", "H", "always print filename prefix") + fs.VarP(noFilename, "no-filename", "h", "suppress filename prefix") + fs.Lookup("with-filename").NoOptDefVal = "true" + fs.Lookup("no-filename").NoOptDefVal = "true" + onlyMatching := fs.BoolP("only-matching", "o", false, "print only the matched parts") + quiet := fs.BoolP("quiet", "q", false, "suppress all output") + _ = fs.Bool("silent", false, "alias for --quiet") + noMessages := fs.BoolP("no-messages", "s", false, "suppress error messages") + maxCount := fs.IntP("max-count", "m", -1, "stop after NUM matches per file") + + // Context flags. + afterContext := fs.IntP("after-context", "A", 0, "print NUM lines after each match") + beforeContext := fs.IntP("before-context", "B", 0, "print NUM lines before each match") + contextLines := fs.IntP("context", "C", -1, "print NUM lines of context around each match") + + // Pattern flags (multiple -e allowed). + var patterns patternSlice + fs.VarP(&patterns, "regexp", "e", "use PATTERN as the pattern") + + return func(ctx context.Context, callCtx *builtins.CallContext, args []string) builtins.Result { + // --silent is an alias for --quiet. + if fs.Changed("silent") { + *quiet = true + } + + // Determine context sizes: -C sets both if -A/-B not explicitly set. + after := *afterContext + before := *beforeContext + if *contextLines >= 0 { + if !fs.Changed("after-context") { + after = *contextLines + } + if !fs.Changed("before-context") { + before = *contextLines + } + } + // Clamp context values. + if after < 0 { + after = 0 + } + if before < 0 { + before = 0 + } + if after > MaxContextLines { + after = MaxContextLines + } + if before > MaxContextLines { + before = MaxContextLines + } + + // Collect patterns: from -e flags and/or first positional argument. + // Each pattern may contain newline-separated sub-patterns (GNU grep behavior). + var allPatterns []string + for _, p := range []string(patterns) { + allPatterns = append(allPatterns, strings.Split(p, "\n")...) + } + if len(allPatterns) == 0 { + if len(args) == 0 { + callCtx.Errf("grep: no pattern specified\n") + return builtins.Result{Code: exitError} + } + allPatterns = append(allPatterns, strings.Split(args[0], "\n")...) + args = args[1:] + } + + // Determine regex mode. GNU grep rejects conflicting matchers. + matcherCount := 0 + if *extendedRegexp { + matcherCount++ + } + if *fixedStrings { + matcherCount++ + } + if *basicRegexp { + matcherCount++ + } + if matcherCount > 1 { + callCtx.Errf("grep: conflicting matchers specified\n") + return builtins.Result{Code: exitError} + } + + mode := modeBRE + if *extendedRegexp { + mode = modeERE + } + if *fixedStrings { + mode = modeFixed + } + + // Compile pattern(s). + re, err := compilePatterns(allPatterns, mode, *ignoreCase, *wordRegexp, *lineRegexp) + if err != nil { + callCtx.Errf("grep: %s\n", err.Error()) + return builtins.Result{Code: exitError} + } + + files := args + if len(files) == 0 { + files = []string{"-"} + } + + // Determine filename printing behavior: last of -h/-H wins. + showFilename := len(files) > 1 + if withFilename.pos > 0 || noFilename.pos > 0 { + showFilename = withFilename.pos > noFilename.pos + } + + contextFlagUsed := fs.Changed("after-context") || fs.Changed("before-context") || fs.Changed("context") + + // GNU grep: -o suppresses context output. + if *onlyMatching { + after = 0 + before = 0 + contextFlagUsed = false + } + + resolvedFilesWithMatches := filesWithMatches.pos > filesWithoutMatch.pos + resolvedFilesWithoutMatch := filesWithoutMatch.pos > filesWithMatches.pos + resolvedCount := *count && !resolvedFilesWithMatches && !resolvedFilesWithoutMatch + + opts := &grepOpts{ + re: re, + invertMatch: *invertMatch, + count: resolvedCount, + filesWithMatches: resolvedFilesWithMatches, + filesWithoutMatch: resolvedFilesWithoutMatch, + lineNumber: *lineNumber, + showFilename: showFilename, + onlyMatching: *onlyMatching, + quiet: *quiet, + noMessages: *noMessages, + maxCount: *maxCount, + afterContext: after, + beforeContext: before, + contextRequested: contextFlagUsed, + } + + anyMatch := false + anyError := false + + for _, file := range files { + if ctx.Err() != nil { + break + } + matched, err := grepFile(ctx, callCtx, file, opts) + if err != nil { + if !opts.noMessages { + name := file + if file == "-" { + name = "(standard input)" + } + callCtx.Errf("grep: %s: %s\n", name, callCtx.PortableErr(err)) + } + anyError = true + continue + } + if matched { + anyMatch = true + if opts.quiet { + return builtins.Result{Code: exitMatch} + } + } + } + + // GNU grep: error takes precedence over match (exit 2 > exit 0). + if anyError { + return builtins.Result{Code: exitError} + } + if anyMatch { + return builtins.Result{Code: exitMatch} + } + return builtins.Result{Code: exitNoMatch} + } +} + +type regexMode int + +const ( + modeBRE regexMode = iota + modeERE + modeFixed +) + +type grepOpts struct { + re *regexp.Regexp + invertMatch bool + count bool + filesWithMatches bool + filesWithoutMatch bool + lineNumber bool + showFilename bool + onlyMatching bool + quiet bool + noMessages bool + maxCount int + afterContext int + beforeContext int + contextRequested bool // true when any -A/-B/-C flag was used (even with 0) +} + +type orderedBoolFlag struct { + seq *int + pos int +} + +func newOrderedBoolFlag(seq *int) *orderedBoolFlag { + return &orderedBoolFlag{seq: seq} +} + +func (f *orderedBoolFlag) String() string { + if f.pos > 0 { + return "true" + } + return "false" +} + +func (f *orderedBoolFlag) Set(s string) error { + b, err := strconv.ParseBool(s) + if err != nil { + return err + } + if !b { + f.pos = 0 + return nil + } + *f.seq = *f.seq + 1 + f.pos = *f.seq + return nil +} + +func (f *orderedBoolFlag) Type() string { return "bool" } + +func (f *orderedBoolFlag) IsBoolFlag() bool { return true } + +// patternSlice collects multiple -e PATTERN values. +type patternSlice []string + +func (p *patternSlice) String() string { return strings.Join(*p, "\n") } +func (p *patternSlice) Set(val string) error { + *p = append(*p, val) + return nil +} +func (p *patternSlice) Type() string { return "string" } + +// compilePatterns builds a single regexp from one or more patterns. +func compilePatterns(patterns []string, mode regexMode, ignoreCase, wordRegexp, lineRegexp bool) (*regexp.Regexp, error) { + var parts []string + for _, p := range patterns { + converted, err := convertPattern(p, mode) + if err != nil { + return nil, err + } + parts = append(parts, converted) + } + + combined := strings.Join(parts, "|") + + if wordRegexp && !lineRegexp { + combined = `(?:\b)(?:` + combined + `)(?:\b)` + } + if lineRegexp { + combined = `^(?:` + combined + `)$` + } + + if ignoreCase { + combined = "(?i)" + combined + } + + re, err := regexp.Compile(combined) + if err != nil { + return nil, errors.New("invalid regular expression: " + err.Error()) + } + return re, nil +} + +// convertPattern translates a pattern to Go RE2 syntax based on the mode. +func convertPattern(pattern string, mode regexMode) (string, error) { + switch mode { + case modeFixed: + return regexp.QuoteMeta(pattern), nil + case modeERE: + // Go's regexp is already ERE-compatible. Just validate. + if _, err := regexp.Compile(pattern); err != nil { + return "", errors.New("invalid regular expression: " + err.Error()) + } + return pattern, nil + case modeBRE: + return breToERE(pattern), nil + default: + return pattern, nil + } +} + +// breToERE converts a POSIX Basic Regular Expression to an Extended Regular +// Expression compatible with Go's RE2 engine. +// +// In BRE: +// - (, ), {, }, +, ? are literal unless backslash-escaped +// - \(, \), \{, \}, \+, \? are metacharacters +// +// In ERE (and Go regex): +// - (, ), {, }, +, ? are metacharacters +// - \(, \), \{, \}, \+, \? are literal +// +// So the conversion swaps the escaping for these characters. +func breToERE(bre string) string { + var out strings.Builder + out.Grow(len(bre)) + i := 0 + for i < len(bre) { + if bre[i] == '\\' && i+1 < len(bre) { + next := bre[i+1] + switch next { + case '(', ')', '{', '}', '+', '?', '|': + // BRE \X → ERE X (metacharacter) + out.WriteByte(next) + i += 2 + default: + // Pass through other escapes + out.WriteByte('\\') + out.WriteByte(next) + i += 2 + } + } else { + ch := bre[i] + switch ch { + case '(', ')', '{', '}', '+', '?', '|': + // BRE literal X → ERE \X (escaped literal) + out.WriteByte('\\') + out.WriteByte(ch) + default: + out.WriteByte(ch) + } + i++ + } + } + return out.String() +} + +func openReader(ctx context.Context, callCtx *builtins.CallContext, file string) (io.ReadCloser, error) { + if file == "-" { + if callCtx.Stdin == nil { + return nil, nil + } + return io.NopCloser(callCtx.Stdin), nil + } + return callCtx.OpenFile(ctx, file, os.O_RDONLY, 0) +} + +// grepFile searches a single file. Returns (matched, error). +func grepFile(ctx context.Context, callCtx *builtins.CallContext, file string, opts *grepOpts) (bool, error) { + rc, err := openReader(ctx, callCtx, file) + if err != nil { + return false, err + } + if rc == nil { + return false, nil + } + defer rc.Close() + + displayName := file + if file == "-" { + displayName = "(standard input)" + } + + sc := bufio.NewScanner(rc) + buf := make([]byte, scanBufInit) + sc.Buffer(buf, MaxLineBytes) + + var matchCount int + lineNum := 0 + + // Context tracking. contextRequested is true when any -A/-B/-C flag was + // used, even with value 0. This controls the "--" group separator. + contextRequested := opts.afterContext > 0 || opts.beforeContext > 0 || opts.contextRequested + var beforeBuf []contextLine // ring buffer for before-context + afterRemaining := 0 // lines of after-context still to print + lastPrintedLine := 0 // last line number we printed (for separator) + printedSeparator := false // have we ever printed a match group? + + for sc.Scan() { + if ctx.Err() != nil { + return matchCount > 0, ctx.Err() + } + lineNum++ + line := sc.Text() + + matched := opts.re.MatchString(line) + if opts.invertMatch { + matched = !matched + } + + if matched { + // Check max-count limit before incrementing/printing. + if opts.maxCount >= 0 && matchCount >= opts.maxCount { + break + } + + matchCount++ + + if opts.quiet { + return true, nil + } + + if opts.count || opts.filesWithMatches || opts.filesWithoutMatch { + continue + } + + // Print group separator if needed. + if contextRequested && printedSeparator && lastPrintedLine > 0 && lineNum > lastPrintedLine+1 { + callCtx.Out("--\n") + } + + // Print before-context lines. + if opts.beforeContext > 0 { + for _, cl := range beforeBuf { + if cl.num <= lastPrintedLine { + continue + } + printContextLine(callCtx, displayName, cl.num, cl.text, opts, '-') + lastPrintedLine = cl.num + } + } + + // Print the match. + if opts.onlyMatching && opts.invertMatch { + // -o -v: line was selected by inversion (doesn't contain + // pattern), so there are no matching parts to print. + } else if opts.onlyMatching { + matches := opts.re.FindAllString(line, -1) + for _, m := range matches { + if m == "" { + continue // suppress empty matches (GNU grep behavior) + } + printMatchLine(callCtx, displayName, lineNum, m, opts) + } + } else { + printMatchLine(callCtx, displayName, lineNum, line, opts) + } + lastPrintedLine = lineNum + printedSeparator = true + afterRemaining = opts.afterContext + + // Clear before buffer since we've consumed it. + beforeBuf = beforeBuf[:0] + } else { + // Non-matching line: might be after-context or before-context. + if afterRemaining > 0 && !opts.quiet && !opts.count && !opts.filesWithMatches && !opts.filesWithoutMatch { + printContextLine(callCtx, displayName, lineNum, line, opts, '-') + lastPrintedLine = lineNum + afterRemaining-- + } + + // Add to before-context ring buffer. + if opts.beforeContext > 0 { + if len(beforeBuf) >= opts.beforeContext { + beforeBuf = beforeBuf[1:] + } + beforeBuf = append(beforeBuf, contextLine{num: lineNum, text: line}) + } + } + } + + if err := sc.Err(); err != nil { + return matchCount > 0, err + } + + // Handle -c, -l, -L output. + if opts.count { + if opts.showFilename { + callCtx.Outf("%s:%s\n", displayName, strconv.Itoa(matchCount)) + } else { + callCtx.Outf("%s\n", strconv.Itoa(matchCount)) + } + } + if opts.filesWithMatches && matchCount > 0 { + callCtx.Outf("%s\n", displayName) + } + if opts.filesWithoutMatch && matchCount == 0 { + callCtx.Outf("%s\n", displayName) + } + + return matchCount > 0, nil +} + +type contextLine struct { + num int + text string +} + +func printMatchLine(callCtx *builtins.CallContext, filename string, lineNum int, line string, opts *grepOpts) { + var prefix strings.Builder + if opts.showFilename { + prefix.WriteString(filename) + prefix.WriteByte(':') + } + if opts.lineNumber { + prefix.WriteString(strconv.Itoa(lineNum)) + prefix.WriteByte(':') + } + callCtx.Outf("%s%s\n", prefix.String(), line) +} + +func printContextLine(callCtx *builtins.CallContext, filename string, lineNum int, line string, opts *grepOpts, sep byte) { + var prefix strings.Builder + if opts.showFilename { + prefix.WriteString(filename) + prefix.WriteByte(sep) + } + if opts.lineNumber { + prefix.WriteString(strconv.Itoa(lineNum)) + prefix.WriteByte(sep) + } + callCtx.Outf("%s%s\n", prefix.String(), line) +} diff --git a/interp/builtins/grep/grep_test.go b/interp/builtins/grep/grep_test.go new file mode 100644 index 00000000..64a5803a --- /dev/null +++ b/interp/builtins/grep/grep_test.go @@ -0,0 +1,707 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package grep_test + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/rshell/interp" + "github.com/DataDog/rshell/interp/builtins/testutil" +) + +func cmdRun(t *testing.T, script, dir string) (string, string, int) { + t.Helper() + return testutil.RunScript(t, script, dir, interp.AllowedPaths([]string{dir})) +} + +func cmdRunCtx(ctx context.Context, t *testing.T, script, dir string) (string, string, int) { + t.Helper() + return testutil.RunScriptCtx(ctx, t, script, dir, interp.AllowedPaths([]string{dir})) +} + +func writeFile(t *testing.T, dir, name, content string) string { + t.Helper() + require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0644)) + return name +} + +const sampleText = "apple\nbanana\ncherry\ndate\nelderberry\n" + +// --- Basic matching --- + +func TestGrepBasicMatch(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", sampleText) + stdout, _, code := cmdRun(t, "grep banana file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "banana\n", stdout) +} + +func TestGrepNoMatch(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", sampleText) + stdout, _, code := cmdRun(t, "grep fig file.txt", dir) + assert.Equal(t, 1, code) + assert.Equal(t, "", stdout) +} + +func TestGrepMultipleMatches(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nbar\nfoo baz\nqux\n") + stdout, _, code := cmdRun(t, "grep foo file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo\nfoo baz\n", stdout) +} + +func TestGrepStdinPipe(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", sampleText) + stdout, _, code := cmdRun(t, "cat file.txt | grep cherry", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "cherry\n", stdout) +} + +func TestGrepStdinDash(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "src.txt", sampleText) + stdout, _, code := cmdRun(t, "grep apple - < src.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "apple\n", stdout) +} + +func TestGrepStdinImplicit(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "src.txt", sampleText) + stdout, _, code := cmdRun(t, "grep banana < src.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "banana\n", stdout) +} + +func TestGrepEmptyFile(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "empty.txt", "") + stdout, _, code := cmdRun(t, "grep anything empty.txt", dir) + assert.Equal(t, 1, code) + assert.Equal(t, "", stdout) +} + +// --- Regex matching --- + +func TestGrepBREDefault(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "abc\nabc123\n123\n") + stdout, _, code := cmdRun(t, "grep 'abc.*123' file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc123\n", stdout) +} + +func TestGrepBREGrouping(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "abcabc\nabc\nab\n") + // BRE: \(abc\) is a group, no repetition — matches lines containing "abc" + stdout, _, code := cmdRun(t, `grep '\(abc\)' file.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abcabc\nabc\n", stdout) +} + +func TestGrepERE(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "cat\nbat\nhat\ndog\n") + stdout, _, code := cmdRun(t, "grep -E '(c|b)at' file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "cat\nbat\n", stdout) +} + +func TestGrepEREAlternation(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nbar\nbaz\n") + stdout, _, code := cmdRun(t, "grep -E 'foo|baz' file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo\nbaz\n", stdout) +} + +func TestGrepFixedStrings(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "a.b\na*b\naxb\n") + stdout, _, code := cmdRun(t, "grep -F 'a.b' file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a.b\n", stdout) +} + +func TestGrepFixedStringsRegexChars(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo(bar)\nfoo bar\n") + stdout, _, code := cmdRun(t, "grep -F 'foo(bar)' file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo(bar)\n", stdout) +} + +// --- Case insensitive --- + +func TestGrepIgnoreCase(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "Hello\nhello\nHELLO\nworld\n") + stdout, _, code := cmdRun(t, "grep -i hello file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "Hello\nhello\nHELLO\n", stdout) +} + +// --- Invert match --- + +func TestGrepInvertMatch(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nbar\nbaz\n") + stdout, _, code := cmdRun(t, "grep -v foo file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "bar\nbaz\n", stdout) +} + +func TestGrepInvertMatchNoMatch(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nfoo\n") + stdout, _, code := cmdRun(t, "grep -v foo file.txt", dir) + assert.Equal(t, 1, code) + assert.Equal(t, "", stdout) +} + +// --- Count --- + +func TestGrepCount(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nbar\nfoo baz\nqux\n") + stdout, _, code := cmdRun(t, "grep -c foo file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2\n", stdout) +} + +func TestGrepCountNoMatch(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nbar\n") + stdout, _, code := cmdRun(t, "grep -c xyz file.txt", dir) + assert.Equal(t, 1, code) + assert.Equal(t, "0\n", stdout) +} + +func TestGrepCountMultipleFiles(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "foo\nbar\n") + writeFile(t, dir, "b.txt", "foo\nfoo\n") + stdout, _, code := cmdRun(t, "grep -c foo a.txt b.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a.txt:1\nb.txt:2\n", stdout) +} + +// --- Files with/without matches --- + +func TestGrepFilesWithMatches(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "foo\n") + writeFile(t, dir, "b.txt", "bar\n") + writeFile(t, dir, "c.txt", "foo bar\n") + stdout, _, code := cmdRun(t, "grep -l foo a.txt b.txt c.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a.txt\nc.txt\n", stdout) +} + +func TestGrepFilesWithoutMatch(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "foo\n") + writeFile(t, dir, "b.txt", "bar\n") + writeFile(t, dir, "c.txt", "foo bar\n") + stdout, _, code := cmdRun(t, "grep -L foo a.txt b.txt c.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "b.txt\n", stdout) +} + +// --- Line number --- + +func TestGrepLineNumber(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "aaa\nbbb\nccc\nbbb\n") + stdout, _, code := cmdRun(t, "grep -n bbb file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2:bbb\n4:bbb\n", stdout) +} + +// --- Filename control --- + +func TestGrepWithFilenameMultipleFiles(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "foo\n") + writeFile(t, dir, "b.txt", "foo\n") + stdout, _, code := cmdRun(t, "grep foo a.txt b.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a.txt:foo\nb.txt:foo\n", stdout) +} + +func TestGrepNoFilenameSingleFile(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "foo\n") + stdout, _, code := cmdRun(t, "grep foo a.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo\n", stdout) +} + +func TestGrepForceFilenameH(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "foo\n") + stdout, _, code := cmdRun(t, "grep -H foo a.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a.txt:foo\n", stdout) +} + +func TestGrepSuppressFilenameh(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "foo\n") + writeFile(t, dir, "b.txt", "foo\n") + stdout, _, code := cmdRun(t, "grep -h foo a.txt b.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo\nfoo\n", stdout) +} + +// --- Only matching --- + +func TestGrepOnlyMatching(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foobar\nbazfoo\n") + stdout, _, code := cmdRun(t, "grep -o foo file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo\nfoo\n", stdout) +} + +func TestGrepOnlyMatchingMultiple(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "abcabc\n") + stdout, _, code := cmdRun(t, "grep -o abc file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abc\nabc\n", stdout) +} + +// --- Quiet mode --- + +func TestGrepQuiet(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nbar\n") + stdout, _, code := cmdRun(t, "grep -q foo file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestGrepQuietNoMatch(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nbar\n") + stdout, _, code := cmdRun(t, "grep -q xyz file.txt", dir) + assert.Equal(t, 1, code) + assert.Equal(t, "", stdout) +} + +// --- No messages --- + +func TestGrepNoMessages(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "grep -s foo nonexistent.txt", dir) + assert.Equal(t, 2, code) + assert.Equal(t, "", stderr) +} + +// --- Word regexp --- + +func TestGrepWordRegexp(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nfoobar\nbar foo baz\n") + stdout, _, code := cmdRun(t, "grep -w foo file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo\nbar foo baz\n", stdout) +} + +// --- Line regexp --- + +func TestGrepLineRegexp(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nfoo bar\nbar foo\n") + stdout, _, code := cmdRun(t, "grep -x foo file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo\n", stdout) +} + +// --- Multiple patterns with -e --- + +func TestGrepMultiplePatterns(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nbar\nbaz\n") + stdout, _, code := cmdRun(t, "grep -e foo -e baz file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo\nbaz\n", stdout) +} + +// --- Max count --- + +func TestGrepMaxCount(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nfoo\nfoo\nbar\n") + stdout, _, code := cmdRun(t, "grep -m 2 foo file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo\nfoo\n", stdout) +} + +func TestGrepMaxCountZero(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nbar\n") + stdout, _, code := cmdRun(t, "grep -m 0 foo file.txt", dir) + assert.Equal(t, 1, code) + assert.Equal(t, "", stdout) +} + +// --- Context lines --- + +func TestGrepAfterContext(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "aaa\nbbb\nccc\nddd\neee\n") + stdout, _, code := cmdRun(t, "grep -A 1 bbb file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "bbb\nccc\n", stdout) +} + +func TestGrepBeforeContext(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "aaa\nbbb\nccc\nddd\neee\n") + stdout, _, code := cmdRun(t, "grep -B 1 ccc file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "bbb\nccc\n", stdout) +} + +func TestGrepContextBoth(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "aaa\nbbb\nccc\nddd\neee\n") + stdout, _, code := cmdRun(t, "grep -C 1 ccc file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "bbb\nccc\nddd\n", stdout) +} + +func TestGrepContextGroupSeparator(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "aaa\nbbb\nccc\nddd\neee\nfff\nggg\n") + stdout, _, code := cmdRun(t, "grep -C 0 -e bbb -e fff file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "bbb\n--\nfff\n", stdout) +} + +func TestGrepContextOverlapping(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "aaa\nbbb\nccc\nddd\neee\n") + stdout, _, code := cmdRun(t, "grep -C 1 -e bbb -e ddd file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "aaa\nbbb\nccc\nddd\neee\n", stdout) +} + +func TestGrepAfterContextWithLineNumbers(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "aaa\nbbb\nccc\nddd\neee\n") + stdout, _, code := cmdRun(t, "grep -n -A 1 bbb file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2:bbb\n3-ccc\n", stdout) +} + +func TestGrepBeforeContextWithLineNumbers(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "aaa\nbbb\nccc\nddd\neee\n") + stdout, _, code := cmdRun(t, "grep -n -B 1 ccc file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2-bbb\n3:ccc\n", stdout) +} + +func TestGrepContextWithFilenames(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "aaa\nbbb\nccc\n") + stdout, _, code := cmdRun(t, "grep -H -A 1 aaa file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "file.txt:aaa\nfile.txt-bbb\n", stdout) +} + +// --- Combined flags --- + +func TestGrepLineNumberWithFilename(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "foo\nbar\n") + writeFile(t, dir, "b.txt", "baz\nfoo\n") + stdout, _, code := cmdRun(t, "grep -n foo a.txt b.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a.txt:1:foo\nb.txt:2:foo\n", stdout) +} + +func TestGrepCountInvert(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nbar\nbaz\n") + stdout, _, code := cmdRun(t, "grep -vc foo file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2\n", stdout) +} + +// --- Error cases --- + +func TestGrepNoPattern(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "grep", dir) + assert.Equal(t, 2, code) + assert.Contains(t, stderr, "grep:") +} + +func TestGrepInvalidRegex(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\n") + _, stderr, code := cmdRun(t, "grep -E '[invalid' file.txt", dir) + assert.Equal(t, 2, code) + assert.Contains(t, stderr, "grep:") +} + +func TestGrepMissingFile(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "grep foo nonexistent.txt", dir) + assert.Equal(t, 2, code) + assert.Contains(t, stderr, "grep:") +} + +func TestGrepUnknownFlag(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "grep --recursive foo", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "grep:") +} + +// --- Exit code semantics --- + +func TestGrepExitCodeMatchMultiFile(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "foo\n") + writeFile(t, dir, "b.txt", "bar\n") + _, _, code := cmdRun(t, "grep foo a.txt b.txt", dir) + assert.Equal(t, 0, code) +} + +func TestGrepExitCodeNoMatchMultiFile(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "bar\n") + writeFile(t, dir, "b.txt", "baz\n") + _, _, code := cmdRun(t, "grep foo a.txt b.txt", dir) + assert.Equal(t, 1, code) +} + +// --- Context cancellation --- + +func TestGrepContextCancellation(t *testing.T) { + dir := t.TempDir() + var sb strings.Builder + for i := 0; i < 10000; i++ { + sb.WriteString("line\n") + } + writeFile(t, dir, "big.txt", sb.String()) + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + done := make(chan struct{}) + go func() { + cmdRunCtx(ctx, t, "grep line big.txt", dir) + close(done) + }() + select { + case <-done: + case <-time.After(5 * time.Second): + t.Fatal("grep did not complete within timeout") + } +} + +// --- Multiple files with some errors --- + +func TestGrepMultipleFilesSomeErrors(t *testing.T) { + // GNU grep returns 2 when errors occur, even if matches were found. + dir := t.TempDir() + writeFile(t, dir, "good.txt", "foo\n") + stdout, stderr, code := cmdRun(t, "grep foo good.txt nonexistent.txt", dir) + assert.Equal(t, 2, code) + assert.Contains(t, stdout, "foo") + assert.Contains(t, stderr, "grep:") +} + +func TestGrepQuietMultipleFilesSomeErrors(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "good.txt", "foo\n") + stdout, stderr, code := cmdRun(t, "grep -q foo good.txt nonexistent.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) + assert.Equal(t, "", stderr) +} + +// --- Pipe chain --- + +func TestGrepPipeChain(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "apple pie\nbanana split\ncherry pie\n") + stdout, _, code := cmdRun(t, "cat file.txt | grep pie | grep -v cherry", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "apple pie\n", stdout) +} + +// --- Double dash --- + +func TestGrepDoubleDash(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "-v\nfoo\n") + stdout, _, code := cmdRun(t, "grep -- -v file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "-v\n", stdout) +} + +// --- Empty pattern --- + +func TestGrepEmptyPattern(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nbar\n") + stdout, _, code := cmdRun(t, `grep '' file.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo\nbar\n", stdout) +} + +// --- -o -v combination (GNU compat) --- + +func TestGrepOnlyMatchingInvert(t *testing.T) { + // GNU grep: -o -v produces no output but exits 0. + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nbar\n") + stdout, _, code := cmdRun(t, "grep -o -v foo file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +// --- -o suppresses empty matches --- + +func TestGrepOnlyMatchingSuppressEmpty(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nbar\n") + stdout, _, code := cmdRun(t, "grep -o -E 'o*' file.txt", dir) + assert.Equal(t, 0, code) + // Only non-empty matches should be printed + assert.Equal(t, "oo\n", stdout) +} + +// --- Conflicting matchers --- + +func TestGrepConflictingMatchersEG(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "grep -E -G foo", dir) + assert.Equal(t, 2, code) + assert.Contains(t, stderr, "grep: conflicting matchers specified") +} + +func TestGrepConflictingMatchersFE(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, "grep -F -E foo", dir) + assert.Equal(t, 2, code) + assert.Contains(t, stderr, "grep: conflicting matchers specified") +} + +func TestGrepSingleMatcherGNotConflict(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\n") + stdout, _, code := cmdRun(t, "grep -G foo file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo\n", stdout) +} + +// --- -l/-L precedence and -c interaction --- + +func TestGrepFilesWithAndWithoutMatchLastFlagWins(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "foo\n") + writeFile(t, dir, "b.txt", "bar\n") + + stdout, _, code := cmdRun(t, "grep -l -L foo a.txt b.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "b.txt\n", stdout) + + stdout, _, code = cmdRun(t, "grep -L -l foo a.txt b.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a.txt\n", stdout) +} + +func TestGrepCountSuppressedByFileListModes(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "foo\nbar\n") + writeFile(t, dir, "b.txt", "bar\n") + + stdout, _, code := cmdRun(t, "grep -c -l foo a.txt b.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a.txt\n", stdout) + + stdout, _, code = cmdRun(t, "grep -c -L foo a.txt b.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "b.txt\n", stdout) +} + +// --- -h/-H last-option precedence --- + +func TestGrepFilenameHhLastFlagWins(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "a.txt", "foo\n") + writeFile(t, dir, "b.txt", "foo\n") + + // -h -H: last flag is -H, so show filenames + stdout, _, code := cmdRun(t, "grep -h -H foo a.txt b.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a.txt:foo\nb.txt:foo\n", stdout) + + // -H -h: last flag is -h, so suppress filenames + stdout, _, code = cmdRun(t, "grep -H -h foo a.txt b.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo\nfoo\n", stdout) +} + +// --- -o suppresses context --- + +func TestGrepOnlyMatchingSuppressesContext(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "aaa\nbbb\nccc\nddd\neee\n") + // -o -A1: GNU grep outputs only matched parts, no context + stdout, _, code := cmdRun(t, "grep -o -A 1 bbb file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "bbb\n", stdout) +} + +func TestGrepOnlyMatchingSuppressesBeforeContext(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "aaa\nbbb\nccc\nddd\neee\n") + stdout, _, code := cmdRun(t, "grep -o -B 1 ccc file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "ccc\n", stdout) +} + +// --- Newline-delimited patterns --- + +func TestGrepNewlineDelimitedPattern(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\nbar\nbaz\n") + // Pattern with embedded newline should match both + stdout, _, code := cmdRun(t, "grep -e $'foo\\nbar' file.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "foo\nbar\n", stdout) +} + +// --- Stdin display name --- + +func TestGrepStdinDisplayName(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "file.txt", "foo\n") + writeFile(t, dir, "src.txt", "foo\n") + stdout, _, code := cmdRun(t, "grep foo file.txt - < src.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "file.txt:foo\n(standard input):foo\n", stdout) +} diff --git a/interp/register_builtins.go b/interp/register_builtins.go index 772e6e40..91c3d728 100644 --- a/interp/register_builtins.go +++ b/interp/register_builtins.go @@ -16,6 +16,7 @@ import ( "github.com/DataDog/rshell/interp/builtins/echo" "github.com/DataDog/rshell/interp/builtins/exit" falsecmd "github.com/DataDog/rshell/interp/builtins/false" + "github.com/DataDog/rshell/interp/builtins/grep" "github.com/DataDog/rshell/interp/builtins/head" "github.com/DataDog/rshell/interp/builtins/ls" "github.com/DataDog/rshell/interp/builtins/tail" @@ -37,6 +38,7 @@ func registerBuiltins() { echo.Cmd, exit.Cmd, falsecmd.Cmd, + grep.Cmd, head.Cmd, ls.Cmd, tail.Cmd, diff --git a/tests/allowed_symbols_test.go b/tests/allowed_symbols_test.go index 60499f3e..e74d5d17 100644 --- a/tests/allowed_symbols_test.go +++ b/tests/allowed_symbols_test.go @@ -78,14 +78,28 @@ var builtinAllowedSymbols = []string{ "os.FileInfo", // os.O_RDONLY — read-only file flag constant; cannot open files by itself. "os.O_RDONLY", + // regexp.Compile — compiles a regular expression; pure function, no I/O. Uses RE2 engine (linear-time, no backtracking). + "regexp.Compile", + // regexp.QuoteMeta — escapes all special regex characters in a string; pure function, no I/O. + "regexp.QuoteMeta", + // regexp.Regexp — compiled regular expression type; no I/O side effects. All matching methods are linear-time (RE2). + "regexp.Regexp", // slices.Reverse — reverses a slice in-place; pure function, no I/O. "slices.Reverse", // slices.SortFunc — sorts a slice with a comparison function; pure function, no I/O. "slices.SortFunc", // strings.Builder — efficient string concatenation; pure in-memory buffer, no I/O. "strings.Builder", + // strings.Join — concatenates a slice of strings with a separator; pure function, no I/O. + "strings.Join", + // strings.Split — splits a string by separator into a slice; pure function, no I/O. + "strings.Split", // strconv.Atoi — string-to-int conversion; pure function, no I/O. "strconv.Atoi", + // strconv.ParseBool — string-to-bool conversion; pure function, no I/O. + "strconv.ParseBool", + // strconv.Itoa — int-to-string conversion; pure function, no I/O. + "strconv.Itoa", // strconv.ErrRange — sentinel error value for overflow; pure constant. "strconv.ErrRange", // strconv.NumError — error type for numeric conversion failures; pure type. @@ -98,8 +112,6 @@ var builtinAllowedSymbols = []string{ "strings.HasPrefix", // strings.IndexByte — finds byte in string; pure function, no I/O. "strings.IndexByte", - // strings.Split — splits string by separator; pure function, no I/O. - "strings.Split", // strings.TrimSpace — removes leading/trailing whitespace; pure function. "strings.TrimSpace", // io.WriteString — writes a string to a writer; no filesystem access, delegates to Write. diff --git a/tests/scenarios/cmd/grep/basic/empty_file.yaml b/tests/scenarios/cmd/grep/basic/empty_file.yaml new file mode 100644 index 00000000..bec11632 --- /dev/null +++ b/tests/scenarios/cmd/grep/basic/empty_file.yaml @@ -0,0 +1,14 @@ +description: grep on an empty file exits 1 with no output. +setup: + files: + - path: empty.txt + content: "" + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep foo empty.txt +expect: + stdout: "" + stderr: "" + exit_code: 1 diff --git a/tests/scenarios/cmd/grep/basic/empty_pattern.yaml b/tests/scenarios/cmd/grep/basic/empty_pattern.yaml new file mode 100644 index 00000000..88034b7f --- /dev/null +++ b/tests/scenarios/cmd/grep/basic/empty_pattern.yaml @@ -0,0 +1,18 @@ +description: grep with empty pattern matches every line. +setup: + files: + - path: file.txt + content: |+ + foo + bar + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep '' file.txt +expect: + stdout: |+ + foo + bar + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/basic/multiple_files.yaml b/tests/scenarios/cmd/grep/basic/multiple_files.yaml new file mode 100644 index 00000000..b932e8f0 --- /dev/null +++ b/tests/scenarios/cmd/grep/basic/multiple_files.yaml @@ -0,0 +1,21 @@ +description: grep prefixes output with filenames when multiple files are given. +setup: + files: + - path: a.txt + content: |+ + foo + chmod: 0644 + - path: b.txt + content: |+ + foo + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep foo a.txt b.txt +expect: + stdout: |+ + a.txt:foo + b.txt:foo + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/basic/multiple_matches.yaml b/tests/scenarios/cmd/grep/basic/multiple_matches.yaml new file mode 100644 index 00000000..446d7645 --- /dev/null +++ b/tests/scenarios/cmd/grep/basic/multiple_matches.yaml @@ -0,0 +1,20 @@ +description: grep prints all matching lines. +setup: + files: + - path: file.txt + content: |+ + foo + bar + foo baz + qux + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep foo file.txt +expect: + stdout: |+ + foo + foo baz + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/basic/no_match.yaml b/tests/scenarios/cmd/grep/basic/no_match.yaml new file mode 100644 index 00000000..00bd0f73 --- /dev/null +++ b/tests/scenarios/cmd/grep/basic/no_match.yaml @@ -0,0 +1,16 @@ +description: grep exits 1 when no lines match. +setup: + files: + - path: file.txt + content: |+ + foo + bar + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep xyz file.txt +expect: + stdout: "" + stderr: "" + exit_code: 1 diff --git a/tests/scenarios/cmd/grep/basic/pipe_chain.yaml b/tests/scenarios/cmd/grep/basic/pipe_chain.yaml new file mode 100644 index 00000000..478d2583 --- /dev/null +++ b/tests/scenarios/cmd/grep/basic/pipe_chain.yaml @@ -0,0 +1,18 @@ +description: grep can be chained in a pipeline. +setup: + files: + - path: file.txt + content: |+ + apple pie + banana split + cherry pie + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + cat file.txt | grep pie | grep -v cherry +expect: + stdout: |+ + apple pie + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/basic/piped_stdin.yaml b/tests/scenarios/cmd/grep/basic/piped_stdin.yaml new file mode 100644 index 00000000..efbd8685 --- /dev/null +++ b/tests/scenarios/cmd/grep/basic/piped_stdin.yaml @@ -0,0 +1,9 @@ +description: grep reads from stdin when piped. +input: + script: |+ + echo "hello world" | grep hello +expect: + stdout: |+ + hello world + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/basic/single_file.yaml b/tests/scenarios/cmd/grep/basic/single_file.yaml new file mode 100644 index 00000000..ec63c9fe --- /dev/null +++ b/tests/scenarios/cmd/grep/basic/single_file.yaml @@ -0,0 +1,18 @@ +description: grep matches a pattern in a single file. +setup: + files: + - path: file.txt + content: |+ + apple + banana + cherry + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep banana file.txt +expect: + stdout: |+ + banana + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/context/after_context.yaml b/tests/scenarios/cmd/grep/context/after_context.yaml new file mode 100644 index 00000000..7254be37 --- /dev/null +++ b/tests/scenarios/cmd/grep/context/after_context.yaml @@ -0,0 +1,21 @@ +description: grep -A prints lines after each match. +setup: + files: + - path: file.txt + content: |+ + aaa + bbb + ccc + ddd + eee + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -A 1 bbb file.txt +expect: + stdout: |+ + bbb + ccc + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/context/before_context.yaml b/tests/scenarios/cmd/grep/context/before_context.yaml new file mode 100644 index 00000000..b2cc1d39 --- /dev/null +++ b/tests/scenarios/cmd/grep/context/before_context.yaml @@ -0,0 +1,21 @@ +description: grep -B prints lines before each match. +setup: + files: + - path: file.txt + content: |+ + aaa + bbb + ccc + ddd + eee + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -B 1 ccc file.txt +expect: + stdout: |+ + bbb + ccc + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/context/context_both.yaml b/tests/scenarios/cmd/grep/context/context_both.yaml new file mode 100644 index 00000000..940b9b1a --- /dev/null +++ b/tests/scenarios/cmd/grep/context/context_both.yaml @@ -0,0 +1,22 @@ +description: grep -C prints lines before and after each match. +setup: + files: + - path: file.txt + content: |+ + aaa + bbb + ccc + ddd + eee + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -C 1 ccc file.txt +expect: + stdout: |+ + bbb + ccc + ddd + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/context/group_separator.yaml b/tests/scenarios/cmd/grep/context/group_separator.yaml new file mode 100644 index 00000000..66d271bf --- /dev/null +++ b/tests/scenarios/cmd/grep/context/group_separator.yaml @@ -0,0 +1,24 @@ +description: grep prints -- between non-contiguous match groups. +setup: + files: + - path: file.txt + content: |+ + aaa + bbb + ccc + ddd + eee + fff + ggg + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -C 0 -e bbb -e fff file.txt +expect: + stdout: |+ + bbb + -- + fff + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/errors/conflicting_matchers.yaml b/tests/scenarios/cmd/grep/errors/conflicting_matchers.yaml new file mode 100644 index 00000000..89448913 --- /dev/null +++ b/tests/scenarios/cmd/grep/errors/conflicting_matchers.yaml @@ -0,0 +1,9 @@ +skip_assert_against_bash: true +description: grep rejects conflicting matcher flags with exit code 2. +input: + script: |+ + grep -E -G foo +expect: + stdout: "" + stderr_contains: ["grep: conflicting matchers specified"] + exit_code: 2 diff --git a/tests/scenarios/cmd/grep/errors/invalid_regex.yaml b/tests/scenarios/cmd/grep/errors/invalid_regex.yaml new file mode 100644 index 00000000..560cdfd2 --- /dev/null +++ b/tests/scenarios/cmd/grep/errors/invalid_regex.yaml @@ -0,0 +1,16 @@ +skip_assert_against_bash: true +description: grep with invalid regex exits 2. +setup: + files: + - path: file.txt + content: |+ + foo + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -E '[invalid' file.txt +expect: + stdout: "" + stderr_contains: ["grep:"] + exit_code: 2 diff --git a/tests/scenarios/cmd/grep/errors/no_pattern.yaml b/tests/scenarios/cmd/grep/errors/no_pattern.yaml new file mode 100644 index 00000000..819a2426 --- /dev/null +++ b/tests/scenarios/cmd/grep/errors/no_pattern.yaml @@ -0,0 +1,9 @@ +skip_assert_against_bash: true +description: grep with no pattern exits 2. +input: + script: |+ + grep +expect: + stdout: "" + stderr_contains: ["grep:"] + exit_code: 2 diff --git a/tests/scenarios/cmd/grep/errors/nonexistent_file.yaml b/tests/scenarios/cmd/grep/errors/nonexistent_file.yaml new file mode 100644 index 00000000..3e2de845 --- /dev/null +++ b/tests/scenarios/cmd/grep/errors/nonexistent_file.yaml @@ -0,0 +1,12 @@ +skip_assert_against_bash: true +description: grep on a nonexistent file prints an error to stderr and exits with code 2. +input: + allowed_paths: ["$DIR"] + script: |+ + grep foo nonexistent.txt +expect: + stdout: "" + stderr_contains: + - "grep: nonexistent.txt:" + - "no such file or directory" + exit_code: 2 diff --git a/tests/scenarios/cmd/grep/flags/count.yaml b/tests/scenarios/cmd/grep/flags/count.yaml new file mode 100644 index 00000000..bee0ee38 --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/count.yaml @@ -0,0 +1,19 @@ +description: grep -c prints a count of matching lines. +setup: + files: + - path: file.txt + content: |+ + foo + bar + foo baz + qux + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -c foo file.txt +expect: + stdout: |+ + 2 + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/extended_regexp.yaml b/tests/scenarios/cmd/grep/flags/extended_regexp.yaml new file mode 100644 index 00000000..0e72ad81 --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/extended_regexp.yaml @@ -0,0 +1,20 @@ +description: grep -E uses extended regular expressions. +setup: + files: + - path: file.txt + content: |+ + cat + bat + hat + dog + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -E '(c|b)at' file.txt +expect: + stdout: |+ + cat + bat + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/files_with_matches.yaml b/tests/scenarios/cmd/grep/flags/files_with_matches.yaml new file mode 100644 index 00000000..e04dbe9d --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/files_with_matches.yaml @@ -0,0 +1,25 @@ +description: grep -l prints only filenames of files with matches. +setup: + files: + - path: a.txt + content: |+ + foo + chmod: 0644 + - path: b.txt + content: |+ + bar + chmod: 0644 + - path: c.txt + content: |+ + foo bar + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -l foo a.txt b.txt c.txt +expect: + stdout: |+ + a.txt + c.txt + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/files_without_match.yaml b/tests/scenarios/cmd/grep/flags/files_without_match.yaml new file mode 100644 index 00000000..37de54e7 --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/files_without_match.yaml @@ -0,0 +1,20 @@ +description: grep -L prints only filenames of files without matches. +setup: + files: + - path: a.txt + content: |+ + foo + chmod: 0644 + - path: b.txt + content: |+ + bar + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -L foo a.txt b.txt +expect: + stdout: |+ + b.txt + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/fixed_strings.yaml b/tests/scenarios/cmd/grep/flags/fixed_strings.yaml new file mode 100644 index 00000000..9d7ada01 --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/fixed_strings.yaml @@ -0,0 +1,18 @@ +description: grep -F treats pattern as a literal string. +setup: + files: + - path: file.txt + content: |+ + a.b + a*b + axb + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -F 'a.b' file.txt +expect: + stdout: |+ + a.b + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/ignore_case.yaml b/tests/scenarios/cmd/grep/flags/ignore_case.yaml new file mode 100644 index 00000000..ec0b878e --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/ignore_case.yaml @@ -0,0 +1,21 @@ +description: grep -i matches case-insensitively. +setup: + files: + - path: file.txt + content: |+ + Hello + hello + HELLO + world + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -i hello file.txt +expect: + stdout: |+ + Hello + hello + HELLO + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/invert_match.yaml b/tests/scenarios/cmd/grep/flags/invert_match.yaml new file mode 100644 index 00000000..31054035 --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/invert_match.yaml @@ -0,0 +1,19 @@ +description: grep -v selects non-matching lines. +setup: + files: + - path: file.txt + content: |+ + foo + bar + baz + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -v foo file.txt +expect: + stdout: |+ + bar + baz + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/line_number.yaml b/tests/scenarios/cmd/grep/flags/line_number.yaml new file mode 100644 index 00000000..4c35b212 --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/line_number.yaml @@ -0,0 +1,20 @@ +description: grep -n prefixes output with line numbers. +setup: + files: + - path: file.txt + content: |+ + aaa + bbb + ccc + bbb + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -n bbb file.txt +expect: + stdout: |+ + 2:bbb + 4:bbb + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/line_regexp.yaml b/tests/scenarios/cmd/grep/flags/line_regexp.yaml new file mode 100644 index 00000000..91205152 --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/line_regexp.yaml @@ -0,0 +1,18 @@ +description: grep -x matches only whole lines. +setup: + files: + - path: file.txt + content: |+ + foo + foo bar + bar foo + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -x foo file.txt +expect: + stdout: |+ + foo + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/max_count.yaml b/tests/scenarios/cmd/grep/flags/max_count.yaml new file mode 100644 index 00000000..48c5792d --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/max_count.yaml @@ -0,0 +1,20 @@ +description: grep -m stops after NUM matching lines. +setup: + files: + - path: file.txt + content: |+ + foo + foo + foo + bar + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -m 2 foo file.txt +expect: + stdout: |+ + foo + foo + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/multiple_patterns.yaml b/tests/scenarios/cmd/grep/flags/multiple_patterns.yaml new file mode 100644 index 00000000..921bc4db --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/multiple_patterns.yaml @@ -0,0 +1,19 @@ +description: grep -e allows multiple patterns. +setup: + files: + - path: file.txt + content: |+ + foo + bar + baz + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -e foo -e baz file.txt +expect: + stdout: |+ + foo + baz + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/no_filename.yaml b/tests/scenarios/cmd/grep/flags/no_filename.yaml new file mode 100644 index 00000000..940f817c --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/no_filename.yaml @@ -0,0 +1,21 @@ +description: grep -h suppresses filename prefix even for multiple files. +setup: + files: + - path: a.txt + content: |+ + foo + chmod: 0644 + - path: b.txt + content: |+ + foo + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -h foo a.txt b.txt +expect: + stdout: |+ + foo + foo + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/only_matching.yaml b/tests/scenarios/cmd/grep/flags/only_matching.yaml new file mode 100644 index 00000000..a88f1953 --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/only_matching.yaml @@ -0,0 +1,18 @@ +description: grep -o prints only matched parts. +setup: + files: + - path: file.txt + content: |+ + foobar + bazfoo + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -o foo file.txt +expect: + stdout: |+ + foo + foo + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/quiet.yaml b/tests/scenarios/cmd/grep/flags/quiet.yaml new file mode 100644 index 00000000..3c66dde8 --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/quiet.yaml @@ -0,0 +1,16 @@ +description: grep -q produces no output but exits 0 on match. +setup: + files: + - path: file.txt + content: |+ + foo + bar + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -q foo file.txt +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/with_filename.yaml b/tests/scenarios/cmd/grep/flags/with_filename.yaml new file mode 100644 index 00000000..8cb3c6ba --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/with_filename.yaml @@ -0,0 +1,16 @@ +description: grep -H forces filename prefix even for a single file. +setup: + files: + - path: file.txt + content: |+ + foo + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -H foo file.txt +expect: + stdout: |+ + file.txt:foo + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/flags/word_regexp.yaml b/tests/scenarios/cmd/grep/flags/word_regexp.yaml new file mode 100644 index 00000000..58fed403 --- /dev/null +++ b/tests/scenarios/cmd/grep/flags/word_regexp.yaml @@ -0,0 +1,19 @@ +description: grep -w matches only whole words. +setup: + files: + - path: file.txt + content: |+ + foo + foobar + bar foo baz + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -w foo file.txt +expect: + stdout: |+ + foo + bar foo baz + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/hardening/double_dash_separator.yaml b/tests/scenarios/cmd/grep/hardening/double_dash_separator.yaml new file mode 100644 index 00000000..7878cd74 --- /dev/null +++ b/tests/scenarios/cmd/grep/hardening/double_dash_separator.yaml @@ -0,0 +1,17 @@ +description: grep -- separates flags from pattern. +setup: + files: + - path: file.txt + content: |+ + -v + foo + chmod: 0644 +input: + allowed_paths: ["$DIR"] + script: |+ + grep -- -v file.txt +expect: + stdout: |+ + -v + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/grep/hardening/unknown_flag.yaml b/tests/scenarios/cmd/grep/hardening/unknown_flag.yaml new file mode 100644 index 00000000..5f48c478 --- /dev/null +++ b/tests/scenarios/cmd/grep/hardening/unknown_flag.yaml @@ -0,0 +1,9 @@ +description: grep rejects unknown flags with exit code 1. +skip_assert_against_bash: true +input: + script: |+ + grep --recursive foo +expect: + stdout: "" + stderr_contains: ["grep:"] + exit_code: 1 diff --git a/tests/scenarios/cmd/unknown_cmd/common_progs/grep.yaml b/tests/scenarios/cmd/unknown_cmd/common_progs/grep.yaml deleted file mode 100644 index 8b8900e9..00000000 --- a/tests/scenarios/cmd/unknown_cmd/common_progs/grep.yaml +++ /dev/null @@ -1,10 +0,0 @@ -skip_assert_against_bash: true -description: The grep command is not a builtin and is rejected as unknown. -input: - script: |+ - grep pattern file.txt -expect: - stdout: "" - stderr: |+ - grep: command not found - exit_code: 127