diff --git a/SHELL_FEATURES.md b/SHELL_FEATURES.md index 40746388..5fd6c4b4 100644 --- a/SHELL_FEATURES.md +++ b/SHELL_FEATURES.md @@ -13,7 +13,7 @@ Blocked features are rejected before execution with exit code 2. - ✅ `exit [N]` — exit the shell with status N (default 0) - ✅ `false` — return exit code 1 - ✅ `grep [-EFGivclLnHhoqsxw] [-e PATTERN] [-m NUM] [-A NUM] [-B NUM] [-C NUM] PATTERN [FILE]...` — print lines that match patterns; uses RE2 regex engine (linear-time, no backtracking) -- ✅ `head [-n N|-c N] [-q|-v] [-z] [FILE]...` — output the first part of files (default: first 10 lines) +- ✅ `head [-n N|-c N] [-q|-v] [FILE]...` — output the first part of files (default: first 10 lines); `-z`/`--zero-terminated` and `--follow` are rejected - ✅ `ls [-1aAdFhlpRrSt] [FILE]...` — list directory contents - ✅ `strings [-a] [-n MIN] [-t o|d|x] [-o] [-f] [-s SEP] [FILE]...` — print printable character sequences in files (default min length 4); offsets via `-t`/`-o`; filename prefix via `-f`; custom separator via `-s` - ✅ `tail [-n N|-c N] [-q|-v] [-z] [FILE]...` — output the last part of files (default: last 10 lines); supports `+N` offset mode; `-f`/`--follow` is rejected diff --git a/interp/builtins/head/builtin_head_pentest_test.go b/interp/builtins/head/builtin_head_pentest_test.go index 1221c711..7f8548b3 100644 --- a/interp/builtins/head/builtin_head_pentest_test.go +++ b/interp/builtins/head/builtin_head_pentest_test.go @@ -480,19 +480,16 @@ func TestCmdPentestZeroTerminatedFlagRejected(t *testing.T) { } func TestCmdPentestMultipleDashes(t *testing.T) { - // Two '-' args: both read from the same stdin fd. - // The first '-' uses a bufio.Scanner which reads ahead in 4096-byte - // chunks. For a small file, the scanner consumes the entire stdin in one - // Read call, leaving nothing for the second '-'. - // This is Safer-than-GNU: our scanner-buffered implementation exhausts - // stdin after the first '-'. GNU head uses lower-level I/O that restores - // the fd position, but we are read-only and do not lseek. + // Two '-' args with seekable stdin (redirected from a file). + // readLines wraps the scanner in a byteCountReader and seeks back any + // bytes the scanner buffered but did not emit. This allows the second '-' + // to read from the correct stream position, matching GNU head behaviour. dir := t.TempDir() require.NoError(t, os.WriteFile(filepath.Join(dir, "src.txt"), []byte("alpha\nbeta\ngamma\n"), 0644)) stdout, _, code := headRun(t, "head -q -n 1 - - < src.txt", dir) assert.Equal(t, 0, code) - // First '-' outputs "alpha\n"; second '-' sees empty stdin (buffered ahead). - assert.Equal(t, "alpha\n", stdout) + // First '-' outputs "alpha\n"; second '-' reads from the seeked-back position → "beta\n". + assert.Equal(t, "alpha\nbeta\n", stdout) } func TestCmdPentestFlagViaExpansion(t *testing.T) { diff --git a/interp/builtins/head/head.go b/interp/builtins/head/head.go index 103a57d6..8ab22a7a 100644 --- a/interp/builtins/head/head.go +++ b/interp/builtins/head/head.go @@ -76,9 +76,19 @@ const MaxLineBytes = 1 << 20 // 1 MiB // framework calls Parse and passes positional arguments to the handler. func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { help := fs.BoolP("help", "h", false, "print usage and exit") - quiet := fs.BoolP("quiet", "q", false, "never print file name headers") - _ = fs.Bool("silent", false, "alias for --quiet") - verbose := fs.BoolP("verbose", "v", false, "always print file name headers") + + // quietFlag, silentFlag, and verboseFlag share a sequence counter so that + // after parsing we can determine which of -q/--quiet/--silent/-v/--verbose + // appeared last on the command line — the last flag wins, matching GNU head. + // NoOptDefVal is set to "true" so pflag treats these as boolean flags that + // can be given without a "=value" argument (e.g. "--quiet" not "--quiet=true"). + var headerSeq int + quietFlag := newBoolSeqFlag(&headerSeq) + silentFlag := newBoolSeqFlag(&headerSeq) + verboseFlag := newBoolSeqFlag(&headerSeq) + fs.VarPF(quietFlag, "quiet", "q", "never print file name headers").NoOptDefVal = "true" + fs.VarPF(silentFlag, "silent", "", "alias for --quiet").NoOptDefVal = "true" + fs.VarPF(verboseFlag, "verbose", "v", "always print file name headers").NoOptDefVal = "true" // linesFlag and bytesFlag share a sequence counter so that after parsing // we can compare their pos fields to determine which appeared last on the @@ -100,9 +110,20 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { return builtins.Result{} } - // --silent is an alias for --quiet. - if fs.Changed("silent") { - *quiet = true + // Validate all explicitly set mode flags upfront. GNU head rejects + // invalid values even for flags that are overridden by a later mode + // flag on the command line (e.g. "head -n xyz -c 1" fails). + if linesFlag.pos > 0 { + if _, ok := parseCount(linesFlag.val); !ok { + callCtx.Errf("head: invalid number of lines: %q\n", linesFlag.val) + return builtins.Result{Code: 1} + } + } + if bytesFlag.pos > 0 { + if _, ok := parseCount(bytesFlag.val); !ok { + callCtx.Errf("head: invalid number of bytes: %q\n", bytesFlag.val) + return builtins.Result{Code: 1} + } } // Bytes mode wins if -c/--bytes was parsed after -n/--lines. When neither @@ -110,7 +131,8 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { // the other stays 0, so the comparison selects correctly. useBytesMode := bytesFlag.pos > linesFlag.pos - // Parse the count for the chosen mode. + // Parse the count for the chosen mode (handles the default "10" for + // linesFlag when neither flag was explicitly given). countStr := linesFlag.val modeLabel := "lines" if useBytesMode { @@ -129,19 +151,29 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { files = []string{"-"} } - // Header printing: on by default for multiple files, suppressed by -q, - // forced for a single file by -v. - printHeaders := len(files) > 1 || *verbose - if *quiet { - printHeaders = false + // Header printing: the last of -q/--quiet/--silent and -v/--verbose wins + // (matching GNU head). --silent is an alias for --quiet and shares the + // same sequence counter. If none are specified, print headers only when + // multiple files are given. + lastQuietPos := max(quietFlag.pos, silentFlag.pos) + var printHeaders bool + switch { + case verboseFlag.pos > lastQuietPos: + printHeaders = true // -v was specified last + case lastQuietPos > verboseFlag.pos: + printHeaders = false // -q or --silent was specified last + default: + printHeaders = len(files) > 1 // neither: default multi-file behaviour } var failed bool - for i, file := range files { + var printedHeader bool + for _, file := range files { if ctx.Err() != nil { break } - if err := processFile(ctx, callCtx, file, i, printHeaders, useBytesMode, count); err != nil { + hp, err := processFile(ctx, callCtx, file, printedHeader, printHeaders, useBytesMode, count) + if err != nil { name := file if file == "-" { name = "standard input" @@ -149,6 +181,14 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { callCtx.Errf("head: %s: %s\n", name, callCtx.PortableErr(err)) failed = true } + if hp { + // A header was printed for this file; subsequent files should + // emit a blank-line separator before their header. This is set + // regardless of whether a read error occurred so that the + // separator is not lost when a file opens successfully (header + // printed) but reading later fails. + printedHeader = true + } } if failed { @@ -159,65 +199,121 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { } // processFile opens and processes one file (or stdin for "-"). -func processFile(ctx context.Context, callCtx *builtins.CallContext, file string, idx int, printHeaders, useBytesMode bool, count int64) error { - var rc io.ReadCloser +// prevHeaderPrinted reports whether a header was already emitted for a previous +// file; when true, a blank-line separator is printed before this file's header. +// It returns (headerPrinted, err): headerPrinted is true whenever a header line +// was actually written to output, regardless of whether a read error follows. +func processFile(ctx context.Context, callCtx *builtins.CallContext, file string, prevHeaderPrinted bool, printHeaders, useBytesMode bool, count int64) (headerPrinted bool, err error) { name := file if file == "-" { name = "standard input" // Print the header before the nil-stdin guard so that -v always // emits a header for stdin even when no input stream is present. if printHeaders { - if idx > 0 { + if prevHeaderPrinted { callCtx.Out("\n") } callCtx.Outf("==> %s <==\n", name) + headerPrinted = true } if callCtx.Stdin == nil { - return nil + return headerPrinted, nil } - rc = io.NopCloser(callCtx.Stdin) - } else { - f, err := callCtx.OpenFile(ctx, file, os.O_RDONLY, 0) - if err != nil { - return err + // Pass callCtx.Stdin directly (not wrapped in NopCloser) so that + // readLines can seek back buffered bytes when stdin is seekable + // (e.g. redirected from a file). This allows a second '-' operand + // to continue reading from the correct stream position, matching + // GNU head behaviour. + r := callCtx.Stdin + if useBytesMode { + return headerPrinted, readBytes(ctx, callCtx, r, count) } - defer f.Close() - rc = f - // Header is printed after a successful open so that a file that - // cannot be opened produces no header (matches GNU head behaviour). - if printHeaders { - if idx > 0 { - callCtx.Out("\n") - } - callCtx.Outf("==> %s <==\n", name) + return headerPrinted, readLines(ctx, callCtx, r, count) + } + f, ferr := callCtx.OpenFile(ctx, file, os.O_RDONLY, 0) + if ferr != nil { + return false, ferr + } + defer f.Close() + // Header is printed after a successful open so that a file that + // cannot be opened produces no header (matches GNU head behaviour). + if printHeaders { + if prevHeaderPrinted { + callCtx.Out("\n") } + callCtx.Outf("==> %s <==\n", name) + headerPrinted = true } - if useBytesMode { - return readBytes(ctx, callCtx, rc, count) + return headerPrinted, readBytes(ctx, callCtx, f, count) } - return readLines(ctx, callCtx, rc, count) + return headerPrinted, readLines(ctx, callCtx, f, count) } // readLines writes the first count lines of r to callCtx.Stdout, preserving // line endings exactly (including a missing final newline). +// +// When r implements io.ReadSeeker (e.g. stdin redirected from a file), readLines +// seeks back any bytes the internal scanner read ahead but did not include in +// the N-line output. This allows a subsequent read on the same stream (e.g. a +// second '-' operand) to start from the correct position, matching GNU head. func readLines(ctx context.Context, callCtx *builtins.CallContext, r io.Reader, count int64) error { - sc := bufio.NewScanner(r) + // Wrap r in a byte counter so we can compute how many bytes the scanner + // consumed from the underlying source — needed for the seek-back below. + cr := &byteCountReader{r: r} + sc := bufio.NewScanner(cr) buf := make([]byte, 4096) sc.Buffer(buf, MaxLineBytes) sc.Split(scanLinesPreservingNewline) var emitted int64 + var bytesEmitted int64 for emitted < count && sc.Scan() { if ctx.Err() != nil { return ctx.Err() } - if _, err := callCtx.Stdout.Write(sc.Bytes()); err != nil { + token := sc.Bytes() + if _, err := callCtx.Stdout.Write(token); err != nil { return err } emitted++ + bytesEmitted += int64(len(token)) + } + if err := sc.Err(); err != nil { + return err + } + // If the underlying reader supports seeking, rewind any bytes the scanner + // read ahead from the source but did not include in the N-line output. + // excess = bytes pulled from r by the scanner − bytes returned as tokens. + // + // *os.File always satisfies io.ReadSeeker but Seek fails at runtime for + // non-seekable fds (pipes, sockets). We probe with a no-op Seek(0, Current) + // first; if it fails the stream is non-seekable and we skip the rewind + // (pipe read-ahead is accepted as consumed, matching OS behaviour). + if rs, ok := r.(io.ReadSeeker); ok { + if excess := cr.total - bytesEmitted; excess > 0 { + if _, err := rs.Seek(0, io.SeekCurrent); err == nil { + if _, err := rs.Seek(-excess, io.SeekCurrent); err != nil { + return err + } + } + } } - return sc.Err() + return nil +} + +// byteCountReader is an io.Reader wrapper that counts the total bytes read +// from the underlying reader. Used by readLines to calculate scanner read-ahead +// so that seekable streams can be rewound to the correct position. +type byteCountReader struct { + r io.Reader + total int64 +} + +func (c *byteCountReader) Read(p []byte) (int, error) { + n, err := c.r.Read(p) + c.total += int64(n) + return n, err } // readBytes writes the first count bytes of r to callCtx.Stdout. It reads @@ -294,6 +390,41 @@ func (f *modeFlag) Set(s string) error { } func (f *modeFlag) Type() string { return "string" } +// boolSeqFlag is a pflag.Value implementation for boolean flags that share a +// sequence counter with other flags. After pflag.Parse, comparing the pos +// fields of flags that share a counter reveals which was specified last. +// This is used to implement last-flag-wins semantics for -q/--quiet/--silent +// versus -v/--verbose. +type boolSeqFlag struct { + seq *int + pos int +} + +func newBoolSeqFlag(seq *int) *boolSeqFlag { + return &boolSeqFlag{seq: seq} +} + +func (f *boolSeqFlag) String() string { return "false" } +func (f *boolSeqFlag) Set(s string) error { + // GNU head rejects --quiet= and --verbose= with an error. + // With NoOptDefVal = "true", pflag calls Set("true") for bare --quiet and + // Set("") when an explicit = is given. We accept only "true" + // (the NoOptDefVal) and reject any other value to match GNU head behaviour. + if s != "true" { + return errors.New("option doesn't allow an argument") + } + *f.seq++ + f.pos = *f.seq + return nil +} +func (f *boolSeqFlag) Type() string { return "bool" } + +// Note: pflag does NOT use an IsBoolFlag() method for flags registered via +// VarP/VarPF. The mechanism that makes these flags accept no value argument +// (e.g. "--quiet" rather than "--quiet=true") is NoOptDefVal = "true", set +// at registration time. IsBoolFlag() is intentionally absent here to avoid +// misleading future readers into thinking it is the active mechanism. + // scanLinesPreservingNewline is a bufio.SplitFunc that includes the line // terminator (\n) in the returned token. Unlike bufio.ScanLines, it does not // strip \r\n or \n, so the caller reproduces the exact file content. If the diff --git a/tests/allowed_symbols_test.go b/tests/allowed_symbols_test.go index e74d5d17..59d56f88 100644 --- a/tests/allowed_symbols_test.go +++ b/tests/allowed_symbols_test.go @@ -68,6 +68,10 @@ var builtinAllowedSymbols = []string{ "io.ReadCloser", // io.Reader — interface type; no side effects. "io.Reader", + // io.ReadSeeker — interface type combining Reader and Seeker; no side effects. + "io.ReadSeeker", + // io.SeekCurrent — whence constant for Seek(offset, SeekCurrent); pure constant. + "io.SeekCurrent", // math.MaxInt32 — integer constant; no side effects. "math.MaxInt32", // math.MaxInt64 — integer constant; no side effects. diff --git a/tests/scenarios/cmd/head/bytes/last_flag_wins_bytes.yaml b/tests/scenarios/cmd/head/bytes/last_flag_wins_bytes.yaml index 2a50d09d..1b9d1cd0 100644 --- a/tests/scenarios/cmd/head/bytes/last_flag_wins_bytes.yaml +++ b/tests/scenarios/cmd/head/bytes/last_flag_wins_bytes.yaml @@ -1,6 +1,5 @@ # Derived from GNU coreutils head behavior: last of -n/-c wins description: When -n and -c are both given, the last flag wins; here -c wins. -skip_assert_against_bash: true setup: files: - path: file.txt diff --git a/tests/scenarios/cmd/head/bytes/last_flag_wins_lines.yaml b/tests/scenarios/cmd/head/bytes/last_flag_wins_lines.yaml index 50a00ed3..c5f1fe02 100644 --- a/tests/scenarios/cmd/head/bytes/last_flag_wins_lines.yaml +++ b/tests/scenarios/cmd/head/bytes/last_flag_wins_lines.yaml @@ -1,6 +1,5 @@ # Derived from GNU coreutils head behavior: last of -n/-c wins description: When -c and -n are both given, the last flag wins; here -n wins. -skip_assert_against_bash: true setup: files: - path: file.txt diff --git a/tests/scenarios/cmd/head/errors/boolean_flag_with_argument.yaml b/tests/scenarios/cmd/head/errors/boolean_flag_with_argument.yaml new file mode 100644 index 00000000..5c090f48 --- /dev/null +++ b/tests/scenarios/cmd/head/errors/boolean_flag_with_argument.yaml @@ -0,0 +1,16 @@ +# GNU head rejects --quiet= and --verbose= (options don't allow arguments). +# Our error message differs from GNU head so bash comparison is skipped. +description: head exits 1 when --quiet or --verbose are given an explicit argument. +skip_assert_against_bash: true +setup: + files: + - path: file.txt + content: "hello\n" +input: + allowed_paths: ["$DIR"] + script: |+ + head --quiet=false file.txt +expect: + stdout: "" + stderr_contains: ["head:"] + exit_code: 1 diff --git a/tests/scenarios/cmd/head/errors/first_fails_second_succeeds.yaml b/tests/scenarios/cmd/head/errors/first_fails_second_succeeds.yaml new file mode 100644 index 00000000..0bca6347 --- /dev/null +++ b/tests/scenarios/cmd/head/errors/first_fails_second_succeeds.yaml @@ -0,0 +1,13 @@ +description: head continues after the first file fails to open; no spurious blank line before the second file's header. +setup: + files: + - path: good.txt + content: "hello\n" +input: + allowed_paths: ["$DIR"] + script: |+ + head missing.txt good.txt +expect: + stdout: "==> good.txt <==\nhello\n" + stderr_contains: ["missing.txt"] + exit_code: 1 diff --git a/tests/scenarios/cmd/head/errors/invalid_losing_flag.yaml b/tests/scenarios/cmd/head/errors/invalid_losing_flag.yaml new file mode 100644 index 00000000..b2e5992c --- /dev/null +++ b/tests/scenarios/cmd/head/errors/invalid_losing_flag.yaml @@ -0,0 +1,13 @@ +description: head validates all explicitly-given mode flags, not just the winner; -n xyz -c 1 fails even though -c wins. +setup: + files: + - path: file.txt + content: "hello\n" +input: + allowed_paths: ["$DIR"] + script: |+ + head -n xyz -c 1 file.txt +expect: + stdout: "" + stderr_contains: ["head:"] + exit_code: 1 diff --git a/tests/scenarios/cmd/head/errors/negative_bytes_count.yaml b/tests/scenarios/cmd/head/errors/negative_bytes_count.yaml new file mode 100644 index 00000000..fa67bf6d --- /dev/null +++ b/tests/scenarios/cmd/head/errors/negative_bytes_count.yaml @@ -0,0 +1,15 @@ +# We intentionally reject negative byte counts (we do not implement -c -N elide-tail mode) +description: head exits 1 when -c is given a negative count. +skip_assert_against_bash: true +setup: + files: + - path: file.txt + content: "hello\n" +input: + allowed_paths: ["$DIR"] + script: |+ + head -c -1 file.txt +expect: + stdout: "" + stderr_contains: ["head:"] + exit_code: 1 diff --git a/tests/scenarios/cmd/head/errors/unknown_flag.yaml b/tests/scenarios/cmd/head/errors/unknown_flag.yaml index 27469df7..de866172 100644 --- a/tests/scenarios/cmd/head/errors/unknown_flag.yaml +++ b/tests/scenarios/cmd/head/errors/unknown_flag.yaml @@ -1,6 +1,5 @@ # Per RULES.md: every dangerous/unsupported flag must have a test verifying rejection description: head exits 1 with an error when given an unknown flag. -skip_assert_against_bash: true setup: files: - path: file.txt diff --git a/tests/scenarios/cmd/head/hardening/double_dash_separator.yaml b/tests/scenarios/cmd/head/hardening/double_dash_separator.yaml index d0a997a5..00a7020c 100644 --- a/tests/scenarios/cmd/head/hardening/double_dash_separator.yaml +++ b/tests/scenarios/cmd/head/hardening/double_dash_separator.yaml @@ -1,6 +1,5 @@ # Standard POSIX -- end-of-flags behavior description: head treats arguments after -- as file names, even if they look like flags. -skip_assert_against_bash: true setup: files: - path: -n diff --git a/tests/scenarios/cmd/head/hardening/large_count_clamped.yaml b/tests/scenarios/cmd/head/hardening/large_count_clamped.yaml index 00d37f7a..e6c05ae3 100644 --- a/tests/scenarios/cmd/head/hardening/large_count_clamped.yaml +++ b/tests/scenarios/cmd/head/hardening/large_count_clamped.yaml @@ -1,6 +1,5 @@ # Per RULES.md: count values must be clamped to prevent allocation attacks description: head accepts very large -n counts by clamping; does not OOM on small files. -skip_assert_against_bash: true setup: files: - path: small.txt diff --git a/tests/scenarios/cmd/head/headers/last_flag_wins_quiet.yaml b/tests/scenarios/cmd/head/headers/last_flag_wins_quiet.yaml new file mode 100644 index 00000000..4d83fd98 --- /dev/null +++ b/tests/scenarios/cmd/head/headers/last_flag_wins_quiet.yaml @@ -0,0 +1,13 @@ +description: When -v and -q are both given, the last flag wins; here -q comes last so headers are suppressed. +setup: + files: + - path: file.txt + content: "hello\n" +input: + allowed_paths: ["$DIR"] + script: |+ + head -v -q file.txt +expect: + stdout: "hello\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/head/headers/last_flag_wins_verbose.yaml b/tests/scenarios/cmd/head/headers/last_flag_wins_verbose.yaml new file mode 100644 index 00000000..64297a10 --- /dev/null +++ b/tests/scenarios/cmd/head/headers/last_flag_wins_verbose.yaml @@ -0,0 +1,13 @@ +description: When -q and -v are both given, the last flag wins; here -v comes last so headers are printed. +setup: + files: + - path: file.txt + content: "hello\n" +input: + allowed_paths: ["$DIR"] + script: |+ + head -q -v file.txt +expect: + stdout: "==> file.txt <==\nhello\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/head/stdin/repeated_dash_seekable.yaml b/tests/scenarios/cmd/head/stdin/repeated_dash_seekable.yaml new file mode 100644 index 00000000..20570afb --- /dev/null +++ b/tests/scenarios/cmd/head/stdin/repeated_dash_seekable.yaml @@ -0,0 +1,13 @@ +description: When stdin is redirected from a file (seekable), repeated - operands each read independently from the current stream position. +setup: + files: + - path: file.txt + content: "alpha\nbeta\ngamma\n" +input: + allowed_paths: ["$DIR"] + script: |+ + head -q -n 1 - - < file.txt +expect: + stdout: "alpha\nbeta\n" + stderr: "" + exit_code: 0