diff --git a/SHELL_FEATURES.md b/SHELL_FEATURES.md index 2eb5d070..29d0fc23 100644 --- a/SHELL_FEATURES.md +++ b/SHELL_FEATURES.md @@ -16,6 +16,7 @@ Blocked features are rejected before execution with exit code 2. - ✅ `head [-n N|-c N] [-q|-v] [FILE]...` — output the first part of files (default: first 10 lines); `-z`/`--zero-terminated` and `--follow` are rejected - ✅ `ls [-1aAdFhlpRrSt] [FILE]...` — list directory contents - ✅ `printf FORMAT [ARGUMENT]...` — format and print data to stdout; supports `%s`, `%b`, `%c`, `%d`, `%i`, `%o`, `%u`, `%x`, `%X`, `%e`, `%E`, `%f`, `%F`, `%g`, `%G`, `%%`; format reuse for excess arguments; `%n` rejected (security risk); `-v` rejected +- ✅ `sed [-n] [-e SCRIPT] [-E|-r] [SCRIPT] [FILE]...` — stream editor for filtering and transforming text; uses RE2 regex engine; `-i`/`-f` rejected; `e`/`w`/`W`/`r`/`R` commands blocked - ✅ `strings [-a] [-n MIN] [-t o|d|x] [-o] [-f] [-s SEP] [FILE]...` — print printable character sequences in files (default min length 4); offsets via `-t`/`-o`; filename prefix via `-f`; custom separator via `-s` - ✅ `tail [-n N|-c N] [-q|-v] [-z] [FILE]...` — output the last part of files (default: last 10 lines); supports `+N` offset mode; `-f`/`--follow` is rejected - ✅ `tr [-cdsCt] SET1 [SET2]` — translate, squeeze, and/or delete characters from stdin diff --git a/interp/builtins/sed/engine.go b/interp/builtins/sed/engine.go new file mode 100644 index 00000000..d2168464 --- /dev/null +++ b/interp/builtins/sed/engine.go @@ -0,0 +1,785 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package sed + +import ( + "bufio" + "bytes" + "context" + "errors" + "fmt" + "io" + "os" + "regexp" + "strings" + + "github.com/DataDog/rshell/interp/builtins" +) + +// engine holds the state for executing a sed script. +type engine struct { + callCtx *builtins.CallContext + prog []*sedCmd + labelMap map[string]labelLocation // precomputed label locations for O(1) branch lookup + suppressPrint bool + lineNum int64 + lastLine bool + patternSpace string + holdSpace string + appendQueue []string // text queued by 'a' command, flushed after auto-print + appendQueueBytes int // total bytes in appendQueue for limit checking + subMade bool // set when s/// succeeds (cleared on new input line) + lastRe *regexp.Regexp // last regex used (for empty pattern in s///) + emptyReErr bool // set when // address has no previous regex + isRegularFile bool + isLastFile bool // whether we are processing the last file in the argument list +} + +// lineReader wraps a scanner with one-line look-ahead so we can determine +// whether the current line is the last one, while still allowing n/N commands +// to consume lines from the same scanner. +type lineReader struct { + sc *bufio.Scanner + nextLine string + hasNext bool + totalRead int64 + isRegularFile bool +} + +func newLineReader(sc *bufio.Scanner, isRegular bool) *lineReader { + lr := &lineReader{sc: sc, isRegularFile: isRegular} + lr.advance() // prime the look-ahead + return lr +} + +func (lr *lineReader) advance() bool { + if lr.sc.Scan() { + lr.nextLine = lr.sc.Text() + // Add +1 to account for the newline delimiter stripped by Scanner. + lr.totalRead += int64(len(lr.sc.Bytes())) + 1 + lr.hasNext = true + return true + } + lr.hasNext = false + return false +} + +func (lr *lineReader) readLine() (string, bool) { + if !lr.hasNext { + return "", false + } + line := lr.nextLine + lr.advance() + return line, true +} + +func (lr *lineReader) isLast() bool { + return !lr.hasNext +} + +func (lr *lineReader) checkLimit() error { + if !lr.isRegularFile && lr.totalRead > MaxTotalReadBytes { + return errors.New("input too large: read limit exceeded") + } + return nil +} + +// processFile reads a single file and runs the sed script on each line. +// isLastFile indicates whether this is the last file in the argument list; +// the $ address only matches when it is the last line of the last file +// (GNU sed treats multiple files as one continuous stream). +func (eng *engine) processFile(ctx context.Context, callCtx *builtins.CallContext, file string, isLastFile bool) error { + var rc io.ReadCloser + if file == "-" { + if callCtx.Stdin == nil { + return nil + } + eng.isRegularFile = isRegularFile(callCtx.Stdin) + rc = io.NopCloser(callCtx.Stdin) + } else { + f, err := callCtx.OpenFile(ctx, file, os.O_RDONLY, 0) + if err != nil { + return err + } + defer f.Close() + eng.isRegularFile = isRegularFile(f) + rc = f + } + + eng.isLastFile = isLastFile + + sc := bufio.NewScanner(rc) + buf := make([]byte, 4096) + sc.Buffer(buf, MaxLineBytes) + // Use a custom split function that only splits on \n (not \r\n). + // The default bufio.ScanLines strips \r from \r\n endings, but GNU sed + // preserves \r as part of the pattern space. + sc.Split(scanLinesPreserveCR) + + lr := newLineReader(sc, eng.isRegularFile) + + for { + if ctx.Err() != nil { + return ctx.Err() + } + + line, ok := lr.readLine() + if !ok { + break + } + if err := lr.checkLimit(); err != nil { + return err + } + + eng.lineNum++ + eng.patternSpace = line + eng.lastLine = lr.isLast() && isLastFile + + err := eng.runCycle(ctx, lr) + if err != nil { + return err + } + } + + if err := lr.sc.Err(); err != nil { + return err + } + return nil +} + +// runCycle executes the script for the current input line. +func (eng *engine) runCycle(ctx context.Context, lr *lineReader) error { + eng.subMade = false + eng.appendQueue = eng.appendQueue[:0] + eng.appendQueueBytes = 0 + for { + action, err := eng.execCommandsFrom(ctx, 0, lr, 0) + if err != nil { + return err + } + if action == actionRestart { + // D command requested a cycle restart with the remaining + // pattern space. subMade is intentionally preserved. + eng.appendQueue = eng.appendQueue[:0] + eng.appendQueueBytes = 0 + continue + } + if action == actionBranch { + action = actionContinue + } + if action != actionDelete && !eng.suppressPrint { + eng.callCtx.Outf("%s\n", eng.patternSpace) + } + // Flush queued 'a' text after auto-print (even if auto-print was suppressed or deleted). + for _, text := range eng.appendQueue { + eng.callCtx.Outf("%s\n", text) + } + return nil + } +} + +// execCommandsFrom executes commands starting from index startIdx in the given +// command list. For branching, it always searches the full eng.prog for labels +// and restarts from there to handle backward branches correctly. +func (eng *engine) execCommandsFrom(ctx context.Context, startIdx int, lr *lineReader, depth int) (actionType, error) { + return eng.execCmds(ctx, eng.prog, startIdx, lr, depth) +} + +func (eng *engine) execCmds(ctx context.Context, cmds []*sedCmd, startIdx int, lr *lineReader, depth int) (actionType, error) { + if depth > MaxBranchIterations { + return actionContinue, errors.New("branch loop limit exceeded") + } + + for i := startIdx; i < len(cmds); i++ { + if ctx.Err() != nil { + return actionContinue, ctx.Err() + } + + cmd := cmds[i] + + if cmd.kind == cmdLabel { + continue + } + + if !eng.addressMatch(cmd) { + if eng.emptyReErr { + eng.emptyReErr = false + return actionContinue, errors.New("no previous regular expression") + } + continue + } + if eng.emptyReErr { + eng.emptyReErr = false + return actionContinue, errors.New("no previous regular expression") + } + + switch cmd.kind { + case cmdSubstitute: + if err := eng.execSubstitute(cmd); err != nil { + return actionContinue, err + } + + case cmdPrint: + eng.callCtx.Outf("%s\n", eng.patternSpace) + + case cmdDelete: + return actionDelete, nil + + case cmdPrintFirstLine: + if idx := strings.IndexByte(eng.patternSpace, '\n'); idx >= 0 { + eng.callCtx.Outf("%s\n", eng.patternSpace[:idx]) + } else { + eng.callCtx.Outf("%s\n", eng.patternSpace) + } + + case cmdDeleteFirstLine: + if idx := strings.IndexByte(eng.patternSpace, '\n'); idx >= 0 { + eng.patternSpace = eng.patternSpace[idx+1:] + // Restart the cycle with the remaining pattern space. + // Note: subMade is intentionally preserved across D restarts + // (GNU sed behaviour — t/T branching state survives D). + eng.appendQueue = eng.appendQueue[:0] + eng.appendQueueBytes = 0 + return actionRestart, nil + } + return actionDelete, nil + + case cmdQuit: + if !eng.suppressPrint { + eng.callCtx.Outf("%s\n", eng.patternSpace) + } + for _, text := range eng.appendQueue { + eng.callCtx.Outf("%s\n", text) + } + return actionContinue, &quitError{code: cmd.quitCode} + + case cmdQuitNoprint: + return actionContinue, &quitError{code: cmd.quitCode} + + case cmdTransliterate: + eng.patternSpace = eng.transliterate(eng.patternSpace, cmd.transMap) + + case cmdAppend: + eng.appendQueueBytes += len(cmd.text) + if eng.appendQueueBytes > MaxAppendQueueBytes { + return actionContinue, errors.New("append queue exceeded size limit") + } + eng.appendQueue = append(eng.appendQueue, cmd.text) + + case cmdInsert: + eng.callCtx.Outf("%s\n", cmd.text) + + case cmdChange: + // For range addresses, only output text at the end of the range. + if cmd.addr2 != nil && cmd.inRange { + // Still inside the range — delete silently without output. + return actionDelete, nil + } + eng.callCtx.Outf("%s\n", cmd.text) + return actionDelete, nil + + case cmdLineNum: + eng.callCtx.Outf("%d\n", eng.lineNum) + + case cmdPrintUnambig: + eng.printUnambiguous() + + case cmdNext: + if !eng.suppressPrint { + eng.callCtx.Outf("%s\n", eng.patternSpace) + } + for _, text := range eng.appendQueue { + eng.callCtx.Outf("%s\n", text) + } + eng.appendQueue = eng.appendQueue[:0] + eng.appendQueueBytes = 0 + line, ok := lr.readLine() + if ok { + if err := lr.checkLimit(); err != nil { + return actionContinue, err + } + eng.lineNum++ + eng.patternSpace = line + eng.lastLine = lr.isLast() && eng.isLastFile + eng.subMade = false // n loads a new input line; reset substitution state + } else { + // n already printed the pattern space; suppress auto-print. + eng.lastLine = eng.isLastFile + return actionDelete, nil + } + + case cmdNextAppend: + // Flush queued 'a' text before reading the next line (GNU sed behaviour). + for _, text := range eng.appendQueue { + eng.callCtx.Outf("%s\n", text) + } + eng.appendQueue = eng.appendQueue[:0] + eng.appendQueueBytes = 0 + line, ok := lr.readLine() + if ok { + if err := lr.checkLimit(); err != nil { + return actionContinue, err + } + eng.lineNum++ + if len(eng.patternSpace)+1+len(line) > MaxSpaceBytes { + return actionContinue, errors.New("pattern space exceeded size limit") + } + eng.patternSpace += "\n" + line + eng.lastLine = lr.isLast() && eng.isLastFile + } else { + if !eng.suppressPrint { + eng.callCtx.Outf("%s\n", eng.patternSpace) + } + return actionDelete, nil + } + + case cmdHoldCopy: + eng.holdSpace = eng.patternSpace + + case cmdHoldAppend: + if len(eng.holdSpace)+1+len(eng.patternSpace) > MaxSpaceBytes { + return actionContinue, errors.New("hold space exceeded size limit") + } + eng.holdSpace += "\n" + eng.patternSpace + + case cmdGetCopy: + eng.patternSpace = eng.holdSpace + + case cmdGetAppend: + if len(eng.patternSpace)+1+len(eng.holdSpace) > MaxSpaceBytes { + return actionContinue, errors.New("pattern space exceeded size limit") + } + eng.patternSpace += "\n" + eng.holdSpace + + case cmdExchange: + eng.patternSpace, eng.holdSpace = eng.holdSpace, eng.patternSpace + + case cmdBranch: + return eng.branchTo(ctx, cmd.label, lr, depth) + + case cmdBranchIfSub: + if eng.subMade { + eng.subMade = false + return eng.branchTo(ctx, cmd.label, lr, depth) + } + + case cmdBranchIfNoSub: + if !eng.subMade { + return eng.branchTo(ctx, cmd.label, lr, depth) + } + eng.subMade = false + + case cmdGroup: + action, err := eng.execCmds(ctx, cmd.children, 0, lr, depth) + if err != nil || action != actionContinue { + return action, err + } + + case cmdNoop, cmdLabel: + // Do nothing. + } + } + + return actionContinue, nil +} + +// labelLocation describes where a label was found as a path through the +// command tree. path[0] is the index in the top-level command list, +// path[1] is the index inside the first-level group's children, etc. +type labelLocation struct { + path []int // indices at each nesting level; nil means not found +} + +// buildLabelMap precomputes the location of every label in the program +// for O(1) branch resolution instead of linear scanning on every branch. +func buildLabelMap(cmds []*sedCmd) map[string]labelLocation { + m := make(map[string]labelLocation) + buildLabelMapRecursive(cmds, nil, m) + return m +} + +func buildLabelMapRecursive(cmds []*sedCmd, prefix []int, m map[string]labelLocation) { + for i, cmd := range cmds { + currentPath := append(append([]int{}, prefix...), i) + if cmd.kind == cmdLabel && cmd.label != "" { + // Last definition wins — GNU sed branches to the most recently + // defined label when duplicates exist. + m[cmd.label] = labelLocation{path: currentPath} + } + if cmd.kind == cmdGroup { + buildLabelMapRecursive(cmd.children, currentPath, m) + } + } +} + +// branchTo resolves a label and continues execution from the command after it. +// An empty label branches to end of script (returns actionBranch). +func (eng *engine) branchTo(ctx context.Context, label string, lr *lineReader, depth int) (actionType, error) { + if label == "" { + // Branch to end of script. Return actionBranch so that a branch + // inside a group properly skips commands after the group. + return actionBranch, nil + } + loc, ok := eng.labelMap[label] + if !ok { + // This should not happen — labels are validated at parse time. + return actionContinue, errors.New("undefined label '" + label + "'") + } + action, err := eng.branchToPath(ctx, eng.prog, loc.path, lr, depth) + if err != nil { + return action, err + } + // Wrap actionContinue as actionBranch so callers (e.g. group execution) + // know a non-local jump occurred and don't fall through. + if action == actionContinue { + return actionBranch, nil + } + return action, nil +} + +// branchToPath executes commands starting from the label described by path. +// path[0] is the index in cmds; if len(path) > 1, cmds[path[0]] is a group +// and we recurse into its children with path[1:]. +func (eng *engine) branchToPath(ctx context.Context, cmds []*sedCmd, path []int, lr *lineReader, depth int) (actionType, error) { + if len(path) == 1 { + // Label is at this level — continue from path[0]+1. + return eng.execCmds(ctx, cmds, path[0]+1, lr, depth+1) + } + // Label is inside a nested group at cmds[path[0]]. + group := cmds[path[0]] + action, err := eng.branchToPath(ctx, group.children, path[1:], lr, depth) + if err != nil || action != actionContinue { + return action, err + } + // After the nested group finishes, continue with commands after it. + return eng.execCmds(ctx, cmds, path[0]+1, lr, depth+1) +} + +// --- Address matching --- + +// addressMatch checks whether the current line matches the command's address. +func (eng *engine) addressMatch(cmd *sedCmd) bool { + match := eng.rawAddressMatch(cmd) + if cmd.negated { + return !match + } + return match +} + +func (eng *engine) rawAddressMatch(cmd *sedCmd) bool { + if cmd.addr1 == nil { + return true // no address means match all + } + + if cmd.addr2 == nil { + // Single address. + return eng.matchAddr(cmd.addr1) + } + + // Two-address range: match from addr1 to addr2 inclusive. + return eng.matchRange(cmd) +} + +func (eng *engine) matchAddr(addr *address) bool { + switch addr.kind { + case addrLine: + // Line 0 is special: it only makes sense as the start of a 0,/re/ + // range. It matches on line 1 (the range starts "before line 1") + // so the regex addr2 can close on line 1. After line 1, it no + // longer matches so the range doesn't reopen. + if addr.line == 0 { + return eng.lineNum == 1 + } + return eng.lineNum == addr.line + case addrLast: + return eng.lastLine + case addrRegexp: + re := addr.re + if re == nil { + // Empty pattern: reuse last regex. + if eng.lastRe == nil { + // GNU sed fails with "no previous regular expression". + // Store a sentinel so the caller can detect and report this. + eng.emptyReErr = true + return false + } + re = eng.lastRe + } else { + eng.lastRe = re // Record the most recently used regex. + } + return re.MatchString(eng.patternSpace) + case addrStep: + if addr.first == 0 { + return eng.lineNum%addr.step == 0 + } + return eng.lineNum >= addr.first && (eng.lineNum-addr.first)%addr.step == 0 + } + return false +} + +func (eng *engine) matchRange(cmd *sedCmd) bool { + if cmd.inRange { + // We're inside the range. Check if addr2 closes it. + if eng.matchAddr(cmd.addr2) { + cmd.inRange = false + return true // addr2 line is still part of the range + } + return true + } + // Not in range — check if addr1 opens it. + if eng.matchAddr(cmd.addr1) { + // Special case: addr1 is line 0 (the GNU 0,/re/ form). + // Unlike normal ranges, check addr2 on the very first line so the + // range can close immediately on line 1. + addr1IsZero := cmd.addr1.kind == addrLine && cmd.addr1.line == 0 + + // For regex addr2, GNU sed does not check it on the opening line — + // the range always extends to at least the next line. + // Exception: 0,/re/ DOES check addr2 on line 1. + // For line-number/$ addr2, check immediately for degenerate range. + if cmd.addr2.kind != addrRegexp || addr1IsZero { + // Descending numeric range (e.g. 4,2): treat as one-line range. + if cmd.addr2.kind == addrLine && cmd.addr2.line < eng.lineNum { + return true // one-line range + } + if eng.matchAddr(cmd.addr2) { + return true // one-line range, don't enter inRange state + } + } + cmd.inRange = true + return true + } + return false +} + +// --- Command implementations --- + +func (eng *engine) execSubstitute(cmd *sedCmd) error { + // Resolve the regex: nil means "reuse last regex". + // Note: case-insensitive flag (i/I) on empty regexp is rejected at parse + // time, so we don't need to handle it here. + re := cmd.subRe + if re == nil { + if eng.lastRe == nil { + return errors.New("no previous regular expression") + } + re = eng.lastRe + } + eng.lastRe = re + + // Validate backreferences in the replacement against the number of + // capture groups in the regex. GNU sed rejects invalid references. + // Skip when cmd.subRe is non-nil — validation was already done at parse time. + // Only needed for empty-pattern reuse (cmd.subRe == nil), since the + // previous regex may have a different number of capture groups. + if cmd.subRe == nil { + if err := validateBackrefs(cmd.subReplacement, re.NumSubexp()); err != nil { + return err + } + } + + var result string + var matched bool + if cmd.subGlobal && cmd.subNth > 0 { + // Combined Nth + global: replace from the Nth match onward. + count := 0 + expanded := expandReplacement(cmd.subReplacement) + result = re.ReplaceAllStringFunc(eng.patternSpace, func(match string) string { + count++ + if count >= cmd.subNth { + matched = true + return re.ReplaceAllString(match, expanded) + } + return match + }) + } else if cmd.subGlobal { + expanded := expandReplacement(cmd.subReplacement) + matched = re.MatchString(eng.patternSpace) + result = re.ReplaceAllString(eng.patternSpace, expanded) + } else if cmd.subNth > 0 { + count := 0 + expanded := expandReplacement(cmd.subReplacement) + result = re.ReplaceAllStringFunc(eng.patternSpace, func(match string) string { + count++ + if count == cmd.subNth { + matched = true + return re.ReplaceAllString(match, expanded) + } + return match + }) + } else { + loc := re.FindStringIndex(eng.patternSpace) + if loc != nil { + matched = true + m := eng.patternSpace[loc[0]:loc[1]] + replacement := re.ReplaceAllString(m, expandReplacement(cmd.subReplacement)) + result = eng.patternSpace[:loc[0]] + replacement + eng.patternSpace[loc[1]:] + } else { + return nil + } + } + if matched { + if len(result) > MaxSpaceBytes { + return errors.New("pattern space exceeded size limit") + } + eng.subMade = true + eng.patternSpace = result + if cmd.subPrint { + eng.callCtx.Outf("%s\n", eng.patternSpace) + } + } + return nil +} + +// validateBackrefs checks that all \N backreferences in the replacement string +// refer to capture groups that exist in the regex. GNU sed errors on invalid +// references like \1 when there are no capture groups. +func validateBackrefs(repl string, numGroups int) error { + for i := 0; i < len(repl); i++ { + if repl[i] == '\\' && i+1 < len(repl) { + next := repl[i+1] + if next >= '1' && next <= '9' { + ref := int(next - '0') + if ref > numGroups { + return errors.New("invalid reference \\" + string(next) + " on `s' command's RHS") + } + } + i++ // skip next + } + } + return nil +} + +// expandReplacement converts sed replacement syntax to Go regexp replacement. +// In sed, & means the whole match. In Go regexp, that's ${0} or $0. +// Sed uses \1-\9 for groups, Go uses $1-$9. +func expandReplacement(repl string) string { + var sb strings.Builder + sb.Grow(len(repl)) + for i := 0; i < len(repl); i++ { + ch := repl[i] + if ch == '&' { + sb.WriteString("${0}") + } else if ch == '$' { + // Escape literal $ so Go's regexp engine doesn't interpret $1, $2, etc. + sb.WriteString("$$") + } else if ch == '\\' && i+1 < len(repl) { + next := repl[i+1] + if next == '0' { + // \0 is equivalent to & (entire match) in GNU sed. + sb.WriteString("${0}") + i++ + } else if next >= '1' && next <= '9' { + // Use braced form ${N} so that a following digit is not + // swallowed by Go's regexp replacement parser. + // e.g. sed's \10 means group-1 then literal '0', not group-10. + sb.WriteString("${") + sb.WriteByte(next) + sb.WriteString("}") + i++ + } else if next == '&' { + sb.WriteByte('&') + i++ + } else if next == '\\' { + sb.WriteByte('\\') + i++ + } else { + // GNU sed drops the backslash for non-special escapes + // (e.g. \q becomes q). + sb.WriteByte(next) + i++ + } + } else { + sb.WriteByte(ch) + } + } + return sb.String() +} + +func (eng *engine) transliterate(s string, mapping map[rune]rune) string { + runes := []rune(s) + for i, r := range runes { + if replacement, ok := mapping[r]; ok { + runes[i] = replacement + } + } + return string(runes) +} + +func (eng *engine) printUnambiguous() { + // l command: print pattern space showing non-printing characters. + var sb strings.Builder + col := 0 + for _, r := range eng.patternSpace { + var s string + switch { + case r == '\\': + s = "\\\\" + case r == '\a': + s = "\\a" + case r == '\b': + s = "\\b" + case r == '\f': + s = "\\f" + case r == '\r': + s = "\\r" + case r == '\t': + s = "\\t" + case r == '\n': + s = "\\n" + case r < 32 || r == 127: + s = fmt.Sprintf("\\%03o", r) + default: + if r > 127 { + // Output non-ASCII bytes as octal escapes like GNU sed. + for _, b := range []byte(string(r)) { + s += fmt.Sprintf("\\%03o", b) + } + } else { + s = string(r) + } + } + if col+len(s) >= 70 { + sb.WriteString("\\\n") + col = 0 + } + sb.WriteString(s) + col += len(s) + } + sb.WriteByte('$') + sb.WriteByte('\n') + eng.callCtx.Out(sb.String()) +} + +// scanLinesPreserveCR is like bufio.ScanLines but does NOT strip trailing \r +// from \r\n endings. GNU sed treats \r as an ordinary character that is part +// of the pattern space, so we must preserve it. +func scanLinesPreserveCR(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + if i := bytes.IndexByte(data, '\n'); i >= 0 { + // Return the line up to (but not including) the \n. + return i + 1, data[:i], nil + } + // At EOF, deliver the last line without a trailing newline. + if atEOF { + return len(data), data, nil + } + // Request more data. + return 0, nil, nil +} + +// isRegularFile checks whether an io.Reader is backed by a regular file. +func isRegularFile(r any) bool { + type stater interface{ Stat() (os.FileInfo, error) } + sf, ok := r.(stater) + if !ok { + return false + } + fi, err := sf.Stat() + return err == nil && fi.Mode().IsRegular() +} diff --git a/interp/builtins/sed/engine_test.go b/interp/builtins/sed/engine_test.go new file mode 100644 index 00000000..29714f3f --- /dev/null +++ b/interp/builtins/sed/engine_test.go @@ -0,0 +1,63 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package sed + +import ( + "bufio" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLineReaderCheckLimitNonRegularFile(t *testing.T) { + // Create a lineReader for a non-regular-file source with totalRead + // exceeding the limit, and verify checkLimit returns an error. + sc := bufio.NewScanner(strings.NewReader("")) + lr := &lineReader{sc: sc, isRegularFile: false} + + // Below the limit — no error. + lr.totalRead = MaxTotalReadBytes - 1 + require.NoError(t, lr.checkLimit()) + + // Exactly at the limit — no error (check is strictly greater-than). + lr.totalRead = MaxTotalReadBytes + require.NoError(t, lr.checkLimit()) + + // Above the limit — error. + lr.totalRead = MaxTotalReadBytes + 1 + err := lr.checkLimit() + require.Error(t, err) + assert.Contains(t, err.Error(), "input too large") +} + +func TestLineReaderCheckLimitRegularFile(t *testing.T) { + // Regular files are not subject to the read limit. + sc := bufio.NewScanner(strings.NewReader("")) + lr := &lineReader{sc: sc, isRegularFile: true} + lr.totalRead = MaxTotalReadBytes + 1 + require.NoError(t, lr.checkLimit()) +} + +func TestLineReaderTotalReadAccumulation(t *testing.T) { + // Verify that totalRead accumulates across multiple readLine calls. + input := "line1\nline2\nline3\n" + sc := bufio.NewScanner(strings.NewReader(input)) + lr := newLineReader(sc, false) + + var totalLines int + for { + _, ok := lr.readLine() + if !ok { + break + } + totalLines++ + } + assert.Equal(t, 3, totalLines) + // totalRead should be > 0 (exact value depends on scanner behavior). + assert.Greater(t, lr.totalRead, int64(0)) +} diff --git a/interp/builtins/sed/parser.go b/interp/builtins/sed/parser.go new file mode 100644 index 00000000..bc4e8945 --- /dev/null +++ b/interp/builtins/sed/parser.go @@ -0,0 +1,776 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package sed + +import ( + "errors" + "regexp" + "strconv" + "strings" +) + +// parser holds state during sed script parsing. +type parser struct { + input string + pos int + useERE bool + groupDepth int +} + +// maxGroupDepth is the maximum nesting depth for {...} groups. +const maxGroupDepth = 256 + +func parseScript(script string, useERE bool) ([]*sedCmd, error) { + p := &parser{input: script, useERE: useERE} + cmds, err := p.parseCommands(false) + if err != nil { + return nil, err + } + // Validate that all branch/conditional labels are defined — GNU sed + // rejects undefined labels at compile time. + if err := validateLabels(cmds); err != nil { + return nil, err + } + return cmds, nil +} + +// validateLabels checks that every b/t/T label referenced in cmds exists. +func validateLabels(cmds []*sedCmd) error { + labels := collectLabels(cmds) + return checkBranches(cmds, labels) +} + +func collectLabels(cmds []*sedCmd) map[string]bool { + m := make(map[string]bool) + for _, cmd := range cmds { + if cmd.kind == cmdLabel { + m[cmd.label] = true + } + if cmd.kind == cmdGroup { + for k, v := range collectLabels(cmd.children) { + m[k] = v + } + } + } + return m +} + +func checkBranches(cmds []*sedCmd, labels map[string]bool) error { + for _, cmd := range cmds { + if (cmd.kind == cmdBranch || cmd.kind == cmdBranchIfSub || cmd.kind == cmdBranchIfNoSub) && cmd.label != "" { + if !labels[cmd.label] { + return errors.New("undefined label '" + cmd.label + "'") + } + } + if cmd.kind == cmdGroup { + if err := checkBranches(cmd.children, labels); err != nil { + return err + } + } + } + return nil +} + +func (p *parser) parseCommands(inGroup bool) ([]*sedCmd, error) { + var cmds []*sedCmd + for p.pos < len(p.input) { + p.skipWhitespaceAndSemicolons() + if p.pos >= len(p.input) { + break + } + ch := p.input[p.pos] + if ch == '}' { + if inGroup { + p.pos++ // consume '}' + return cmds, nil + } + return nil, errors.New("unexpected '}'") + } + if ch == '#' { + // Comment — skip to end of line. + for p.pos < len(p.input) && p.input[p.pos] != '\n' { + p.pos++ + } + continue + } + cmd, err := p.parseOneCommand() + if err != nil { + return nil, err + } + if cmd != nil { + cmds = append(cmds, cmd) + } + } + if inGroup { + return nil, errors.New("unterminated '{'") + } + return cmds, nil +} + +func (p *parser) skipWhitespaceAndSemicolons() { + for p.pos < len(p.input) { + ch := p.input[p.pos] + if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == ';' { + p.pos++ + } else { + break + } + } +} + +func (p *parser) skipSpaces() { + for p.pos < len(p.input) && (p.input[p.pos] == ' ' || p.input[p.pos] == '\t') { + p.pos++ + } +} + +// requireSeparator checks that the next non-space character is a valid command +// separator (;, newline, }, #, or EOF). GNU sed rejects extra characters after +// zero-argument commands like p, d, n, etc. +func (p *parser) requireSeparator() error { + // Skip optional trailing spaces/tabs (GNU sed allows "p ; d"). + p.skipSpaces() + if p.pos >= len(p.input) { + return nil // EOF is fine + } + ch := p.input[p.pos] + if ch == ';' || ch == '\n' || ch == '\r' || ch == '}' || ch == '#' { + return nil + } + return errors.New("extra characters after command") +} + +func (p *parser) parseOneCommand() (*sedCmd, error) { + cmd := &sedCmd{} + + // Parse first address. + addr1, err := p.parseAddress() + if err != nil { + return nil, err + } + cmd.addr1 = addr1 + + // Check for comma (address range). + if cmd.addr1 != nil && p.pos < len(p.input) && p.input[p.pos] == ',' { + p.pos++ // consume ',' + p.skipSpaces() + addr2, err := p.parseAddress() + if err != nil { + return nil, err + } + if addr2 == nil { + return nil, errors.New("expected address after ','") + } + cmd.addr2 = addr2 + } + + // Reject line address 0 as a standalone address. GNU sed allows 0 only + // as the first address in a range (0,/re/). + if cmd.addr1 != nil && cmd.addr1.kind == addrLine && cmd.addr1.line == 0 && cmd.addr2 == nil { + return nil, errors.New("invalid usage of line address 0") + } + + p.skipSpaces() + + // Check for negation. + if p.pos < len(p.input) && p.input[p.pos] == '!' { + cmd.negated = true + p.pos++ + p.skipSpaces() + } + + if p.pos >= len(p.input) { + return nil, errors.New("missing command") + } + + ch := p.input[p.pos] + p.pos++ + + switch ch { + case 's': + return p.parseSubstitute(cmd) + case 'y': + return p.parseTransliterate(cmd) + case 'p': + cmd.kind = cmdPrint + case 'P': + cmd.kind = cmdPrintFirstLine + case 'd': + cmd.kind = cmdDelete + case 'D': + cmd.kind = cmdDeleteFirstLine + case 'q': + cmd.kind = cmdQuit + code, err := p.parseOptionalExitCode() + if err != nil { + return nil, err + } + cmd.quitCode = code + case 'Q': + cmd.kind = cmdQuitNoprint + code, err := p.parseOptionalExitCode() + if err != nil { + return nil, err + } + cmd.quitCode = code + case 'a': + cmd.kind = cmdAppend + cmd.text = p.parseTextArg() + case 'i': + cmd.kind = cmdInsert + cmd.text = p.parseTextArg() + case 'c': + cmd.kind = cmdChange + cmd.text = p.parseTextArg() + case '=': + cmd.kind = cmdLineNum + case 'l': + cmd.kind = cmdPrintUnambig + case 'n': + cmd.kind = cmdNext + case 'N': + cmd.kind = cmdNextAppend + case 'h': + cmd.kind = cmdHoldCopy + case 'H': + cmd.kind = cmdHoldAppend + case 'g': + cmd.kind = cmdGetCopy + case 'G': + cmd.kind = cmdGetAppend + case 'x': + cmd.kind = cmdExchange + case 'b': + cmd.kind = cmdBranch + cmd.label = p.parseLabelArg() + case 't': + cmd.kind = cmdBranchIfSub + cmd.label = p.parseLabelArg() + case 'T': + cmd.kind = cmdBranchIfNoSub + cmd.label = p.parseLabelArg() + case ':': + cmd.kind = cmdLabel + cmd.label = p.parseLabelArg() + if cmd.label == "" { + return nil, errors.New("missing label name for ':'") + } + case '{': + if p.groupDepth >= maxGroupDepth { + return nil, errors.New("group nesting depth limit exceeded") + } + p.groupDepth++ + children, err := p.parseCommands(true) + p.groupDepth-- + if err != nil { + return nil, err + } + cmd.kind = cmdGroup + cmd.children = children + case 'e': + return nil, errors.New("'e' command is blocked: command execution is not allowed") + case 'w': + return nil, errors.New("'w' command is blocked: file writing is not allowed") + case 'W': + return nil, errors.New("'W' command is blocked: file writing is not allowed") + case 'r': + return nil, errors.New("'r' command is blocked: unsandboxed file reading is not allowed") + case 'R': + return nil, errors.New("'R' command is blocked: unsandboxed file reading is not allowed") + default: + return nil, errors.New("unknown command: '" + string(ch) + "'") + } + + // Enforce command separators after zero-arg commands. + // GNU sed rejects "pp", "dp", etc. with "extra characters after command". + switch cmd.kind { + case cmdPrint, cmdPrintFirstLine, cmdDelete, cmdDeleteFirstLine, + cmdLineNum, cmdPrintUnambig, cmdNext, cmdNextAppend, + cmdHoldCopy, cmdHoldAppend, cmdGetCopy, cmdGetAppend, cmdExchange: + if err := p.requireSeparator(); err != nil { + return nil, err + } + } + + return cmd, nil +} + +func (p *parser) parseOptionalExitCode() (uint8, error) { + p.skipSpaces() + start := p.pos + for p.pos < len(p.input) && p.input[p.pos] >= '0' && p.input[p.pos] <= '9' { + p.pos++ + // Cap digit scanning to avoid allocating huge substrings before + // strconv.Atoi rejects them. + if p.pos-start > 20 { + return 0, errors.New("invalid exit code for q/Q command") + } + } + if start == p.pos { + // No digits found — still reject trailing non-separator characters. + // GNU sed reports "extra characters after command" for inputs like "qp". + if p.pos < len(p.input) { + ch := p.input[p.pos] + if ch != ';' && ch != '\n' && ch != '\r' && ch != '}' && ch != ' ' && ch != '\t' && ch != '#' { + return 0, errors.New("extra characters after command") + } + } + return 0, nil + } + n, err := strconv.Atoi(p.input[start:p.pos]) + if err != nil { + return 0, errors.New("invalid exit code for q/Q command") + } + // GNU sed rejects extra characters after q/Q exit code — the next + // character must be a command separator, '#' (inline comment), or EOF. + if p.pos < len(p.input) { + ch := p.input[p.pos] + if ch != ';' && ch != '\n' && ch != '\r' && ch != '}' && ch != ' ' && ch != '\t' && ch != '#' { + return 0, errors.New("extra characters after command") + } + } + // GNU sed truncates large exit codes modulo 256. + return uint8(n % 256), nil +} + +func (p *parser) parseTextArg() string { + // GNU sed allows: a\text, a text, or a\text + // Multi-line continuation: lines ending with \ before \n are joined. + if p.pos < len(p.input) && p.input[p.pos] == '\\' { + p.pos++ + if p.pos < len(p.input) && p.input[p.pos] == '\n' { + p.pos++ // consume newline after backslash + } + } else { + p.skipSpaces() + } + + var sb strings.Builder + for { + start := p.pos + for p.pos < len(p.input) && p.input[p.pos] != '\n' { + p.pos++ + } + line := p.input[start:p.pos] + + // Check if the line ends with a backslash (continuation). + if len(line) > 0 && line[len(line)-1] == '\\' && p.pos < len(p.input) && p.input[p.pos] == '\n' { + sb.WriteString(line[:len(line)-1]) + sb.WriteByte('\n') + p.pos++ // consume the newline + continue + } + sb.WriteString(line) + break + } + return sb.String() +} + +func (p *parser) parseLabelArg() string { + p.skipSpaces() + start := p.pos + for p.pos < len(p.input) && p.input[p.pos] != ' ' && p.input[p.pos] != '\t' && + p.input[p.pos] != '\n' && p.input[p.pos] != ';' && p.input[p.pos] != '}' { + p.pos++ + } + return p.input[start:p.pos] +} + +func (p *parser) parseAddress() (*address, error) { + if p.pos >= len(p.input) { + return nil, nil + } + + ch := p.input[p.pos] + + // Line number. + if ch >= '0' && ch <= '9' { + start := p.pos + for p.pos < len(p.input) && p.input[p.pos] >= '0' && p.input[p.pos] <= '9' { + p.pos++ + } + // Check for first~step syntax. + if p.pos < len(p.input) && p.input[p.pos] == '~' { + first, err := strconv.ParseInt(p.input[start:p.pos], 10, 64) + if err != nil { + return nil, errors.New("invalid address: " + p.input[start:p.pos]) + } + p.pos++ // consume '~' + stepStart := p.pos + for p.pos < len(p.input) && p.input[p.pos] >= '0' && p.input[p.pos] <= '9' { + p.pos++ + } + step, err := strconv.ParseInt(p.input[stepStart:p.pos], 10, 64) + if err != nil || step <= 0 { + return nil, errors.New("invalid step in address") + } + return &address{kind: addrStep, first: first, step: step}, nil + } + n, err := strconv.ParseInt(p.input[start:p.pos], 10, 64) + if err != nil { + return nil, errors.New("invalid line number: " + p.input[start:p.pos]) + } + // Line address 0 is only valid as the first address of a range (0,/re/). + // As a standalone address, GNU sed rejects it. We defer the check to + // parseOneCommand after we know whether a second address follows. + return &address{kind: addrLine, line: n}, nil + } + + // Last line. + if ch == '$' { + p.pos++ + return &address{kind: addrLast}, nil + } + + // Regex address. + if ch == '/' || ch == '\\' { + var delim byte + if ch == '\\' { + p.pos++ // consume '\' + if p.pos >= len(p.input) { + return nil, errors.New("expected delimiter after '\\'") + } + delim = p.input[p.pos] + } else { + delim = '/' + } + p.pos++ // consume delimiter + pattern, err := p.readUntilDelimiter(delim) + if err != nil { + return nil, err + } + if pattern == "" { + // Empty pattern means "reuse last regex" — defer to runtime. + // re stays nil to signal this. + return &address{kind: addrRegexp, re: nil}, nil + } + re, err := p.compileRegex(pattern) + if err != nil { + return nil, err + } + return &address{kind: addrRegexp, re: re}, nil + } + + return nil, nil +} + +func (p *parser) readUntilDelimiter(delim byte) (string, error) { + var sb strings.Builder + for p.pos < len(p.input) { + ch := p.input[p.pos] + if ch == '\\' && p.pos+1 < len(p.input) { + next := p.input[p.pos+1] + if next == delim { + sb.WriteByte(delim) + p.pos += 2 + continue + } + sb.WriteByte('\\') + sb.WriteByte(next) + p.pos += 2 + continue + } + if ch == delim { + p.pos++ // consume closing delimiter + return sb.String(), nil + } + sb.WriteByte(ch) + p.pos++ + } + return "", errors.New("unterminated address regex") +} + +func (p *parser) parseSubstitute(cmd *sedCmd) (*sedCmd, error) { + if p.pos >= len(p.input) { + return nil, errors.New("missing delimiter for 's' command") + } + delim := p.input[p.pos] + if delim == '\\' || delim == '\n' { + return nil, errors.New("invalid delimiter for 's' command: '" + string(delim) + "'") + } + p.pos++ // consume delimiter + + // Read pattern (isPattern=true: preserve \b as word boundary for RE2). + pattern, err := p.readSubstPart(delim, true) + if err != nil { + return nil, errors.New("unterminated 's' command: " + err.Error()) + } + + // Read replacement (isPattern=false: convert \b to backspace). + replacement, err := p.readSubstPart(delim, false) + if err != nil { + return nil, errors.New("unterminated 's' command: " + err.Error()) + } + + // Read flags. + cmd.kind = cmdSubstitute + cmd.subReplacement = replacement + caseInsensitive := false + + for p.pos < len(p.input) { + ch := p.input[p.pos] + switch ch { + case 'g': + cmd.subGlobal = true + p.pos++ + case 'p': + cmd.subPrint = true + p.pos++ + case 'i', 'I': + caseInsensitive = true + p.pos++ + case 'w': + return nil, errors.New("'w' flag in 's' command is blocked: file writing is not allowed") + case 'e': + return nil, errors.New("'e' flag in 's' command is blocked: command execution is not allowed") + default: + if ch == '0' { + return nil, errors.New("number option to 's' command may not be zero") + } + if ch >= '1' && ch <= '9' { + if cmd.subNth > 0 { + return nil, errors.New("multiple number options to 's' command") + } + start := p.pos + for p.pos < len(p.input) && p.input[p.pos] >= '0' && p.input[p.pos] <= '9' { + p.pos++ + } + n, err := strconv.Atoi(p.input[start:p.pos]) + if err != nil || n <= 0 { + return nil, errors.New("invalid substitution occurrence number") + } + cmd.subNth = n + continue + } + // Whitespace, semicolons, newlines, closing braces, and '#' end the flag list + // (they are command separators). '#' begins a comment that runs to end of line. + if ch == ';' || ch == '\n' || ch == '}' || ch == ' ' || ch == '\t' || ch == '\r' { + goto flagsDone + } + if ch == '#' { + // '#' starts a comment — skip to end of line. + for p.pos < len(p.input) && p.input[p.pos] != '\n' { + p.pos++ + } + goto flagsDone + } + return nil, errors.New("unknown option to 's' command") + } + } +flagsDone: + + if pattern == "" { + // Empty pattern means "reuse last regex" — defer to runtime. + // cmd.subRe stays nil to signal this. + // GNU sed rejects modifiers (i/I) on empty regexp: + // "cannot specify modifiers on empty regexp". + if caseInsensitive { + return nil, errors.New("cannot specify modifiers on empty regexp") + } + } else { + re, err := p.compileRegex(pattern) + if err != nil { + return nil, err + } + // Apply case-insensitive flag after BRE-to-ERE conversion so (?i) isn't mangled. + if caseInsensitive { + re, err = regexp.Compile("(?i)" + re.String()) + if err != nil { + return nil, errors.New("invalid regex with case-insensitive flag: " + err.Error()) + } + } + cmd.subRe = re + // Validate backreferences at parse time. GNU sed rejects invalid + // references regardless of whether the command address matches. + if err := validateBackrefs(replacement, re.NumSubexp()); err != nil { + return nil, err + } + } + return cmd, nil +} + +// readSubstPart reads one delimited part of an s/// command. +// isPattern controls how certain escapes are handled: when true (reading the +// regex pattern), \b is preserved as the two-character sequence \b so that +// RE2 interprets it as a word boundary assertion. When false (reading the +// replacement), \b is converted to a literal backspace (0x08). +func (p *parser) readSubstPart(delim byte, isPattern bool) (string, error) { + var sb strings.Builder + for p.pos < len(p.input) { + ch := p.input[p.pos] + if ch == '\\' && p.pos+1 < len(p.input) { + next := p.input[p.pos+1] + if next == delim { + sb.WriteByte(delim) + p.pos += 2 + continue + } + if next == 'n' { + sb.WriteByte('\n') + p.pos += 2 + continue + } + if next == 't' { + sb.WriteByte('\t') + p.pos += 2 + continue + } + if next == 'a' { + sb.WriteByte('\a') + p.pos += 2 + continue + } + if next == 'b' { + if isPattern { + // In the pattern part, \b is a word boundary in RE2 — + // pass through as the literal two-character sequence \b. + sb.WriteByte('\\') + sb.WriteByte('b') + } else { + // In the replacement part, \b is a literal backspace. + sb.WriteByte('\b') + } + p.pos += 2 + continue + } + if next == 'f' { + sb.WriteByte('\f') + p.pos += 2 + continue + } + if next == 'r' { + sb.WriteByte('\r') + p.pos += 2 + continue + } + if isPattern && !isSpecialPatternEscape(next) { + // In patterns, GNU sed drops the backslash for non-special + // escapes (for example, \q behaves like q). + sb.WriteByte(next) + } else { + // Preserve \+next; expandReplacement will strip the + // backslash for non-special escapes (e.g. \q -> q). + sb.WriteByte('\\') + sb.WriteByte(next) + } + p.pos += 2 + continue + } + if ch == delim { + p.pos++ // consume closing delimiter + return sb.String(), nil + } + sb.WriteByte(ch) + p.pos++ + } + return sb.String(), errors.New("unterminated delimiter") +} + +func isSpecialPatternEscape(ch byte) bool { + if ch >= '1' && ch <= '9' { + // BRE backreferences (unsupported by RE2 but intentionally preserved + // so they fail as invalid regex rather than changing meaning). + return true + } + + switch ch { + case '\\', '.', '^', '$', '*', '[', ']', '(', ')', '{', '}', '+', '?', '|': + // Common regex/BRE escapes and BRE-special operators. + return true + case '<', '>', 'B', 'A', 'Z', 'z': + // RE2 zero-width assertions. + return true + case 'w', 'W', 's', 'S', 'd', 'D': + // RE2 character classes. + return true + default: + return false + } +} + +func (p *parser) parseTransliterate(cmd *sedCmd) (*sedCmd, error) { + if p.pos >= len(p.input) { + return nil, errors.New("missing delimiter for 'y' command") + } + delim := p.input[p.pos] + p.pos++ + + srcStr, err := p.readSubstPart(delim, false) + if err != nil { + return nil, err + } + dstStr, err := p.readSubstPart(delim, false) + if err != nil { + return nil, err + } + + src := []rune(srcStr) + dst := []rune(dstStr) + if len(src) != len(dst) { + return nil, errors.New("'y' command: source and destination must have the same length") + } + + cmd.kind = cmdTransliterate + // Precompute the rune mapping at parse time for O(n) transliteration. + cmd.transMap = make(map[rune]rune, len(src)) + for i, fr := range src { + // First occurrence wins (matches GNU sed behaviour). + if _, exists := cmd.transMap[fr]; !exists { + cmd.transMap[fr] = dst[i] + } + } + return cmd, nil +} + +// compileRegex compiles a regex pattern, converting BRE to ERE if needed. +func (p *parser) compileRegex(pattern string) (*regexp.Regexp, error) { + if !p.useERE { + pattern = breToERE(pattern) + } + re, err := regexp.Compile(pattern) + if err != nil { + return nil, errors.New("invalid regex: " + err.Error()) + } + return re, nil +} + +// breToERE converts a basic regular expression to an extended one. +// In BRE: \( \) \{ \} \+ \? are special; ( ) { } + ? are literal. +// In ERE: ( ) { } + ? are special; \( \) etc. are literal. +func breToERE(pattern string) string { + var sb strings.Builder + sb.Grow(len(pattern)) + i := 0 + for i < len(pattern) { + if pattern[i] == '\\' && i+1 < len(pattern) { + next := pattern[i+1] + switch next { + case '(', ')', '{', '}', '+', '?', '|': + // BRE escaped special → ERE unescaped special. + sb.WriteByte(next) + i += 2 + default: + // Includes backreferences (\1-\9) which RE2 doesn't support + // but are passed through unchanged. + sb.WriteByte('\\') + sb.WriteByte(next) + i += 2 + } + } else { + ch := pattern[i] + switch ch { + case '(', ')', '{', '}', '+', '?', '|': + // In BRE these are literal; escape them for ERE. + sb.WriteByte('\\') + sb.WriteByte(ch) + default: + sb.WriteByte(ch) + } + i++ + } + } + return sb.String() +} diff --git a/interp/builtins/sed/sed.go b/interp/builtins/sed/sed.go new file mode 100644 index 00000000..4cae8b8b --- /dev/null +++ b/interp/builtins/sed/sed.go @@ -0,0 +1,238 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +// Package sed implements the sed builtin command. +// +// sed — stream editor for filtering and transforming text +// +// Usage: sed [OPTION]... [script] [FILE]... +// +// sed [OPTION]... -e script [-e script]... [FILE]... +// +// sed reads input files (or standard input if no files are given, or when +// FILE is -), applies editing commands from the script, and writes the +// result to standard output. +// +// Accepted flags: +// +// -n, --quiet, --silent +// Suppress automatic printing of pattern space. Only lines +// explicitly printed via the p command are output. +// +// -e script, --expression=script +// Add the script commands to the set of commands to execute. +// Multiple -e options are allowed; they are concatenated in order. +// +// -E, --regexp-extended +// Use extended regular expressions (ERE) rather than basic (BRE). +// +// -r +// GNU alias for -E (extended regular expressions). +// +// -h, --help +// Print this usage message to stdout and exit 0. +// +// Supported sed commands: +// +// s/regex/replacement/[flags] Substitute matches of regex with replacement. +// Flags: g (global), p (print), i/I (case-insensitive), +// N (replace Nth match). +// p Print the current pattern space. +// d Delete pattern space, start next cycle. +// q [code] Quit with optional exit code (prints pattern space first). +// Q [code] Quit with optional exit code (does not print). +// y/src/dst/ Transliterate characters from src to dst. +// a\text / a text Append text after the current line. +// i\text / i text Insert text before the current line. +// c\text / c text Replace line(s) with text. +// = Print the current line number. +// l Print pattern space unambiguously. +// n Read next input line into pattern space. +// N Append next input line to pattern space. +// h Copy pattern space to hold space. +// H Append pattern space to hold space. +// g Copy hold space to pattern space. +// G Append hold space to pattern space. +// x Exchange pattern and hold spaces. +// b [label] Branch to label (or end of script). +// : label Define a label for branching. +// t [label] Branch to label if s/// made a substitution. +// T [label] Branch to label if s/// did NOT make a substitution. +// {...} Group commands. +// !command Negate the address (apply to non-matching lines). +// +// Addressing: +// +// N Line number (1-based). +// $ Last line. +// /regex/ Lines matching regex. +// addr1,addr2 Range of lines. +// first~step Every step-th line starting from first (GNU extension). +// +// Rejected commands (blocked for safety): +// +// e Execute pattern space as shell command (blocked: command execution). +// w file Write pattern space to file (blocked: file write). +// W file Write first line to file (blocked: file write). +// r file Read file contents (blocked: unsandboxed file read). +// R file Read one line from file (blocked: unsandboxed file read). +// +// Rejected flags: +// +// -i, --in-place Edit files in place (blocked: file write). +// -f, --file Read script from file (not implemented). +// -s, --separate Treat files as separate streams (not implemented). +// -z, --null-data NUL-separated input (not implemented). +// +// Exit codes: +// +// 0 Success (or custom code via q/Q command). +// 1 Invalid script syntax, missing file, or other error. +// +// Memory safety: +// +// Input is processed line-by-line via a buffered scanner with a per-line +// cap of 1 MiB (MaxLineBytes). Pattern space and hold space are each +// bounded to MaxSpaceBytes (1 MiB). Branch loops are capped at +// MaxBranchIterations (10 000) per input line to prevent infinite loops. +// Non-regular-file inputs are subject to a MaxTotalReadBytes (256 MiB) +// limit to guard against infinite sources. +// +// Regex safety: +// +// All regular expressions use Go's regexp package, which implements RE2 +// (guaranteed linear-time matching, no backtracking). This prevents ReDoS +// attacks. BRE patterns are converted to ERE syntax before compilation. +package sed + +import ( + "context" + "errors" + "strings" + + "github.com/DataDog/rshell/interp/builtins" +) + +// Cmd is the sed builtin command descriptor. +var Cmd = builtins.Command{Name: "sed", MakeFlags: registerFlags} + +// MaxLineBytes is the per-line buffer cap for the line scanner. +const MaxLineBytes = 1 << 20 // 1 MiB + +// MaxSpaceBytes is the maximum size for pattern space and hold space. +const MaxSpaceBytes = 1 << 20 // 1 MiB + +// MaxBranchIterations is the maximum number of branch iterations per +// input line to prevent infinite loops. +const MaxBranchIterations = 10_000 + +// MaxTotalReadBytes is the maximum total bytes consumed from a single +// non-regular-file input source. +const MaxTotalReadBytes = 256 << 20 // 256 MiB + +// MaxAppendQueueBytes is the maximum total bytes that can be accumulated +// in the append queue within a single cycle. +const MaxAppendQueueBytes = 1 << 20 // 1 MiB + +// expressionSlice collects multiple -e values. +type expressionSlice []string + +func (e *expressionSlice) String() string { return strings.Join(*e, "\n") } +func (e *expressionSlice) Set(val string) error { + *e = append(*e, val) + return nil +} +func (e *expressionSlice) Type() string { return "string" } + +// registerFlags sets up sed flags and returns the handler. +func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc { + help := fs.BoolP("help", "h", false, "print usage and exit") + quiet := fs.BoolP("quiet", "n", false, "suppress automatic printing of pattern space") + fs.Lookup("quiet").NoOptDefVal = "true" + // --silent is an alias for --quiet. + silent := fs.Bool("silent", false, "alias for --quiet") + fs.Lookup("silent").NoOptDefVal = "true" + + var expressions expressionSlice + fs.VarP(&expressions, "expression", "e", "add script commands") + + extendedE := fs.BoolP("regexp-extended", "E", false, "use extended regular expressions") + extendedR := fs.BoolP("regexp-extended-r", "r", false, "use extended regular expressions (GNU alias for -E)") + fs.Lookup("regexp-extended-r").Hidden = true + + return func(ctx context.Context, callCtx *builtins.CallContext, args []string) builtins.Result { + if *help { + callCtx.Out("Usage: sed [OPTION]... [script] [FILE]...\n") + callCtx.Out("Stream editor for filtering and transforming text.\n") + callCtx.Out("With no FILE, or when FILE is -, read standard input.\n\n") + fs.SetOutput(callCtx.Stdout) + fs.PrintDefaults() + return builtins.Result{} + } + + suppressPrint := *quiet || *silent + useERE := *extendedE || *extendedR + + // Determine script and files. + var scriptParts []string + var files []string + + if len(expressions) > 0 { + scriptParts = []string(expressions) + files = args + } else if len(args) > 0 { + scriptParts = []string{args[0]} + files = args[1:] + } else { + callCtx.Errf("sed: no script command has been specified\n") + return builtins.Result{Code: 1} + } + + // Parse the sed script. + prog, err := parseScript(strings.Join(scriptParts, "\n"), useERE) + if err != nil { + callCtx.Errf("sed: %s\n", err) + return builtins.Result{Code: 1} + } + + if len(files) == 0 { + files = []string{"-"} + } + + // Create the execution engine. + eng := &engine{ + callCtx: callCtx, + prog: prog, + labelMap: buildLabelMap(prog), + suppressPrint: suppressPrint, + } + + var failed bool + for i, file := range files { + if ctx.Err() != nil { + break + } + isLastFile := i == len(files)-1 + if err := eng.processFile(ctx, callCtx, file, isLastFile); err != nil { + var qe *quitError + if errors.As(err, &qe) { + // q command: print pattern space if requested, then exit. + return builtins.Result{Code: qe.code} + } + name := file + if file == "-" { + name = "standard input" + } + callCtx.Errf("sed: %s: %s\n", name, callCtx.PortableErr(err)) + failed = true + } + } + + if failed { + return builtins.Result{Code: 1} + } + return builtins.Result{} + } +} diff --git a/interp/builtins/sed/types.go b/interp/builtins/sed/types.go new file mode 100644 index 00000000..f04053c3 --- /dev/null +++ b/interp/builtins/sed/types.go @@ -0,0 +1,123 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package sed + +import ( + "fmt" + "regexp" +) + +// --- Error types --- + +// quitError signals a q or Q command with an exit code. +type quitError struct { + code uint8 +} + +func (e *quitError) Error() string { + return fmt.Sprintf("quit with code %d", e.code) +} + +// --- Address types --- + +// addrType distinguishes different address kinds. +type addrType int + +const ( + addrNone addrType = iota + addrLine // specific line number + addrLast // $ (last line) + addrRegexp // /regex/ + addrStep // first~step (GNU extension) +) + +// address represents a sed address (line number, regex, or $). +type address struct { + kind addrType + line int64 // for addrLine + re *regexp.Regexp // for addrRegexp + first int64 // for addrStep + step int64 // for addrStep +} + +// --- Command types --- + +// cmdType identifies the sed command. +type cmdType int + +const ( + cmdSubstitute cmdType = iota + cmdPrint + cmdDelete + cmdQuit + cmdQuitNoprint + cmdTransliterate + cmdAppend + cmdInsert + cmdChange + cmdLineNum + cmdPrintUnambig + cmdNext + cmdNextAppend + cmdHoldCopy + cmdHoldAppend + cmdGetCopy + cmdGetAppend + cmdExchange + cmdBranch + cmdLabel + cmdBranchIfSub + cmdBranchIfNoSub + cmdPrintFirstLine // P: print up to first embedded newline + cmdDeleteFirstLine // D: delete up to first embedded newline, restart cycle + cmdGroup + cmdNoop +) + +// sedCmd represents a single parsed sed command. +type sedCmd struct { + addr1 *address + addr2 *address + negated bool + inRange bool // stateful: tracks whether we're inside a two-address range + kind cmdType + + // For s command: + subRe *regexp.Regexp // nil means "reuse last regex" + subReplacement string + subGlobal bool + subPrint bool + subNth int + // Note: case-insensitive flag (i/I) on empty regexp is rejected at parse time + // (GNU sed: "cannot specify modifiers on empty regexp"), so no deferred flag is needed. + + // For y command: + transMap map[rune]rune // precomputed rune mapping for O(1) lookup + + // For a, i, c commands: + text string + + // For q, Q commands: + quitCode uint8 + + // For b, t, T commands: + label string + + // For { ... } grouping: + children []*sedCmd +} + +// --- Action types --- + +// actionType signals how to proceed after executing a command. +type actionType int + +const ( + actionContinue actionType = iota + actionDelete // d/D command: skip auto-print, start next cycle + actionRestart // D command: restart the cycle (used internally) + actionBranch // b/t/T command completed a non-local jump +) diff --git a/interp/builtins/tests/sed/sed_hardening_test.go b/interp/builtins/tests/sed/sed_hardening_test.go new file mode 100644 index 00000000..d152a2a4 --- /dev/null +++ b/interp/builtins/tests/sed/sed_hardening_test.go @@ -0,0 +1,235 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package sed_test + +import ( + "context" + "strings" + "testing" + "time" + + "github.com/DataDog/rshell/interp" + "github.com/stretchr/testify/assert" +) + +// --- Memory Safety & Resource Limits --- + +func TestHardenLongLine(t *testing.T) { + dir := setupDir(t, map[string]string{ + "long.txt": strings.Repeat("x", 512*1024) + "\n", + }) + stdout, _, code := cmdRun(t, `sed 's/x/y/' long.txt`, dir) + assert.Equal(t, 0, code) + // First 'x' replaced with 'y', rest unchanged. + assert.True(t, strings.HasPrefix(stdout, "y")) +} + +func TestHardenPatternSpaceLimit(t *testing.T) { + // Use N command to accumulate lines until pattern space limit is hit. + var sb strings.Builder + for i := 0; i < 2000; i++ { + sb.WriteString(strings.Repeat("a", 600)) + sb.WriteByte('\n') + } + dir := setupDir(t, map[string]string{ + "big.txt": sb.String(), + }) + _, stderr, code := cmdRun(t, `sed ':a;N;ba' big.txt`, dir) + // Should fail with pattern space limit error. + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "pattern space exceeded size limit") +} + +func TestHardenHoldSpaceLimit(t *testing.T) { + var sb strings.Builder + for i := 0; i < 2000; i++ { + sb.WriteString(strings.Repeat("b", 600)) + sb.WriteByte('\n') + } + dir := setupDir(t, map[string]string{ + "big.txt": sb.String(), + }) + _, stderr, code := cmdRun(t, `sed 'H' big.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "hold space exceeded size limit") +} + +func TestHardenBranchLoopLimit(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "test\n", + }) + _, stderr, code := cmdRun(t, `sed ':loop;b loop' input.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "branch loop limit exceeded") +} + +// --- Context Cancellation --- + +func TestHardenContextCancellation(t *testing.T) { + // Create a large file that would take a while to process. + dir := setupDir(t, map[string]string{ + "big.txt": strings.Repeat("line\n", 100000), + }) + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + _, _, code := runScriptCtx(ctx, t, `sed 's/line/LINE/g' big.txt`, dir) + // Should either complete or be cancelled — both are acceptable. + _ = code +} + +// --- Blocked Commands --- + +func TestHardenBlockedExecuteCommand(t *testing.T) { + dir := setupDir(t, map[string]string{"f.txt": "test\n"}) + _, stderr, code := cmdRun(t, `sed 'e' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "blocked") +} + +func TestHardenBlockedWriteCommand(t *testing.T) { + dir := setupDir(t, map[string]string{"f.txt": "test\n"}) + _, stderr, code := cmdRun(t, `sed 'w /tmp/evil' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "blocked") +} + +func TestHardenBlockedReadCommand(t *testing.T) { + dir := setupDir(t, map[string]string{"f.txt": "test\n"}) + _, stderr, code := cmdRun(t, `sed 'r /etc/passwd' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "blocked") +} + +func TestHardenBlockedBigRCommand(t *testing.T) { + dir := setupDir(t, map[string]string{"f.txt": "test\n"}) + _, stderr, code := cmdRun(t, `sed 'R /etc/passwd' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "blocked") +} + +func TestHardenBlockedBigWCommand(t *testing.T) { + dir := setupDir(t, map[string]string{"f.txt": "test\n"}) + _, stderr, code := cmdRun(t, `sed 'W /tmp/evil' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "blocked") +} + +func TestHardenBlockedSubstituteWriteFlag(t *testing.T) { + dir := setupDir(t, map[string]string{"f.txt": "test\n"}) + _, stderr, code := cmdRun(t, `sed 's/t/T/w /tmp/evil' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "blocked") +} + +func TestHardenBlockedSubstituteExecuteFlag(t *testing.T) { + dir := setupDir(t, map[string]string{"f.txt": "test\n"}) + _, stderr, code := cmdRun(t, `sed 's/t/T/e' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "blocked") +} + +// --- Input Validation --- + +func TestHardenInvalidRegex(t *testing.T) { + dir := setupDir(t, map[string]string{"f.txt": "test\n"}) + _, stderr, code := cmdRun(t, `sed 's/[invalid/x/' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "sed:") +} + +func TestHardenEmptyScript(t *testing.T) { + _, stderr, code := cmdRun(t, `sed '' /dev/null`, "") + // Empty script is valid — matches all lines with no commands. + _ = stderr + _ = code +} + +func TestHardenNoScript(t *testing.T) { + _, stderr, code := cmdRun(t, `sed`, "") + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "sed:") +} + +func TestHardenUnterminatedSubstitution(t *testing.T) { + dir := setupDir(t, map[string]string{"f.txt": "test\n"}) + _, stderr, code := cmdRun(t, `sed 's/foo' f.txt`, dir) + // Unterminated s command — the parser may accept the last delimiter as optional. + // Just make sure it doesn't crash. + _ = stderr + _ = code +} + +func TestHardenUnterminatedGroup(t *testing.T) { + dir := setupDir(t, map[string]string{"f.txt": "test\n"}) + _, stderr, code := cmdRun(t, `sed '{p' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "unterminated") +} + +func TestHardenUnmatchedCloseBrace(t *testing.T) { + dir := setupDir(t, map[string]string{"f.txt": "test\n"}) + _, stderr, code := cmdRun(t, `sed '}' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "unexpected '}'") +} + +// --- Multiple Files --- + +func TestHardenMultipleFiles(t *testing.T) { + dir := setupDir(t, map[string]string{ + "a.txt": "alpha\n", + "b.txt": "beta\n", + "c.txt": "gamma\n", + }) + stdout, _, code := cmdRun(t, `sed 's/a/A/g' a.txt b.txt c.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "AlphA\nbetA\ngAmmA\n", stdout) +} + +func TestHardenMissingFileContinues(t *testing.T) { + dir := setupDir(t, map[string]string{ + "a.txt": "alpha\n", + }) + stdout, stderr, code := cmdRun(t, `sed 's/a/A/' a.txt nonexistent.txt`, dir) + assert.Equal(t, 1, code) + assert.Equal(t, "Alpha\n", stdout) + assert.Contains(t, stderr, "nonexistent.txt") +} + +// --- Regex Safety --- + +func TestHardenRegexComplexPattern(t *testing.T) { + // RE2 guarantees linear time, so complex patterns should not cause ReDoS. + dir := setupDir(t, map[string]string{ + "f.txt": strings.Repeat("a", 100) + "\n", + }) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _, _, code := runScriptCtx(ctx, t, `sed -E 's/(a+)+b/x/' f.txt`, dir, interp.AllowedPaths([]string{dir})) + // Should complete without timeout (RE2 handles this in linear time). + assert.Equal(t, 0, code) +} + +// --- Y command edge cases --- + +func TestHardenTransliterateMismatch(t *testing.T) { + dir := setupDir(t, map[string]string{"f.txt": "test\n"}) + _, stderr, code := cmdRun(t, `sed 'y/abc/de/' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "same length") +} + +// --- Comments --- + +func TestHardenComments(t *testing.T) { + dir := setupDir(t, map[string]string{ + "f.txt": "hello\n", + }) + stdout, _, code := cmdRun(t, `sed '#this is a comment +s/hello/world/' f.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "world\n", stdout) +} diff --git a/interp/builtins/tests/sed/sed_pentest_test.go b/interp/builtins/tests/sed/sed_pentest_test.go new file mode 100644 index 00000000..01dd768f --- /dev/null +++ b/interp/builtins/tests/sed/sed_pentest_test.go @@ -0,0 +1,370 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package sed_test + +import ( + "context" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "github.com/DataDog/rshell/interp" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const pentestTimeout = 10 * time.Second + +func pentestDir(t *testing.T, files map[string]string) string { + t.Helper() + dir := t.TempDir() + for name, content := range files { + require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0644)) + } + return dir +} + +// --- Flag and argument injection --- + +func TestPentestUnknownFlags(t *testing.T) { + dir := pentestDir(t, map[string]string{"f.txt": "test\n"}) + for _, flag := range []string{"-f", "--follow", "--no-such-flag", "-z", "-s"} { + t.Run(flag, func(t *testing.T) { + _, stderr, code := cmdRun(t, "sed "+flag+" 's/a/b/' f.txt", dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "sed:") + }) + } +} + +func TestPentestRejectedInPlaceFlag(t *testing.T) { + dir := pentestDir(t, map[string]string{"f.txt": "hello\n"}) + _, stderr, code := cmdRun(t, `sed -i 's/hello/bye/' f.txt`, dir) + assert.NotEqual(t, 0, code) + assert.Contains(t, stderr, "sed:") + // Verify file was NOT modified. + data, err := os.ReadFile(filepath.Join(dir, "f.txt")) + require.NoError(t, err) + assert.Equal(t, "hello\n", string(data)) +} + +func TestPentestDoubleDashEndOfFlags(t *testing.T) { + dir := pentestDir(t, map[string]string{"-n": "hello\n"}) + // -- should allow flag-like filenames. + stdout, _, code := runScript(t, `sed 's/hello/bye/' -- -n`, dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) + assert.Equal(t, "bye\n", stdout) +} + +func TestPentestFlagViaWordExpansion(t *testing.T) { + dir := pentestDir(t, map[string]string{"f.txt": "test\n"}) + // Flag injection via variable. + _, stderr, code := cmdRun(t, `flag="-f"; sed $flag 's/a/b/' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "sed:") +} + +// --- Blocked commands --- + +func TestPentestAllBlockedCommands(t *testing.T) { + dir := pentestDir(t, map[string]string{"f.txt": "test\n"}) + blocked := []struct { + name, script string + }{ + {"e-command", `sed 'e' f.txt`}, + {"w-command", `sed 'w /tmp/evil' f.txt`}, + {"W-command", `sed 'W /tmp/evil' f.txt`}, + {"r-command", `sed 'r /etc/passwd' f.txt`}, + {"R-command", `sed 'R /etc/passwd' f.txt`}, + {"s-w-flag", `sed 's/t/T/w /tmp/evil' f.txt`}, + {"s-e-flag", `sed 's/t/T/e' f.txt`}, + } + for _, tc := range blocked { + t.Run(tc.name, func(t *testing.T) { + _, stderr, code := cmdRun(t, tc.script, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "blocked") + }) + } +} + +// --- Memory / resource exhaustion --- + +func TestPentestLongLineNearLimit(t *testing.T) { + // Line of MaxLineBytes - 1 bytes should succeed. + const limit = 1 << 20 // 1 MiB + dir := pentestDir(t, map[string]string{ + "near.txt": strings.Repeat("x", limit-1) + "\n", + }) + stdout, _, code := cmdRun(t, `sed 's/x/y/' near.txt`, dir) + assert.Equal(t, 0, code) + assert.True(t, strings.HasPrefix(stdout, "y")) +} + +func TestPentestPatternSpaceExhaustion(t *testing.T) { + // Try to exceed MaxSpaceBytes via N command. + var sb strings.Builder + for i := 0; i < 2000; i++ { + sb.WriteString(strings.Repeat("a", 600)) + sb.WriteByte('\n') + } + dir := pentestDir(t, map[string]string{"big.txt": sb.String()}) + _, stderr, code := cmdRun(t, `sed ':a;N;ba' big.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "pattern space exceeded size limit") +} + +func TestPentestHoldSpaceExhaustion(t *testing.T) { + var sb strings.Builder + for i := 0; i < 2000; i++ { + sb.WriteString(strings.Repeat("b", 600)) + sb.WriteByte('\n') + } + dir := pentestDir(t, map[string]string{"big.txt": sb.String()}) + _, stderr, code := cmdRun(t, `sed 'H' big.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "hold space exceeded size limit") +} + +func TestPentestBranchLoopLimit(t *testing.T) { + dir := pentestDir(t, map[string]string{"f.txt": "test\n"}) + ctx, cancel := context.WithTimeout(context.Background(), pentestTimeout) + defer cancel() + _, stderr, code := runScriptCtx(ctx, t, `sed ':loop;b loop' f.txt`, dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "branch loop limit exceeded") +} + +func TestPentestSubstitutionGrowsPatternSpace(t *testing.T) { + // s///g with a replacement much larger than the match can grow pattern space. + dir := pentestDir(t, map[string]string{ + "f.txt": strings.Repeat("x", 100000) + "\n", + }) + // Replace each 'x' with 20 chars — would grow to 2MB, exceeding MaxSpaceBytes (1MB). + _, stderr, code := cmdRun(t, `sed 's/x/xxxxxxxxxxxxxxxxxxxx/g' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "pattern space exceeded size limit") +} + +func TestPentestSmallFileWithLargeNthSub(t *testing.T) { + // Substitution with very high N should not cause issues. + dir := pentestDir(t, map[string]string{"f.txt": "aaa\n"}) + stdout, _, code := cmdRun(t, `sed 's/a/X/999' f.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "aaa\n", stdout) // No 999th occurrence, no change. +} + +// --- Special files --- + +func TestPentestDevNull(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("skipping /dev/null test on Windows") + } + stdout, _, code := runScript(t, `sed 's/a/b/' /dev/null`, "", interp.AllowedPaths([]string{"/dev"})) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestPentestContextCancelledDuringProcessing(t *testing.T) { + dir := pentestDir(t, map[string]string{ + "big.txt": strings.Repeat("line\n", 100000), + }) + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + _, _, code := runScriptCtx(ctx, t, `sed 's/line/LINE/g' big.txt`, dir, interp.AllowedPaths([]string{dir})) + // Should either complete or be cancelled — both are acceptable. + _ = code +} + +// --- Path and filename edge cases --- + +func TestPentestNonExistentFile(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `sed 's/a/b/' nonexistent.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "nonexistent.txt") +} + +func TestPentestDirectoryAsFile(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.Mkdir(filepath.Join(dir, "subdir"), 0755)) + _, stderr, code := cmdRun(t, `sed 's/a/b/' subdir`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "sed:") +} + +func TestPentestEmptyStringFilename(t *testing.T) { + _, stderr, code := cmdRun(t, `sed 's/a/b/' ""`, "") + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "sed:") +} + +func TestPentestMultipleStdinArgs(t *testing.T) { + dir := pentestDir(t, map[string]string{"f.txt": "hello\n"}) + // Multiple - args: stdin should only be consumed once. + stdout, _, code := cmdRun(t, `echo hello | sed 's/hello/bye/' - -`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "bye\n", stdout) +} + +func TestPentestManyFiles(t *testing.T) { + // Verify no FD leak with many files. + files := make(map[string]string) + var names []string + for i := 0; i < 50; i++ { + name := "f" + strings.Repeat("0", 3-len(strings.TrimLeft(string(rune('0'+i%10)), ""))) + ".txt" + name = filepath.Base(name) + actualName := "file_" + string(rune('a'+i%26)) + string(rune('a'+i/26)) + ".txt" + files[actualName] = "line\n" + names = append(names, actualName) + } + dir := pentestDir(t, files) + script := "sed 's/line/LINE/' " + strings.Join(names, " ") + stdout, _, code := cmdRun(t, script, dir) + assert.Equal(t, 0, code) + assert.Equal(t, strings.Repeat("LINE\n", len(names)), stdout) +} + +// --- Input validation --- + +func TestPentestInvalidRegexPatterns(t *testing.T) { + dir := pentestDir(t, map[string]string{"f.txt": "test\n"}) + patterns := []string{ + `sed 's/[/x/' f.txt`, // Unterminated character class + `sed '/[/d' f.txt`, // Invalid address regex + `sed -E 's/(/x/' f.txt`, // Unmatched paren in ERE + } + for _, script := range patterns { + t.Run(script, func(t *testing.T) { + _, stderr, code := cmdRun(t, script, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "sed:") + }) + } +} + +func TestPentestEmptyScript(t *testing.T) { + // Empty script is valid in GNU sed. + _, _, code := cmdRun(t, `sed '' /dev/null`, "") + _ = code // just don't crash +} + +func TestPentestNoScriptNoFiles(t *testing.T) { + _, stderr, code := cmdRun(t, `sed`, "") + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "sed:") +} + +func TestPentestUnterminatedSubstitution(t *testing.T) { + dir := pentestDir(t, map[string]string{"f.txt": "test\n"}) + _, _, code := cmdRun(t, `sed 's/foo' f.txt`, dir) + // Just don't crash. + _ = code +} + +func TestPentestUnterminatedGroup(t *testing.T) { + dir := pentestDir(t, map[string]string{"f.txt": "test\n"}) + _, stderr, code := cmdRun(t, `sed '{p' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "unterminated") +} + +func TestPentestDeeplyNestedGroups(t *testing.T) { + // Many nested groups should not crash. + dir := pentestDir(t, map[string]string{"f.txt": "test\n"}) + script := strings.Repeat("{", 50) + "p" + strings.Repeat("}", 50) + stdout, _, code := cmdRun(t, `sed '`+script+`' f.txt`, dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "test") +} + +// --- Regex safety (ReDoS) --- + +func TestPentestReDoSPattern(t *testing.T) { + // RE2 should handle this in linear time. + dir := pentestDir(t, map[string]string{ + "f.txt": strings.Repeat("a", 100) + "\n", + }) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _, _, code := runScriptCtx(ctx, t, `sed -E 's/(a+)+b/x/' f.txt`, dir, interp.AllowedPaths([]string{dir})) + assert.Equal(t, 0, code) +} + +// --- P and D commands --- + +func TestPentestPCommandMultiline(t *testing.T) { + dir := pentestDir(t, map[string]string{ + "f.txt": "line1\nline2\n", + }) + stdout, _, code := cmdRun(t, `sed -n 'N;P' f.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "line1\n", stdout) +} + +func TestPentestDCommandMultiline(t *testing.T) { + dir := pentestDir(t, map[string]string{ + "f.txt": "a\nb\nc\n", + }) + // D deletes first line of pattern space and restarts cycle. + // N appends next line, D removes the first. + stdout, _, code := cmdRun(t, `sed 'N;P;D' f.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "a\nb\nc\n", stdout) +} + +// --- Quit commands with exit codes --- + +func TestPentestQuitWithCode(t *testing.T) { + dir := pentestDir(t, map[string]string{"f.txt": "line1\nline2\nline3\n"}) + _, _, code := cmdRun(t, `sed 'q 42' f.txt`, dir) + assert.Equal(t, 42, code) +} + +func TestPentestQuitNoPrintWithCode(t *testing.T) { + dir := pentestDir(t, map[string]string{"f.txt": "line1\nline2\nline3\n"}) + stdout, _, code := cmdRun(t, `sed 'Q 7' f.txt`, dir) + assert.Equal(t, 7, code) + assert.Equal(t, "", stdout) +} + +// --- Transliterate edge cases --- + +func TestPentestTransliterateMismatch(t *testing.T) { + dir := pentestDir(t, map[string]string{"f.txt": "test\n"}) + _, stderr, code := cmdRun(t, `sed 'y/abc/de/' f.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "same length") +} + +func TestPentestTransliterateEmpty(t *testing.T) { + dir := pentestDir(t, map[string]string{"f.txt": "test\n"}) + stdout, _, code := cmdRun(t, `sed 'y///' f.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "test\n", stdout) // No transliteration, identity. +} + +// --- Symlinks --- + +func TestPentestSymlinkToRegularFile(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(dir, "real.txt"), []byte("hello\n"), 0644)) + require.NoError(t, os.Symlink("real.txt", filepath.Join(dir, "link.txt"))) + stdout, _, code := cmdRun(t, `sed 's/hello/bye/' link.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "bye\n", stdout) +} + +func TestPentestDanglingSymlink(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.Symlink(filepath.Join(dir, "nonexistent"), filepath.Join(dir, "dangling.txt"))) + _, stderr, code := cmdRun(t, `sed 's/a/b/' dangling.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "sed:") +} diff --git a/interp/builtins/tests/sed/sed_test.go b/interp/builtins/tests/sed/sed_test.go new file mode 100644 index 00000000..ef4e3d14 --- /dev/null +++ b/interp/builtins/tests/sed/sed_test.go @@ -0,0 +1,607 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package sed_test + +import ( + "bytes" + "context" + "errors" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/DataDog/rshell/interp" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "mvdan.cc/sh/v3/syntax" +) + +func runScript(t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) { + t.Helper() + return runScriptCtx(context.Background(), t, script, dir, opts...) +} + +func runScriptCtx(ctx context.Context, t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) { + t.Helper() + parser := syntax.NewParser() + prog, err := parser.Parse(strings.NewReader(script), "") + require.NoError(t, err) + var outBuf, errBuf bytes.Buffer + allOpts := append([]interp.RunnerOption{interp.StdIO(nil, &outBuf, &errBuf)}, opts...) + runner, err := interp.New(allOpts...) + require.NoError(t, err) + defer runner.Close() + if dir != "" { + runner.Dir = dir + } + err = runner.Run(ctx, prog) + exitCode := 0 + if err != nil { + var es interp.ExitStatus + if errors.As(err, &es) { + exitCode = int(es) + } else if ctx.Err() == nil { + t.Fatalf("unexpected error: %v", err) + } + } + return outBuf.String(), errBuf.String(), exitCode +} + +func cmdRun(t *testing.T, script, dir string) (stdout, stderr string, exitCode int) { + t.Helper() + return runScript(t, script, dir, interp.AllowedPaths([]string{dir})) +} + +func writeFile(t *testing.T, dir, name, content string) { + t.Helper() + err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0644) + require.NoError(t, err) +} + +func setupDir(t *testing.T, files map[string]string) string { + t.Helper() + dir := t.TempDir() + for name, content := range files { + writeFile(t, dir, name, content) + } + return dir +} + +// --- Basic Substitution --- + +func TestSubstituteBasic(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello world\n", + }) + stdout, _, code := cmdRun(t, `sed 's/world/earth/' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "hello earth\n", stdout) +} + +func TestSubstituteGlobal(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "aaa bbb aaa\n", + }) + stdout, _, code := cmdRun(t, `sed 's/aaa/zzz/g' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "zzz bbb zzz\n", stdout) +} + +func TestSubstituteNth(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "ab ab ab ab\n", + }) + stdout, _, code := cmdRun(t, `sed 's/ab/XY/2' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "ab XY ab ab\n", stdout) +} + +func TestSubstituteCaseInsensitive(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "Hello HELLO hello\n", + }) + stdout, _, code := cmdRun(t, `sed 's/hello/bye/i' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "bye HELLO hello\n", stdout) +} + +func TestSubstituteAlternateDelimiter(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "/usr/local/bin\n", + }) + stdout, _, code := cmdRun(t, `sed 's|/usr/local|/opt|' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "/opt/bin\n", stdout) +} + +func TestSubstituteAmpersand(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello\n", + }) + stdout, _, code := cmdRun(t, `sed 's/hello/[&]/' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "[hello]\n", stdout) +} + +func TestSubstituteEmptyPattern(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello\n", + }) + stdout, _, code := cmdRun(t, `sed 's/^/prefix: /' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "prefix: hello\n", stdout) +} + +func TestSubstituteWithPrint(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "aaa\nbbb\naaa\n", + }) + stdout, _, code := cmdRun(t, `sed -n 's/aaa/zzz/p' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "zzz\nzzz\n", stdout) +} + +// --- Print and Output --- + +func TestPrint(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "line1\nline2\n", + }) + stdout, _, code := cmdRun(t, `sed 'p' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "line1\nline1\nline2\nline2\n", stdout) +} + +func TestSuppressAutoPrint(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "line1\nline2\nline3\n", + }) + stdout, _, code := cmdRun(t, `sed -n 'p' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "line1\nline2\nline3\n", stdout) +} + +func TestLineNumber(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "aaa\nbbb\nccc\n", + }) + stdout, _, code := cmdRun(t, `sed '=' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "1\naaa\n2\nbbb\n3\nccc\n", stdout) +} + +func TestPrintUnambiguous(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello\tworld\n", + }) + stdout, _, code := cmdRun(t, `sed -n 'l' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "hello\\tworld$\n", stdout) +} + +// --- Delete --- + +func TestDeleteBasic(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "line1\nline2\nline3\n", + }) + stdout, _, code := cmdRun(t, `sed '2d' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "line1\nline3\n", stdout) +} + +func TestDeleteRange(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "line1\nline2\nline3\nline4\nline5\n", + }) + stdout, _, code := cmdRun(t, `sed '2,4d' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "line1\nline5\n", stdout) +} + +// --- Addressing --- + +func TestAddressLine(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "first\nsecond\nthird\n", + }) + stdout, _, code := cmdRun(t, `sed '2s/second/SECOND/' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "first\nSECOND\nthird\n", stdout) +} + +func TestAddressLastLine(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "first\nsecond\nthird\n", + }) + stdout, _, code := cmdRun(t, `sed '$s/third/THIRD/' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "first\nsecond\nTHIRD\n", stdout) +} + +func TestAddressRegex(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "apple\nbanana\ncherry\n", + }) + stdout, _, code := cmdRun(t, `sed '/banana/d' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "apple\ncherry\n", stdout) +} + +func TestAddressRange(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "1\n2\n3\n4\n5\n", + }) + stdout, _, code := cmdRun(t, `sed -n '2,4p' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "2\n3\n4\n", stdout) +} + +func TestAddressRegexRange(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "start\nmiddle1\nmiddle2\nend\nafter\n", + }) + stdout, _, code := cmdRun(t, `sed -n '/start/,/end/p' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "start\nmiddle1\nmiddle2\nend\n", stdout) +} + +func TestAddressNegation(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "keep\ndelete\nkeep\n", + }) + stdout, _, code := cmdRun(t, `sed '/keep/!d' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "keep\nkeep\n", stdout) +} + +func TestAddressStep(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "1\n2\n3\n4\n5\n6\n", + }) + stdout, _, code := cmdRun(t, `sed -n '1~2p' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "1\n3\n5\n", stdout) +} + +// --- Text Commands --- + +func TestAppend(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "line1\nline2\n", + }) + stdout, _, code := cmdRun(t, `sed '1a\appended' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "line1\nappended\nline2\n", stdout) +} + +func TestInsert(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "line1\nline2\n", + }) + stdout, _, code := cmdRun(t, `sed '2i\inserted' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "line1\ninserted\nline2\n", stdout) +} + +func TestChange(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "line1\nline2\nline3\n", + }) + stdout, _, code := cmdRun(t, `sed '2c\changed' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "line1\nchanged\nline3\n", stdout) +} + +// --- Hold Space --- + +func TestHoldCopy(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "first\nsecond\n", + }) + // Copy first line to hold space, on second line replace pattern with hold + stdout, _, code := cmdRun(t, `sed -n '1h;2{g;p}' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "first\n", stdout) +} + +func TestHoldAppend(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "a\nb\nc\n", + }) + // Accumulate all lines in hold space, print at end + stdout, _, code := cmdRun(t, `sed -n 'H;${g;p}' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\na\nb\nc\n", stdout) +} + +func TestExchange(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "pattern\n", + }) + // Exchange swaps pattern space (content) with hold space (initially empty) + stdout, _, code := cmdRun(t, `sed -n 'x;p' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "\n", stdout) +} + +// --- Branching --- + +func TestBranch(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello\n", + }) + // b with no label branches to end of script, skipping subsequent commands + stdout, _, code := cmdRun(t, `sed 'b;s/hello/bye/' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "hello\n", stdout) +} + +func TestBranchLabel(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello\n", + }) + stdout, _, code := cmdRun(t, "sed 'b skip;s/hello/bye/;:skip' input.txt", dir) + assert.Equal(t, 0, code) + assert.Equal(t, "hello\n", stdout) +} + +func TestBranchConditional(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "aXb\n", + }) + // t branches if substitution was made + stdout, _, code := cmdRun(t, `sed 's/X/Y/;t done;s/a/Z/;:done' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "aYb\n", stdout) +} + +func TestBranchConditionalNoSub(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello\nworld\n", + }) + // T branches if NO substitution was made. + // On "hello": s/hello/HI/ succeeds, T does not branch, s/HI/BYE/ runs → "BYE" + // On "world": s/hello/HI/ fails, T branches to done, s/HI/BYE/ skipped → "world" + stdout, _, code := cmdRun(t, `sed 's/hello/HI/;T done;s/HI/BYE/;:done' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "BYE\nworld\n", stdout) +} + +// --- Next Line --- + +func TestNext(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "line1\nline2\nline3\nline4\n", + }) + // n prints current line (unless -n), reads next line into pattern space + stdout, _, code := cmdRun(t, `sed -n 'n;p' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "line2\nline4\n", stdout) +} + +func TestNextAppend(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "line1\nline2\n", + }) + // N appends next line to pattern space with embedded newline + stdout, _, code := cmdRun(t, `sed -n 'N;p' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "line1") + assert.Contains(t, stdout, "line2") +} + +// --- Transliterate --- + +func TestTransliterate(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello\n", + }) + stdout, _, code := cmdRun(t, `sed 'y/helo/HELO/' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "HELLO\n", stdout) +} + +// --- Quit --- + +func TestQuit(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "line1\nline2\nline3\n", + }) + // q prints current line then exits + stdout, _, code := cmdRun(t, `sed '2q' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "line1\nline2\n", stdout) +} + +func TestQuitNoPrint(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "line1\nline2\nline3\n", + }) + // Q exits without printing current line + stdout, _, code := cmdRun(t, `sed '2Q' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "line1\n", stdout) +} + +// --- Multiple Expressions --- + +func TestMultipleExpressions(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello world\n", + }) + stdout, _, code := cmdRun(t, `sed -e 's/hello/hi/' -e 's/world/earth/' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "hi earth\n", stdout) +} + +func TestSemicolonSeparator(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello world\n", + }) + stdout, _, code := cmdRun(t, `sed 's/hello/hi/;s/world/earth/' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "hi earth\n", stdout) +} + +// --- Extended Regex --- + +func TestExtendedRegex(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "abc123def\n", + }) + stdout, _, code := cmdRun(t, `sed -E 's/[0-9]+/NUM/' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abcNUMdef\n", stdout) +} + +func TestExtendedRegexR(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "abc123def\n", + }) + stdout, _, code := cmdRun(t, `sed -r 's/[0-9]+/NUM/' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "abcNUMdef\n", stdout) +} + +// --- Stdin --- + +func TestStdinPipe(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello world\n", + }) + stdout, _, code := cmdRun(t, `cat input.txt | sed 's/world/earth/'`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "hello earth\n", stdout) +} + +func TestStdinDash(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello world\n", + }) + stdout, _, code := cmdRun(t, `cat input.txt | sed 's/world/earth/' -`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "hello earth\n", stdout) +} + +// --- Edge Cases --- + +func TestEmptyFile(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "", + }) + stdout, _, code := cmdRun(t, `sed 's/a/b/' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "", stdout) +} + +func TestSingleLine(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "only line\n", + }) + stdout, _, code := cmdRun(t, `sed 's/only/single/' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "single line\n", stdout) +} + +func TestNoTrailingNewline(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "no newline", + }) + stdout, _, code := cmdRun(t, `sed 's/no/with/' input.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "with newline\n", stdout) +} + +func TestMultipleFiles(t *testing.T) { + dir := setupDir(t, map[string]string{ + "a.txt": "alpha\n", + "b.txt": "beta\n", + }) + stdout, _, code := cmdRun(t, `sed 's/^/> /' a.txt b.txt`, dir) + assert.Equal(t, 0, code) + assert.Equal(t, "> alpha\n> beta\n", stdout) +} + +// --- Error Cases --- + +func TestMissingFile(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `sed 's/a/b/' nonexistent.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "sed:") +} + +func TestNoScript(t *testing.T) { + dir := t.TempDir() + _, stderr, code := cmdRun(t, `sed`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "sed:") +} + +func TestInvalidRegex(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello\n", + }) + _, stderr, code := cmdRun(t, `sed 's/[invalid/replacement/' input.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "sed:") +} + +func TestBlockedWriteCommand(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello\n", + }) + _, stderr, code := cmdRun(t, `sed 'w output.txt' input.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "blocked") +} + +func TestBlockedExecuteCommand(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello\n", + }) + _, stderr, code := cmdRun(t, `sed 'e' input.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "blocked") +} + +func TestBlockedInPlaceFlag(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello\n", + }) + _, stderr, code := cmdRun(t, `sed -i 's/hello/bye/' input.txt`, dir) + assert.NotEqual(t, 0, code) + assert.Contains(t, stderr, "sed:") +} + +func TestBlockedReadCommand(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello\n", + }) + _, stderr, code := cmdRun(t, `sed 'r other.txt' input.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "blocked") +} + +func TestBlockedWriteFlag(t *testing.T) { + dir := setupDir(t, map[string]string{ + "input.txt": "hello\n", + }) + _, stderr, code := cmdRun(t, `sed 's/hello/bye/w output.txt' input.txt`, dir) + assert.Equal(t, 1, code) + assert.Contains(t, stderr, "blocked") +} + +// --- Help --- + +func TestHelp(t *testing.T) { + dir := t.TempDir() + stdout, _, code := cmdRun(t, `sed --help`, dir) + assert.Equal(t, 0, code) + assert.Contains(t, stdout, "Usage:") +} diff --git a/interp/register_builtins.go b/interp/register_builtins.go index 1a360bbc..03608922 100644 --- a/interp/register_builtins.go +++ b/interp/register_builtins.go @@ -20,6 +20,7 @@ import ( "github.com/DataDog/rshell/interp/builtins/head" "github.com/DataDog/rshell/interp/builtins/ls" printfcmd "github.com/DataDog/rshell/interp/builtins/printf" + "github.com/DataDog/rshell/interp/builtins/sed" "github.com/DataDog/rshell/interp/builtins/strings_cmd" "github.com/DataDog/rshell/interp/builtins/tail" "github.com/DataDog/rshell/interp/builtins/testcmd" @@ -45,6 +46,7 @@ func registerBuiltins() { head.Cmd, ls.Cmd, printfcmd.Cmd, + sed.Cmd, strings_cmd.Cmd, tail.Cmd, testcmd.Cmd, diff --git a/tests/allowed_symbols_test.go b/tests/allowed_symbols_test.go index 0b272b2f..940df447 100644 --- a/tests/allowed_symbols_test.go +++ b/tests/allowed_symbols_test.go @@ -32,8 +32,12 @@ import ( // All packages not listed here are implicitly banned, including all // third-party packages and other internal module packages. var builtinAllowedSymbols = []string{ + // bytes.IndexByte — finds a byte in a byte slice; pure function, no I/O. + "bytes.IndexByte", // bufio.NewScanner — line-by-line input reading (e.g. head, cat); no write or exec capability. "bufio.NewScanner", + // bufio.Scanner — scanner type for buffered input reading; no write or exec capability. + "bufio.Scanner", // bufio.SplitFunc — type for custom scanner split functions; pure type, no I/O. "bufio.SplitFunc", // context.Context — deadline/cancellation plumbing; pure interface, no side effects. diff --git a/tests/scenarios/cmd/sed/address/degenerate_range.yaml b/tests/scenarios/cmd/sed/address/degenerate_range.yaml new file mode 100644 index 00000000..746d2962 --- /dev/null +++ b/tests/scenarios/cmd/sed/address/degenerate_range.yaml @@ -0,0 +1,13 @@ +description: A degenerate range where addr1 and addr2 match the same line is treated as a one-line match. +setup: + files: + - path: input.txt + content: "line1\nline2\nline3\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '2,2s/line/LINE/' input.txt +expect: + stdout: "line1\nLINE2\nline3\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/descending_range.yaml b/tests/scenarios/cmd/sed/address/descending_range.yaml new file mode 100644 index 00000000..007c90de --- /dev/null +++ b/tests/scenarios/cmd/sed/address/descending_range.yaml @@ -0,0 +1,13 @@ +description: "Descending numeric range (e.g. 4,2) is treated as a one-line range." +setup: + files: + - path: input.txt + content: "a\nb\nc\nd\ne\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n '4,2p' input.txt +expect: + stdout: "d\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/dollar_multi_file.yaml b/tests/scenarios/cmd/sed/address/dollar_multi_file.yaml new file mode 100644 index 00000000..a91c4b8e --- /dev/null +++ b/tests/scenarios/cmd/sed/address/dollar_multi_file.yaml @@ -0,0 +1,15 @@ +description: "The $ address matches only the last line of the last file (continuous stream)." +setup: + files: + - path: a.txt + content: "a1\na2\n" + - path: b.txt + content: "b1\nb2\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n '$p' a.txt b.txt +expect: + stdout: "b2\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/empty_regex_reuse.yaml b/tests/scenarios/cmd/sed/address/empty_regex_reuse.yaml new file mode 100644 index 00000000..216b28ff --- /dev/null +++ b/tests/scenarios/cmd/sed/address/empty_regex_reuse.yaml @@ -0,0 +1,12 @@ +description: "Empty // address reuses the last regex." +setup: + files: + - path: input.txt + content: "abc\ndef\nabc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n '/abc/p;//p' input.txt +expect: + stdout: "abc\nabc\nabc\nabc\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/last_line.yaml b/tests/scenarios/cmd/sed/address/last_line.yaml new file mode 100644 index 00000000..2839e4d5 --- /dev/null +++ b/tests/scenarios/cmd/sed/address/last_line.yaml @@ -0,0 +1,13 @@ +description: The $ address matches the last line of input. +setup: + files: + - path: input.txt + content: "first\nmiddle\nlast\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '$s/last/LAST/' input.txt +expect: + stdout: "first\nmiddle\nLAST\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/line.yaml b/tests/scenarios/cmd/sed/address/line.yaml new file mode 100644 index 00000000..bb405cc4 --- /dev/null +++ b/tests/scenarios/cmd/sed/address/line.yaml @@ -0,0 +1,13 @@ +description: Specific line number address applies command to that line only. +setup: + files: + - path: input.txt + content: "one\ntwo\nthree\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '2s/two/TWO/' input.txt +expect: + stdout: "one\nTWO\nthree\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/mixed_range.yaml b/tests/scenarios/cmd/sed/address/mixed_range.yaml new file mode 100644 index 00000000..df8cbb8c --- /dev/null +++ b/tests/scenarios/cmd/sed/address/mixed_range.yaml @@ -0,0 +1,13 @@ +description: Address ranges can mix different types like regex and line number. +setup: + files: + - path: input.txt + content: "start\nkeep1\nkeep2\nstop\nignore\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '/start/,3s/^/>> /' input.txt +expect: + stdout: ">> start\n>> keep1\n>> keep2\nstop\nignore\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/negation.yaml b/tests/scenarios/cmd/sed/address/negation.yaml new file mode 100644 index 00000000..eb8aa059 --- /dev/null +++ b/tests/scenarios/cmd/sed/address/negation.yaml @@ -0,0 +1,13 @@ +description: The ! negation applies command to non-matching lines. +setup: + files: + - path: input.txt + content: "keep\ndelete\nkeep\ndelete\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '/delete/!s/keep/KEEP/' input.txt +expect: + stdout: "KEEP\ndelete\nKEEP\ndelete\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/negation_delete.yaml b/tests/scenarios/cmd/sed/address/negation_delete.yaml new file mode 100644 index 00000000..7b1b4dfe --- /dev/null +++ b/tests/scenarios/cmd/sed/address/negation_delete.yaml @@ -0,0 +1,13 @@ +description: Negation with the delete command keeps only matching lines. +setup: + files: + - path: input.txt + content: "keep\nremove\nkeep\nremove\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '/keep/!d' input.txt +expect: + stdout: "keep\nkeep\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/negation_print.yaml b/tests/scenarios/cmd/sed/address/negation_print.yaml new file mode 100644 index 00000000..68cbf295 --- /dev/null +++ b/tests/scenarios/cmd/sed/address/negation_print.yaml @@ -0,0 +1,13 @@ +description: Negation with the print command. +setup: + files: + - path: input.txt + content: "aaa\nbbb\nccc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n '/bbb/!p' input.txt +expect: + stdout: "aaa\nccc\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/negation_range.yaml b/tests/scenarios/cmd/sed/address/negation_range.yaml new file mode 100644 index 00000000..a1105ee8 --- /dev/null +++ b/tests/scenarios/cmd/sed/address/negation_range.yaml @@ -0,0 +1,13 @@ +description: Negation applied to a range address. +setup: + files: + - path: input.txt + content: "line1\nline2\nline3\nline4\nline5\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '2,4!d' input.txt +expect: + stdout: "line2\nline3\nline4\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/range.yaml b/tests/scenarios/cmd/sed/address/range.yaml new file mode 100644 index 00000000..704fbbe6 --- /dev/null +++ b/tests/scenarios/cmd/sed/address/range.yaml @@ -0,0 +1,13 @@ +description: Address range applies command from first to second address inclusive. +setup: + files: + - path: input.txt + content: "line1\nline2\nline3\nline4\nline5\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '2,4s/line/LINE/' input.txt +expect: + stdout: "line1\nLINE2\nLINE3\nLINE4\nline5\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/regex.yaml b/tests/scenarios/cmd/sed/address/regex.yaml new file mode 100644 index 00000000..bed2a5cc --- /dev/null +++ b/tests/scenarios/cmd/sed/address/regex.yaml @@ -0,0 +1,13 @@ +description: Regex address applies command to matching lines. +setup: + files: + - path: input.txt + content: "apple\nbanana\napricot\ncherry\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '/^a/s/a/A/' input.txt +expect: + stdout: "Apple\nbanana\nApricot\ncherry\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/regex_addr_sets_lastRe.yaml b/tests/scenarios/cmd/sed/address/regex_addr_sets_lastRe.yaml new file mode 100644 index 00000000..18cc37ba --- /dev/null +++ b/tests/scenarios/cmd/sed/address/regex_addr_sets_lastRe.yaml @@ -0,0 +1,13 @@ +description: "Regex address updates lastRe even when it does not match." +setup: + files: + - path: input.txt + content: "baz\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n -e '/foo/ p' -e 's//REPLACED/p' input.txt +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/regex_range_same_pattern.yaml b/tests/scenarios/cmd/sed/address/regex_range_same_pattern.yaml new file mode 100644 index 00000000..230cb372 --- /dev/null +++ b/tests/scenarios/cmd/sed/address/regex_range_same_pattern.yaml @@ -0,0 +1,13 @@ +description: "Regex addr2 is not checked on the opening line of a range." +setup: + files: + - path: input.txt + content: "a\nb\nc\na\nd\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n '/a/,/a/p' input.txt +expect: + stdout: "a\nb\nc\na\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/step.yaml b/tests/scenarios/cmd/sed/address/step.yaml new file mode 100644 index 00000000..80fbcb54 --- /dev/null +++ b/tests/scenarios/cmd/sed/address/step.yaml @@ -0,0 +1,13 @@ +description: The first~step address matches every step-th line starting from first. +setup: + files: + - path: input.txt + content: "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n '1~3p' input.txt +expect: + stdout: "1\n4\n7\n10\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/address/zero_comma_regex.yaml b/tests/scenarios/cmd/sed/address/zero_comma_regex.yaml new file mode 100644 index 00000000..07a5f06c --- /dev/null +++ b/tests/scenarios/cmd/sed/address/zero_comma_regex.yaml @@ -0,0 +1,13 @@ +description: "The 0,/re/ address allows the regex to match on line 1 (unlike 1,/re/)." +setup: + files: + - path: input.txt + content: "foo\nbar\nfoo\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '0,/foo/s/foo/FOO/' input.txt +expect: + stdout: "FOO\nbar\nfoo\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/branch/T_resets_subMade.yaml b/tests/scenarios/cmd/sed/branch/T_resets_subMade.yaml new file mode 100644 index 00000000..c5516db0 --- /dev/null +++ b/tests/scenarios/cmd/sed/branch/T_resets_subMade.yaml @@ -0,0 +1,12 @@ +description: The T command resets the substitution flag after evaluation. +setup: + files: + - path: input.txt + content: "a\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n 's/a/b/;T end;p;:end' input.txt +expect: + stdout: "b\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/branch/basic.yaml b/tests/scenarios/cmd/sed/branch/basic.yaml new file mode 100644 index 00000000..c51d1fc2 --- /dev/null +++ b/tests/scenarios/cmd/sed/branch/basic.yaml @@ -0,0 +1,13 @@ +description: The b command with no label branches to end of script. +setup: + files: + - path: input.txt + content: "aaa\nbbb\nccc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '/bbb/b; s/^/>> /' input.txt +expect: + stdout: ">> aaa\nbbb\n>> ccc\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/branch/conditional.yaml b/tests/scenarios/cmd/sed/branch/conditional.yaml new file mode 100644 index 00000000..cc077eeb --- /dev/null +++ b/tests/scenarios/cmd/sed/branch/conditional.yaml @@ -0,0 +1,13 @@ +description: The t command branches only if a substitution was made. +setup: + files: + - path: input.txt + content: "foo bar\nhello world\nfoo baz\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/foo/FOO/; t done; s/hello/HELLO/; :done' input.txt +expect: + stdout: "FOO bar\nHELLO world\nFOO baz\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/branch/duplicate_label_last_wins.yaml b/tests/scenarios/cmd/sed/branch/duplicate_label_last_wins.yaml new file mode 100644 index 00000000..e9a4f871 --- /dev/null +++ b/tests/scenarios/cmd/sed/branch/duplicate_label_last_wins.yaml @@ -0,0 +1,17 @@ +description: "Duplicate labels: branch goes to the last definition (GNU sed behavior)." +setup: + files: + - path: input.txt + content: "x\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed ':a + s/x/1/ + ba + s/1/2/ + :a + s/1/3/' input.txt +expect: + stdout: "3\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/branch/label.yaml b/tests/scenarios/cmd/sed/branch/label.yaml new file mode 100644 index 00000000..2218011e --- /dev/null +++ b/tests/scenarios/cmd/sed/branch/label.yaml @@ -0,0 +1,13 @@ +description: The b command branches to a named label defined with colon. +setup: + files: + - path: input.txt + content: "aaa\nbbb\nccc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n '/bbb/b skip; p; :skip' input.txt +expect: + stdout: "aaa\nccc\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/branch/not_substituted.yaml b/tests/scenarios/cmd/sed/branch/not_substituted.yaml new file mode 100644 index 00000000..0bae5a89 --- /dev/null +++ b/tests/scenarios/cmd/sed/branch/not_substituted.yaml @@ -0,0 +1,13 @@ +description: The T command branches only if no substitution was made. +setup: + files: + - path: input.txt + content: "foo bar\nhello world\nfoo baz\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/foo/FOO/; T skip; s/$/ MATCHED/; :skip' input.txt +expect: + stdout: "FOO bar MATCHED\nhello world\nFOO baz MATCHED\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/delete/D_preserves_subMade.yaml b/tests/scenarios/cmd/sed/delete/D_preserves_subMade.yaml new file mode 100644 index 00000000..53b22c35 --- /dev/null +++ b/tests/scenarios/cmd/sed/delete/D_preserves_subMade.yaml @@ -0,0 +1,13 @@ +description: "D restarts the cycle but preserves subMade for t/T branching." +setup: + files: + - path: input.txt + content: "ax\nby\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -e '/a/{' -e 'N' -e 's/a/A/' -e 'D' -e '}' -e 't s' -e 's/b/B/' -e ':s' input.txt +expect: + stdout: "by\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/delete/basic.yaml b/tests/scenarios/cmd/sed/delete/basic.yaml new file mode 100644 index 00000000..07be9ddd --- /dev/null +++ b/tests/scenarios/cmd/sed/delete/basic.yaml @@ -0,0 +1,13 @@ +description: The d command deletes lines matching an address. +setup: + files: + - path: input.txt + content: "line1\nline2\nline3\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '2d' input.txt +expect: + stdout: "line1\nline3\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/delete/first_line.yaml b/tests/scenarios/cmd/sed/delete/first_line.yaml new file mode 100644 index 00000000..0584d265 --- /dev/null +++ b/tests/scenarios/cmd/sed/delete/first_line.yaml @@ -0,0 +1,13 @@ +description: The D command deletes up to the first newline and restarts the cycle. +setup: + files: + - path: input.txt + content: "line1\nline2\nline3\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'N; P; D' input.txt +expect: + stdout: "line1\nline2\nline3\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/delete/range.yaml b/tests/scenarios/cmd/sed/delete/range.yaml new file mode 100644 index 00000000..3450eea3 --- /dev/null +++ b/tests/scenarios/cmd/sed/delete/range.yaml @@ -0,0 +1,13 @@ +description: The d command deletes a range of lines. +setup: + files: + - path: input.txt + content: "line1\nline2\nline3\nline4\nline5\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '2,4d' input.txt +expect: + stdout: "line1\nline5\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/edge/crlf_preserved.yaml b/tests/scenarios/cmd/sed/edge/crlf_preserved.yaml new file mode 100644 index 00000000..f101eb6a --- /dev/null +++ b/tests/scenarios/cmd/sed/edge/crlf_preserved.yaml @@ -0,0 +1,7 @@ +description: Carriage returns in CRLF line endings are preserved in pattern space. +input: + script: "echo -e 'hello\r\nworld\r' | sed 's/\r$//'" +expect: + stdout: "hello\nworld\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/edge/empty_file.yaml b/tests/scenarios/cmd/sed/edge/empty_file.yaml new file mode 100644 index 00000000..67509b3b --- /dev/null +++ b/tests/scenarios/cmd/sed/edge/empty_file.yaml @@ -0,0 +1,13 @@ +description: Sed handles an empty input file without error. +setup: + files: + - path: empty.txt + content: "" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/a/b/' empty.txt +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/edge/no_trailing_newline.yaml b/tests/scenarios/cmd/sed/edge/no_trailing_newline.yaml new file mode 100644 index 00000000..9d192bd7 --- /dev/null +++ b/tests/scenarios/cmd/sed/edge/no_trailing_newline.yaml @@ -0,0 +1,18 @@ +description: >- + Sed processes a file without a trailing newline. + Intentional divergence: rshell sed always adds a trailing newline for + consistent output for AI agent consumers. GNU sed preserves the absence + of a trailing newline. +setup: + files: + - path: input.txt + content: "no newline at end" +skip_assert_against_bash: true +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/end/END/' input.txt +expect: + stdout: "no newline at END\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/edge/single_line.yaml b/tests/scenarios/cmd/sed/edge/single_line.yaml new file mode 100644 index 00000000..97b06b61 --- /dev/null +++ b/tests/scenarios/cmd/sed/edge/single_line.yaml @@ -0,0 +1,13 @@ +description: Sed processes a single line file correctly. +setup: + files: + - path: input.txt + content: "only line\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/only/the only/' input.txt +expect: + stdout: "the only line\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/errors/address_zero.yaml b/tests/scenarios/cmd/sed/errors/address_zero.yaml new file mode 100644 index 00000000..b049f7aa --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/address_zero.yaml @@ -0,0 +1,9 @@ +description: Line address 0 as standalone address is rejected. +skip_assert_against_bash: true +input: + script: |+ + echo 'hello' | sed -n '0p' +expect: + stdout: "" + stderr_contains: ["invalid usage of line address 0"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/blocked_R_cmd.yaml b/tests/scenarios/cmd/sed/errors/blocked_R_cmd.yaml new file mode 100644 index 00000000..f5064b10 --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/blocked_R_cmd.yaml @@ -0,0 +1,9 @@ +description: The R command is blocked for safety. +skip_assert_against_bash: true +input: + script: |+ + echo "test" | sed 'R input.txt' +expect: + stdout: "" + stderr: "sed: 'R' command is blocked: unsandboxed file reading is not allowed\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/blocked_W_cmd.yaml b/tests/scenarios/cmd/sed/errors/blocked_W_cmd.yaml new file mode 100644 index 00000000..831847db --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/blocked_W_cmd.yaml @@ -0,0 +1,9 @@ +description: The W command is blocked for safety. +skip_assert_against_bash: true +input: + script: |+ + echo "test" | sed 'W output.txt' +expect: + stdout: "" + stderr: "sed: 'W' command is blocked: file writing is not allowed\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/blocked_e_flag.yaml b/tests/scenarios/cmd/sed/errors/blocked_e_flag.yaml new file mode 100644 index 00000000..665114f5 --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/blocked_e_flag.yaml @@ -0,0 +1,9 @@ +description: The e flag in s command is blocked for safety. +skip_assert_against_bash: true +input: + script: |+ + echo "test" | sed 's/test/replaced/e' +expect: + stdout: "" + stderr: "sed: 'e' flag in 's' command is blocked: command execution is not allowed\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/blocked_execute.yaml b/tests/scenarios/cmd/sed/errors/blocked_execute.yaml new file mode 100644 index 00000000..89678dcb --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/blocked_execute.yaml @@ -0,0 +1,9 @@ +description: The e command is blocked for safety. +skip_assert_against_bash: true +input: + script: |+ + echo "test" | sed 'e' +expect: + stdout: "" + stderr: "sed: 'e' command is blocked: command execution is not allowed\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/blocked_inplace.yaml b/tests/scenarios/cmd/sed/errors/blocked_inplace.yaml new file mode 100644 index 00000000..8db911f8 --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/blocked_inplace.yaml @@ -0,0 +1,9 @@ +description: The -i flag is blocked for safety. +skip_assert_against_bash: true +input: + script: |+ + echo "test" | sed -i 's/test/replaced/' +expect: + stdout: "" + stderr_contains: ["unknown shorthand flag"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/blocked_read.yaml b/tests/scenarios/cmd/sed/errors/blocked_read.yaml new file mode 100644 index 00000000..5b069997 --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/blocked_read.yaml @@ -0,0 +1,9 @@ +description: The r command is blocked for safety. +skip_assert_against_bash: true +input: + script: |+ + echo "test" | sed 'r somefile.txt' +expect: + stdout: "" + stderr: "sed: 'r' command is blocked: unsandboxed file reading is not allowed\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/blocked_write.yaml b/tests/scenarios/cmd/sed/errors/blocked_write.yaml new file mode 100644 index 00000000..7336792c --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/blocked_write.yaml @@ -0,0 +1,9 @@ +description: The w command is blocked for safety. +skip_assert_against_bash: true +input: + script: |+ + echo "test" | sed 's/test/replaced/w output.txt' +expect: + stdout: "" + stderr: "sed: 'w' flag in 's' command is blocked: file writing is not allowed\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/blocked_write_cmd.yaml b/tests/scenarios/cmd/sed/errors/blocked_write_cmd.yaml new file mode 100644 index 00000000..77967c58 --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/blocked_write_cmd.yaml @@ -0,0 +1,9 @@ +description: The w command is blocked for safety. +skip_assert_against_bash: true +input: + script: |+ + echo "test" | sed 'w output.txt' +expect: + stdout: "" + stderr: "sed: 'w' command is blocked: file writing is not allowed\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/empty_regex_addr_no_previous.yaml b/tests/scenarios/cmd/sed/errors/empty_regex_addr_no_previous.yaml new file mode 100644 index 00000000..70e65efa --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/empty_regex_addr_no_previous.yaml @@ -0,0 +1,9 @@ +description: "Empty regex address // with no previous regex produces an error." +input: + script: |+ + echo test | sed -n '//p' +skip_assert_against_bash: true +expect: + stdout: "" + stderr_contains: ["no previous regular expression"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/extra_chars_after_p.yaml b/tests/scenarios/cmd/sed/errors/extra_chars_after_p.yaml new file mode 100644 index 00000000..fcdacaef --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/extra_chars_after_p.yaml @@ -0,0 +1,13 @@ +description: Extra characters after zero-arg command p are rejected. +setup: + files: + - path: input.txt + content: "hello\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'pp' input.txt +expect: + stdout: "" + stderr_contains: ["extra characters after command"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/extra_chars_after_quit.yaml b/tests/scenarios/cmd/sed/errors/extra_chars_after_quit.yaml new file mode 100644 index 00000000..2f75ff8b --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/extra_chars_after_quit.yaml @@ -0,0 +1,13 @@ +description: Extra characters after q/Q command are rejected. +setup: + files: + - path: input.txt + content: "line1\nline2\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'q1x' input.txt +expect: + stdout: "" + stderr_contains: ["extra characters after command"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/extra_chars_after_quit_no_code.yaml b/tests/scenarios/cmd/sed/errors/extra_chars_after_quit_no_code.yaml new file mode 100644 index 00000000..da71ef36 --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/extra_chars_after_quit_no_code.yaml @@ -0,0 +1,13 @@ +description: Extra characters after q command without exit code are rejected. +setup: + files: + - path: input.txt + content: "line1\nline2\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'qp' input.txt +expect: + stdout: "" + stderr_contains: ["extra characters after command"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/invalid_backref_parse_time.yaml b/tests/scenarios/cmd/sed/errors/invalid_backref_parse_time.yaml new file mode 100644 index 00000000..581ee279 --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/invalid_backref_parse_time.yaml @@ -0,0 +1,9 @@ +description: "Invalid backreference in s/// rejected at parse time even when address doesn't match." +input: + script: |+ + echo x | sed '/y/s/a/\1/' +skip_assert_against_bash: true +expect: + stdout: "" + stderr_contains: ["invalid reference"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/invalid_regex.yaml b/tests/scenarios/cmd/sed/errors/invalid_regex.yaml new file mode 100644 index 00000000..9d2c8f2c --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/invalid_regex.yaml @@ -0,0 +1,8 @@ +description: Sed reports an error for an invalid regular expression. +input: + script: |+ + echo "test" | sed 's/[invalid/replace/' +expect: + stdout: "" + stderr_contains: ["sed:"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/invalid_s_flag.yaml b/tests/scenarios/cmd/sed/errors/invalid_s_flag.yaml new file mode 100644 index 00000000..88b37961 --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/invalid_s_flag.yaml @@ -0,0 +1,14 @@ +description: "Unknown flags in s/// are rejected as errors." +setup: + files: + - path: input.txt + content: "abc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/a/b/z' input.txt +skip_assert_against_bash: true +expect: + stdout: "" + stderr_contains: ["unknown option to 's' command"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/missing_file.yaml b/tests/scenarios/cmd/sed/errors/missing_file.yaml new file mode 100644 index 00000000..dace29ba --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/missing_file.yaml @@ -0,0 +1,14 @@ +description: Sed reports an error for a non-existent input file. +setup: + files: + - path: dummy.txt + content: "" +skip_assert_against_bash: true +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/a/b/' nonexistent.txt +expect: + stdout: "" + stderr_contains: ["nonexistent.txt"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/modifiers_on_empty_regexp.yaml b/tests/scenarios/cmd/sed/errors/modifiers_on_empty_regexp.yaml new file mode 100644 index 00000000..d3b9c5bd --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/modifiers_on_empty_regexp.yaml @@ -0,0 +1,13 @@ +description: Case-insensitive flag on empty regexp reuse is rejected. +setup: + files: + - path: input.txt + content: "abc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/a/x/; s//y/I' input.txt +expect: + stdout: "" + stderr_contains: ["cannot specify modifiers on empty regexp"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/multiple_number_flags.yaml b/tests/scenarios/cmd/sed/errors/multiple_number_flags.yaml new file mode 100644 index 00000000..eb7dd347 --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/multiple_number_flags.yaml @@ -0,0 +1,13 @@ +description: Repeated numeric occurrence flags in s/// are rejected. +setup: + files: + - path: input.txt + content: "aaa\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/a/b/2g3' input.txt +expect: + stdout: "" + stderr_contains: ["multiple number options"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/no_script.yaml b/tests/scenarios/cmd/sed/errors/no_script.yaml new file mode 100644 index 00000000..b3031e7d --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/no_script.yaml @@ -0,0 +1,9 @@ +description: Sed reports an error when no script is provided. +input: + script: |+ + sed +skip_assert_against_bash: true +expect: + stdout: "" + stderr: "sed: no script command has been specified\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/undefined_label.yaml b/tests/scenarios/cmd/sed/errors/undefined_label.yaml new file mode 100644 index 00000000..e4aab3e3 --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/undefined_label.yaml @@ -0,0 +1,9 @@ +description: Undefined branch labels are rejected at parse time. +skip_assert_against_bash: true +input: + script: |+ + echo 'hello' | sed 'bmissing' +expect: + stdout: "" + stderr_contains: ["undefined label"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/unterminated_s.yaml b/tests/scenarios/cmd/sed/errors/unterminated_s.yaml new file mode 100644 index 00000000..d460a67e --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/unterminated_s.yaml @@ -0,0 +1,14 @@ +description: Unterminated s command is rejected. +setup: + files: + - path: input.txt + content: "abc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/a/b' input.txt +skip_assert_against_bash: true +expect: + stdout: "" + stderr_contains: ["unterminated"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/errors/zero_occurrence.yaml b/tests/scenarios/cmd/sed/errors/zero_occurrence.yaml new file mode 100644 index 00000000..b3fcda68 --- /dev/null +++ b/tests/scenarios/cmd/sed/errors/zero_occurrence.yaml @@ -0,0 +1,9 @@ +description: The s command rejects 0 as an occurrence number. +skip_assert_against_bash: true +input: + script: |+ + echo 'aaa' | sed 's/a/b/0' +expect: + stdout: "" + stderr_contains: ["number option to 's' command may not be zero"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/flags/extended_E.yaml b/tests/scenarios/cmd/sed/flags/extended_E.yaml new file mode 100644 index 00000000..a0b79514 --- /dev/null +++ b/tests/scenarios/cmd/sed/flags/extended_E.yaml @@ -0,0 +1,13 @@ +description: The -E flag enables extended regular expressions. +setup: + files: + - path: input.txt + content: "foo123bar\nhello456world\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -E 's/[0-9]+/NUM/' input.txt +expect: + stdout: "fooNUMbar\nhelloNUMworld\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/flags/extended_r.yaml b/tests/scenarios/cmd/sed/flags/extended_r.yaml new file mode 100644 index 00000000..5a104952 --- /dev/null +++ b/tests/scenarios/cmd/sed/flags/extended_r.yaml @@ -0,0 +1,13 @@ +description: The -r flag is a GNU alias for -E (extended regular expressions). +setup: + files: + - path: input.txt + content: "abc123def\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -r 's/[0-9]+/NUM/' input.txt +expect: + stdout: "abcNUMdef\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/flags/help.yaml b/tests/scenarios/cmd/sed/flags/help.yaml new file mode 100644 index 00000000..d3043440 --- /dev/null +++ b/tests/scenarios/cmd/sed/flags/help.yaml @@ -0,0 +1,9 @@ +description: The -h flag prints usage information and exits 0. +skip_assert_against_bash: true +input: + script: |+ + sed -h +expect: + stdout_contains: ["Usage: sed"] + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/group/basic.yaml b/tests/scenarios/cmd/sed/group/basic.yaml new file mode 100644 index 00000000..438dc2cf --- /dev/null +++ b/tests/scenarios/cmd/sed/group/basic.yaml @@ -0,0 +1,13 @@ +description: Group commands with braces apply multiple commands to matching lines. +setup: + files: + - path: input.txt + content: "apple\nbanana\ncherry\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '/banana/{ s/banana/BANANA/; s/$/!/; }' input.txt +expect: + stdout: "apple\nBANANA!\ncherry\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/group/branch_skips_after_group.yaml b/tests/scenarios/cmd/sed/group/branch_skips_after_group.yaml new file mode 100644 index 00000000..ac0b5c41 --- /dev/null +++ b/tests/scenarios/cmd/sed/group/branch_skips_after_group.yaml @@ -0,0 +1,13 @@ +description: "A branch command inside a group skips commands after the group." +setup: + files: + - path: input.txt + content: "a\nb\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n '{b};p' input.txt +expect: + stdout: "" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/group/branch_to_label_in_group.yaml b/tests/scenarios/cmd/sed/group/branch_to_label_in_group.yaml new file mode 100644 index 00000000..d5ca6888 --- /dev/null +++ b/tests/scenarios/cmd/sed/group/branch_to_label_in_group.yaml @@ -0,0 +1,12 @@ +description: Branch to a label inside a group works correctly. +setup: + files: + - path: input.txt + content: "aaa\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n '{:a;s/a/b/;ta;p}' input.txt +expect: + stdout: "bbb\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/group/deeply_nested_label.yaml b/tests/scenarios/cmd/sed/group/deeply_nested_label.yaml new file mode 100644 index 00000000..55365abe --- /dev/null +++ b/tests/scenarios/cmd/sed/group/deeply_nested_label.yaml @@ -0,0 +1,13 @@ +description: Labels inside deeply nested groups (3+ levels) are resolved correctly. +setup: + files: + - path: input.txt + content: "aaa\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n '{{:a;s/a/b/;ta;p}}' input.txt +expect: + stdout: "bbb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/group/nested.yaml b/tests/scenarios/cmd/sed/group/nested.yaml new file mode 100644 index 00000000..93cf7073 --- /dev/null +++ b/tests/scenarios/cmd/sed/group/nested.yaml @@ -0,0 +1,13 @@ +description: Nested group commands work correctly. +setup: + files: + - path: input.txt + content: "foo bar\nhello world\nfoo baz\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '/foo/{ s/foo/FOO/; /bar/{ s/bar/BAR/; } }' input.txt +expect: + stdout: "FOO BAR\nhello world\nFOO baz\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/group/with_address.yaml b/tests/scenarios/cmd/sed/group/with_address.yaml new file mode 100644 index 00000000..383129a4 --- /dev/null +++ b/tests/scenarios/cmd/sed/group/with_address.yaml @@ -0,0 +1,13 @@ +description: Group commands with line address ranges. +setup: + files: + - path: input.txt + content: "line1\nline2\nline3\nline4\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '2,3{ s/line/LINE/; s/$/!/; }' input.txt +expect: + stdout: "line1\nLINE2!\nLINE3!\nline4\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/hold/append.yaml b/tests/scenarios/cmd/sed/hold/append.yaml new file mode 100644 index 00000000..5e6af229 --- /dev/null +++ b/tests/scenarios/cmd/sed/hold/append.yaml @@ -0,0 +1,13 @@ +description: The H and G commands append between pattern and hold space. +setup: + files: + - path: input.txt + content: "aaa\nbbb\nccc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n 'H; ${g;p}' input.txt +expect: + stdout: "\naaa\nbbb\nccc\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/hold/copy.yaml b/tests/scenarios/cmd/sed/hold/copy.yaml new file mode 100644 index 00000000..4e415ef5 --- /dev/null +++ b/tests/scenarios/cmd/sed/hold/copy.yaml @@ -0,0 +1,13 @@ +description: The h and g commands copy between pattern and hold space. +setup: + files: + - path: input.txt + content: "first\nsecond\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n '1h; 2{g;p}' input.txt +expect: + stdout: "first\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/hold/exchange.yaml b/tests/scenarios/cmd/sed/hold/exchange.yaml new file mode 100644 index 00000000..248641de --- /dev/null +++ b/tests/scenarios/cmd/sed/hold/exchange.yaml @@ -0,0 +1,13 @@ +description: The x command exchanges pattern and hold space. +setup: + files: + - path: input.txt + content: "first\nsecond\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n '1{h;d}; 2{x;p;x;p}' input.txt +expect: + stdout: "first\nsecond\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/multiple/comments.yaml b/tests/scenarios/cmd/sed/multiple/comments.yaml new file mode 100644 index 00000000..ad9a5311 --- /dev/null +++ b/tests/scenarios/cmd/sed/multiple/comments.yaml @@ -0,0 +1,13 @@ +description: Comments in sed scripts are ignored. +setup: + files: + - path: input.txt + content: "hello\nworld\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -e '# This is a comment' -e 's/hello/HELLO/' input.txt +expect: + stdout: "HELLO\nworld\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/multiple/multi_e.yaml b/tests/scenarios/cmd/sed/multiple/multi_e.yaml new file mode 100644 index 00000000..efebb9dc --- /dev/null +++ b/tests/scenarios/cmd/sed/multiple/multi_e.yaml @@ -0,0 +1,13 @@ +description: Multiple -e flags apply expressions in order. +setup: + files: + - path: input.txt + content: "hello world\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -e 's/hello/goodbye/' -e 's/world/earth/' input.txt +expect: + stdout: "goodbye earth\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/multiple/same_address.yaml b/tests/scenarios/cmd/sed/multiple/same_address.yaml new file mode 100644 index 00000000..9e29bf87 --- /dev/null +++ b/tests/scenarios/cmd/sed/multiple/same_address.yaml @@ -0,0 +1,13 @@ +description: Multiple commands targeting the same address are applied in sequence. +setup: + files: + - path: input.txt + content: "aaa\nbbb\nccc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '2s/bbb/BBB/; 2s/BBB/xxx/' input.txt +expect: + stdout: "aaa\nxxx\nccc\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/multiple/semicolon.yaml b/tests/scenarios/cmd/sed/multiple/semicolon.yaml new file mode 100644 index 00000000..f634e56f --- /dev/null +++ b/tests/scenarios/cmd/sed/multiple/semicolon.yaml @@ -0,0 +1,13 @@ +description: Semicolons separate multiple commands within a single script. +setup: + files: + - path: input.txt + content: "hello world\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/hello/goodbye/; s/world/earth/' input.txt +expect: + stdout: "goodbye earth\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/next/N_eof_graceful.yaml b/tests/scenarios/cmd/sed/next/N_eof_graceful.yaml new file mode 100644 index 00000000..3c11ee86 --- /dev/null +++ b/tests/scenarios/cmd/sed/next/N_eof_graceful.yaml @@ -0,0 +1,12 @@ +description: N at EOF prints pattern space and exits gracefully. +setup: + files: + - path: input.txt + content: "a\nb\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'N' input.txt +expect: + stdout: "a\nb\nc\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/next/N_flushes_append_queue.yaml b/tests/scenarios/cmd/sed/next/N_flushes_append_queue.yaml new file mode 100644 index 00000000..f4160586 --- /dev/null +++ b/tests/scenarios/cmd/sed/next/N_flushes_append_queue.yaml @@ -0,0 +1,7 @@ +description: The N command flushes queued 'a' text before reading the next input line. +input: + script: "echo -e 'line1\nline2' | sed -e 'a\\X' -e 'N' -e 'p'" +expect: + stdout: "X\nline1\nline2\nline1\nline2\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/next/append_next.yaml b/tests/scenarios/cmd/sed/next/append_next.yaml new file mode 100644 index 00000000..7c22f4e3 --- /dev/null +++ b/tests/scenarios/cmd/sed/next/append_next.yaml @@ -0,0 +1,13 @@ +description: The N command appends next line to pattern space with embedded newline. +setup: + files: + - path: input.txt + content: "line1\nline2\nline3\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'N; s/\n/ /' input.txt +expect: + stdout: "line1 line2\nline3\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/next/append_next_dollar_multifile.yaml b/tests/scenarios/cmd/sed/next/append_next_dollar_multifile.yaml new file mode 100644 index 00000000..381852f7 --- /dev/null +++ b/tests/scenarios/cmd/sed/next/append_next_dollar_multifile.yaml @@ -0,0 +1,15 @@ +description: "N (append next) command with $ address only matches last line of last file." +setup: + files: + - path: a.txt + content: "a\nb\n" + - path: b.txt + content: "c\nd\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n 'N;$p' a.txt b.txt +expect: + stdout: "c\nd\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/next/basic.yaml b/tests/scenarios/cmd/sed/next/basic.yaml new file mode 100644 index 00000000..65411675 --- /dev/null +++ b/tests/scenarios/cmd/sed/next/basic.yaml @@ -0,0 +1,13 @@ +description: The n command prints current line and reads the next into pattern space. +setup: + files: + - path: input.txt + content: "line1\nline2\nline3\nline4\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'n; s/line/LINE/' input.txt +expect: + stdout: "line1\nLINE2\nline3\nLINE4\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/next/n_dollar_multifile.yaml b/tests/scenarios/cmd/sed/next/n_dollar_multifile.yaml new file mode 100644 index 00000000..6312c25b --- /dev/null +++ b/tests/scenarios/cmd/sed/next/n_dollar_multifile.yaml @@ -0,0 +1,15 @@ +description: "n command with $ address only matches last line of last file." +setup: + files: + - path: a.txt + content: "a\nb\n" + - path: b.txt + content: "c\nd\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'n;$s/./LAST/' a.txt b.txt +expect: + stdout: "a\nb\nc\nLAST\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/next/n_eof_no_duplicate.yaml b/tests/scenarios/cmd/sed/next/n_eof_no_duplicate.yaml new file mode 100644 index 00000000..739daf51 --- /dev/null +++ b/tests/scenarios/cmd/sed/next/n_eof_no_duplicate.yaml @@ -0,0 +1,12 @@ +description: The n command at EOF does not duplicate the last line. +setup: + files: + - path: input.txt + content: "a\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'n' input.txt +expect: + stdout: "a\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/next/n_odd_lines.yaml b/tests/scenarios/cmd/sed/next/n_odd_lines.yaml new file mode 100644 index 00000000..eae4e2a4 --- /dev/null +++ b/tests/scenarios/cmd/sed/next/n_odd_lines.yaml @@ -0,0 +1,12 @@ +description: The n command on odd number of lines prints all lines. +setup: + files: + - path: input.txt + content: "a\nb\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'n' input.txt +expect: + stdout: "a\nb\nc\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/next/n_resets_subMade.yaml b/tests/scenarios/cmd/sed/next/n_resets_subMade.yaml new file mode 100644 index 00000000..31f6da14 --- /dev/null +++ b/tests/scenarios/cmd/sed/next/n_resets_subMade.yaml @@ -0,0 +1,13 @@ +description: "The n command resets subMade so t/T branch correctly after reading a new line." +setup: + files: + - path: input.txt + content: "a\nb\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n -e 's/a/x/' -e 'n' -e 't end' -e 'p' -e ':end' input.txt +expect: + stdout: "b\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/print/first_line.yaml b/tests/scenarios/cmd/sed/print/first_line.yaml new file mode 100644 index 00000000..bed9574c --- /dev/null +++ b/tests/scenarios/cmd/sed/print/first_line.yaml @@ -0,0 +1,13 @@ +description: The P command prints up to the first embedded newline in the pattern space. +setup: + files: + - path: input.txt + content: "line1\nline2\nline3\nline4\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n 'N; P' input.txt +expect: + stdout: "line1\nline3\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/print/line_number.yaml b/tests/scenarios/cmd/sed/print/line_number.yaml new file mode 100644 index 00000000..f1126371 --- /dev/null +++ b/tests/scenarios/cmd/sed/print/line_number.yaml @@ -0,0 +1,13 @@ +description: The = command prints the current line number. +setup: + files: + - path: input.txt + content: "aaa\nbbb\nccc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n '=' input.txt +expect: + stdout: "1\n2\n3\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/print/suppress.yaml b/tests/scenarios/cmd/sed/print/suppress.yaml new file mode 100644 index 00000000..59a0caf0 --- /dev/null +++ b/tests/scenarios/cmd/sed/print/suppress.yaml @@ -0,0 +1,13 @@ +description: The -n flag suppresses auto-print and p command prints explicitly. +setup: + files: + - path: input.txt + content: "line1\nline2\nline3\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n '2p' input.txt +expect: + stdout: "line2\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/print/unambiguous.yaml b/tests/scenarios/cmd/sed/print/unambiguous.yaml new file mode 100644 index 00000000..07f7700b --- /dev/null +++ b/tests/scenarios/cmd/sed/print/unambiguous.yaml @@ -0,0 +1,13 @@ +description: The l command prints the pattern space unambiguously with $ at end. +setup: + files: + - path: input.txt + content: "hello\tworld\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n 'l' input.txt +expect: + stdout: "hello\\tworld$\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/print/unambiguous_non_ascii.yaml b/tests/scenarios/cmd/sed/print/unambiguous_non_ascii.yaml new file mode 100644 index 00000000..31a93aca --- /dev/null +++ b/tests/scenarios/cmd/sed/print/unambiguous_non_ascii.yaml @@ -0,0 +1,13 @@ +description: The l command octal-escapes non-ASCII bytes like GNU sed. +setup: + files: + - path: input.txt + content: "caf\u00e9\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n 'l' input.txt +expect: + stdout: "caf\\303\\251$\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/quit/basic.yaml b/tests/scenarios/cmd/sed/quit/basic.yaml new file mode 100644 index 00000000..9dd998e1 --- /dev/null +++ b/tests/scenarios/cmd/sed/quit/basic.yaml @@ -0,0 +1,13 @@ +description: The q command prints the current line and quits. +setup: + files: + - path: input.txt + content: "line1\nline2\nline3\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '2q' input.txt +expect: + stdout: "line1\nline2\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/quit/exit_code_modulo.yaml b/tests/scenarios/cmd/sed/quit/exit_code_modulo.yaml new file mode 100644 index 00000000..5c01773a --- /dev/null +++ b/tests/scenarios/cmd/sed/quit/exit_code_modulo.yaml @@ -0,0 +1,12 @@ +description: "q with exit code > 255 wraps modulo 256." +setup: + files: + - path: input.txt + content: "hello\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'q 257' input.txt +expect: + stdout: "hello\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/quit/flush_append.yaml b/tests/scenarios/cmd/sed/quit/flush_append.yaml new file mode 100644 index 00000000..d75172f2 --- /dev/null +++ b/tests/scenarios/cmd/sed/quit/flush_append.yaml @@ -0,0 +1,13 @@ +description: The q command flushes pending append text before quitting. +setup: + files: + - path: input.txt + content: "a\nb\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -e 'a\appended' -e 'q' input.txt +expect: + stdout: "a\nappended\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/quit/inline_comment.yaml b/tests/scenarios/cmd/sed/quit/inline_comment.yaml new file mode 100644 index 00000000..91bcd961 --- /dev/null +++ b/tests/scenarios/cmd/sed/quit/inline_comment.yaml @@ -0,0 +1,12 @@ +description: "Inline # comment after q/Q is accepted." +setup: + files: + - path: input.txt + content: "line1\nline2\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'q#note' input.txt +expect: + stdout: "line1\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/quit/inline_comment_with_code.yaml b/tests/scenarios/cmd/sed/quit/inline_comment_with_code.yaml new file mode 100644 index 00000000..b2027553 --- /dev/null +++ b/tests/scenarios/cmd/sed/quit/inline_comment_with_code.yaml @@ -0,0 +1,12 @@ +description: "Inline # comment after q with exit code is accepted." +setup: + files: + - path: input.txt + content: "line1\nline2\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'q1#done' input.txt +expect: + stdout: "line1\n" + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/quit/noprint.yaml b/tests/scenarios/cmd/sed/quit/noprint.yaml new file mode 100644 index 00000000..c555b23f --- /dev/null +++ b/tests/scenarios/cmd/sed/quit/noprint.yaml @@ -0,0 +1,13 @@ +description: The Q command quits without printing the current line. +setup: + files: + - path: input.txt + content: "line1\nline2\nline3\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '2Q' input.txt +expect: + stdout: "line1\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/stdin/dash.yaml b/tests/scenarios/cmd/sed/stdin/dash.yaml new file mode 100644 index 00000000..6463e257 --- /dev/null +++ b/tests/scenarios/cmd/sed/stdin/dash.yaml @@ -0,0 +1,8 @@ +description: Sed reads from stdin when - is given as the file argument. +input: + script: |+ + echo "hello world" | sed 's/world/earth/' - +expect: + stdout: "hello earth\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/stdin/pipe.yaml b/tests/scenarios/cmd/sed/stdin/pipe.yaml new file mode 100644 index 00000000..cd4fd026 --- /dev/null +++ b/tests/scenarios/cmd/sed/stdin/pipe.yaml @@ -0,0 +1,8 @@ +description: Sed reads from piped stdin when no file is given. +input: + script: |+ + echo "hello world" | sed 's/world/earth/' +expect: + stdout: "hello earth\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/ampersand.yaml b/tests/scenarios/cmd/sed/substitute/ampersand.yaml new file mode 100644 index 00000000..727de0dd --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/ampersand.yaml @@ -0,0 +1,13 @@ +description: Ampersand in replacement refers to the matched text. +setup: + files: + - path: input.txt + content: "foo bar\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/foo/[&]/' input.txt +expect: + stdout: "[foo] bar\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/backref_followed_by_digit.yaml b/tests/scenarios/cmd/sed/substitute/backref_followed_by_digit.yaml new file mode 100644 index 00000000..7a1d0325 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/backref_followed_by_digit.yaml @@ -0,0 +1,13 @@ +description: Backreference followed by a literal digit (e.g. \10 means \1 then '0'). +setup: + files: + - path: input.txt + content: "a\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -E 's/(a)/\10/' input.txt +expect: + stdout: "a0\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/backref_zero.yaml b/tests/scenarios/cmd/sed/substitute/backref_zero.yaml new file mode 100644 index 00000000..ad8f3cbf --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/backref_zero.yaml @@ -0,0 +1,12 @@ +description: "\\0 in replacement refers to the entire match (equivalent to &)." +setup: + files: + - path: input.txt + content: "hello\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/hello/[\0]/' input.txt +expect: + stdout: "[hello]\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/backreference.yaml b/tests/scenarios/cmd/sed/substitute/backreference.yaml new file mode 100644 index 00000000..6a3ddbe6 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/backreference.yaml @@ -0,0 +1,13 @@ +description: Backreference in replacement using capture groups with -E flag. +setup: + files: + - path: input.txt + content: "hello world\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -E 's/(hello) (world)/\2 \1/' input.txt +expect: + stdout: "world hello\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/basic.yaml b/tests/scenarios/cmd/sed/substitute/basic.yaml new file mode 100644 index 00000000..533a7e68 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/basic.yaml @@ -0,0 +1,13 @@ +description: Basic sed substitution replaces first occurrence on each line. +setup: + files: + - path: input.txt + content: "hello world hello\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/hello/goodbye/' input.txt +expect: + stdout: "goodbye world hello\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/case_insensitive.yaml b/tests/scenarios/cmd/sed/substitute/case_insensitive.yaml new file mode 100644 index 00000000..1e2ad35e --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/case_insensitive.yaml @@ -0,0 +1,13 @@ +description: Case-insensitive substitution flag matches regardless of case. +setup: + files: + - path: input.txt + content: "Hello HELLO hello\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/hello/world/I' input.txt +expect: + stdout: "world HELLO hello\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/combined_flags.yaml b/tests/scenarios/cmd/sed/substitute/combined_flags.yaml new file mode 100644 index 00000000..41275830 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/combined_flags.yaml @@ -0,0 +1,13 @@ +description: Substitute flags can be combined (e.g., gI for global case-insensitive). +setup: + files: + - path: input.txt + content: "Foo foo FOO\nBar BAR bar\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/foo/X/gI' input.txt +expect: + stdout: "X X X\nBar BAR bar\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/delimiter.yaml b/tests/scenarios/cmd/sed/substitute/delimiter.yaml new file mode 100644 index 00000000..bfbe9e6e --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/delimiter.yaml @@ -0,0 +1,13 @@ +description: Substitution with alternate delimiter character. +setup: + files: + - path: input.txt + content: "/usr/local/bin\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's|/usr/local|/opt|' input.txt +expect: + stdout: "/opt/bin\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/dollar_in_replacement.yaml b/tests/scenarios/cmd/sed/substitute/dollar_in_replacement.yaml new file mode 100644 index 00000000..23ebb4e7 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/dollar_in_replacement.yaml @@ -0,0 +1,12 @@ +description: Literal dollar sign in replacement is treated literally. +setup: + files: + - path: input.txt + content: "abc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/b/$1/' input.txt +expect: + stdout: "a$1c\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/empty_match.yaml b/tests/scenarios/cmd/sed/substitute/empty_match.yaml new file mode 100644 index 00000000..281c4ee2 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/empty_match.yaml @@ -0,0 +1,13 @@ +description: Substitution matching beginning of line inserts a prefix. +setup: + files: + - path: input.txt + content: "line1\nline2\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/^/>> /' input.txt +expect: + stdout: ">> line1\n>> line2\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/empty_pattern_no_previous.yaml b/tests/scenarios/cmd/sed/substitute/empty_pattern_no_previous.yaml new file mode 100644 index 00000000..851226ce --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/empty_pattern_no_previous.yaml @@ -0,0 +1,9 @@ +description: "Empty pattern in s/// with no previous regex produces an error." +input: + script: |+ + echo test | sed 's//X/' +skip_assert_against_bash: true +expect: + stdout: "" + stderr_contains: ["no previous regular expression"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/substitute/empty_pattern_reuse.yaml b/tests/scenarios/cmd/sed/substitute/empty_pattern_reuse.yaml new file mode 100644 index 00000000..0fa1f8f0 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/empty_pattern_reuse.yaml @@ -0,0 +1,13 @@ +description: "Empty pattern in s/// reuses the last regex from an address or previous s command." +setup: + files: + - path: input.txt + content: "test\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '/test/s//replaced/' input.txt +expect: + stdout: "replaced\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/empty_replacement.yaml b/tests/scenarios/cmd/sed/substitute/empty_replacement.yaml new file mode 100644 index 00000000..be197fd9 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/empty_replacement.yaml @@ -0,0 +1,13 @@ +description: Substitution with empty replacement string deletes the match. +setup: + files: + - path: input.txt + content: "hello world\nfoo bar baz\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/world//' input.txt +expect: + stdout: "hello \nfoo bar baz\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/escape_ampersand.yaml b/tests/scenarios/cmd/sed/substitute/escape_ampersand.yaml new file mode 100644 index 00000000..36692823 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/escape_ampersand.yaml @@ -0,0 +1,13 @@ +description: The \& escape in replacement inserts a literal ampersand instead of the matched text. +setup: + files: + - path: input.txt + content: "foo\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/foo/\&/' input.txt +expect: + stdout: "&\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/escape_backslash.yaml b/tests/scenarios/cmd/sed/substitute/escape_backslash.yaml new file mode 100644 index 00000000..950aa599 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/escape_backslash.yaml @@ -0,0 +1,13 @@ +description: The \\\\ escape in replacement inserts a literal backslash. +setup: + files: + - path: input.txt + content: "foo\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/foo/a\\b/' input.txt +expect: + stdout: "a\\b\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/escape_newline.yaml b/tests/scenarios/cmd/sed/substitute/escape_newline.yaml new file mode 100644 index 00000000..95366e8a --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/escape_newline.yaml @@ -0,0 +1,13 @@ +description: The \n escape in replacement inserts a literal newline. +setup: + files: + - path: input.txt + content: "hello world\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/ /\n/' input.txt +expect: + stdout: "hello\nworld\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/escape_sequences.yaml b/tests/scenarios/cmd/sed/substitute/escape_sequences.yaml new file mode 100644 index 00000000..9d912f49 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/escape_sequences.yaml @@ -0,0 +1,13 @@ +description: Replacement strings support escape sequences like \n, \t, \\, and \&. +setup: + files: + - path: input.txt + content: "foo bar\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/foo/a\tb/' input.txt +expect: + stdout: "a\tb bar\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/escape_sequences_abfr.yaml b/tests/scenarios/cmd/sed/substitute/escape_sequences_abfr.yaml new file mode 100644 index 00000000..716b06e8 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/escape_sequences_abfr.yaml @@ -0,0 +1,13 @@ +description: Replacement strings support \a, \b, \f, \r escape sequences like GNU sed. +setup: + files: + - path: input.txt + content: "abcd\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/b/\a/' input.txt +expect: + stdout: "a\x07cd\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/global.yaml b/tests/scenarios/cmd/sed/substitute/global.yaml new file mode 100644 index 00000000..923aae99 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/global.yaml @@ -0,0 +1,13 @@ +description: Global substitution flag replaces all occurrences on each line. +setup: + files: + - path: input.txt + content: "aaa bbb aaa\naaa ccc aaa\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/aaa/XXX/g' input.txt +expect: + stdout: "XXX bbb XXX\nXXX ccc XXX\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/global_print.yaml b/tests/scenarios/cmd/sed/substitute/global_print.yaml new file mode 100644 index 00000000..4a4f11fb --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/global_print.yaml @@ -0,0 +1,13 @@ +description: Combined gp flags do global replacement and print on match. +setup: + files: + - path: input.txt + content: "foo foo\nbar\nfoo\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n 's/foo/FOO/gp' input.txt +expect: + stdout: "FOO FOO\nFOO\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/identity_sub_p_flag.yaml b/tests/scenarios/cmd/sed/substitute/identity_sub_p_flag.yaml new file mode 100644 index 00000000..498e2924 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/identity_sub_p_flag.yaml @@ -0,0 +1,12 @@ +description: Identity substitution (s/a/a/p) prints when matching with -n. +setup: + files: + - path: input.txt + content: "abc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n 's/a/a/p' input.txt +expect: + stdout: "abc\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/identity_sub_t_branch.yaml b/tests/scenarios/cmd/sed/substitute/identity_sub_t_branch.yaml new file mode 100644 index 00000000..f3114041 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/identity_sub_t_branch.yaml @@ -0,0 +1,12 @@ +description: Identity substitution (s/a/a/) is considered successful for t branching. +setup: + files: + - path: input.txt + content: "abc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -e 's/a/a/' -e 't found' -e 's/b/X/' -e ':found' input.txt +expect: + stdout: "abc\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/inline_comment.yaml b/tests/scenarios/cmd/sed/substitute/inline_comment.yaml new file mode 100644 index 00000000..bee0685f --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/inline_comment.yaml @@ -0,0 +1,8 @@ +description: "Inline # comment after s/// flags is treated as comment (GNU sed compat)." +input: + script: |+ + echo hello | sed 's/hello/world/# this is a comment' +expect: + stdout: "world\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/invalid_backref.yaml b/tests/scenarios/cmd/sed/substitute/invalid_backref.yaml new file mode 100644 index 00000000..1478d65a --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/invalid_backref.yaml @@ -0,0 +1,9 @@ +description: "Invalid backreference in s/// replacement is rejected." +input: + script: |+ + echo test | sed 's/a/\1/' +skip_assert_against_bash: true +expect: + stdout: "" + stderr_contains: ["invalid reference \\1 on `s' command's RHS"] + exit_code: 1 diff --git a/tests/scenarios/cmd/sed/substitute/nonspecial_escape.yaml b/tests/scenarios/cmd/sed/substitute/nonspecial_escape.yaml new file mode 100644 index 00000000..79347dd9 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/nonspecial_escape.yaml @@ -0,0 +1,13 @@ +description: "Non-special backslash escapes in replacement drop the backslash (GNU sed behaviour)." +setup: + files: + - path: input.txt + content: "test\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/t/\q/g' input.txt +expect: + stdout: "qesq\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/nth.yaml b/tests/scenarios/cmd/sed/substitute/nth.yaml new file mode 100644 index 00000000..9f62aba0 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/nth.yaml @@ -0,0 +1,13 @@ +description: Numeric flag replaces only the Nth occurrence. +setup: + files: + - path: input.txt + content: "one one one one\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/one/TWO/2' input.txt +expect: + stdout: "one TWO one one\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/nth_global.yaml b/tests/scenarios/cmd/sed/substitute/nth_global.yaml new file mode 100644 index 00000000..c30db3b3 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/nth_global.yaml @@ -0,0 +1,8 @@ +description: "Combined Nth + global flag replaces from the Nth match onward." +input: + script: |+ + echo 'aXaXaXa' | sed 's/a/b/2g' +expect: + stdout: "aXbXbXb\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/nth_print.yaml b/tests/scenarios/cmd/sed/substitute/nth_print.yaml new file mode 100644 index 00000000..15909f8b --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/nth_print.yaml @@ -0,0 +1,13 @@ +description: Combined Nth occurrence and print flags. +setup: + files: + - path: input.txt + content: "aaa aaa aaa\nbbb\naaa aaa\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n 's/aaa/XXX/2p' input.txt +expect: + stdout: "aaa XXX aaa\naaa XXX\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/pattern_nonspecial_escape.yaml b/tests/scenarios/cmd/sed/substitute/pattern_nonspecial_escape.yaml new file mode 100644 index 00000000..17d35617 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/pattern_nonspecial_escape.yaml @@ -0,0 +1,12 @@ +description: "Non-special escapes in s/// patterns drop the backslash (GNU sed compatibility)." +setup: + files: + - path: input.txt + content: "q\n\\q\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n 's/\q/X/p' input.txt +expect: + stdout: "X\n\\X\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/print_flag.yaml b/tests/scenarios/cmd/sed/substitute/print_flag.yaml new file mode 100644 index 00000000..b6b28a34 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/print_flag.yaml @@ -0,0 +1,13 @@ +description: The p flag in substitution prints the line if a substitution was made. +setup: + files: + - path: input.txt + content: "foo\nbar\nfoo\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed -n 's/foo/FOO/p' input.txt +expect: + stdout: "FOO\nFOO\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/substitute/word_boundary.yaml b/tests/scenarios/cmd/sed/substitute/word_boundary.yaml new file mode 100644 index 00000000..b55dd475 --- /dev/null +++ b/tests/scenarios/cmd/sed/substitute/word_boundary.yaml @@ -0,0 +1,12 @@ +description: "\\b in s/// pattern is a word boundary, not a backspace." +setup: + files: + - path: input.txt + content: "hello world\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 's/\bworld\b/EARTH/' input.txt +expect: + stdout: "hello EARTH\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/text/append.yaml b/tests/scenarios/cmd/sed/text/append.yaml new file mode 100644 index 00000000..f8718ca0 --- /dev/null +++ b/tests/scenarios/cmd/sed/text/append.yaml @@ -0,0 +1,13 @@ +description: The a command appends text after the current line. +setup: + files: + - path: input.txt + content: "line1\nline2\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '1a appended' input.txt +expect: + stdout: "line1\nappended\nline2\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/text/append_multiline.yaml b/tests/scenarios/cmd/sed/text/append_multiline.yaml new file mode 100644 index 00000000..9ad79989 --- /dev/null +++ b/tests/scenarios/cmd/sed/text/append_multiline.yaml @@ -0,0 +1,12 @@ +description: The a command supports multi-line text via backslash continuation. +setup: + files: + - path: input.txt + content: "line1\nline2\n" +input: + allowed_paths: ["$DIR"] + script: "sed '1a\\\nfirst line\\\nsecond line' input.txt\n" +expect: + stdout: "line1\nfirst line\nsecond line\nline2\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/text/change.yaml b/tests/scenarios/cmd/sed/text/change.yaml new file mode 100644 index 00000000..b2be4d02 --- /dev/null +++ b/tests/scenarios/cmd/sed/text/change.yaml @@ -0,0 +1,13 @@ +description: The c command replaces the current line with new text. +setup: + files: + - path: input.txt + content: "line1\nline2\nline3\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '2c replaced' input.txt +expect: + stdout: "line1\nreplaced\nline3\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/text/change_range.yaml b/tests/scenarios/cmd/sed/text/change_range.yaml new file mode 100644 index 00000000..19266db5 --- /dev/null +++ b/tests/scenarios/cmd/sed/text/change_range.yaml @@ -0,0 +1,12 @@ +description: The c command with a range outputs replacement text only once. +setup: + files: + - path: input.txt + content: "a\nb\nc\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '1,3c\REPLACED' input.txt +expect: + stdout: "REPLACED\n" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/text/insert.yaml b/tests/scenarios/cmd/sed/text/insert.yaml new file mode 100644 index 00000000..75c270ae --- /dev/null +++ b/tests/scenarios/cmd/sed/text/insert.yaml @@ -0,0 +1,13 @@ +description: The i command inserts text before the current line. +setup: + files: + - path: input.txt + content: "line1\nline2\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed '2i inserted' input.txt +expect: + stdout: "line1\ninserted\nline2\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/text/semicolon_in_text.yaml b/tests/scenarios/cmd/sed/text/semicolon_in_text.yaml new file mode 100644 index 00000000..c08f863a --- /dev/null +++ b/tests/scenarios/cmd/sed/text/semicolon_in_text.yaml @@ -0,0 +1,13 @@ +description: "Semicolons in a/i/c text arguments are literal, not command separators." +setup: + files: + - path: input.txt + content: "test\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'a\hello;world' input.txt +expect: + stdout: "test\nhello;world\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/sed/transliterate/basic.yaml b/tests/scenarios/cmd/sed/transliterate/basic.yaml new file mode 100644 index 00000000..921f108f --- /dev/null +++ b/tests/scenarios/cmd/sed/transliterate/basic.yaml @@ -0,0 +1,13 @@ +description: The y command transliterates characters from source to destination set. +setup: + files: + - path: input.txt + content: "hello world\n" +input: + allowed_paths: ["$DIR"] + script: |+ + sed 'y/helo/HELO/' input.txt +expect: + stdout: "HELLO wOrLd\n" + stderr: "" + exit_code: 0 diff --git a/tests/scenarios/cmd/unknown_cmd/common_progs/sed.yaml b/tests/scenarios/cmd/unknown_cmd/common_progs/sed.yaml deleted file mode 100644 index e4c28f58..00000000 --- a/tests/scenarios/cmd/unknown_cmd/common_progs/sed.yaml +++ /dev/null @@ -1,10 +0,0 @@ -skip_assert_against_bash: true -description: The sed command is not a builtin and is rejected as unknown. -input: - script: |+ - sed s/foo/bar/ file.txt -expect: - stdout: "" - stderr: |+ - sed: command not found - exit_code: 127