Skip to content
90 changes: 90 additions & 0 deletions interp/builtins/cat/cat_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2026-present Datadog, Inc.

//go:build !race

package cat_test

import (
"io"
"os"
"path/filepath"
"testing"

"github.com/DataDog/rshell/interp"
"github.com/DataDog/rshell/interp/builtins/testutil"
)

// createLargeFileCat writes totalBytes of repeating content to dir/filename.
func createLargeFileCat(tb testing.TB, dir, filename, line string, totalBytes int) string {
tb.Helper()
path := filepath.Join(dir, filename)
f, err := os.Create(path)
if err != nil {
tb.Fatal(err)
}
defer f.Close()
if _, err := io.Copy(f, io.LimitReader(testutil.NewRepeatReader(line), int64(totalBytes))); err != nil {
tb.Fatal(err)
}
return path
}

// cmdRunBCat runs a cat command with AllowedPaths set to dir (bench variant).
func cmdRunBCat(b *testing.B, script, dir string) (string, string, int) {
b.Helper()
return testutil.RunScript(b, script, dir, interp.AllowedPaths([]string{dir}))
}

// BenchmarkCat measures cat on a 1MB file.
func BenchmarkCat(b *testing.B) {
dir := b.TempDir()
createLargeFileCat(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 1<<20)
b.ResetTimer()
b.ReportAllocs()
for b.Loop() {
cmdRunBCat(b, "cat input.txt", dir)
}
}

// BenchmarkCatNumbered measures cat -n on a 1MB file.
func BenchmarkCatNumbered(b *testing.B) {
dir := b.TempDir()
createLargeFileCat(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 1<<20)
b.ResetTimer()
b.ReportAllocs()
for b.Loop() {
cmdRunBCat(b, "cat -n input.txt", dir)
}
}

// TestCatMemoryBounded asserts that cat uses O(1) memory regardless of input
// size. cat streams input to output in fixed chunks with no per-line allocation.
func TestCatMemoryBounded(t *testing.T) {
dir := t.TempDir()
createLargeFileCat(t, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)

result := testing.Benchmark(func(b *testing.B) {
b.ReportAllocs()
for b.Loop() {
testutil.RunScriptDiscard(b, "cat input.txt", dir, interp.AllowedPaths([]string{dir}))
}
})

const maxBytesPerOp = 4 << 20
if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
t.Errorf("cat allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
}
}

func BenchmarkCatDiscard(b *testing.B) {
dir := b.TempDir()
createLargeFileCat(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
b.ResetTimer()
b.ReportAllocs()
for b.Loop() {
testutil.RunScriptDiscard(b, "cat input.txt", dir, interp.AllowedPaths([]string{dir}))
}
}
122 changes: 72 additions & 50 deletions interp/builtins/cut/cut.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,10 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc {
}
}

// newline is a package-level buffer reused for every line-terminator Write,
// avoiding a heap allocation per line.
var newline = []byte{'\n'}

// cutConfig holds the parsed configuration for a cut invocation.
type cutConfig struct {
mode mode
Expand Down Expand Up @@ -392,30 +396,46 @@ func processBytes(callCtx *builtins.CallContext, raw []byte, cfg *cutConfig) {
if cfg.outDelimSet {
processBytesComplementWithOutDelim(callCtx, raw, cfg)
} else {
var sb strings.Builder
start := -1
for i := range n {
pos := i + 1
if !inRanges(pos, cfg.ranges) {
sb.WriteByte(raw[i])
if !inRanges(i+1, cfg.ranges) {
if start < 0 {
start = i
}
} else {
if start >= 0 {
callCtx.Stdout.Write(raw[start:i]) //nolint:errcheck
start = -1
}
}
}
callCtx.Out(sb.String())
if start >= 0 {
callCtx.Stdout.Write(raw[start:]) //nolint:errcheck
}
}
} else {
if cfg.outDelimSet {
processBytesWithOutDelim(callCtx, raw, cfg)
} else {
var sb strings.Builder
start := -1
for i := range n {
pos := i + 1
if inRanges(pos, cfg.ranges) {
sb.WriteByte(raw[i])
if inRanges(i+1, cfg.ranges) {
if start < 0 {
start = i
}
} else {
if start >= 0 {
callCtx.Stdout.Write(raw[start:i]) //nolint:errcheck
start = -1
}
}
}
callCtx.Out(sb.String())
if start >= 0 {
callCtx.Stdout.Write(raw[start:]) //nolint:errcheck
}
}
}
callCtx.Out("\n")
callCtx.Stdout.Write(newline) //nolint:errcheck
}

// processBytesWithOutDelim outputs selected byte ranges with the output
Expand Down Expand Up @@ -455,56 +475,58 @@ func processBytesComplementWithOutDelim(callCtx *builtins.CallContext, raw []byt

// processFields selects fields from a line.
func processFields(callCtx *builtins.CallContext, raw []byte, cfg *cutConfig) {
line := string(raw)
delimStr := string(cfg.delimByte)

// Check if line contains the delimiter.
if strings.IndexByte(line, cfg.delimByte) < 0 {
hasDelim := false
for _, b := range raw {
if b == cfg.delimByte {
hasDelim = true
break
}
}
if !hasDelim {
if cfg.onlyDelimited {
return // suppress line
return
}
// No delimiter: print the whole line + newline.
callCtx.Out(line)
callCtx.Out("\n")
callCtx.Stdout.Write(raw) //nolint:errcheck
callCtx.Stdout.Write(newline) //nolint:errcheck
return
}

fields := strings.Split(line, delimStr)
nFields := len(fields)
nFields := 1
for _, b := range raw {
if b == cfg.delimByte {
nFields++
}
}

// Determine which fields to select.
var selected []int
if cfg.complement {
compRanges := complementRanges(cfg.ranges, nFields)
for _, r := range compRanges {
for i := r[0]; i <= r[1] && i <= nFields; i++ {
selected = append(selected, i)
}
fieldIdx := 0
fieldStart := 0
firstOutput := true

for i := 0; i <= len(raw); i++ {
if i < len(raw) && raw[i] != cfg.delimByte {
continue
}
} else {
for _, r := range cfg.ranges {
start := r[0]
end := r[1]
if start > nFields {
break
}
if end > nFields {
end = nFields
}
for i := start; i <= end; i++ {
selected = append(selected, i)
}
fieldIdx++
fieldNum := fieldIdx

selected := false
if cfg.complement {
selected = !inRanges(fieldNum, cfg.ranges)
} else {
selected = inRanges(fieldNum, cfg.ranges)
}
}

// Output selected fields joined by the output delimiter.
for i, idx := range selected {
if i > 0 {
callCtx.Out(cfg.outDelim)
if selected {
if !firstOutput {
callCtx.Out(cfg.outDelim)
}
callCtx.Stdout.Write(raw[fieldStart:i]) //nolint:errcheck
firstOutput = false
}
callCtx.Out(fields[idx-1])

fieldStart = i + 1
}
callCtx.Out("\n")
callCtx.Stdout.Write(newline) //nolint:errcheck
}

// complementRanges returns the complement of the given sorted, merged ranges
Expand Down
131 changes: 131 additions & 0 deletions interp/builtins/cut/cut_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2026-present Datadog, Inc.

//go:build !race

package cut_test

import (
"io"
"os"
"path/filepath"
"testing"

"github.com/DataDog/rshell/interp"
"github.com/DataDog/rshell/interp/builtins/testutil"
)

func createLargeFileCut(tb testing.TB, dir, filename, line string, totalBytes int) string {
tb.Helper()
path := filepath.Join(dir, filename)
f, err := os.Create(path)
if err != nil {
tb.Fatal(err)
}
defer f.Close()
if _, err := io.Copy(f, io.LimitReader(testutil.NewRepeatReader(line), int64(totalBytes))); err != nil {
tb.Fatal(err)
}
return path
}

func cmdRunBCut(b *testing.B, script, dir string) (string, string, int) {
b.Helper()
return testutil.RunScript(b, script, dir, interp.AllowedPaths([]string{dir}))
}

// BenchmarkCutBytes measures cut -b 1-10 on a 10MB file of short lines.
func BenchmarkCutBytes(b *testing.B) {
dir := b.TempDir()
createLargeFileCut(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
b.ResetTimer()
b.ReportAllocs()
for b.Loop() {
cmdRunBCut(b, "cut -b 1-10 input.txt", dir)
}
}

// BenchmarkCutFields measures cut -f 1 -d ' ' on a 10MB file of short lines.
func BenchmarkCutFields(b *testing.B) {
dir := b.TempDir()
// Tab-delimited: "field1\tfield2\tfield3"
createLargeFileCut(b, dir, "input.txt", "alpha\tbeta\tgamma\tdelta\n", 10<<20)
b.ResetTimer()
b.ReportAllocs()
for b.Loop() {
cmdRunBCut(b, "cut -f 1 input.txt", dir)
}
}

// BenchmarkCutFieldsMultiple measures cut selecting multiple fields on a 10MB file.
func BenchmarkCutFieldsMultiple(b *testing.B) {
dir := b.TempDir()
createLargeFileCut(b, dir, "input.txt", "alpha\tbeta\tgamma\tdelta\n", 10<<20)
b.ResetTimer()
b.ReportAllocs()
for b.Loop() {
cmdRunBCut(b, "cut -f 1,3 input.txt", dir)
}
}

// TestCutMemoryBounded asserts that cut -b uses O(1) memory regardless of
// input size. cut is a streaming command that writes selected byte ranges
// directly to Stdout with no per-line string allocation.
func TestCutMemoryBounded(t *testing.T) {
dir := t.TempDir()
createLargeFileCut(t, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)

result := testing.Benchmark(func(b *testing.B) {
b.ReportAllocs()
for b.Loop() {
testutil.RunScriptDiscard(b, "cut -b 1-10 input.txt", dir, interp.AllowedPaths([]string{dir}))
}
})

const maxBytesPerOp = 4 << 20
if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
t.Errorf("cut -b 1-10 allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
}
}

// TestCutFieldsMemoryBounded asserts that cut -f uses O(1) memory regardless
// of input size. Field mode scans raw bytes for the delimiter without
// converting to string or allocating a []string per line.
func TestCutFieldsMemoryBounded(t *testing.T) {
dir := t.TempDir()
createLargeFileCut(t, dir, "input.txt", "alpha\tbeta\tgamma\tdelta\n", 10<<20)

result := testing.Benchmark(func(b *testing.B) {
b.ReportAllocs()
for b.Loop() {
testutil.RunScriptDiscard(b, "cut -f 1 input.txt", dir, interp.AllowedPaths([]string{dir}))
}
})

const maxBytesPerOp = 4 << 20
if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
t.Errorf("cut -f 1 allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
}
}

func BenchmarkCutBytesDiscard(b *testing.B) {
dir := b.TempDir()
createLargeFileCut(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
b.ResetTimer()
b.ReportAllocs()
for b.Loop() {
testutil.RunScriptDiscard(b, "cut -b 1-10 input.txt", dir, interp.AllowedPaths([]string{dir}))
}
}

func BenchmarkCutFieldsDiscard(b *testing.B) {
dir := b.TempDir()
createLargeFileCut(b, dir, "input.txt", "alpha\tbeta\tgamma\tdelta\n", 10<<20)
b.ResetTimer()
b.ReportAllocs()
for b.Loop() {
testutil.RunScriptDiscard(b, "cut -f 1 input.txt", dir, interp.AllowedPaths([]string{dir}))
}
}
Loading
Loading