DataDog · thieman · Mar 13, 2026 · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026
@@ -0,0 +1,90 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2026-present Datadog, Inc.
+
+//go:build !race
+
+package cat_test
+
+import (
+	"io"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/DataDog/rshell/interp"
+	"github.com/DataDog/rshell/interp/builtins/testutil"
+)
+
+// createLargeFileCat writes totalBytes of repeating content to dir/filename.
+func createLargeFileCat(tb testing.TB, dir, filename, line string, totalBytes int) string {
+	tb.Helper()
+	path := filepath.Join(dir, filename)
+	f, err := os.Create(path)
+	if err != nil {
+		tb.Fatal(err)
+	}
+	defer f.Close()
+	if _, err := io.Copy(f, io.LimitReader(testutil.NewRepeatReader(line), int64(totalBytes))); err != nil {
+		tb.Fatal(err)
+	}
+	return path
+}
+
+// cmdRunBCat runs a cat command with AllowedPaths set to dir (bench variant).
+func cmdRunBCat(b *testing.B, script, dir string) (string, string, int) {
+	b.Helper()
+	return testutil.RunScript(b, script, dir, interp.AllowedPaths([]string{dir}))
+}
+
+// BenchmarkCat measures cat on a 1MB file.
+func BenchmarkCat(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileCat(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 1<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBCat(b, "cat input.txt", dir)
+	}
+}
+
+// BenchmarkCatNumbered measures cat -n on a 1MB file.
+func BenchmarkCatNumbered(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileCat(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 1<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBCat(b, "cat -n input.txt", dir)
+	}
+}
+
+// TestCatMemoryBounded asserts that cat uses O(1) memory regardless of input
+// size. cat streams input to output in fixed chunks with no per-line allocation.
+func TestCatMemoryBounded(t *testing.T) {
+	dir := t.TempDir()
+	createLargeFileCat(t, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			testutil.RunScriptDiscard(b, "cat input.txt", dir, interp.AllowedPaths([]string{dir}))
+		}
+	})
+
+	const maxBytesPerOp = 4 << 20
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("cat allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
+	}
+}
+
+func BenchmarkCatDiscard(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileCat(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		testutil.RunScriptDiscard(b, "cat input.txt", dir, interp.AllowedPaths([]string{dir}))
+	}
+}
@@ -213,6 +213,10 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc {
 	}
 }
 
+// newline is a package-level buffer reused for every line-terminator Write,
+// avoiding a heap allocation per line.
+var newline = []byte{'\n'}
+
 // cutConfig holds the parsed configuration for a cut invocation.
 type cutConfig struct {
 	mode          mode
@@ -392,30 +396,46 @@ func processBytes(callCtx *builtins.CallContext, raw []byte, cfg *cutConfig) {
 		if cfg.outDelimSet {
 			processBytesComplementWithOutDelim(callCtx, raw, cfg)
 		} else {
-			var sb strings.Builder
+			start := -1
 			for i := range n {
-				pos := i + 1
-				if !inRanges(pos, cfg.ranges) {
-					sb.WriteByte(raw[i])
+				if !inRanges(i+1, cfg.ranges) {
+					if start < 0 {
+						start = i
+					}
+				} else {
+					if start >= 0 {
+						callCtx.Stdout.Write(raw[start:i]) //nolint:errcheck
+						start = -1
+					}
 				}
 			}
-			callCtx.Out(sb.String())
+			if start >= 0 {
+				callCtx.Stdout.Write(raw[start:]) //nolint:errcheck
+			}
 		}
 	} else {
 		if cfg.outDelimSet {
 			processBytesWithOutDelim(callCtx, raw, cfg)
 		} else {
-			var sb strings.Builder
+			start := -1
 			for i := range n {
-				pos := i + 1
-				if inRanges(pos, cfg.ranges) {
-					sb.WriteByte(raw[i])
+				if inRanges(i+1, cfg.ranges) {
+					if start < 0 {
+						start = i
+					}
+				} else {
+					if start >= 0 {
+						callCtx.Stdout.Write(raw[start:i]) //nolint:errcheck
+						start = -1
+					}
 				}
 			}
-			callCtx.Out(sb.String())
+			if start >= 0 {
+				callCtx.Stdout.Write(raw[start:]) //nolint:errcheck
+			}
 		}
 	}
-	callCtx.Out("\n")
+	callCtx.Stdout.Write(newline) //nolint:errcheck
 }
 
 // processBytesWithOutDelim outputs selected byte ranges with the output
@@ -455,56 +475,58 @@ func processBytesComplementWithOutDelim(callCtx *builtins.CallContext, raw []byt
 
 // processFields selects fields from a line.
 func processFields(callCtx *builtins.CallContext, raw []byte, cfg *cutConfig) {
-	line := string(raw)
-	delimStr := string(cfg.delimByte)
-
-	// Check if line contains the delimiter.
-	if strings.IndexByte(line, cfg.delimByte) < 0 {
+	hasDelim := false
+	for _, b := range raw {
+		if b == cfg.delimByte {
+			hasDelim = true
+			break
+		}
+	}
+	if !hasDelim {
 		if cfg.onlyDelimited {
-			return // suppress line
+			return
 		}
-		// No delimiter: print the whole line + newline.
-		callCtx.Out(line)
-		callCtx.Out("\n")
+		callCtx.Stdout.Write(raw)     //nolint:errcheck
+		callCtx.Stdout.Write(newline) //nolint:errcheck
 		return
 	}
 
-	fields := strings.Split(line, delimStr)
-	nFields := len(fields)
+	nFields := 1
+	for _, b := range raw {
+		if b == cfg.delimByte {
+			nFields++
+		}
+	}
 
-	// Determine which fields to select.
-	var selected []int
-	if cfg.complement {
-		compRanges := complementRanges(cfg.ranges, nFields)
-		for _, r := range compRanges {
-			for i := r[0]; i <= r[1] && i <= nFields; i++ {
-				selected = append(selected, i)
-			}
+	fieldIdx := 0
+	fieldStart := 0
+	firstOutput := true
+
+	for i := 0; i <= len(raw); i++ {
+		if i < len(raw) && raw[i] != cfg.delimByte {
+			continue
 		}
-	} else {
-		for _, r := range cfg.ranges {
-			start := r[0]
-			end := r[1]
-			if start > nFields {
-				break
-			}
-			if end > nFields {
-				end = nFields
-			}
-			for i := start; i <= end; i++ {
-				selected = append(selected, i)
-			}
+		fieldIdx++
+		fieldNum := fieldIdx
+
+		selected := false
+		if cfg.complement {
+			selected = !inRanges(fieldNum, cfg.ranges)
+		} else {
+			selected = inRanges(fieldNum, cfg.ranges)
 		}
-	}
 
-	// Output selected fields joined by the output delimiter.
-	for i, idx := range selected {
-		if i > 0 {
-			callCtx.Out(cfg.outDelim)
+		if selected {
+			if !firstOutput {
+				callCtx.Out(cfg.outDelim)
+			}
+			callCtx.Stdout.Write(raw[fieldStart:i]) //nolint:errcheck
+			firstOutput = false
 		}
-		callCtx.Out(fields[idx-1])
+
+		fieldStart = i + 1
 	}
-	callCtx.Out("\n")
+	callCtx.Stdout.Write(newline) //nolint:errcheck
 }
 
 // complementRanges returns the complement of the given sorted, merged ranges

@@ -0,0 +1,131 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2026-present Datadog, Inc.
+
+//go:build !race
+
+package cut_test
+
+import (
+	"io"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/DataDog/rshell/interp"
+	"github.com/DataDog/rshell/interp/builtins/testutil"
+)
+
+func createLargeFileCut(tb testing.TB, dir, filename, line string, totalBytes int) string {
+	tb.Helper()
+	path := filepath.Join(dir, filename)
+	f, err := os.Create(path)
+	if err != nil {
+		tb.Fatal(err)
+	}
+	defer f.Close()
+	if _, err := io.Copy(f, io.LimitReader(testutil.NewRepeatReader(line), int64(totalBytes))); err != nil {
+		tb.Fatal(err)
+	}
+	return path
+}
+
+func cmdRunBCut(b *testing.B, script, dir string) (string, string, int) {
+	b.Helper()
+	return testutil.RunScript(b, script, dir, interp.AllowedPaths([]string{dir}))
+}
+
+// BenchmarkCutBytes measures cut -b 1-10 on a 10MB file of short lines.
+func BenchmarkCutBytes(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileCut(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBCut(b, "cut -b 1-10 input.txt", dir)
+	}
+}
+
+// BenchmarkCutFields measures cut -f 1 -d ' ' on a 10MB file of short lines.
+func BenchmarkCutFields(b *testing.B) {
+	dir := b.TempDir()
+	// Tab-delimited: "field1\tfield2\tfield3"
+	createLargeFileCut(b, dir, "input.txt", "alpha\tbeta\tgamma\tdelta\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBCut(b, "cut -f 1 input.txt", dir)
+	}
+}
+
+// BenchmarkCutFieldsMultiple measures cut selecting multiple fields on a 10MB file.
+func BenchmarkCutFieldsMultiple(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileCut(b, dir, "input.txt", "alpha\tbeta\tgamma\tdelta\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBCut(b, "cut -f 1,3 input.txt", dir)
+	}
+}
+
+// TestCutMemoryBounded asserts that cut -b uses O(1) memory regardless of
+// input size. cut is a streaming command that writes selected byte ranges
+// directly to Stdout with no per-line string allocation.
+func TestCutMemoryBounded(t *testing.T) {
+	dir := t.TempDir()
+	createLargeFileCut(t, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			testutil.RunScriptDiscard(b, "cut -b 1-10 input.txt", dir, interp.AllowedPaths([]string{dir}))
+		}
+	})
+
+	const maxBytesPerOp = 4 << 20
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("cut -b 1-10 allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
+	}
+}
+
+// TestCutFieldsMemoryBounded asserts that cut -f uses O(1) memory regardless
+// of input size. Field mode scans raw bytes for the delimiter without
+// converting to string or allocating a []string per line.
+func TestCutFieldsMemoryBounded(t *testing.T) {
+	dir := t.TempDir()
+	createLargeFileCut(t, dir, "input.txt", "alpha\tbeta\tgamma\tdelta\n", 10<<20)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			testutil.RunScriptDiscard(b, "cut -f 1 input.txt", dir, interp.AllowedPaths([]string{dir}))
+		}
+	})
+
+	const maxBytesPerOp = 4 << 20
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("cut -f 1 allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
+	}
+}
+
+func BenchmarkCutBytesDiscard(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileCut(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		testutil.RunScriptDiscard(b, "cut -b 1-10 input.txt", dir, interp.AllowedPaths([]string{dir}))
+	}
+}
+
+func BenchmarkCutFieldsDiscard(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileCut(b, dir, "input.txt", "alpha\tbeta\tgamma\tdelta\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		testutil.RunScriptDiscard(b, "cut -f 1 input.txt", dir, interp.AllowedPaths([]string{dir}))
+	}
+}