diff --git a/interp/builtins/cat/cat_bench_test.go b/interp/builtins/cat/cat_bench_test.go
new file mode 100644
index 00000000..b9cd0835
--- /dev/null
+++ b/interp/builtins/cat/cat_bench_test.go
@@ -0,0 +1,90 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2026-present Datadog, Inc.
+
+//go:build !race
+
+package cat_test
+
+import (
+	"io"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/DataDog/rshell/interp"
+	"github.com/DataDog/rshell/interp/builtins/testutil"
+)
+
+// createLargeFileCat writes totalBytes of repeating content to dir/filename.
+func createLargeFileCat(tb testing.TB, dir, filename, line string, totalBytes int) string {
+	tb.Helper()
+	path := filepath.Join(dir, filename)
+	f, err := os.Create(path)
+	if err != nil {
+		tb.Fatal(err)
+	}
+	defer f.Close()
+	if _, err := io.Copy(f, io.LimitReader(testutil.NewRepeatReader(line), int64(totalBytes))); err != nil {
+		tb.Fatal(err)
+	}
+	return path
+}
+
+// cmdRunBCat runs a cat command with AllowedPaths set to dir (bench variant).
+func cmdRunBCat(b *testing.B, script, dir string) (string, string, int) {
+	b.Helper()
+	return testutil.RunScript(b, script, dir, interp.AllowedPaths([]string{dir}))
+}
+
+// BenchmarkCat measures cat on a 1MB file.
+func BenchmarkCat(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileCat(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 1<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBCat(b, "cat input.txt", dir)
+	}
+}
+
+// BenchmarkCatNumbered measures cat -n on a 1MB file.
+func BenchmarkCatNumbered(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileCat(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 1<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBCat(b, "cat -n input.txt", dir)
+	}
+}
+
+// TestCatMemoryBounded asserts that cat uses O(1) memory regardless of input
+// size. cat streams input to output in fixed chunks with no per-line allocation.
+func TestCatMemoryBounded(t *testing.T) {
+	dir := t.TempDir()
+	createLargeFileCat(t, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			testutil.RunScriptDiscard(b, "cat input.txt", dir, interp.AllowedPaths([]string{dir}))
+		}
+	})
+
+	const maxBytesPerOp = 4 << 20
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("cat allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
+	}
+}
+
+func BenchmarkCatDiscard(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileCat(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		testutil.RunScriptDiscard(b, "cat input.txt", dir, interp.AllowedPaths([]string{dir}))
+	}
+}
diff --git a/interp/builtins/cut/cut.go b/interp/builtins/cut/cut.go
index a27ad0cc..dba26259 100644
--- a/interp/builtins/cut/cut.go
+++ b/interp/builtins/cut/cut.go
@@ -213,6 +213,10 @@ func registerFlags(fs *builtins.FlagSet) builtins.HandlerFunc {
 	}
 }
 
+// newline is a package-level buffer reused for every line-terminator Write,
+// avoiding a heap allocation per line.
+var newline = []byte{'\n'}
+
 // cutConfig holds the parsed configuration for a cut invocation.
 type cutConfig struct {
 	mode          mode
@@ -392,30 +396,46 @@ func processBytes(callCtx *builtins.CallContext, raw []byte, cfg *cutConfig) {
 		if cfg.outDelimSet {
 			processBytesComplementWithOutDelim(callCtx, raw, cfg)
 		} else {
-			var sb strings.Builder
+			start := -1
 			for i := range n {
-				pos := i + 1
-				if !inRanges(pos, cfg.ranges) {
-					sb.WriteByte(raw[i])
+				if !inRanges(i+1, cfg.ranges) {
+					if start < 0 {
+						start = i
+					}
+				} else {
+					if start >= 0 {
+						callCtx.Stdout.Write(raw[start:i]) //nolint:errcheck
+						start = -1
+					}
 				}
 			}
-			callCtx.Out(sb.String())
+			if start >= 0 {
+				callCtx.Stdout.Write(raw[start:]) //nolint:errcheck
+			}
 		}
 	} else {
 		if cfg.outDelimSet {
 			processBytesWithOutDelim(callCtx, raw, cfg)
 		} else {
-			var sb strings.Builder
+			start := -1
 			for i := range n {
-				pos := i + 1
-				if inRanges(pos, cfg.ranges) {
-					sb.WriteByte(raw[i])
+				if inRanges(i+1, cfg.ranges) {
+					if start < 0 {
+						start = i
+					}
+				} else {
+					if start >= 0 {
+						callCtx.Stdout.Write(raw[start:i]) //nolint:errcheck
+						start = -1
+					}
 				}
 			}
-			callCtx.Out(sb.String())
+			if start >= 0 {
+				callCtx.Stdout.Write(raw[start:]) //nolint:errcheck
+			}
 		}
 	}
-	callCtx.Out("\n")
+	callCtx.Stdout.Write(newline) //nolint:errcheck
 }
 
 // processBytesWithOutDelim outputs selected byte ranges with the output
@@ -455,56 +475,58 @@ func processBytesComplementWithOutDelim(callCtx *builtins.CallContext, raw []byt
 
 // processFields selects fields from a line.
 func processFields(callCtx *builtins.CallContext, raw []byte, cfg *cutConfig) {
-	line := string(raw)
-	delimStr := string(cfg.delimByte)
-
-	// Check if line contains the delimiter.
-	if strings.IndexByte(line, cfg.delimByte) < 0 {
+	hasDelim := false
+	for _, b := range raw {
+		if b == cfg.delimByte {
+			hasDelim = true
+			break
+		}
+	}
+	if !hasDelim {
 		if cfg.onlyDelimited {
-			return // suppress line
+			return
 		}
-		// No delimiter: print the whole line + newline.
-		callCtx.Out(line)
-		callCtx.Out("\n")
+		callCtx.Stdout.Write(raw)     //nolint:errcheck
+		callCtx.Stdout.Write(newline) //nolint:errcheck
 		return
 	}
 
-	fields := strings.Split(line, delimStr)
-	nFields := len(fields)
+	nFields := 1
+	for _, b := range raw {
+		if b == cfg.delimByte {
+			nFields++
+		}
+	}
 
-	// Determine which fields to select.
-	var selected []int
-	if cfg.complement {
-		compRanges := complementRanges(cfg.ranges, nFields)
-		for _, r := range compRanges {
-			for i := r[0]; i <= r[1] && i <= nFields; i++ {
-				selected = append(selected, i)
-			}
+	fieldIdx := 0
+	fieldStart := 0
+	firstOutput := true
+
+	for i := 0; i <= len(raw); i++ {
+		if i < len(raw) && raw[i] != cfg.delimByte {
+			continue
 		}
-	} else {
-		for _, r := range cfg.ranges {
-			start := r[0]
-			end := r[1]
-			if start > nFields {
-				break
-			}
-			if end > nFields {
-				end = nFields
-			}
-			for i := start; i <= end; i++ {
-				selected = append(selected, i)
-			}
+		fieldIdx++
+		fieldNum := fieldIdx
+
+		selected := false
+		if cfg.complement {
+			selected = !inRanges(fieldNum, cfg.ranges)
+		} else {
+			selected = inRanges(fieldNum, cfg.ranges)
 		}
-	}
 
-	// Output selected fields joined by the output delimiter.
-	for i, idx := range selected {
-		if i > 0 {
-			callCtx.Out(cfg.outDelim)
+		if selected {
+			if !firstOutput {
+				callCtx.Out(cfg.outDelim)
+			}
+			callCtx.Stdout.Write(raw[fieldStart:i]) //nolint:errcheck
+			firstOutput = false
 		}
-		callCtx.Out(fields[idx-1])
+
+		fieldStart = i + 1
 	}
-	callCtx.Out("\n")
+	callCtx.Stdout.Write(newline) //nolint:errcheck
 }
 
 // complementRanges returns the complement of the given sorted, merged ranges
diff --git a/interp/builtins/cut/cut_bench_test.go b/interp/builtins/cut/cut_bench_test.go
new file mode 100644
index 00000000..9852e442
--- /dev/null
+++ b/interp/builtins/cut/cut_bench_test.go
@@ -0,0 +1,131 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2026-present Datadog, Inc.
+
+//go:build !race
+
+package cut_test
+
+import (
+	"io"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/DataDog/rshell/interp"
+	"github.com/DataDog/rshell/interp/builtins/testutil"
+)
+
+func createLargeFileCut(tb testing.TB, dir, filename, line string, totalBytes int) string {
+	tb.Helper()
+	path := filepath.Join(dir, filename)
+	f, err := os.Create(path)
+	if err != nil {
+		tb.Fatal(err)
+	}
+	defer f.Close()
+	if _, err := io.Copy(f, io.LimitReader(testutil.NewRepeatReader(line), int64(totalBytes))); err != nil {
+		tb.Fatal(err)
+	}
+	return path
+}
+
+func cmdRunBCut(b *testing.B, script, dir string) (string, string, int) {
+	b.Helper()
+	return testutil.RunScript(b, script, dir, interp.AllowedPaths([]string{dir}))
+}
+
+// BenchmarkCutBytes measures cut -b 1-10 on a 10MB file of short lines.
+func BenchmarkCutBytes(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileCut(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBCut(b, "cut -b 1-10 input.txt", dir)
+	}
+}
+
+// BenchmarkCutFields measures cut -f 1 -d ' ' on a 10MB file of short lines.
+func BenchmarkCutFields(b *testing.B) {
+	dir := b.TempDir()
+	// Tab-delimited: "field1\tfield2\tfield3"
+	createLargeFileCut(b, dir, "input.txt", "alpha\tbeta\tgamma\tdelta\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBCut(b, "cut -f 1 input.txt", dir)
+	}
+}
+
+// BenchmarkCutFieldsMultiple measures cut selecting multiple fields on a 10MB file.
+func BenchmarkCutFieldsMultiple(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileCut(b, dir, "input.txt", "alpha\tbeta\tgamma\tdelta\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBCut(b, "cut -f 1,3 input.txt", dir)
+	}
+}
+
+// TestCutMemoryBounded asserts that cut -b uses O(1) memory regardless of
+// input size. cut is a streaming command that writes selected byte ranges
+// directly to Stdout with no per-line string allocation.
+func TestCutMemoryBounded(t *testing.T) {
+	dir := t.TempDir()
+	createLargeFileCut(t, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			testutil.RunScriptDiscard(b, "cut -b 1-10 input.txt", dir, interp.AllowedPaths([]string{dir}))
+		}
+	})
+
+	const maxBytesPerOp = 4 << 20
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("cut -b 1-10 allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
+	}
+}
+
+// TestCutFieldsMemoryBounded asserts that cut -f uses O(1) memory regardless
+// of input size. Field mode scans raw bytes for the delimiter without
+// converting to string or allocating a []string per line.
+func TestCutFieldsMemoryBounded(t *testing.T) {
+	dir := t.TempDir()
+	createLargeFileCut(t, dir, "input.txt", "alpha\tbeta\tgamma\tdelta\n", 10<<20)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			testutil.RunScriptDiscard(b, "cut -f 1 input.txt", dir, interp.AllowedPaths([]string{dir}))
+		}
+	})
+
+	const maxBytesPerOp = 4 << 20
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("cut -f 1 allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
+	}
+}
+
+func BenchmarkCutBytesDiscard(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileCut(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		testutil.RunScriptDiscard(b, "cut -b 1-10 input.txt", dir, interp.AllowedPaths([]string{dir}))
+	}
+}
+
+func BenchmarkCutFieldsDiscard(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileCut(b, dir, "input.txt", "alpha\tbeta\tgamma\tdelta\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		testutil.RunScriptDiscard(b, "cut -f 1 input.txt", dir, interp.AllowedPaths([]string{dir}))
+	}
+}
diff --git a/interp/builtins/grep/grep.go b/interp/builtins/grep/grep.go
index cb4622e1..e1feebbe 100644
--- a/interp/builtins/grep/grep.go
+++ b/interp/builtins/grep/grep.go
@@ -562,9 +562,9 @@ func grepFile(ctx context.Context, callCtx *builtins.CallContext, file string, o
 			return matchCount > 0, ctx.Err()
 		}
 		lineNum++
-		line := sc.Text()
+		lineBytes := sc.Bytes()
 
-		matched := opts.re.MatchString(line)
+		matched := opts.re.Match(lineBytes)
 		if opts.invertMatch {
 			matched = !matched
 		}
@@ -606,15 +606,15 @@ func grepFile(ctx context.Context, callCtx *builtins.CallContext, file string, o
 				// -o -v: line was selected by inversion (doesn't contain
 				// pattern), so there are no matching parts to print.
 			} else if opts.onlyMatching {
-				matches := opts.re.FindAllString(line, -1)
-				for _, m := range matches {
-					if m == "" {
+				indices := opts.re.FindAllIndex(lineBytes, -1)
+				for _, idx := range indices {
+					if idx[0] == idx[1] {
 						continue // suppress empty matches (GNU grep behavior)
 					}
-					printMatchLine(callCtx, displayName, lineNum, m, opts)
+					printMatchLine(callCtx, displayName, lineNum, lineBytes[idx[0]:idx[1]], opts)
 				}
 			} else {
-				printMatchLine(callCtx, displayName, lineNum, line, opts)
+				printMatchLine(callCtx, displayName, lineNum, lineBytes, opts)
 			}
 			lastPrintedLine = lineNum
 			printedSeparator = true
@@ -625,7 +625,7 @@ func grepFile(ctx context.Context, callCtx *builtins.CallContext, file string, o
 		} else {
 			// Non-matching line: might be after-context or before-context.
 			if afterRemaining > 0 && !opts.quiet && !opts.count && !opts.filesWithMatches && !opts.filesWithoutMatch {
-				printContextLine(callCtx, displayName, lineNum, line, opts, '-')
+				printContextLine(callCtx, displayName, lineNum, lineBytes, opts, '-')
 				lastPrintedLine = lineNum
 				afterRemaining--
 			}
@@ -635,7 +635,9 @@ func grepFile(ctx context.Context, callCtx *builtins.CallContext, file string, o
 				if len(beforeBuf) >= opts.beforeContext {
 					beforeBuf = beforeBuf[1:]
 				}
-				beforeBuf = append(beforeBuf, contextLine{num: lineNum, text: line})
+				cp := make([]byte, len(lineBytes))
+				copy(cp, lineBytes)
+				beforeBuf = append(beforeBuf, contextLine{num: lineNum, text: cp})
 			}
 		}
 	}
@@ -664,31 +666,31 @@ func grepFile(ctx context.Context, callCtx *builtins.CallContext, file string, o
 
 type contextLine struct {
 	num  int
-	text string
+	text []byte
 }
 
-func printMatchLine(callCtx *builtins.CallContext, filename string, lineNum int, line string, opts *grepOpts) {
-	var prefix strings.Builder
+func printMatchLine(callCtx *builtins.CallContext, filename string, lineNum int, line []byte, opts *grepOpts) {
 	if opts.showFilename {
-		prefix.WriteString(filename)
-		prefix.WriteByte(':')
+		callCtx.Stdout.Write([]byte(filename)) //nolint:errcheck
+		callCtx.Stdout.Write([]byte{':'})      //nolint:errcheck
 	}
 	if opts.lineNumber {
-		prefix.WriteString(strconv.Itoa(lineNum))
-		prefix.WriteByte(':')
+		callCtx.Stdout.Write([]byte(strconv.Itoa(lineNum))) //nolint:errcheck
+		callCtx.Stdout.Write([]byte{':'})                   //nolint:errcheck
 	}
-	callCtx.Outf("%s%s\n", prefix.String(), line)
+	callCtx.Stdout.Write(line)         //nolint:errcheck
+	callCtx.Stdout.Write([]byte{'\n'}) //nolint:errcheck
 }
 
-func printContextLine(callCtx *builtins.CallContext, filename string, lineNum int, line string, opts *grepOpts, sep byte) {
-	var prefix strings.Builder
+func printContextLine(callCtx *builtins.CallContext, filename string, lineNum int, line []byte, opts *grepOpts, sep byte) {
 	if opts.showFilename {
-		prefix.WriteString(filename)
-		prefix.WriteByte(sep)
+		callCtx.Stdout.Write([]byte(filename)) //nolint:errcheck
+		callCtx.Stdout.Write([]byte{sep})      //nolint:errcheck
 	}
 	if opts.lineNumber {
-		prefix.WriteString(strconv.Itoa(lineNum))
-		prefix.WriteByte(sep)
+		callCtx.Stdout.Write([]byte(strconv.Itoa(lineNum))) //nolint:errcheck
+		callCtx.Stdout.Write([]byte{sep})                   //nolint:errcheck
 	}
-	callCtx.Outf("%s%s\n", prefix.String(), line)
+	callCtx.Stdout.Write(line)         //nolint:errcheck
+	callCtx.Stdout.Write([]byte{'\n'}) //nolint:errcheck
 }
diff --git a/interp/builtins/grep/grep_bench_test.go b/interp/builtins/grep/grep_bench_test.go
new file mode 100644
index 00000000..a50fd9d3
--- /dev/null
+++ b/interp/builtins/grep/grep_bench_test.go
@@ -0,0 +1,112 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2026-present Datadog, Inc.
+
+//go:build !race
+
+package grep_test
+
+import (
+	"io"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/DataDog/rshell/interp"
+	"github.com/DataDog/rshell/interp/builtins/testutil"
+)
+
+func createLargeFileGrep(tb testing.TB, dir, filename, line string, totalBytes int) string {
+	tb.Helper()
+	path := filepath.Join(dir, filename)
+	f, err := os.Create(path)
+	if err != nil {
+		tb.Fatal(err)
+	}
+	defer f.Close()
+	if _, err := io.Copy(f, io.LimitReader(testutil.NewRepeatReader(line), int64(totalBytes))); err != nil {
+		tb.Fatal(err)
+	}
+	return path
+}
+
+func cmdRunBGrep(b *testing.B, script, dir string) (string, string, int) {
+	b.Helper()
+	return testutil.RunScript(b, script, dir, interp.AllowedPaths([]string{dir}))
+}
+
+// BenchmarkGrepMatch measures grep on a 10MB file where every line matches.
+func BenchmarkGrepMatch(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileGrep(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBGrep(b, "grep fox input.txt", dir)
+	}
+}
+
+// BenchmarkGrepNoMatch measures grep on a 10MB file where no lines match.
+func BenchmarkGrepNoMatch(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileGrep(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBGrep(b, "grep NOMATCH input.txt", dir)
+	}
+}
+
+// BenchmarkGrepFixedStrings measures grep -F on a 10MB file.
+func BenchmarkGrepFixedStrings(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileGrep(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBGrep(b, "grep -F fox input.txt", dir)
+	}
+}
+
+// BenchmarkGrepCount measures grep -c on a 10MB file.
+func BenchmarkGrepCount(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileGrep(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBGrep(b, "grep -c fox input.txt", dir)
+	}
+}
+
+// TestGrepMemoryBounded asserts that grep uses O(1) memory when processing
+// large files. grep is a streaming command that reads one line at a time via
+// sc.Bytes() (no per-line string allocation). Total allocations are dominated
+// by the shell/runner overhead, not input size.
+func TestGrepMemoryBounded(t *testing.T) {
+	dir := t.TempDir()
+	createLargeFileGrep(t, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			testutil.RunScriptDiscard(b, "grep fox input.txt", dir, interp.AllowedPaths([]string{dir}))
+		}
+	})
+
+	const maxBytesPerOp = 4 << 20
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("grep allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
+	}
+}
+
+func BenchmarkGrepMatchDiscard(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileGrep(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		testutil.RunScriptDiscard(b, "grep fox input.txt", dir, interp.AllowedPaths([]string{dir}))
+	}
+}
diff --git a/interp/builtins/head/head_bench_test.go b/interp/builtins/head/head_bench_test.go
new file mode 100644
index 00000000..c6bf134d
--- /dev/null
+++ b/interp/builtins/head/head_bench_test.go
@@ -0,0 +1,126 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2026-present Datadog, Inc.
+
+//go:build !race
+
+package head_test
+
+import (
+	"io"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/DataDog/rshell/interp"
+	"github.com/DataDog/rshell/interp/builtins/testutil"
+)
+
+// createLargeFile writes totalBytes of repeating line content to dir/filename.
+func createLargeFile(tb testing.TB, dir, filename, line string, totalBytes int) string {
+	tb.Helper()
+	path := filepath.Join(dir, filename)
+	f, err := os.Create(path)
+	if err != nil {
+		tb.Fatal(err)
+	}
+	defer f.Close()
+	if _, err := io.Copy(f, io.LimitReader(testutil.NewRepeatReader(line), int64(totalBytes))); err != nil {
+		tb.Fatal(err)
+	}
+	return path
+}
+
+// cmdRunB runs a head command with AllowedPaths set to dir (bench variant).
+// Uses testutil.RunScript which accepts testing.TB.
+func cmdRunB(b *testing.B, script, dir string) (string, string, int) {
+	b.Helper()
+	return testutil.RunScript(b, script, dir, interp.AllowedPaths([]string{dir}))
+}
+
+// BenchmarkHeadTenLines measures head -n 10 on a 10MB file of short lines.
+func BenchmarkHeadTenLines(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFile(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunB(b, "head -n 10 input.txt", dir)
+	}
+}
+
+// BenchmarkHeadBytes measures head -c 1024 on a 10MB file.
+func BenchmarkHeadBytes(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFile(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunB(b, "head -c 1024 input.txt", dir)
+	}
+}
+
+// BenchmarkHeadSingleLineNearCap measures head -n 1 on a file with one line
+// just below MaxLineBytes (1MiB). Lines exceeding MaxLineBytes trigger an
+// error path; this benchmark exercises the successful large-line path.
+func BenchmarkHeadSingleLineNearCap(b *testing.B) {
+	dir := b.TempDir()
+	// 900KB line -- safely below MaxLineBytes (1MiB) so head succeeds.
+	createLargeFile(b, dir, "input.txt", "x", 900<<10)
+	// Append a newline to complete the line.
+	f, err := os.OpenFile(filepath.Join(dir, "input.txt"), os.O_APPEND|os.O_WRONLY, 0)
+	if err != nil {
+		b.Fatal(err)
+	}
+	defer func() {
+		if err := f.Close(); err != nil {
+			b.Errorf("close input.txt: %v", err)
+		}
+	}()
+	if _, err := f.WriteString("\n"); err != nil {
+		b.Fatal(err)
+	}
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunB(b, "head -n 1 input.txt", dir)
+	}
+}
+
+// TestHeadMemoryBoundedLines asserts that head -n 10 uses O(1) memory
+// regardless of input file size.
+func TestHeadMemoryBoundedLines(t *testing.T) {
+	dir := t.TempDir()
+	createLargeFile(t, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			cmdRunB(b, "head -n 10 input.txt", dir)
+		}
+	})
+
+	const maxBytesPerOp = 1 << 20 // 1MB ceiling
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("head -n 10 allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
+	}
+}
+
+// TestHeadMemoryBoundedBytes asserts that head -c 1024 uses O(1) memory.
+func TestHeadMemoryBoundedBytes(t *testing.T) {
+	dir := t.TempDir()
+	createLargeFile(t, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			cmdRunB(b, "head -c 1024 input.txt", dir)
+		}
+	})
+
+	const maxBytesPerOp = 1 << 20 // 1MB ceiling
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("head -c 1024 allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
+	}
+}
diff --git a/interp/builtins/ls/ls_bench_test.go b/interp/builtins/ls/ls_bench_test.go
new file mode 100644
index 00000000..988ce1b6
--- /dev/null
+++ b/interp/builtins/ls/ls_bench_test.go
@@ -0,0 +1,94 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2026-present Datadog, Inc.
+
+//go:build !race
+
+package ls_test
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/DataDog/rshell/interp"
+	"github.com/DataDog/rshell/interp/builtins/testutil"
+)
+
+// createFileDir creates a directory containing n empty files named
+// file0000.txt … fileNNNN.txt and returns the directory path.
+func createFileDir(tb testing.TB, n int) string {
+	tb.Helper()
+	dir := tb.TempDir()
+	for i := range n {
+		name := filepath.Join(dir, fmt.Sprintf("file%04d.txt", i))
+		f, err := os.Create(name)
+		if err != nil {
+			tb.Fatal(err)
+		}
+		f.Close()
+	}
+	return dir
+}
+
+func cmdRunBLs(b *testing.B, script, dir string) (string, string, int) {
+	b.Helper()
+	return testutil.RunScript(b, script, dir, interp.AllowedPaths([]string{dir}))
+}
+
+// BenchmarkLs measures ls on a directory with 1000 entries.
+func BenchmarkLs(b *testing.B) {
+	dir := createFileDir(b, 1000)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBLs(b, "ls .", dir)
+	}
+}
+
+// BenchmarkLsLong measures ls -l on a directory with 1000 entries.
+func BenchmarkLsLong(b *testing.B) {
+	dir := createFileDir(b, 1000)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBLs(b, "ls -l .", dir)
+	}
+}
+
+// BenchmarkLsSmallDir measures ls on a small directory (10 entries).
+func BenchmarkLsSmallDir(b *testing.B) {
+	dir := createFileDir(b, 10)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBLs(b, "ls .", dir)
+	}
+}
+
+// TestLsMemoryBounded asserts that ls allocation scales linearly with the
+// number of directory entries rather than diverging to pathological levels.
+// ls must load all directory entries into memory to sort them (O(n) live heap),
+// but should not buffer additional data beyond what os.ReadDir returns.
+//
+// With 1000 entries of ~12-byte names the expected allocation is roughly
+// 1000 × (name string + FileInfo struct) ≈ a few hundred KB. A 10MB ceiling
+// catches regressions that accidentally buffer full file contents or loop
+// without bound.
+func TestLsMemoryBounded(t *testing.T) {
+	dir := createFileDir(t, 1000)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			cmdRunBLs(b, "ls .", dir)
+		}
+	})
+
+	const maxBytesPerOp = 10 << 20 // 10MB ceiling for 1000-entry directory
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("ls allocated %d bytes/op on 1000-entry dir; want < %d", bpo, maxBytesPerOp)
+	}
+}
diff --git a/interp/builtins/strings_cmd/strings_bench_test.go b/interp/builtins/strings_cmd/strings_bench_test.go
new file mode 100644
index 00000000..7864b400
--- /dev/null
+++ b/interp/builtins/strings_cmd/strings_bench_test.go
@@ -0,0 +1,95 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2026-present Datadog, Inc.
+
+//go:build !race
+
+package strings_cmd_test
+
+import (
+	"io"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/DataDog/rshell/interp"
+	"github.com/DataDog/rshell/interp/builtins/testutil"
+)
+
+func createLargeFileStrings(tb testing.TB, dir, filename, line string, totalBytes int) string {
+	tb.Helper()
+	path := filepath.Join(dir, filename)
+	f, err := os.Create(path)
+	if err != nil {
+		tb.Fatal(err)
+	}
+	defer f.Close()
+	if _, err := io.Copy(f, io.LimitReader(testutil.NewRepeatReader(line), int64(totalBytes))); err != nil {
+		tb.Fatal(err)
+	}
+	return path
+}
+
+func cmdRunBStrings(b *testing.B, script, dir string) (string, string, int) {
+	b.Helper()
+	return testutil.RunScript(b, script, dir, interp.AllowedPaths([]string{dir}))
+}
+
+// BenchmarkStrings measures strings on a 1MB file containing many short
+// printable sequences separated by null bytes. Each line is a 43-byte printable
+// string followed by a null byte, producing ~24k strings.
+func BenchmarkStrings(b *testing.B) {
+	dir := b.TempDir()
+	// Mix of printable chars + null byte so strings emits many short tokens.
+	createLargeFileStrings(b, dir, "input.bin", "the quick brown fox jumps over lazy\x00", 1<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBStrings(b, "strings input.bin", dir)
+	}
+}
+
+// BenchmarkStringsPrintableOnly measures strings on a 1MB fully-printable file.
+// The entire file is one continuous printable run that exceeds maxStringLen
+// (1 MiB cap), so only the first 1 MiB is emitted.
+func BenchmarkStringsPrintableOnly(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileStrings(b, dir, "input.txt", "abcdefghij", 1<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBStrings(b, "strings input.txt", dir)
+	}
+}
+
+// TestStringsMemoryBounded asserts that strings uses O(1) memory regardless
+// of input size. strings reads in 32 KiB chunks and caps individual string
+// accumulation at maxStringLen (1 MiB). With short printable sequences
+// separated by non-printable bytes the current string buffer stays small.
+func TestStringsMemoryBounded(t *testing.T) {
+	dir := t.TempDir()
+	createLargeFileStrings(t, dir, "input.bin", "the quick brown fox jumps over lazy\x00", 10<<20)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			testutil.RunScriptDiscard(b, "strings input.bin", dir, interp.AllowedPaths([]string{dir}))
+		}
+	})
+
+	const maxBytesPerOp = 4 << 20
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("strings allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
+	}
+}
+
+func BenchmarkStringsDiscard(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileStrings(b, dir, "input.bin", "the quick brown fox jumps over lazy\x00", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		testutil.RunScriptDiscard(b, "strings input.bin", dir, interp.AllowedPaths([]string{dir}))
+	}
+}
diff --git a/interp/builtins/tail/tail.go b/interp/builtins/tail/tail.go
index 35d231e3..878ef1db 100644
--- a/interp/builtins/tail/tail.go
+++ b/interp/builtins/tail/tail.go
@@ -320,8 +320,6 @@ func readLastLines(ctx context.Context, callCtx *builtins.CallContext, r io.Read
 		if !isRegularFile && totalRead > MaxTotalReadBytes {
 			return errors.New("input too large: read limit exceeded")
 		}
-		cp := make([]byte, len(raw))
-		copy(cp, raw)
 		// When the ring is full, evict the oldest entry before writing.
 		if ringCount == ringSize {
 			// If count exceeds the ring capacity, we cannot deliver the full
@@ -331,8 +329,8 @@ func readLastLines(ctx context.Context, callCtx *builtins.CallContext, r io.Read
 			}
 			ringBytes -= int64(len(ring[ringHead]))
 		}
-		ring[ringHead] = cp
-		ringBytes += int64(len(cp))
+		ring[ringHead] = append(ring[ringHead][:0], raw...)
+		ringBytes += int64(len(ring[ringHead]))
 		if ringBytes > MaxRingBytes {
 			return errors.New("input too large: ring buffer memory limit exceeded")
 		}
diff --git a/interp/builtins/tail/tail_bench_test.go b/interp/builtins/tail/tail_bench_test.go
new file mode 100644
index 00000000..ec6c59f9
--- /dev/null
+++ b/interp/builtins/tail/tail_bench_test.go
@@ -0,0 +1,84 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2026-present Datadog, Inc.
+
+//go:build !race
+
+package tail_test
+
+import (
+	"io"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/DataDog/rshell/interp"
+	"github.com/DataDog/rshell/interp/builtins/testutil"
+)
+
+// createLargeFileTail writes totalBytes of repeating content to dir/filename.
+func createLargeFileTail(tb testing.TB, dir, filename, line string, totalBytes int) string {
+	tb.Helper()
+	path := filepath.Join(dir, filename)
+	f, err := os.Create(path)
+	if err != nil {
+		tb.Fatal(err)
+	}
+	defer f.Close()
+	if _, err := io.Copy(f, io.LimitReader(testutil.NewRepeatReader(line), int64(totalBytes))); err != nil {
+		tb.Fatal(err)
+	}
+	return path
+}
+
+// cmdRunBTail runs a tail command with AllowedPaths set to dir (bench variant).
+func cmdRunBTail(b *testing.B, script, dir string) (string, string, int) {
+	b.Helper()
+	return testutil.RunScript(b, script, dir, interp.AllowedPaths([]string{dir}))
+}
+
+// BenchmarkTailTenLines measures tail -n 10 on a 10MB file.
+func BenchmarkTailTenLines(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileTail(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBTail(b, "tail -n 10 input.txt", dir)
+	}
+}
+
+// BenchmarkTailBytes measures tail -c 1024 on a 10MB file.
+func BenchmarkTailBytes(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileTail(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBTail(b, "tail -c 1024 input.txt", dir)
+	}
+}
+
+// TestTailMemoryBounded asserts that tail -n 10 uses O(1) memory regardless of
+// input size. The ring buffer slots are reused via append(slot[:0], raw...),
+// so no per-line allocation occurs after the first pass fills the ring.
+func TestTailMemoryBounded(t *testing.T) {
+	const line = "the quick brown fox jumps over the lazy dog\n" // 44 bytes
+	const inputSize = 10 << 20                                   // 10 MB
+
+	dir := t.TempDir()
+	createLargeFileTail(t, dir, "input.txt", line, inputSize)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			testutil.RunScriptDiscard(b, "tail -n 10 input.txt", dir, interp.AllowedPaths([]string{dir}))
+		}
+	})
+
+	const maxBytesPerOp = 4 << 20
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("tail -n 10 allocated %d bytes/op on %d-byte input; want < %d", bpo, inputSize, maxBytesPerOp)
+	}
+}
diff --git a/interp/builtins/testutil/testutil.go b/interp/builtins/testutil/testutil.go
index f262617c..bed6dead 100644
--- a/interp/builtins/testutil/testutil.go
+++ b/interp/builtins/testutil/testutil.go
@@ -10,6 +10,7 @@ import (
 	"bytes"
 	"context"
 	"errors"
+	"io"
 	"strings"
 	"testing"
 
@@ -19,9 +20,37 @@ import (
 	"github.com/DataDog/rshell/interp"
 )
 
+// repeatReader is an io.Reader that repeats a fixed line pattern indefinitely.
+type repeatReader struct {
+	line []byte
+	pos  int
+}
+
+func (r *repeatReader) Read(p []byte) (int, error) {
+	n := 0
+	for n < len(p) {
+		if r.pos >= len(r.line) {
+			r.pos = 0
+		}
+		copied := copy(p[n:], r.line[r.pos:])
+		r.pos += copied
+		n += copied
+	}
+	return n, nil
+}
+
+// NewRepeatReader returns an io.Reader that yields the given line pattern
+// indefinitely. Use io.LimitReader to cap the total bytes produced.
+// It is intended for benchmark setup — generating large synthetic files
+// without keeping the full content in memory.
+func NewRepeatReader(line string) io.Reader {
+	return &repeatReader{line: []byte(line)}
+}
+
 // RunScriptCtx runs a shell script with a context and returns stdout, stderr,
-// and the exit code.
-func RunScriptCtx(ctx context.Context, t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) {
+// and the exit code. It accepts testing.TB so it can be used in both tests
+// and benchmarks.
+func RunScriptCtx(ctx context.Context, t testing.TB, script, dir string, opts ...interp.RunnerOption) (string, string, int) {
 	t.Helper()
 	parser := syntax.NewParser()
 	prog, err := parser.Parse(strings.NewReader(script), "")
@@ -51,7 +80,46 @@ func RunScriptCtx(ctx context.Context, t *testing.T, script, dir string, opts ..
 }
 
 // RunScript runs a shell script and returns stdout, stderr, and the exit code.
-func RunScript(t *testing.T, script, dir string, opts ...interp.RunnerOption) (string, string, int) {
+// It accepts testing.TB so it can be used in both tests and benchmarks.
+func RunScript(t testing.TB, script, dir string, opts ...interp.RunnerOption) (string, string, int) {
 	t.Helper()
 	return RunScriptCtx(context.Background(), t, script, dir, opts...)
 }
+
+// RunScriptDiscard runs a shell script and returns stderr and the exit code.
+// Stdout is discarded (io.Discard). Use this in memory-allocation tests to
+// prevent output buffering from dominating the AllocedBytesPerOp measurement.
+func RunScriptDiscard(t testing.TB, script, dir string, opts ...interp.RunnerOption) (string, int) {
+	t.Helper()
+	return RunScriptDiscardCtx(context.Background(), t, script, dir, opts...)
+}
+
+// RunScriptDiscardCtx is RunScriptDiscard with an explicit context.
+func RunScriptDiscardCtx(ctx context.Context, t testing.TB, script, dir string, opts ...interp.RunnerOption) (string, int) {
+	t.Helper()
+	parser := syntax.NewParser()
+	prog, err := parser.Parse(strings.NewReader(script), "")
+	require.NoError(t, err)
+
+	var errBuf bytes.Buffer
+	allOpts := append([]interp.RunnerOption{interp.StdIO(nil, io.Discard, &errBuf)}, opts...)
+	runner, err := interp.New(allOpts...)
+	require.NoError(t, err)
+	defer runner.Close()
+
+	if dir != "" {
+		runner.Dir = dir
+	}
+
+	err = runner.Run(ctx, prog)
+	exitCode := 0
+	if err != nil {
+		var es interp.ExitStatus
+		if errors.As(err, &es) {
+			exitCode = int(es)
+		} else if ctx.Err() == nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+	}
+	return errBuf.String(), exitCode
+}
diff --git a/interp/builtins/tr/tr_bench_test.go b/interp/builtins/tr/tr_bench_test.go
new file mode 100644
index 00000000..327b48ab
--- /dev/null
+++ b/interp/builtins/tr/tr_bench_test.go
@@ -0,0 +1,106 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2026-present Datadog, Inc.
+
+//go:build !race
+
+package tr_test
+
+import (
+	"io"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/DataDog/rshell/interp"
+	"github.com/DataDog/rshell/interp/builtins/testutil"
+)
+
+func createLargeFileTr(tb testing.TB, dir, filename, line string, totalBytes int) string {
+	tb.Helper()
+	path := filepath.Join(dir, filename)
+	f, err := os.Create(path)
+	if err != nil {
+		tb.Fatal(err)
+	}
+	defer f.Close()
+	if _, err := io.Copy(f, io.LimitReader(testutil.NewRepeatReader(line), int64(totalBytes))); err != nil {
+		tb.Fatal(err)
+	}
+	return path
+}
+
+func cmdRunBTr(b *testing.B, script, dir string) (string, string, int) {
+	b.Helper()
+	return testutil.RunScript(b, script, dir, interp.AllowedPaths([]string{dir}))
+}
+
+// BenchmarkTrTranslateDiscard measures tr with stdout discarded to isolate
+// tr's own allocations from output buffering. Used to calibrate the ceiling
+// in TestTrMemoryBounded.
+func BenchmarkTrTranslateDiscard(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileTr(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		testutil.RunScriptDiscard(b, "cat input.txt | tr 'a-z' 'A-Z'", dir, interp.AllowedPaths([]string{dir}))
+	}
+}
+
+// BenchmarkTrTranslate measures tr 'a-z' 'A-Z' on a 1MB file piped through tr.
+// tr reads input from stdin in fixed 32 KiB chunks and translates byte-by-byte
+// using a pre-built 256-entry lookup table.
+func BenchmarkTrTranslate(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileTr(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 1<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBTr(b, "cat input.txt | tr 'a-z' 'A-Z'", dir)
+	}
+}
+
+// BenchmarkTrDelete measures tr -d on a 1MB file.
+func BenchmarkTrDelete(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileTr(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 1<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBTr(b, "cat input.txt | tr -d ' '", dir)
+	}
+}
+
+// BenchmarkTrSqueeze measures tr -s on a 1MB file.
+func BenchmarkTrSqueeze(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileTr(b, dir, "input.txt", "the  quick  brown  fox  jumps  over  the  lazy  dog\n", 1<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBTr(b, "cat input.txt | tr -s ' '", dir)
+	}
+}
+
+// TestTrMemoryBounded asserts that tr uses O(1) memory regardless of input
+// size. tr operates on a 256-entry lookup table built once at startup. Input
+// is read in fixed 32 KiB chunks and translated in-place; no allocation is
+// proportional to input length.
+func TestTrMemoryBounded(t *testing.T) {
+	dir := t.TempDir()
+	createLargeFileTr(t, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			testutil.RunScriptDiscard(b, "cat input.txt | tr 'a-z' 'A-Z'", dir, interp.AllowedPaths([]string{dir}))
+		}
+	})
+
+	const maxBytesPerOp = 4 << 20
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("tr allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
+	}
+}
diff --git a/interp/builtins/uniq/uniq.go b/interp/builtins/uniq/uniq.go
index 4b44598a..6cabbaf9 100644
--- a/interp/builtins/uniq/uniq.go
+++ b/interp/builtins/uniq/uniq.go
@@ -73,6 +73,7 @@ package uniq
 
 import (
 	"bufio"
+	"bytes"
 	"context"
 	"io"
 	"math"
@@ -282,7 +283,6 @@ func processInput(ctx context.Context, callCtx *builtins.CallContext, r io.Reade
 	sc.Split(makeSplitFunc(cfg.delim))
 
 	w := callCtx.Stdout
-	delimStr := string([]byte{cfg.delim})
 
 	reportWrite := func(err error) error {
 		if err != nil {
@@ -291,8 +291,16 @@ func processInput(ctx context.Context, callCtx *builtins.CallContext, r io.Reade
 		return err
 	}
 
-	var prevLine string
-	var prevKey string
+	writeLine := func(line []byte) error {
+		if _, err := w.Write(line); err != nil {
+			return err
+		}
+		_, err := w.Write([]byte{cfg.delim})
+		return err
+	}
+
+	var prevLine []byte
+	var prevKey []byte
 	var lineCount int64
 	first := true
 	groupNum := 0
@@ -301,77 +309,77 @@ func processInput(ctx context.Context, callCtx *builtins.CallContext, r io.Reade
 		if ctx.Err() != nil {
 			return ctx.Err()
 		}
-		curLine := sc.Text()
-		curKey := compareKey(curLine, cfg)
+		curBytes := sc.Bytes()
+		curKey := compareKeyBytes(curBytes, cfg)
 
 		if first {
-			prevLine = curLine
-			prevKey = curKey
+			prevLine = append(prevLine[:0], curBytes...)
+			prevKey = append(prevKey[:0], curKey...)
 			lineCount = 1
 			first = false
 
 			if cfg.useGroup {
 				if cfg.grpMethod == groupPrepend || cfg.grpMethod == groupBoth {
-					if err := reportWrite(writeStr(w, delimStr)); err != nil {
+					if err := reportWrite(writeLine(nil)); err != nil {
 						return err
 					}
 				}
-				if err := reportWrite(writeStr(w, curLine+delimStr)); err != nil {
+				if err := reportWrite(writeLine(prevLine)); err != nil {
 					return err
 				}
 			}
 			continue
 		}
 
-		same := prevKey == curKey
+		same := bytes.Equal(prevKey, curKey)
 
 		if same {
 			if lineCount < math.MaxInt64 {
 				lineCount++
 			}
 			if cfg.useGroup {
-				if err := reportWrite(writeStr(w, curLine+delimStr)); err != nil {
+				if err := reportWrite(writeLine(curBytes)); err != nil {
 					return err
 				}
 			} else if cfg.useAllRepeated {
 				if lineCount == 2 {
 					if groupNum > 0 && cfg.arMethod != allRepeatedNone {
-						if err := reportWrite(writeStr(w, delimStr)); err != nil {
+						if err := reportWrite(writeLine(nil)); err != nil {
 							return err
 						}
 					}
 					if groupNum == 0 && cfg.arMethod == allRepeatedPrepend {
-						if err := reportWrite(writeStr(w, delimStr)); err != nil {
+						if err := reportWrite(writeLine(nil)); err != nil {
 							return err
 						}
 					}
-					if err := reportWrite(writeStr(w, prevLine+delimStr)); err != nil {
+					if err := reportWrite(writeLine(prevLine)); err != nil {
 						return err
 					}
 					groupNum++
 				}
-				if err := reportWrite(writeStr(w, curLine+delimStr)); err != nil {
+				if err := reportWrite(writeLine(curBytes)); err != nil {
 					return err
 				}
 			}
 		} else {
 			if cfg.useGroup {
-				if err := reportWrite(writeStr(w, delimStr)); err != nil {
+				if err := reportWrite(writeLine(nil)); err != nil {
 					return err
 				}
-				if err := reportWrite(writeStr(w, curLine+delimStr)); err != nil {
+				if err := reportWrite(writeLine(curBytes)); err != nil {
 					return err
 				}
 				groupNum++
 			} else if cfg.useAllRepeated {
 				// Nothing to do — non-repeated last group is simply dropped.
 			} else {
-				if err := reportWrite(emitStandard(w, cfg, prevLine, lineCount, delimStr)); err != nil {
+				if err := reportWrite(emitStandard(w, cfg, prevLine, lineCount)); err != nil {
 					return err
 				}
 			}
-			prevLine = curLine
-			prevKey = curKey
+			prevLine = append(prevLine[:0], curBytes...)
+			prevKey = append(prevKey[:0], curKey...)
 			lineCount = 1
 		}
 	}
@@ -388,17 +396,17 @@ func processInput(ctx context.Context, callCtx *builtins.CallContext, r io.Reade
 	// Flush last group.
 	if cfg.useGroup {
 		if cfg.grpMethod == groupAppend || cfg.grpMethod == groupBoth {
-			return reportWrite(writeStr(w, delimStr))
+			return reportWrite(writeLine(nil))
 		}
 		return nil
 	}
 	if cfg.useAllRepeated {
 		return nil
 	}
-	return reportWrite(emitStandard(w, cfg, prevLine, lineCount, delimStr))
+	return reportWrite(emitStandard(w, cfg, prevLine, lineCount))
 }
 
-func emitStandard(w io.Writer, cfg *uniqConfig, line string, count int64, delimStr string) error {
+func emitStandard(w io.Writer, cfg *uniqConfig, line []byte, count int64) error {
 	if cfg.repeated && cfg.unique {
 		return nil
 	}
@@ -413,22 +421,30 @@ func emitStandard(w io.Writer, cfg *uniqConfig, line string, count int64, delimS
 		for len(s) < countFieldWidth {
 			s = " " + s
 		}
-		return writeStr(w, s+" "+line+delimStr)
+		if _, err := io.WriteString(w, s+" "); err != nil {
+			return err
+		}
+		if _, err := w.Write(line); err != nil {
+			return err
+		}
+		_, err := w.Write([]byte{cfg.delim})
+		return err
 	}
-	return writeStr(w, line+delimStr)
-}
-
-func writeStr(w io.Writer, s string) error {
-	_, err := io.WriteString(w, s)
+	if _, err := w.Write(line); err != nil {
+		return err
+	}
+	_, err := w.Write([]byte{cfg.delim})
 	return err
 }
 
-// compareKey extracts the portion of line used for comparison, applying
+// compareKeyBytes extracts the portion of line used for comparison, applying
 // field skipping, char skipping, check-chars, and case folding.
-func compareKey(line string, cfg *uniqConfig) string {
+// For the ignore-case path it returns a newly allocated lowercased copy;
+// otherwise it returns a subslice of line (no allocation).
+func compareKeyBytes(line []byte, cfg *uniqConfig) []byte {
 	s := line
 	if cfg.skipFields > 0 {
-		s = skipFieldsN(s, cfg.skipFields)
+		s = skipFieldsBytesN(s, cfg.skipFields)
 	}
 	if cfg.skipChars > 0 && len(s) > 0 {
 		skip := cfg.skipChars
@@ -441,37 +457,28 @@ func compareKey(line string, cfg *uniqConfig) string {
 		s = s[:cfg.checkChars]
 	}
 	if cfg.ignoreCase {
-		s = asciiToLower(s)
+		s = asciiToLowerBytes(s)
 	}
 	return s
 }
 
-// asciiToLower folds only ASCII A-Z to a-z, matching GNU uniq behavior
-// in the default C/POSIX locale. Unlike strings.ToLower, this does not
-// apply Unicode case folding, so non-ASCII characters are left unchanged.
-func asciiToLower(s string) string {
-	for i := 0; i < len(s); i++ {
-		if s[i] >= 'A' && s[i] <= 'Z' {
-			b := make([]byte, len(s))
-			copy(b, s[:i])
-			b[i] = s[i] + ('a' - 'A')
-			for j := i + 1; j < len(s); j++ {
-				c := s[j]
-				if c >= 'A' && c <= 'Z' {
-					c += 'a' - 'A'
-				}
-				b[j] = c
-			}
-			return string(b)
+// asciiToLowerBytes folds only ASCII A-Z to a-z in a byte slice, matching GNU
+// uniq behavior in the default C/POSIX locale. It always returns a new copy.
+func asciiToLowerBytes(s []byte) []byte {
+	b := make([]byte, len(s))
+	for i, c := range s {
+		if c >= 'A' && c <= 'Z' {
+			c += 'a' - 'A'
 		}
+		b[i] = c
 	}
-	return s
+	return b
 }
 
-// skipFieldsN skips the first n blank-delimited fields and returns the
-// remainder of the string, starting immediately after the last character
+// skipFieldsBytesN skips the first n blank-delimited fields in a byte slice
+// and returns the remainder, starting immediately after the last character
 // of the n-th field (before any subsequent blanks).
-func skipFieldsN(s string, n int64) string {
+func skipFieldsBytesN(s []byte, n int64) []byte {
 	i := 0
 	for field := int64(0); field < n && i < len(s); field++ {
 		for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
diff --git a/interp/builtins/uniq/uniq_bench_test.go b/interp/builtins/uniq/uniq_bench_test.go
new file mode 100644
index 00000000..beb3ce5b
--- /dev/null
+++ b/interp/builtins/uniq/uniq_bench_test.go
@@ -0,0 +1,80 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2026-present Datadog, Inc.
+
+//go:build !race
+
+package uniq_test
+
+import (
+	"io"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/DataDog/rshell/interp"
+	"github.com/DataDog/rshell/interp/builtins/testutil"
+)
+
+func createLargeFileUniq(tb testing.TB, dir, filename, line string, totalBytes int) string {
+	tb.Helper()
+	path := filepath.Join(dir, filename)
+	f, err := os.Create(path)
+	if err != nil {
+		tb.Fatal(err)
+	}
+	defer f.Close()
+	if _, err := io.Copy(f, io.LimitReader(testutil.NewRepeatReader(line), int64(totalBytes))); err != nil {
+		tb.Fatal(err)
+	}
+	return path
+}
+
+func cmdRunBUniq(b *testing.B, script, dir string) (string, string, int) {
+	b.Helper()
+	return testutil.RunScript(b, script, dir, interp.AllowedPaths([]string{dir}))
+}
+
+// BenchmarkUniq measures uniq on a 10MB file of identical lines (all deduplicated to one).
+func BenchmarkUniq(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileUniq(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBUniq(b, "uniq input.txt", dir)
+	}
+}
+
+// BenchmarkUniqCount measures uniq -c on a 10MB file.
+func BenchmarkUniqCount(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileUniq(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBUniq(b, "uniq -c input.txt", dir)
+	}
+}
+
+// TestUniqMemoryBounded asserts that uniq uses O(1) memory when processing
+// large files. uniq is a streaming command: only the current and previous lines
+// are kept in memory at any time (live heap is O(1)) and sc.Bytes() avoids
+// per-line string allocations.
+func TestUniqMemoryBounded(t *testing.T) {
+	dir := t.TempDir()
+	createLargeFileUniq(t, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			testutil.RunScriptDiscard(b, "uniq input.txt", dir, interp.AllowedPaths([]string{dir}))
+		}
+	})
+
+	const maxBytesPerOp = 4 << 20
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("uniq allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
+	}
+}
diff --git a/interp/builtins/wc/wc_bench_test.go b/interp/builtins/wc/wc_bench_test.go
new file mode 100644
index 00000000..2411354a
--- /dev/null
+++ b/interp/builtins/wc/wc_bench_test.go
@@ -0,0 +1,79 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2026-present Datadog, Inc.
+
+//go:build !race
+
+package wc_test
+
+import (
+	"io"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/DataDog/rshell/interp"
+	"github.com/DataDog/rshell/interp/builtins/testutil"
+)
+
+// createLargeFileWc writes totalBytes of repeating content to dir/filename.
+func createLargeFileWc(tb testing.TB, dir, filename, line string, totalBytes int) string {
+	tb.Helper()
+	path := filepath.Join(dir, filename)
+	f, err := os.Create(path)
+	if err != nil {
+		tb.Fatal(err)
+	}
+	defer f.Close()
+	if _, err := io.Copy(f, io.LimitReader(testutil.NewRepeatReader(line), int64(totalBytes))); err != nil {
+		tb.Fatal(err)
+	}
+	return path
+}
+
+// cmdRunBWc runs a wc command with AllowedPaths set to dir (bench variant).
+func cmdRunBWc(b *testing.B, script, dir string) (string, string, int) {
+	b.Helper()
+	return testutil.RunScript(b, script, dir, interp.AllowedPaths([]string{dir}))
+}
+
+// BenchmarkWcLines measures wc -l on a 10MB file.
+func BenchmarkWcLines(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileWc(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBWc(b, "wc -l input.txt", dir)
+	}
+}
+
+// BenchmarkWcAll measures wc (all counts) on a 10MB file.
+func BenchmarkWcAll(b *testing.B) {
+	dir := b.TempDir()
+	createLargeFileWc(b, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for b.Loop() {
+		cmdRunBWc(b, "wc input.txt", dir)
+	}
+}
+
+// TestWcMemoryBounded asserts that wc uses O(1) memory regardless of file size.
+func TestWcMemoryBounded(t *testing.T) {
+	dir := t.TempDir()
+	createLargeFileWc(t, dir, "input.txt", "the quick brown fox jumps over the lazy dog\n", 10<<20)
+
+	result := testing.Benchmark(func(b *testing.B) {
+		b.ReportAllocs()
+		for b.Loop() {
+			cmdRunBWc(b, "wc -l input.txt", dir)
+		}
+	})
+
+	const maxBytesPerOp = 1 << 20 // 1MB ceiling for a streaming counter
+	if bpo := result.AllocedBytesPerOp(); bpo > maxBytesPerOp {
+		t.Errorf("wc -l allocated %d bytes/op on 10MB input; want < %d", bpo, maxBytesPerOp)
+	}
+}
diff --git a/tests/allowed_symbols_test.go b/tests/allowed_symbols_test.go
index 7a8adffb..38ce08c7 100644
--- a/tests/allowed_symbols_test.go
+++ b/tests/allowed_symbols_test.go
@@ -38,6 +38,8 @@ var builtinAllowedSymbols = []string{
 	"bufio.NewScanner",
 	// bufio.Scanner — scanner type for buffered input reading; no write or exec capability.
 	"bufio.Scanner",
+	// bytes.Equal — compares two byte slices for equality; pure function, no I/O.
+	"bytes.Equal",
 	// bufio.SplitFunc — type for custom scanner split functions; pure type, no I/O.
 	"bufio.SplitFunc",
 	// context.Context — deadline/cancellation plumbing; pure interface, no side effects.