Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
286 changes: 283 additions & 3 deletions packages/cli/src/ui/utils/textUtils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,289 @@ describe('textUtils', () => {
});

describe('stripUnsafeCharacters', () => {
it('should not strip tab characters', () => {
const input = 'hello world';
expect(stripUnsafeCharacters(input)).toBe('hello world');
describe('preserved characters', () => {
it('should preserve TAB (0x09)', () => {
const input = 'hello\tworld';
expect(stripUnsafeCharacters(input)).toBe('hello\tworld');
});

it('should preserve LF/newline (0x0A)', () => {
const input = 'hello\nworld';
expect(stripUnsafeCharacters(input)).toBe('hello\nworld');
});

it('should preserve CR (0x0D)', () => {
const input = 'hello\rworld';
expect(stripUnsafeCharacters(input)).toBe('hello\rworld');
});

it('should preserve CRLF (0x0D 0x0A)', () => {
const input = 'hello\r\nworld';
expect(stripUnsafeCharacters(input)).toBe('hello\r\nworld');
});

it('should preserve DEL (0x7F)', () => {
const input = 'hello\x7Fworld';
expect(stripUnsafeCharacters(input)).toBe('hello\x7Fworld');
});

it('should preserve all printable ASCII (0x20-0x7E)', () => {
const printableAscii =
' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~';
expect(stripUnsafeCharacters(printableAscii)).toBe(printableAscii);
});

it('should preserve Unicode characters above 0x9F', () => {
const input = 'Hello κόσμε 世界 🌍';
expect(stripUnsafeCharacters(input)).toBe('Hello κόσμε 世界 🌍');
});

it('should preserve emojis', () => {
const input = '🎉 Celebration! 🚀 Launch! 💯';
expect(stripUnsafeCharacters(input)).toBe(
'🎉 Celebration! 🚀 Launch! 💯',
);
});

it('should preserve complex emoji sequences (ZWJ)', () => {
const input = 'Family: 👨‍👩‍👧‍👦 Flag: 🏳️‍🌈';
expect(stripUnsafeCharacters(input)).toBe('Family: 👨‍👩‍👧‍👦 Flag: 🏳️‍🌈');
});
});

describe('stripped C0 control characters (0x00-0x1F except TAB/LF/CR)', () => {
it('should strip NULL (0x00)', () => {
const input = 'hello\x00world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip SOH (0x01)', () => {
const input = 'hello\x01world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip STX (0x02)', () => {
const input = 'hello\x02world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip ETX (0x03)', () => {
const input = 'hello\x03world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip EOT (0x04)', () => {
const input = 'hello\x04world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip ENQ (0x05)', () => {
const input = 'hello\x05world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip ACK (0x06)', () => {
const input = 'hello\x06world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip BELL (0x07)', () => {
const input = 'hello\x07world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip BACKSPACE (0x08)', () => {
const input = 'hello\x08world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip VT/Vertical Tab (0x0B)', () => {
const input = 'hello\x0Bworld';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip FF/Form Feed (0x0C)', () => {
const input = 'hello\x0Cworld';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip SO (0x0E)', () => {
const input = 'hello\x0Eworld';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip SI (0x0F)', () => {
const input = 'hello\x0Fworld';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip DLE (0x10)', () => {
const input = 'hello\x10world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip DC1 (0x11)', () => {
const input = 'hello\x11world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip DC2 (0x12)', () => {
const input = 'hello\x12world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip DC3 (0x13)', () => {
const input = 'hello\x13world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip DC4 (0x14)', () => {
const input = 'hello\x14world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip NAK (0x15)', () => {
const input = 'hello\x15world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip SYN (0x16)', () => {
const input = 'hello\x16world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip ETB (0x17)', () => {
const input = 'hello\x17world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip CAN (0x18)', () => {
const input = 'hello\x18world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip EM (0x19)', () => {
const input = 'hello\x19world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip SUB (0x1A)', () => {
const input = 'hello\x1Aworld';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip FS (0x1C)', () => {
const input = 'hello\x1Cworld';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip GS (0x1D)', () => {
const input = 'hello\x1Dworld';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip RS (0x1E)', () => {
const input = 'hello\x1Eworld';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip US (0x1F)', () => {
const input = 'hello\x1Fworld';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});
});

describe('stripped C1 control characters (0x80-0x9F)', () => {
it('should strip all C1 control characters', () => {
// Test a few representative C1 control chars
expect(stripUnsafeCharacters('hello\x80world')).toBe('helloworld');
expect(stripUnsafeCharacters('hello\x85world')).toBe('helloworld'); // NEL
expect(stripUnsafeCharacters('hello\x8Aworld')).toBe('helloworld');
expect(stripUnsafeCharacters('hello\x90world')).toBe('helloworld');
expect(stripUnsafeCharacters('hello\x9Fworld')).toBe('helloworld');
});

it('should preserve characters at 0xA0 and above (non-C1)', () => {
// 0xA0 is non-breaking space, should be preserved
expect(stripUnsafeCharacters('hello\xA0world')).toBe('hello\xA0world');
});
});

describe('ANSI escape sequence stripping', () => {
it('should strip ANSI color codes', () => {
const input = '\x1b[31mRed\x1b[0m text';
expect(stripUnsafeCharacters(input)).toBe('Red text');
});

it('should strip ANSI cursor movement codes', () => {
const input = 'hello\x1b[9D\x1b[Kworld';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should strip complex ANSI sequences', () => {
const input = '\x1b[1;32;40mBold Green on Black\x1b[0m';
expect(stripUnsafeCharacters(input)).toBe('Bold Green on Black');
});
});

describe('multiple control characters', () => {
it('should strip multiple different control characters', () => {
const input = 'a\x00b\x01c\x02d\x07e\x08f';
expect(stripUnsafeCharacters(input)).toBe('abcdef');
});

it('should handle consecutive control characters', () => {
const input = 'hello\x00\x01\x02\x03\x04world';
expect(stripUnsafeCharacters(input)).toBe('helloworld');
});

it('should handle mixed preserved and stripped chars', () => {
const input = 'line1\n\x00line2\t\x07line3\r\n';
expect(stripUnsafeCharacters(input)).toBe('line1\nline2\tline3\r\n');
});
});

describe('edge cases', () => {
it('should handle empty string', () => {
expect(stripUnsafeCharacters('')).toBe('');
});

it('should handle string with only control characters', () => {
expect(stripUnsafeCharacters('\x00\x01\x02\x03')).toBe('');
});

it('should handle string with only preserved whitespace', () => {
expect(stripUnsafeCharacters('\t\n\r')).toBe('\t\n\r');
});

it('should handle very long strings efficiently', () => {
const longString = 'a'.repeat(10000) + '\x00' + 'b'.repeat(10000);
const result = stripUnsafeCharacters(longString);
expect(result).toBe('a'.repeat(10000) + 'b'.repeat(10000));
expect(result.length).toBe(20000);
});

it('should handle surrogate pairs correctly', () => {
// 𝌆 is outside BMP (U+1D306)
const input = '𝌆hello𝌆';
expect(stripUnsafeCharacters(input)).toBe('𝌆hello𝌆');
});

it('should handle mixed BMP and non-BMP characters', () => {
const input = 'Hello 世界 🌍 привет';
expect(stripUnsafeCharacters(input)).toBe('Hello 世界 🌍 привет');
});
});

describe('performance: regex vs array-based', () => {
it('should handle real-world terminal output with control chars', () => {
// Simulate terminal output with various control sequences
const terminalOutput =
'\x1b[32mSuccess:\x1b[0m File saved\x07\n\x1b[?25hDone';
expect(stripUnsafeCharacters(terminalOutput)).toBe(
'Success: File saved\nDone',
);
});
});
});
describe('escapeAnsiCtrlCodes', () => {
Expand Down
29 changes: 6 additions & 23 deletions packages/cli/src/ui/utils/textUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ export function cpSlice(str: string, start: number, end?: number): string {
* Characters stripped:
* - ANSI escape sequences (via strip-ansi)
* - VT control sequences (via Node.js util.stripVTControlCharacters)
* - C0 control chars (0x00-0x1F) except CR/LF which are handled elsewhere
* - C0 control chars (0x00-0x1F) except TAB(0x09), LF(0x0A), CR(0x0D)
* - C1 control chars (0x80-0x9F) that can cause display issues
*
* Characters preserved:
Expand All @@ -117,28 +117,11 @@ export function stripUnsafeCharacters(str: string): string {
const strippedAnsi = stripAnsi(str);
const strippedVT = stripVTControlCharacters(strippedAnsi);

return toCodePoints(strippedVT)
.filter((char) => {
const code = char.codePointAt(0);
if (code === undefined) return false;

// Preserve CR/LF/TAB for line handling
if (code === 0x0a || code === 0x0d || code === 0x09) return true;

// Remove C0 control chars (except CR/LF) that can break display
// Examples: BELL(0x07) makes noise, BS(0x08) moves cursor, VT(0x0B), FF(0x0C)
if (code >= 0x00 && code <= 0x1f) return false;

// Remove C1 control chars (0x80-0x9f) - legacy 8-bit control codes
if (code >= 0x80 && code <= 0x9f) return false;

// Preserve DEL (0x7f) - it's handled functionally by applyOperations as backspace
// and doesn't cause rendering issues when displayed

// Preserve all other characters including Unicode/emojis
return true;
})
.join('');
// Use a regex to strip remaining unsafe control characters
// C0: 0x00-0x1F except 0x09 (TAB), 0x0A (LF), 0x0D (CR)
// C1: 0x80-0x9F
// eslint-disable-next-line no-control-regex
return strippedVT.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x80-\x9F]/g, '');
}

/**
Expand Down
Loading