From bcbcfb025f1cce3278bc839966794f86ddacda82 Mon Sep 17 00:00:00 2001 From: Ruben Bridgewater Date: Fri, 26 Nov 2021 05:37:10 +0100 Subject: [PATCH] util: escape lone surrogate code points using .inspect() Unpaired surrogate code points have no representation in UTF8. Therefore, such code points are just "random" output that is unreadable. Instead, escape the code points similar to C0 and C1 control characters. Refs: https://unicodebook.readthedocs.io/unicode_encodings.html#utf-16-surrogate-pairs Signed-off-by: Ruben Bridgewater --- lib/internal/util/inspect.js | 28 ++++++++++++++------ test/parallel/test-util-inspect.js | 42 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/lib/internal/util/inspect.js b/lib/internal/util/inspect.js index 4936ab761ddccb..5ea70b6b8d7f5e 100644 --- a/lib/internal/util/inspect.js +++ b/lib/internal/util/inspect.js @@ -176,10 +176,10 @@ const kArrayType = 1; const kArrayExtrasType = 2; /* eslint-disable no-control-regex */ -const strEscapeSequencesRegExp = /[\x00-\x1f\x27\x5c\x7f-\x9f]/; -const strEscapeSequencesReplacer = /[\x00-\x1f\x27\x5c\x7f-\x9f]/g; -const strEscapeSequencesRegExpSingle = /[\x00-\x1f\x5c\x7f-\x9f]/; -const strEscapeSequencesReplacerSingle = /[\x00-\x1f\x5c\x7f-\x9f]/g; +const strEscapeSequencesRegExp = /[\x00-\x1f\x27\x5c\x7f-\x9f]|[\ud800-\udbff](?![\udc00-\udfff])|(? meta[StringPrototypeCharCodeAt(str)]; +function escapeFn(str) { + const charCode = StringPrototypeCharCodeAt(str); + return meta.length > charCode ? meta[charCode] : `\\u${charCode.toString(16)}`; +} // Escape control characters, single quotes and the backslash. // This is similar to JSON stringify escaping. @@ -496,8 +499,7 @@ function strEscape(str) { let result = ''; let last = 0; - const lastIndex = str.length; - for (let i = 0; i < lastIndex; i++) { + for (let i = 0; i < str.length; i++) { const point = StringPrototypeCharCodeAt(str, i); if (point === singleQuote || point === 92 || @@ -509,10 +511,20 @@ function strEscape(str) { result += `${StringPrototypeSlice(str, last, i)}${meta[point]}`; } last = i + 1; + } else if (point >= 0xd800 && point <= 0xdfff) { + if (point <= 0xdbff && i + 1 < str.length) { + const point = StringPrototypeCharCodeAt(str, i + 1); + if (point >= 0xdc00 && point <= 0xdfff) { + i++; + continue; + } + } + result += `${StringPrototypeSlice(str, last, i)}${`\\u${point.toString(16)}`}`; + last = i + 1; } } - if (last !== lastIndex) { + if (last !== str.length) { result += StringPrototypeSlice(str, last); } return addQuotes(result, singleQuote); diff --git a/test/parallel/test-util-inspect.js b/test/parallel/test-util-inspect.js index fe01c4c0f92858..0f7db93306edc1 100644 --- a/test/parallel/test-util-inspect.js +++ b/test/parallel/test-util-inspect.js @@ -837,6 +837,48 @@ assert.strictEqual(util.inspect(Object.create(Date.prototype)), 'Date {}'); ); } +// Escape unpaired surrogate pairs. +{ + const edgeChar = String.fromCharCode(0xd799); + + for (let charCode = 0xD800; charCode < 0xDFFF; charCode++) { + const surrogate = String.fromCharCode(charCode); + + assert.strictEqual( + util.inspect(surrogate), + `'\\u${charCode.toString(16)}'` + ); + assert.strictEqual( + util.inspect(`${'a'.repeat(200)}${surrogate}`), + `'${'a'.repeat(200)}\\u${charCode.toString(16)}'` + ); + assert.strictEqual( + util.inspect(`${surrogate}${'a'.repeat(200)}`), + `'\\u${charCode.toString(16)}${'a'.repeat(200)}'` + ); + if (charCode < 0xdc00) { + const highSurrogate = surrogate; + const lowSurrogate = String.fromCharCode(charCode + 1024); + assert( + !util.inspect( + `${edgeChar}${highSurrogate}${lowSurrogate}${edgeChar}` + ).includes('\\u') + ); + assert.strictEqual( + (util.inspect( + `${highSurrogate}${highSurrogate}${lowSurrogate}` + ).match(/\\u/g) ?? []).length, + 1 + ); + } else { + assert.strictEqual( + util.inspect(`${edgeChar}${surrogate}${edgeChar}`), + `'${edgeChar}\\u${charCode.toString(16)}${edgeChar}'` + ); + } + } +} + // Test util.inspect.styles and util.inspect.colors. { function testColorStyle(style, input, implicit) {