diff --git a/lib/internal/bootstrap/switches/does_own_process_state.js b/lib/internal/bootstrap/switches/does_own_process_state.js index 370da66a825f65..ced9c4cbaa0e6c 100644 --- a/lib/internal/bootstrap/switches/does_own_process_state.js +++ b/lib/internal/bootstrap/switches/does_own_process_state.js @@ -138,7 +138,17 @@ function wrappedUmask(mask) { } function wrappedCwd() { - if (cachedCwd === '') - cachedCwd = rawMethods.cwd(); + if (cachedCwd === '') { + try { + cachedCwd = rawMethods.cwd(); + } catch (err) { + // Provide a clearer error message when the current directory doesn't exist + if (err.code === 'ENOENT') { + err.message = 'process.cwd() failed: current working directory no longer exists'; + } + throw err; + } + } return cachedCwd; } + diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index e054ca4dd5202d..45f5edfae8efba 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -29,6 +29,7 @@ const kDecoder = Symbol('decoder'); const kFatal = Symbol('kFatal'); const kUTF8FastPath = Symbol('kUTF8FastPath'); const kLatin1FastPath = Symbol('kLatin1FastPath'); +const kWindows1252FastPath = Symbol('kWindows1252FastPath'); const kIgnoreBOM = Symbol('kIgnoreBOM'); const { @@ -56,6 +57,7 @@ const { encodeUtf8String, decodeUTF8, decodeLatin1, + decodeWindows1252, } = binding; const { Buffer } = require('buffer'); @@ -418,12 +420,12 @@ function makeTextDecoderICU() { this[kEncoding] = enc; this[kIgnoreBOM] = Boolean(options?.ignoreBOM); this[kFatal] = Boolean(options?.fatal); - // Only support fast path for UTF-8. + // Only support fast path for UTF-8 and Windows-1252. this[kUTF8FastPath] = enc === 'utf-8'; - this[kLatin1FastPath] = enc === 'windows-1252'; + this[kWindows1252FastPath] = enc === 'windows-1252'; this[kHandle] = undefined; - if (!this[kUTF8FastPath] && !this[kLatin1FastPath]) { + if (!this[kUTF8FastPath] && !this[kWindows1252FastPath]) { this.#prepareConverter(); } } @@ -440,14 +442,14 @@ function makeTextDecoderICU() { validateDecoder(this); this[kUTF8FastPath] &&= !(options?.stream); - this[kLatin1FastPath] &&= !(options?.stream); + this[kWindows1252FastPath] &&= !(options?.stream); if (this[kUTF8FastPath]) { return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]); } - if (this[kLatin1FastPath]) { - return decodeLatin1(input, this[kIgnoreBOM], this[kFatal]); + if (this[kWindows1252FastPath]) { + return decodeWindows1252(input, this[kIgnoreBOM], this[kFatal]); } this.#prepareConverter(); diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index 88d49d7fcd134d..319ab491963b6f 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -79,6 +79,17 @@ InternalFieldInfoBase* BindingData::Serialize(int index) { // Copyright (c) 2017-2025 Cloudflare, Inc. // Licensed under the Apache 2.0 license found in the LICENSE file or at: // https://opensource.org/licenses/Apache-2.0 + +// Windows-1252 specific mappings for bytes 0x80-0x9F +// These differ from ISO-8859-1 (Latin-1) which leaves these bytes undefined +// Reference: https://encoding.spec.whatwg.org/#windows-1252 +static constexpr uint16_t kWindows1252Table[32] = { + 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 0x80-0x87 + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 0x88-0x8F + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 0x90-0x97 + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 0x98-0x9F +}; + namespace { constexpr int MAX_SIZE_FOR_STACK_ALLOC = 4096; @@ -415,6 +426,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data, SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII); SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode); SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1); + SetMethodNoSideEffect(isolate, target, "decodeWindows1252", DecodeWindows1252); } void BindingData::CreatePerContextProperties(Local target, @@ -433,6 +445,7 @@ void BindingData::RegisterTimerExternalReferences( registry->Register(ToASCII); registry->Register(ToUnicode); registry->Register(DecodeLatin1); + registry->Register(DecodeWindows1252); } void BindingData::DecodeLatin1(const FunctionCallbackInfo& args) { @@ -481,6 +494,80 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo& args) { } } +void BindingData::DecodeWindows1252(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + + CHECK_GE(args.Length(), 1); + if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() || + args[0]->IsArrayBufferView())) { + return node::THROW_ERR_INVALID_ARG_TYPE( + env->isolate(), + "The \"input\" argument must be an instance of ArrayBuffer, " + "SharedArrayBuffer, or ArrayBufferView."); + } + + bool ignore_bom = args[1]->IsTrue(); + bool has_fatal = args[2]->IsTrue(); + + ArrayBufferViewContents buffer(args[0]); + const uint8_t* data = buffer.data(); + size_t length = buffer.length(); + + if (ignore_bom && length > 0 && data[0] == 0xFF) { + data++; + length--; + } + + if (length == 0) { + return args.GetReturnValue().SetEmptyString(); + } + + // Convert Windows-1252 to UTF-8 + // Maximum expansion: 3 bytes per input byte (for characters in 0x80-0x9F range) + std::string result; + result.reserve(length * 3); + + for (size_t i = 0; i < length; i++) { + uint8_t byte = data[i]; + uint32_t codepoint; + + if (byte >= 0x80 && byte <= 0x9F) { + // Use Windows-1252 specific mapping for bytes 0x80-0x9F + codepoint = kWindows1252Table[byte - 0x80]; + } else { + // Use direct Latin-1 mapping for other bytes (0x00-0x7F and 0xA0-0xFF) + codepoint = byte; + } + + // Convert codepoint to UTF-8 + if (codepoint < 0x80) { + // 1-byte sequence (ASCII) + result += static_cast(codepoint); + } else if (codepoint < 0x800) { + // 2-byte sequence + result += static_cast(0xC0 | (codepoint >> 6)); + result += static_cast(0x80 | (codepoint & 0x3F)); + } else { + // 3-byte sequence + result += static_cast(0xE0 | (codepoint >> 12)); + result += static_cast(0x80 | ((codepoint >> 6) & 0x3F)); + result += static_cast(0x80 | (codepoint & 0x3F)); + } + } + + if (has_fatal && result.empty() && length > 0) { + return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( + env->isolate(), "The encoded data was not valid for encoding windows-1252"); + } + + std::string_view view(result.c_str(), result.size()); + + Local ret; + if (ToV8Value(env->context(), view, env->isolate()).ToLocal(&ret)) { + args.GetReturnValue().Set(ret); + } +} + } // namespace encoding_binding } // namespace node diff --git a/src/encoding_binding.h b/src/encoding_binding.h index 97f55394d27641..43f320e8fe2a7c 100644 --- a/src/encoding_binding.h +++ b/src/encoding_binding.h @@ -32,6 +32,7 @@ class BindingData : public SnapshotableObject { static void EncodeUtf8String(const v8::FunctionCallbackInfo& args); static void DecodeUTF8(const v8::FunctionCallbackInfo& args); static void DecodeLatin1(const v8::FunctionCallbackInfo& args); + static void DecodeWindows1252(const v8::FunctionCallbackInfo& args); static void ToASCII(const v8::FunctionCallbackInfo& args); static void ToUnicode(const v8::FunctionCallbackInfo& args); diff --git a/test/known_issues/test-cwd-enoent-file.js b/test/known_issues/test-cwd-enoent-file.js index 6d99987895baf4..eeaca9cdb1197b 100644 --- a/test/known_issues/test-cwd-enoent-file.js +++ b/test/known_issues/test-cwd-enoent-file.js @@ -23,7 +23,7 @@ if (process.argv[2] === 'child') { process.chdir(dir); fs.rmdirSync(dir); assert.throws(process.cwd, - /^Error: ENOENT: no such file or directory, uv_cwd$/); + /^Error: process\.cwd\(\) failed: current working directory no longer exists$/); const r = cp.spawnSync(process.execPath, [__filename, 'child']); diff --git a/test/parallel/test-process-cwd-deleted-dir.js b/test/parallel/test-process-cwd-deleted-dir.js new file mode 100644 index 00000000000000..046336b3a3c945 --- /dev/null +++ b/test/parallel/test-process-cwd-deleted-dir.js @@ -0,0 +1,44 @@ +'use strict'; +const common = require('../common'); +const assert = require('assert'); +const fs = require('fs'); +const path = require('path'); + +if (common.isSunOS || common.isWindows || common.isAIX || common.isIBMi) { + // The current working directory cannot be removed on these platforms. + common.skip('cannot rmdir current working directory'); +} + +const tmpdir = require('../common/tmpdir'); +tmpdir.refresh(); + +// Create a temporary directory +const testDir = path.join(tmpdir.path, 'test-cwd-deleted'); +fs.mkdirSync(testDir); + +// Save original cwd +const originalCwd = process.cwd(); + +try { + // Change to the test directory + process.chdir(testDir); + + // Delete the directory while we're in it + fs.rmdirSync(testDir); + + // Verify that process.cwd() throws with improved error message + assert.throws( + () => process.cwd(), + { + code: 'ENOENT', + message: /process\.cwd\(\) failed: current working directory no longer exists/ + } + ); +} finally { + // Restore original cwd for cleanup + try { + process.chdir(originalCwd); + } catch { + // Ignore errors if we can't change back + } +} diff --git a/test/parallel/test-util-text-decoder.js b/test/parallel/test-util-text-decoder.js index 0f6d0463f9da48..0c34913470c46c 100644 --- a/test/parallel/test-util-text-decoder.js +++ b/test/parallel/test-util-text-decoder.js @@ -15,3 +15,88 @@ test('TextDecoder correctly decodes windows-1252 encoded data', { skip: !common. assert.strictEqual(decodedString, expectedString); }); + +test('TextDecoder correctly decodes windows-1252 special characters (0x80-0x9F)', { skip: !common.hasIntl }, () => { + const decoder = new TextDecoder('windows-1252'); + + // Test byte 0x92 (right single quotation mark) - the main bug from issue #56542 + assert.strictEqual(decoder.decode(new Uint8Array([0x92])), '\u2019'); + assert.strictEqual(decoder.decode(new Uint8Array([0x92])).charCodeAt(0), 0x2019); + + // Test all special Windows-1252 characters (0x80-0x9F) + // Excluding undefined bytes: 0x81, 0x8D, 0x8F, 0x90, 0x9D + const testCases = [ + { byte: 0x80, expected: '\u20AC', name: 'Euro sign' }, + { byte: 0x82, expected: '\u201A', name: 'Single low-9 quotation mark' }, + { byte: 0x83, expected: '\u0192', name: 'Latin small letter f with hook' }, + { byte: 0x84, expected: '\u201E', name: 'Double low-9 quotation mark' }, + { byte: 0x85, expected: '\u2026', name: 'Horizontal ellipsis' }, + { byte: 0x86, expected: '\u2020', name: 'Dagger' }, + { byte: 0x87, expected: '\u2021', name: 'Double dagger' }, + { byte: 0x88, expected: '\u02C6', name: 'Modifier letter circumflex accent' }, + { byte: 0x89, expected: '\u2030', name: 'Per mille sign' }, + { byte: 0x8A, expected: '\u0160', name: 'Latin capital letter S with caron' }, + { byte: 0x8B, expected: '\u2039', name: 'Single left-pointing angle quotation mark' }, + { byte: 0x8C, expected: '\u0152', name: 'Latin capital ligature OE' }, + { byte: 0x8E, expected: '\u017D', name: 'Latin capital letter Z with caron' }, + { byte: 0x91, expected: '\u2018', name: 'Left single quotation mark' }, + { byte: 0x92, expected: '\u2019', name: 'Right single quotation mark' }, + { byte: 0x93, expected: '\u201C', name: 'Left double quotation mark' }, + { byte: 0x94, expected: '\u201D', name: 'Right double quotation mark' }, + { byte: 0x95, expected: '\u2022', name: 'Bullet' }, + { byte: 0x96, expected: '\u2013', name: 'En dash' }, + { byte: 0x97, expected: '\u2014', name: 'Em dash' }, + { byte: 0x98, expected: '\u02DC', name: 'Small tilde' }, + { byte: 0x99, expected: '\u2122', name: 'Trade mark sign' }, + { byte: 0x9A, expected: '\u0161', name: 'Latin small letter s with caron' }, + { byte: 0x9B, expected: '\u203A', name: 'Single right-pointing angle quotation mark' }, + { byte: 0x9C, expected: '\u0153', name: 'Latin small ligature oe' }, + { byte: 0x9E, expected: '\u017E', name: 'Latin small letter z with caron' }, + { byte: 0x9F, expected: '\u0178', name: 'Latin capital letter Y with diaeresis' }, + ]; + + for (const { byte, expected, name } of testCases) { + const result = decoder.decode(new Uint8Array([byte])); + assert.strictEqual(result, expected, `Failed for ${name} (0x${byte.toString(16)})`); + } +}); + +test('TextDecoder windows-1252 handles undefined bytes correctly', { skip: !common.hasIntl }, () => { + const decoder = new TextDecoder('windows-1252'); + + // Bytes 0x81, 0x8D, 0x8F, 0x90, 0x9D are undefined in Windows-1252 + // They should be passed through as their Unicode equivalents + const undefinedBytes = [0x81, 0x8D, 0x8F, 0x90, 0x9D]; + + for (const byte of undefinedBytes) { + const result = decoder.decode(new Uint8Array([byte])); + assert.strictEqual(result.charCodeAt(0), byte, + `Undefined byte 0x${byte.toString(16)} should map to U+00${byte.toString(16).toUpperCase()}`); + } +}); + +test('TextDecoder windows-1252 handles ASCII range correctly', { skip: !common.hasIntl }, () => { + const decoder = new TextDecoder('windows-1252'); + + // Test ASCII range (0x00-0x7F) - should be identical to UTF-8 + const asciiBytes = new Uint8Array([0x48, 0x65, 0x6C, 0x6C, 0x6F]); // "Hello" + assert.strictEqual(decoder.decode(asciiBytes), 'Hello'); +}); + +test('TextDecoder windows-1252 handles Latin-1 range correctly', { skip: !common.hasIntl }, () => { + const decoder = new TextDecoder('windows-1252'); + + // Test Latin-1 range (0xA0-0xFF) - should be identical to ISO-8859-1 + const latin1Bytes = new Uint8Array([0xA0, 0xC0, 0xE0, 0xFF]); + const expected = '\u00A0\u00C0\u00E0\u00FF'; + assert.strictEqual(decoder.decode(latin1Bytes), expected); +}); + +test('TextDecoder windows-1252 handles mixed content', { skip: !common.hasIntl }, () => { + const decoder = new TextDecoder('windows-1252'); + + // Mix of ASCII, Windows-1252 special chars, and Latin-1 + // "It's €100" where 's is 0x92 (right single quote) and € is 0x80 + const mixedBytes = new Uint8Array([0x49, 0x74, 0x92, 0x73, 0x20, 0x80, 0x31, 0x30, 0x30]); + assert.strictEqual(decoder.decode(mixedBytes), 'It\u2019s \u20AC100'); +});