Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions lib/internal/bootstrap/switches/does_own_process_state.js
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,17 @@ function wrappedUmask(mask) {
}

function wrappedCwd() {
if (cachedCwd === '')
cachedCwd = rawMethods.cwd();
if (cachedCwd === '') {
try {
cachedCwd = rawMethods.cwd();
} catch (err) {
// Provide a clearer error message when the current directory doesn't exist
if (err.code === 'ENOENT') {
err.message = 'process.cwd() failed: current working directory no longer exists';
}
throw err;
}
}
return cachedCwd;
}

14 changes: 8 additions & 6 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const kDecoder = Symbol('decoder');
const kFatal = Symbol('kFatal');
const kUTF8FastPath = Symbol('kUTF8FastPath');
const kLatin1FastPath = Symbol('kLatin1FastPath');
const kWindows1252FastPath = Symbol('kWindows1252FastPath');
const kIgnoreBOM = Symbol('kIgnoreBOM');

const {
Expand Down Expand Up @@ -56,6 +57,7 @@ const {
encodeUtf8String,
decodeUTF8,
decodeLatin1,
decodeWindows1252,
} = binding;

const { Buffer } = require('buffer');
Expand Down Expand Up @@ -418,12 +420,12 @@ function makeTextDecoderICU() {
this[kEncoding] = enc;
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
this[kFatal] = Boolean(options?.fatal);
// Only support fast path for UTF-8.
// Only support fast path for UTF-8 and Windows-1252.
this[kUTF8FastPath] = enc === 'utf-8';
this[kLatin1FastPath] = enc === 'windows-1252';
this[kWindows1252FastPath] = enc === 'windows-1252';
this[kHandle] = undefined;

if (!this[kUTF8FastPath] && !this[kLatin1FastPath]) {
if (!this[kUTF8FastPath] && !this[kWindows1252FastPath]) {
this.#prepareConverter();
}
}
Expand All @@ -440,14 +442,14 @@ function makeTextDecoderICU() {
validateDecoder(this);

this[kUTF8FastPath] &&= !(options?.stream);
this[kLatin1FastPath] &&= !(options?.stream);
this[kWindows1252FastPath] &&= !(options?.stream);

if (this[kUTF8FastPath]) {
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
}

if (this[kLatin1FastPath]) {
return decodeLatin1(input, this[kIgnoreBOM], this[kFatal]);
if (this[kWindows1252FastPath]) {
return decodeWindows1252(input, this[kIgnoreBOM], this[kFatal]);
}

this.#prepareConverter();
Expand Down
87 changes: 87 additions & 0 deletions src/encoding_binding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,17 @@ InternalFieldInfoBase* BindingData::Serialize(int index) {
// Copyright (c) 2017-2025 Cloudflare, Inc.
// Licensed under the Apache 2.0 license found in the LICENSE file or at:
// https://opensource.org/licenses/Apache-2.0

// Windows-1252 specific mappings for bytes 0x80-0x9F
// These differ from ISO-8859-1 (Latin-1) which leaves these bytes undefined
// Reference: https://encoding.spec.whatwg.org/#windows-1252
static constexpr uint16_t kWindows1252Table[32] = {
0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 0x80-0x87
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 0x88-0x8F
0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 0x90-0x97
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 0x98-0x9F
};

namespace {
constexpr int MAX_SIZE_FOR_STACK_ALLOC = 4096;

Expand Down Expand Up @@ -415,6 +426,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1);
SetMethodNoSideEffect(isolate, target, "decodeWindows1252", DecodeWindows1252);
}

void BindingData::CreatePerContextProperties(Local<Object> target,
Expand All @@ -433,6 +445,7 @@ void BindingData::RegisterTimerExternalReferences(
registry->Register(ToASCII);
registry->Register(ToUnicode);
registry->Register(DecodeLatin1);
registry->Register(DecodeWindows1252);
}

void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
Expand Down Expand Up @@ -481,6 +494,80 @@ void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
}
}

void BindingData::DecodeWindows1252(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);

CHECK_GE(args.Length(), 1);
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
args[0]->IsArrayBufferView())) {
return node::THROW_ERR_INVALID_ARG_TYPE(
env->isolate(),
"The \"input\" argument must be an instance of ArrayBuffer, "
"SharedArrayBuffer, or ArrayBufferView.");
}

bool ignore_bom = args[1]->IsTrue();
bool has_fatal = args[2]->IsTrue();

ArrayBufferViewContents<uint8_t> buffer(args[0]);
const uint8_t* data = buffer.data();
size_t length = buffer.length();

if (ignore_bom && length > 0 && data[0] == 0xFF) {
data++;
length--;
}

if (length == 0) {
return args.GetReturnValue().SetEmptyString();
}

// Convert Windows-1252 to UTF-8
// Maximum expansion: 3 bytes per input byte (for characters in 0x80-0x9F range)
std::string result;
result.reserve(length * 3);

for (size_t i = 0; i < length; i++) {
uint8_t byte = data[i];
uint32_t codepoint;

if (byte >= 0x80 && byte <= 0x9F) {
// Use Windows-1252 specific mapping for bytes 0x80-0x9F
codepoint = kWindows1252Table[byte - 0x80];
} else {
// Use direct Latin-1 mapping for other bytes (0x00-0x7F and 0xA0-0xFF)
codepoint = byte;
}

// Convert codepoint to UTF-8
if (codepoint < 0x80) {
// 1-byte sequence (ASCII)
result += static_cast<char>(codepoint);
} else if (codepoint < 0x800) {
// 2-byte sequence
result += static_cast<char>(0xC0 | (codepoint >> 6));
result += static_cast<char>(0x80 | (codepoint & 0x3F));
} else {
// 3-byte sequence
result += static_cast<char>(0xE0 | (codepoint >> 12));
result += static_cast<char>(0x80 | ((codepoint >> 6) & 0x3F));
result += static_cast<char>(0x80 | (codepoint & 0x3F));
}
}

if (has_fatal && result.empty() && length > 0) {
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
env->isolate(), "The encoded data was not valid for encoding windows-1252");
}

std::string_view view(result.c_str(), result.size());

Local<Value> ret;
if (ToV8Value(env->context(), view, env->isolate()).ToLocal(&ret)) {
args.GetReturnValue().Set(ret);
}
}

} // namespace encoding_binding
} // namespace node

Expand Down
1 change: 1 addition & 0 deletions src/encoding_binding.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class BindingData : public SnapshotableObject {
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
static void DecodeLatin1(const v8::FunctionCallbackInfo<v8::Value>& args);
static void DecodeWindows1252(const v8::FunctionCallbackInfo<v8::Value>& args);

static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);
Expand Down
2 changes: 1 addition & 1 deletion test/known_issues/test-cwd-enoent-file.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ if (process.argv[2] === 'child') {
process.chdir(dir);
fs.rmdirSync(dir);
assert.throws(process.cwd,
/^Error: ENOENT: no such file or directory, uv_cwd$/);
/^Error: process\.cwd\(\) failed: current working directory no longer exists$/);

const r = cp.spawnSync(process.execPath, [__filename, 'child']);

Expand Down
44 changes: 44 additions & 0 deletions test/parallel/test-process-cwd-deleted-dir.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
'use strict';
const common = require('../common');
const assert = require('assert');
const fs = require('fs');
const path = require('path');

if (common.isSunOS || common.isWindows || common.isAIX || common.isIBMi) {
// The current working directory cannot be removed on these platforms.
common.skip('cannot rmdir current working directory');
}

const tmpdir = require('../common/tmpdir');
tmpdir.refresh();

// Create a temporary directory
const testDir = path.join(tmpdir.path, 'test-cwd-deleted');
fs.mkdirSync(testDir);

// Save original cwd
const originalCwd = process.cwd();

try {
// Change to the test directory
process.chdir(testDir);

// Delete the directory while we're in it
fs.rmdirSync(testDir);

// Verify that process.cwd() throws with improved error message
assert.throws(
() => process.cwd(),
{
code: 'ENOENT',
message: /process\.cwd\(\) failed: current working directory no longer exists/
}
);
} finally {
// Restore original cwd for cleanup
try {
process.chdir(originalCwd);
} catch {
// Ignore errors if we can't change back
}
}
85 changes: 85 additions & 0 deletions test/parallel/test-util-text-decoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,88 @@ test('TextDecoder correctly decodes windows-1252 encoded data', { skip: !common.

assert.strictEqual(decodedString, expectedString);
});

test('TextDecoder correctly decodes windows-1252 special characters (0x80-0x9F)', { skip: !common.hasIntl }, () => {
const decoder = new TextDecoder('windows-1252');

// Test byte 0x92 (right single quotation mark) - the main bug from issue #56542
assert.strictEqual(decoder.decode(new Uint8Array([0x92])), '\u2019');
assert.strictEqual(decoder.decode(new Uint8Array([0x92])).charCodeAt(0), 0x2019);

// Test all special Windows-1252 characters (0x80-0x9F)
// Excluding undefined bytes: 0x81, 0x8D, 0x8F, 0x90, 0x9D
const testCases = [
{ byte: 0x80, expected: '\u20AC', name: 'Euro sign' },
{ byte: 0x82, expected: '\u201A', name: 'Single low-9 quotation mark' },
{ byte: 0x83, expected: '\u0192', name: 'Latin small letter f with hook' },
{ byte: 0x84, expected: '\u201E', name: 'Double low-9 quotation mark' },
{ byte: 0x85, expected: '\u2026', name: 'Horizontal ellipsis' },
{ byte: 0x86, expected: '\u2020', name: 'Dagger' },
{ byte: 0x87, expected: '\u2021', name: 'Double dagger' },
{ byte: 0x88, expected: '\u02C6', name: 'Modifier letter circumflex accent' },
{ byte: 0x89, expected: '\u2030', name: 'Per mille sign' },
{ byte: 0x8A, expected: '\u0160', name: 'Latin capital letter S with caron' },
{ byte: 0x8B, expected: '\u2039', name: 'Single left-pointing angle quotation mark' },
{ byte: 0x8C, expected: '\u0152', name: 'Latin capital ligature OE' },
{ byte: 0x8E, expected: '\u017D', name: 'Latin capital letter Z with caron' },
{ byte: 0x91, expected: '\u2018', name: 'Left single quotation mark' },
{ byte: 0x92, expected: '\u2019', name: 'Right single quotation mark' },
{ byte: 0x93, expected: '\u201C', name: 'Left double quotation mark' },
{ byte: 0x94, expected: '\u201D', name: 'Right double quotation mark' },
{ byte: 0x95, expected: '\u2022', name: 'Bullet' },
{ byte: 0x96, expected: '\u2013', name: 'En dash' },
{ byte: 0x97, expected: '\u2014', name: 'Em dash' },
{ byte: 0x98, expected: '\u02DC', name: 'Small tilde' },
{ byte: 0x99, expected: '\u2122', name: 'Trade mark sign' },
{ byte: 0x9A, expected: '\u0161', name: 'Latin small letter s with caron' },
{ byte: 0x9B, expected: '\u203A', name: 'Single right-pointing angle quotation mark' },
{ byte: 0x9C, expected: '\u0153', name: 'Latin small ligature oe' },
{ byte: 0x9E, expected: '\u017E', name: 'Latin small letter z with caron' },
{ byte: 0x9F, expected: '\u0178', name: 'Latin capital letter Y with diaeresis' },
];

for (const { byte, expected, name } of testCases) {
const result = decoder.decode(new Uint8Array([byte]));
assert.strictEqual(result, expected, `Failed for ${name} (0x${byte.toString(16)})`);
}
});

test('TextDecoder windows-1252 handles undefined bytes correctly', { skip: !common.hasIntl }, () => {
const decoder = new TextDecoder('windows-1252');

// Bytes 0x81, 0x8D, 0x8F, 0x90, 0x9D are undefined in Windows-1252
// They should be passed through as their Unicode equivalents
const undefinedBytes = [0x81, 0x8D, 0x8F, 0x90, 0x9D];

for (const byte of undefinedBytes) {
const result = decoder.decode(new Uint8Array([byte]));
assert.strictEqual(result.charCodeAt(0), byte,
`Undefined byte 0x${byte.toString(16)} should map to U+00${byte.toString(16).toUpperCase()}`);
}
});

test('TextDecoder windows-1252 handles ASCII range correctly', { skip: !common.hasIntl }, () => {
const decoder = new TextDecoder('windows-1252');

// Test ASCII range (0x00-0x7F) - should be identical to UTF-8
const asciiBytes = new Uint8Array([0x48, 0x65, 0x6C, 0x6C, 0x6F]); // "Hello"
assert.strictEqual(decoder.decode(asciiBytes), 'Hello');
});

test('TextDecoder windows-1252 handles Latin-1 range correctly', { skip: !common.hasIntl }, () => {
const decoder = new TextDecoder('windows-1252');

// Test Latin-1 range (0xA0-0xFF) - should be identical to ISO-8859-1
const latin1Bytes = new Uint8Array([0xA0, 0xC0, 0xE0, 0xFF]);
const expected = '\u00A0\u00C0\u00E0\u00FF';
assert.strictEqual(decoder.decode(latin1Bytes), expected);
});

test('TextDecoder windows-1252 handles mixed content', { skip: !common.hasIntl }, () => {
const decoder = new TextDecoder('windows-1252');

// Mix of ASCII, Windows-1252 special chars, and Latin-1
// "It's €100" where 's is 0x92 (right single quote) and € is 0x80
const mixedBytes = new Uint8Array([0x49, 0x74, 0x92, 0x73, 0x20, 0x80, 0x31, 0x30, 0x30]);
assert.strictEqual(decoder.decode(mixedBytes), 'It\u2019s \u20AC100');
});