From f44c31ce887b4878825b2cd4cda955dc8d7a698a Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Fri, 8 Mar 2024 21:53:50 -0800 Subject: [PATCH 1/8] module: fix detect-module not retrying as esm for cjs-only errors --- src/node_contextify.cc | 67 ++++++++++++++++++- test/es-module/test-esm-detect-ambiguous.mjs | 41 ++++++++++++ .../commonjs-wrapper-variables.js | 6 ++ 3 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 test/fixtures/es-modules/package-without-type/commonjs-wrapper-variables.js diff --git a/src/node_contextify.cc b/src/node_contextify.cc index f39b4ec4c372a3..529337d8217283 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1409,6 +1409,25 @@ constexpr std::array esm_syntax_error_messages = { "Unexpected token 'export'", // `export` statements "Cannot use 'import.meta' outside a module"}; // `import.meta` references +// Another class of error messages that we need to check for are syntax errors +// where the syntax throws when parsed as CommonJS but succeeds when parsed as +// ESM. So far, the cases we've found are: +// - CommonJS module variables (`module`, `exports`, `require`, `__filename`, +// `__dirname`): if the user writes code such as `const module =` in the top +// level of a CommonJS module, it will throw a syntax error; but the same +// code is valid in ESM. +// - Top-level `await`: if the user writes `await` at the top level of a +// CommonJS module, it will throw a syntax error; but the same code is valid +// in ESM. +constexpr std::array throws_only_in_cjs_error_messages = { + "Identifier 'module' has already been declared", + "Identifier 'exports' has already been declared", + "Identifier 'require' has already been declared", + "Identifier '__filename' has already been declared", + "Identifier '__dirname' has already been declared", + "await is only valid in async functions and " + "the top level bodies of modules"}; + void ContextifyContext::ContainsModuleSyntax( const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); @@ -1476,19 +1495,61 @@ void ContextifyContext::ContainsModuleSyntax( id_symbol, try_catch); - bool found_error_message_caused_by_module_syntax = false; + bool should_retry_as_esm = false; if (try_catch.HasCaught() && !try_catch.HasTerminated()) { Utf8Value message_value(env->isolate(), try_catch.Message()->Get()); auto message = message_value.ToStringView(); for (const auto& error_message : esm_syntax_error_messages) { if (message.find(error_message) != std::string_view::npos) { - found_error_message_caused_by_module_syntax = true; + should_retry_as_esm = true; + break; + } + } + + for (const auto& error_message : throws_only_in_cjs_error_messages) { + if (message.find(error_message) != std::string_view::npos) { + // Try parsing again where the user's code is wrapped within an async + // function. If the new parse succeeds, then the error was caused by + // either a top-level declaration of one of the CommonJS module + // variables, or a top-level `await`. + TryCatchScope second_parse_try_catch(env); + Local wrapped_code = + String::Concat(isolate, + String::NewFromUtf8(isolate, "(async function() {") + .ToLocalChecked(), + code); + wrapped_code = String::Concat( + isolate, + wrapped_code, + String::NewFromUtf8(isolate, "})();").ToLocalChecked()); + ScriptCompiler::Source wrapped_source = + GetCommonJSSourceInstance(isolate, + wrapped_code, + filename, + 0, + 0, + host_defined_options, + nullptr); + ContextifyContext::CompileFunctionAndCacheResult( + env, + context, + &wrapped_source, + std::move(params), + std::vector>(), + options, + true, + id_symbol, + second_parse_try_catch); + if (!second_parse_try_catch.HasCaught() && + !second_parse_try_catch.HasTerminated()) { + should_retry_as_esm = true; + } break; } } } - args.GetReturnValue().Set(found_error_message_caused_by_module_syntax); + args.GetReturnValue().Set(should_retry_as_esm); } static void CompileFunctionForCJSLoader( diff --git a/test/es-module/test-esm-detect-ambiguous.mjs b/test/es-module/test-esm-detect-ambiguous.mjs index 43537f26f5304a..fef504df487bed 100644 --- a/test/es-module/test-esm-detect-ambiguous.mjs +++ b/test/es-module/test-esm-detect-ambiguous.mjs @@ -234,6 +234,47 @@ describe('--experimental-detect-module', { concurrency: true }, () => { }); } }); + + // https://github.com/nodejs/node/issues/50917 + describe('syntax that errors in CommonJS but works in ESM', { concurrency: true }, () => { + it('permits top-level `await`', async () => { + const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--experimental-detect-module', + '--eval', + 'await Promise.resolve(); console.log("executed");', + ]); + + strictEqual(stderr, ''); + strictEqual(stdout, 'executed\n'); + strictEqual(code, 0); + strictEqual(signal, null); + }); + + it('still throws on `await` in an ordinary sync function', async () => { + const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--experimental-detect-module', + '--eval', + 'function fn() { await Promise.resolve(); } fn();', + ]); + + match(stderr, /SyntaxError: await is only valid in async function/); + strictEqual(stdout, ''); + strictEqual(code, 1); + strictEqual(signal, null); + }); + + it('permits declaration of CommonJS module variables', async () => { + const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--experimental-detect-module', + fixtures.path('es-modules/package-without-type/commonjs-wrapper-variables.js'), + ]); + + strictEqual(stderr, ''); + strictEqual(stdout, 'exports require module __filename __dirname\n'); + strictEqual(code, 0); + strictEqual(signal, null); + }); + }); }); // Validate temporarily disabling `--abort-on-uncaught-exception` diff --git a/test/fixtures/es-modules/package-without-type/commonjs-wrapper-variables.js b/test/fixtures/es-modules/package-without-type/commonjs-wrapper-variables.js new file mode 100644 index 00000000000000..946bc690f27baa --- /dev/null +++ b/test/fixtures/es-modules/package-without-type/commonjs-wrapper-variables.js @@ -0,0 +1,6 @@ +const exports = "exports"; +const require = "require"; +const module = "module"; +const __filename = "__filename"; +const __dirname = "__dirname"; +console.log(exports, require, module, __filename, __dirname); From b28976ca39d45bf883595e9d02ed04e2d26fa983 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 10 Mar 2024 15:50:25 -0700 Subject: [PATCH 2/8] fix tla above import/export --- src/node_contextify.cc | 25 +++++++++++++++++--- test/es-module/test-esm-detect-ambiguous.mjs | 13 ++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/node_contextify.cc b/src/node_contextify.cc index 529337d8217283..bfe2b32d089a84 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1541,9 +1541,28 @@ void ContextifyContext::ContainsModuleSyntax( true, id_symbol, second_parse_try_catch); - if (!second_parse_try_catch.HasCaught() && - !second_parse_try_catch.HasTerminated()) { - should_retry_as_esm = true; + if (!second_parse_try_catch.HasTerminated()) { + if (second_parse_try_catch.HasCaught()) { + // If on the second parse an error is thrown by ESM syntax, then + // what happened was that the user had top-level `await` or a + // top-level declaration of one of the CommonJS module variables + // above the first `import` or `export`. + Utf8Value second_message_value( + env->isolate(), second_parse_try_catch.Message()->Get()); + auto second_message = second_message_value.ToStringView(); + for (const auto& error_message : esm_syntax_error_messages) { + if (second_message.find(error_message) != + std::string_view::npos) { + should_retry_as_esm = true; + break; + } + } + } else { + // No errors thrown in the second parse, so most likely the error + // was caused by a top-level `await` or a top-level declaration of + // one of the CommonJS module variables. + should_retry_as_esm = true; + } } break; } diff --git a/test/es-module/test-esm-detect-ambiguous.mjs b/test/es-module/test-esm-detect-ambiguous.mjs index fef504df487bed..7fa74753498b89 100644 --- a/test/es-module/test-esm-detect-ambiguous.mjs +++ b/test/es-module/test-esm-detect-ambiguous.mjs @@ -250,6 +250,19 @@ describe('--experimental-detect-module', { concurrency: true }, () => { strictEqual(signal, null); }); + it('permits top-level `await` above import/export syntax', async () => { + const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--experimental-detect-module', + '--eval', + 'await Promise.resolve(); import "node:os"; console.log("executed");', + ]); + + strictEqual(stderr, ''); + strictEqual(stdout, 'executed\n'); + strictEqual(code, 0); + strictEqual(signal, null); + }); + it('still throws on `await` in an ordinary sync function', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ '--experimental-detect-module', From 91143388fcf00ec2c8f6aab7b723ce00d118e426 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 10 Mar 2024 16:13:13 -0700 Subject: [PATCH 3/8] add test for redeclaration above import/export --- test/es-module/test-esm-detect-ambiguous.mjs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/es-module/test-esm-detect-ambiguous.mjs b/test/es-module/test-esm-detect-ambiguous.mjs index 7fa74753498b89..d15a63dec98cf4 100644 --- a/test/es-module/test-esm-detect-ambiguous.mjs +++ b/test/es-module/test-esm-detect-ambiguous.mjs @@ -287,6 +287,19 @@ describe('--experimental-detect-module', { concurrency: true }, () => { strictEqual(code, 0); strictEqual(signal, null); }); + + it('permits declaration of CommonJS module variables above import/export', async () => { + const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--experimental-detect-module', + '--eval', + 'const module = 3; import "node:os"; console.log("executed");', + ]); + + strictEqual(stderr, ''); + strictEqual(stdout, 'executed\n'); + strictEqual(code, 0); + strictEqual(signal, null); + }); }); }); From 7f99f57bd373ecf211bdfba6eb0ca15c036908c2 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 10 Mar 2024 17:27:19 -0700 Subject: [PATCH 4/8] add double declaration test case --- test/es-module/test-esm-detect-ambiguous.mjs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/es-module/test-esm-detect-ambiguous.mjs b/test/es-module/test-esm-detect-ambiguous.mjs index d15a63dec98cf4..5276439118f6d7 100644 --- a/test/es-module/test-esm-detect-ambiguous.mjs +++ b/test/es-module/test-esm-detect-ambiguous.mjs @@ -300,6 +300,19 @@ describe('--experimental-detect-module', { concurrency: true }, () => { strictEqual(code, 0); strictEqual(signal, null); }); + + it('still throws on double `const` declaration not at the top level', async () => { + const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--experimental-detect-module', + '--eval', + 'function fn() { const require = 1; const require = 2; } fn();', + ]); + + match(stderr, /SyntaxError: Identifier 'require' has already been declared/); + strictEqual(stdout, ''); + strictEqual(code, 1); + strictEqual(signal, null); + }); }); }); From ecd8096297fd10c8a608a6ca91e28171da230cdb Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 10 Mar 2024 18:00:01 -0700 Subject: [PATCH 5/8] don't check second error message set unnecessarily --- src/node_contextify.cc | 110 +++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/src/node_contextify.cc b/src/node_contextify.cc index bfe2b32d089a84..57b49354958f98 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1507,64 +1507,66 @@ void ContextifyContext::ContainsModuleSyntax( } } - for (const auto& error_message : throws_only_in_cjs_error_messages) { - if (message.find(error_message) != std::string_view::npos) { - // Try parsing again where the user's code is wrapped within an async - // function. If the new parse succeeds, then the error was caused by - // either a top-level declaration of one of the CommonJS module - // variables, or a top-level `await`. - TryCatchScope second_parse_try_catch(env); - Local wrapped_code = - String::Concat(isolate, - String::NewFromUtf8(isolate, "(async function() {") - .ToLocalChecked(), - code); - wrapped_code = String::Concat( - isolate, - wrapped_code, - String::NewFromUtf8(isolate, "})();").ToLocalChecked()); - ScriptCompiler::Source wrapped_source = - GetCommonJSSourceInstance(isolate, - wrapped_code, - filename, - 0, - 0, - host_defined_options, - nullptr); - ContextifyContext::CompileFunctionAndCacheResult( - env, - context, - &wrapped_source, - std::move(params), - std::vector>(), - options, - true, - id_symbol, - second_parse_try_catch); - if (!second_parse_try_catch.HasTerminated()) { - if (second_parse_try_catch.HasCaught()) { - // If on the second parse an error is thrown by ESM syntax, then - // what happened was that the user had top-level `await` or a - // top-level declaration of one of the CommonJS module variables - // above the first `import` or `export`. - Utf8Value second_message_value( - env->isolate(), second_parse_try_catch.Message()->Get()); - auto second_message = second_message_value.ToStringView(); - for (const auto& error_message : esm_syntax_error_messages) { - if (second_message.find(error_message) != - std::string_view::npos) { - should_retry_as_esm = true; - break; + if (!should_retry_as_esm) { + for (const auto& error_message : throws_only_in_cjs_error_messages) { + if (message.find(error_message) != std::string_view::npos) { + // Try parsing again where the user's code is wrapped within an async + // function. If the new parse succeeds, then the error was caused by + // either a top-level declaration of one of the CommonJS module + // variables, or a top-level `await`. + TryCatchScope second_parse_try_catch(env); + Local wrapped_code = + String::Concat(isolate, + String::NewFromUtf8(isolate, "(async function() {") + .ToLocalChecked(), + code); + wrapped_code = String::Concat( + isolate, + wrapped_code, + String::NewFromUtf8(isolate, "})();").ToLocalChecked()); + ScriptCompiler::Source wrapped_source = + GetCommonJSSourceInstance(isolate, + wrapped_code, + filename, + 0, + 0, + host_defined_options, + nullptr); + ContextifyContext::CompileFunctionAndCacheResult( + env, + context, + &wrapped_source, + std::move(params), + std::vector>(), + options, + true, + id_symbol, + second_parse_try_catch); + if (!second_parse_try_catch.HasTerminated()) { + if (second_parse_try_catch.HasCaught()) { + // If on the second parse an error is thrown by ESM syntax, then + // what happened was that the user had top-level `await` or a + // top-level declaration of one of the CommonJS module variables + // above the first `import` or `export`. + Utf8Value second_message_value( + env->isolate(), second_parse_try_catch.Message()->Get()); + auto second_message = second_message_value.ToStringView(); + for (const auto& error_message : esm_syntax_error_messages) { + if (second_message.find(error_message) != + std::string_view::npos) { + should_retry_as_esm = true; + break; + } } + } else { + // No errors thrown in the second parse, so most likely the error + // was caused by a top-level `await` or a top-level declaration of + // one of the CommonJS module variables. + should_retry_as_esm = true; } - } else { - // No errors thrown in the second parse, so most likely the error - // was caused by a top-level `await` or a top-level declaration of - // one of the CommonJS module variables. - should_retry_as_esm = true; } + break; } - break; } } } From b1c17bbca60241c9c19d681f91f216a7a1800826 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 10 Mar 2024 18:04:10 -0700 Subject: [PATCH 6/8] update docs --- doc/api/cli.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/doc/api/cli.md b/doc/api/cli.md index df90e9ae306b45..9d24741a395660 100644 --- a/doc/api/cli.md +++ b/doc/api/cli.md @@ -777,7 +777,7 @@ added: - v20.10.0 --> -> Stability: 1.0 - Early development +> Stability: 1.1 - Active development Node.js will inspect the source code of ambiguous input to determine whether it contains ES module syntax; if such syntax is detected, the input will be treated @@ -792,9 +792,15 @@ Ambiguous input is defined as: `--experimental-default-type` are specified. ES module syntax is defined as syntax that would throw when evaluated as -CommonJS. This includes `import` and `export` statements and `import.meta` -references. It does _not_ include `import()` expressions, which are valid in -CommonJS. +CommonJS. This includes the following: + +* `import` statements (but _not_ `import()` expressions, which are valid in + CommonJS). +* `export` statements. +* `import.meta` references. +* `await` at the top level of a module. +* `const` declarations of the CommonJS wrapper variables (`require`, `module`, + `exports`, `__dirname`, `__filename`). ### `--experimental-import-meta-resolve` From 026d6c1dc2ae86bd89f364fc05783179ae342a83 Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 10 Mar 2024 18:06:57 -0700 Subject: [PATCH 7/8] lint --- src/node_contextify.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/node_contextify.cc b/src/node_contextify.cc index 57b49354958f98..36d0f745d42a78 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1517,9 +1517,9 @@ void ContextifyContext::ContainsModuleSyntax( TryCatchScope second_parse_try_catch(env); Local wrapped_code = String::Concat(isolate, - String::NewFromUtf8(isolate, "(async function() {") - .ToLocalChecked(), - code); + String::NewFromUtf8(isolate, "(async function() {") + .ToLocalChecked(), + code); wrapped_code = String::Concat( isolate, wrapped_code, From 228f5c793ea45fe31c7df624d3e98e536244e19b Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Sun, 10 Mar 2024 19:10:27 -0700 Subject: [PATCH 8/8] use plain async function wrapper instead of CommonJS module wrapper around async function wrapper; add test --- doc/api/cli.md | 2 +- src/node_contextify.cc | 37 ++++++++------------ test/es-module/test-esm-detect-ambiguous.mjs | 13 +++++++ 3 files changed, 29 insertions(+), 23 deletions(-) diff --git a/doc/api/cli.md b/doc/api/cli.md index 9d24741a395660..d7b514694595b0 100644 --- a/doc/api/cli.md +++ b/doc/api/cli.md @@ -799,7 +799,7 @@ CommonJS. This includes the following: * `export` statements. * `import.meta` references. * `await` at the top level of a module. -* `const` declarations of the CommonJS wrapper variables (`require`, `module`, +* Lexical redeclarations of the CommonJS wrapper variables (`require`, `module`, `exports`, `__dirname`, `__filename`). ### `--experimental-import-meta-resolve` diff --git a/src/node_contextify.cc b/src/node_contextify.cc index 36d0f745d42a78..09079e963c7036 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -1510,38 +1510,31 @@ void ContextifyContext::ContainsModuleSyntax( if (!should_retry_as_esm) { for (const auto& error_message : throws_only_in_cjs_error_messages) { if (message.find(error_message) != std::string_view::npos) { - // Try parsing again where the user's code is wrapped within an async - // function. If the new parse succeeds, then the error was caused by - // either a top-level declaration of one of the CommonJS module - // variables, or a top-level `await`. + // Try parsing again where the CommonJS wrapper is replaced by an + // async function wrapper. If the new parse succeeds, then the error + // was caused by either a top-level declaration of one of the CommonJS + // module variables, or a top-level `await`. TryCatchScope second_parse_try_catch(env); - Local wrapped_code = + code = String::Concat(isolate, String::NewFromUtf8(isolate, "(async function() {") .ToLocalChecked(), code); - wrapped_code = String::Concat( + code = String::Concat( isolate, - wrapped_code, + code, String::NewFromUtf8(isolate, "})();").ToLocalChecked()); - ScriptCompiler::Source wrapped_source = - GetCommonJSSourceInstance(isolate, - wrapped_code, - filename, - 0, - 0, - host_defined_options, - nullptr); - ContextifyContext::CompileFunctionAndCacheResult( - env, + ScriptCompiler::Source wrapped_source = GetCommonJSSourceInstance( + isolate, code, filename, 0, 0, host_defined_options, nullptr); + std::ignore = ScriptCompiler::CompileFunction( context, &wrapped_source, - std::move(params), - std::vector>(), + params.size(), + params.data(), + 0, + nullptr, options, - true, - id_symbol, - second_parse_try_catch); + v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason); if (!second_parse_try_catch.HasTerminated()) { if (second_parse_try_catch.HasCaught()) { // If on the second parse an error is thrown by ESM syntax, then diff --git a/test/es-module/test-esm-detect-ambiguous.mjs b/test/es-module/test-esm-detect-ambiguous.mjs index 5276439118f6d7..2067040114a86b 100644 --- a/test/es-module/test-esm-detect-ambiguous.mjs +++ b/test/es-module/test-esm-detect-ambiguous.mjs @@ -276,6 +276,19 @@ describe('--experimental-detect-module', { concurrency: true }, () => { strictEqual(signal, null); }); + it('throws on undefined `require` when top-level `await` triggers ESM parsing', async () => { + const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ + '--experimental-detect-module', + '--eval', + 'const fs = require("node:fs"); await Promise.resolve();', + ]); + + match(stderr, /ReferenceError: require is not defined in ES module scope/); + strictEqual(stdout, ''); + strictEqual(code, 1); + strictEqual(signal, null); + }); + it('permits declaration of CommonJS module variables', async () => { const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [ '--experimental-detect-module',