From 808a77bbecb345f82e8e8cfe1c00a07eded4dbdf Mon Sep 17 00:00:00 2001 From: Ali Hassan Date: Sun, 31 Mar 2024 00:37:30 +0500 Subject: [PATCH 1/4] url: implement parse method for safer URL parsing Implement the static parse method as per the WHATWG URL specification. Unlike the URL constructor, URL.parse does not throw on invalid input, instead returning null. This behavior allows safer parsing of URLs without the need for try-catch blocks around constructor calls. The implementation follows the steps outlined in the WHATWG URL standard, ensuring compatibility and consistency with web platform URL parsing APIs. Fixes: https://github.com/nodejs/node/issues/52208 Refs: https://github.com/whatwg/url/pull/825 --- lib/internal/url.js | 10 ++++++++++ test/fixtures/wpt/interfaces/url.idl | 1 + 2 files changed, 11 insertions(+) diff --git a/lib/internal/url.js b/lib/internal/url.js index 0e69ff52b5edef..f8152e998a9799 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -804,6 +804,16 @@ class URL { this.#updateContext(bindingUrl.parse(input, base)); } + static parse(input, base = undefined) { + try { + const url = new URL(input, base); + return url; + /* eslint-disable-next-line no-unused-vars */ + } catch (_) { + return null; + } + } + [inspect.custom](depth, opts) { if (typeof depth === 'number' && depth < 0) return this; diff --git a/test/fixtures/wpt/interfaces/url.idl b/test/fixtures/wpt/interfaces/url.idl index a5e4d1eb492e82..cd18a66e31b339 100644 --- a/test/fixtures/wpt/interfaces/url.idl +++ b/test/fixtures/wpt/interfaces/url.idl @@ -8,6 +8,7 @@ interface URL { constructor(USVString url, optional USVString base); + static URL? parse(USVString url, optional USVString base); static boolean canParse(USVString url, optional USVString base); stringifier attribute USVString href; From b28fc6e8459af2f3cbf9b9c6af95de8701348553 Mon Sep 17 00:00:00 2001 From: Ali Hassan Date: Sun, 31 Mar 2024 16:25:47 +0500 Subject: [PATCH 2/4] url: change approach for parse method --- lib/internal/url.js | 19 +- src/node_url.cc | 13 +- test/fixtures/wpt/README.md | 2 +- test/fixtures/wpt/interfaces/url.idl | 1 - .../wpt/url/resources/urltestdata.json | 185 ++++++++++++++++++ .../fixtures/wpt/url/url-statics-parse.any.js | 50 +++++ test/fixtures/wpt/versions.json | 2 +- 7 files changed, 259 insertions(+), 13 deletions(-) create mode 100644 test/fixtures/wpt/url/url-statics-parse.any.js diff --git a/lib/internal/url.js b/lib/internal/url.js index f8152e998a9799..6e52a8079eceff 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -773,6 +773,7 @@ class URL { #context = new URLContext(); #searchParams; #searchParamsModified; + static #kParseURLSymbol = Symbol('kParseURL'); static { setURLSearchParamsModified = (obj) => { @@ -787,7 +788,7 @@ class URL { }; } - constructor(input, base = undefined) { + constructor(input, base = undefined, parseSymbol = undefined) { markTransferMode(this, false, false); if (arguments.length === 0) { @@ -801,17 +802,19 @@ class URL { base = `${base}`; } - this.#updateContext(bindingUrl.parse(input, base)); + const raiseException = parseSymbol !== URL.#kParseURLSymbol; + const href = bindingUrl.parse(input, base, raiseException); + if(href){ + this.#updateContext(href); + } } static parse(input, base = undefined) { - try { - const url = new URL(input, base); - return url; - /* eslint-disable-next-line no-unused-vars */ - } catch (_) { - return null; + if (arguments.length === 0) { + throw new ERR_MISSING_ARGS('url'); } + const parsedURLObject = new URL(input, base, URL.#kParseURLSymbol) + return parsedURLObject.href ? parsedURLObject : null; } [inspect.custom](depth, opts) { diff --git a/src/node_url.cc b/src/node_url.cc index 95d15c78407359..abeaeee1ffd5a8 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -233,6 +233,9 @@ void BindingData::Parse(const FunctionCallbackInfo& args) { CHECK_GE(args.Length(), 1); CHECK(args[0]->IsString()); // input // args[1] // base url + // args[2] // raise Exception + + const bool raise_exception = args[2]->BooleanValue(args.GetIsolate()); Realm* realm = Realm::GetCurrent(args); BindingData* binding_data = realm->GetBindingData(); @@ -245,8 +248,10 @@ void BindingData::Parse(const FunctionCallbackInfo& args) { if (args[1]->IsString()) { base_ = Utf8Value(isolate, args[1]).ToString(); base = ada::parse(*base_); - if (!base) { + if (!base && raise_exception) { return ThrowInvalidURL(realm->env(), input.ToStringView(), base_); + } else if (!base) { + return args.GetReturnValue().SetNull(); } base_pointer = &base.value(); } @@ -254,7 +259,11 @@ void BindingData::Parse(const FunctionCallbackInfo& args) { ada::parse(input.ToStringView(), base_pointer); if (!out) { - return ThrowInvalidURL(realm->env(), input.ToStringView(), base_); + if (raise_exception) { + return ThrowInvalidURL(realm->env(), input.ToStringView(), base_); + } else { + return args.GetReturnValue().SetNull(); + } } binding_data->UpdateComponents(out->get_components(), out->type); diff --git a/test/fixtures/wpt/README.md b/test/fixtures/wpt/README.md index a4ca98dc7ffa4f..3a2bb782480ef6 100644 --- a/test/fixtures/wpt/README.md +++ b/test/fixtures/wpt/README.md @@ -28,7 +28,7 @@ Last update: - resource-timing: https://github.com/web-platform-tests/wpt/tree/22d38586d0/resource-timing - resources: https://github.com/web-platform-tests/wpt/tree/1e140d63ec/resources - streams: https://github.com/web-platform-tests/wpt/tree/3df6d94318/streams -- url: https://github.com/web-platform-tests/wpt/tree/c2d7e70b52/url +- url: https://github.com/web-platform-tests/wpt/tree/0f550ab9f5/url - user-timing: https://github.com/web-platform-tests/wpt/tree/5ae85bf826/user-timing - wasm/jsapi: https://github.com/web-platform-tests/wpt/tree/cde25e7e3c/wasm/jsapi - wasm/webapi: https://github.com/web-platform-tests/wpt/tree/fd1b23eeaa/wasm/webapi diff --git a/test/fixtures/wpt/interfaces/url.idl b/test/fixtures/wpt/interfaces/url.idl index cd18a66e31b339..a5e4d1eb492e82 100644 --- a/test/fixtures/wpt/interfaces/url.idl +++ b/test/fixtures/wpt/interfaces/url.idl @@ -8,7 +8,6 @@ interface URL { constructor(USVString url, optional USVString base); - static URL? parse(USVString url, optional USVString base); static boolean canParse(USVString url, optional USVString base); stringifier attribute USVString href; diff --git a/test/fixtures/wpt/url/resources/urltestdata.json b/test/fixtures/wpt/url/resources/urltestdata.json index 287a84b467a48b..9f1be0449c63d3 100644 --- a/test/fixtures/wpt/url/resources/urltestdata.json +++ b/test/fixtures/wpt/url/resources/urltestdata.json @@ -734,6 +734,36 @@ "search": "", "hash": "" }, + { + "input": "http://a:b@c\\", + "base": null, + "href": "http://a:b@c/", + "origin": "http://c", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "c", + "hostname": "c", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://a@b\\c", + "base": null, + "href": "ws://a@b/c", + "origin": "ws://b", + "protocol": "ws:", + "username": "a", + "password": "", + "host": "b", + "hostname": "b", + "port": "", + "pathname": "/c", + "search": "", + "hash": "" + }, { "input": "foo:/", "base": "http://example.org/foo/bar", @@ -9529,5 +9559,160 @@ "pathname": "", "search": "", "hash": "" + }, + "Scheme relative path starting with multiple slashes", + { + "input": "///test", + "base": "http://example.org/", + "href": "http://test/", + "protocol": "http:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///\\//\\//test", + "base": "http://example.org/", + "href": "http://test/", + "protocol": "http:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///example.org/path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../../", + "base": "http://example.org/", + "href": "http://example.org/", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../path/../../", + "base": "http://example.org/", + "href": "http://example.org/", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../path/../../path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "/\\/\\//example.org/../path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "///abcdef/../", + "base": "file:///", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "/\\//\\/a/../", + "base": "file:///", + "href": "file://////", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "////", + "search": "", + "hash": "" + }, + { + "input": "//a/../", + "base": "file:///", + "href": "file://a/", + "protocol": "file:", + "username": "", + "password": "", + "host": "a", + "hostname": "a", + "port": "", + "pathname": "/", + "search": "", + "hash": "" } ] diff --git a/test/fixtures/wpt/url/url-statics-parse.any.js b/test/fixtures/wpt/url/url-statics-parse.any.js new file mode 100644 index 00000000000000..0822e9da07af6a --- /dev/null +++ b/test/fixtures/wpt/url/url-statics-parse.any.js @@ -0,0 +1,50 @@ +// This intentionally does not use resources/urltestdata.json to preserve resources. +[ + { + "url": undefined, + "base": undefined, + "expected": false + }, + { + "url": "aaa:b", + "base": undefined, + "expected": true + }, + { + "url": undefined, + "base": "aaa:b", + "expected": false + }, + { + "url": "aaa:/b", + "base": undefined, + "expected": true + }, + { + "url": undefined, + "base": "aaa:/b", + "expected": true + }, + { + "url": "https://test:test", + "base": undefined, + "expected": false + }, + { + "url": "a", + "base": "https://b/", + "expected": true + } +].forEach(({ url, base, expected }) => { + test(() => { + if (expected == false) { + assert_equals(URL.parse(url, base), null); + } else { + assert_equals(URL.parse(url, base).href, new URL(url, base).href); + } + }, `URL.parse(${url}, ${base})`); +}); + +test(() => { + assert_not_equals(URL.parse("https://example/"), URL.parse("https://example/")); +}, `URL.parse() should return a unique object`); diff --git a/test/fixtures/wpt/versions.json b/test/fixtures/wpt/versions.json index a7c655125b5c22..5f65dd1a829c58 100644 --- a/test/fixtures/wpt/versions.json +++ b/test/fixtures/wpt/versions.json @@ -72,7 +72,7 @@ "path": "streams" }, "url": { - "commit": "c2d7e70b52cbd9a5b938aa32f37078d7a71e0b21", + "commit": "0f550ab9f5a07ed293926a306e914866164b346b", "path": "url" }, "user-timing": { From 48e3b9ae2db57cd589d1a8747448c23870f98440 Mon Sep 17 00:00:00 2001 From: Ali Hassan Date: Sun, 31 Mar 2024 18:58:40 +0500 Subject: [PATCH 3/4] url: fix lint --- lib/internal/url.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index 6e52a8079eceff..d8bf15563adfb9 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -804,7 +804,7 @@ class URL { const raiseException = parseSymbol !== URL.#kParseURLSymbol; const href = bindingUrl.parse(input, base, raiseException); - if(href){ + if (href) { this.#updateContext(href); } } @@ -813,7 +813,7 @@ class URL { if (arguments.length === 0) { throw new ERR_MISSING_ARGS('url'); } - const parsedURLObject = new URL(input, base, URL.#kParseURLSymbol) + const parsedURLObject = new URL(input, base, URL.#kParseURLSymbol); return parsedURLObject.href ? parsedURLObject : null; } From c3042525d716b3c39e98dbc888d3bc5fc1dd3b1c Mon Sep 17 00:00:00 2001 From: Ali Hassan Date: Fri, 12 Apr 2024 02:01:59 +0500 Subject: [PATCH 4/4] url: resolve feedback --- lib/internal/url.js | 13 ++++++++++--- src/node_url.cc | 14 ++++++-------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index d8bf15563adfb9..39d79392bd86bc 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -769,11 +769,18 @@ function isURL(self) { return Boolean(self?.href && self.protocol && self.auth === undefined && self.path === undefined); } +/** + * A unique symbol used as a private identifier to safely invoke the URL constructor + * with a special parsing behavior. When passed as the third argument to the URL + * constructor, it signals that the constructor should not throw an exception + * for invalid URL inputs. + */ +const kParseURLSymbol = Symbol('kParseURL'); + class URL { #context = new URLContext(); #searchParams; #searchParamsModified; - static #kParseURLSymbol = Symbol('kParseURL'); static { setURLSearchParamsModified = (obj) => { @@ -802,7 +809,7 @@ class URL { base = `${base}`; } - const raiseException = parseSymbol !== URL.#kParseURLSymbol; + const raiseException = parseSymbol !== kParseURLSymbol; const href = bindingUrl.parse(input, base, raiseException); if (href) { this.#updateContext(href); @@ -813,7 +820,7 @@ class URL { if (arguments.length === 0) { throw new ERR_MISSING_ARGS('url'); } - const parsedURLObject = new URL(input, base, URL.#kParseURLSymbol); + const parsedURLObject = new URL(input, base, kParseURLSymbol); return parsedURLObject.href ? parsedURLObject : null; } diff --git a/src/node_url.cc b/src/node_url.cc index abeaeee1ffd5a8..74b639c23084b5 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -235,7 +235,7 @@ void BindingData::Parse(const FunctionCallbackInfo& args) { // args[1] // base url // args[2] // raise Exception - const bool raise_exception = args[2]->BooleanValue(args.GetIsolate()); + const bool raise_exception = args.Length() > 2 && args[2]->IsTrue(); Realm* realm = Realm::GetCurrent(args); BindingData* binding_data = realm->GetBindingData(); @@ -251,19 +251,17 @@ void BindingData::Parse(const FunctionCallbackInfo& args) { if (!base && raise_exception) { return ThrowInvalidURL(realm->env(), input.ToStringView(), base_); } else if (!base) { - return args.GetReturnValue().SetNull(); + return; } base_pointer = &base.value(); } auto out = ada::parse(input.ToStringView(), base_pointer); - if (!out) { - if (raise_exception) { - return ThrowInvalidURL(realm->env(), input.ToStringView(), base_); - } else { - return args.GetReturnValue().SetNull(); - } + if (!out && raise_exception) { + return ThrowInvalidURL(realm->env(), input.ToStringView(), base_); + } else if (!out) { + return; } binding_data->UpdateComponents(out->get_components(), out->type);