From f882f4759a8fa340580295f2fe6640b49ba03c31 Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Fri, 14 Apr 2017 13:44:45 +0200 Subject: [PATCH 01/30] deps: cherry-pick 79aee39 from upstream v8 This is a chery-pick if you consider reducing the context to -C2 a cherry-pick; WordIsSmi has been renamed to TaggedIsSmi upstream. Original commit message: [builtins] Fix pointer comparison in ToString builtin. This fixes the bogus {Word32Equal} comparison in the ToString builtin implementing Object.prototype.toString to be a pointer-size {WordEqual} comparison instead. Comparing just the lower half-word is insufficient on 64-bit architectures. R=jgruber@chromium.org TEST=mjsunit/regress/regress-crbug-664506 BUG=chromium:664506 Review-Url: https://codereview.chromium.org/2496043003 Cr-Commit-Position: refs/heads/master@{#40963} Fixes: https://github.com/nodejs/node/issues/12411 PR-URL: https://github.com/nodejs/node/pull/12412 Reviewed-By: Anna Henningsen Reviewed-By: James M Snell Reviewed-By: Sakthipriyan Vairamani --- deps/v8/src/builtins/builtins-object.cc | 4 ++-- deps/v8/test/mjsunit/regress/regress-crbug-664506.js | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 deps/v8/test/mjsunit/regress/regress-crbug-664506.js diff --git a/deps/v8/src/builtins/builtins-object.cc b/deps/v8/src/builtins/builtins-object.cc index 671397d9eacd11..53de00338ae4db 100644 --- a/deps/v8/src/builtins/builtins-object.cc +++ b/deps/v8/src/builtins/builtins-object.cc @@ -300,10 +300,10 @@ void Builtins::Generate_ObjectProtoToString(CodeStubAssembler* assembler) { Node* context = assembler->Parameter(3); assembler->GotoIf( - assembler->Word32Equal(receiver, assembler->UndefinedConstant()), + assembler->WordEqual(receiver, assembler->UndefinedConstant()), &return_undefined); - assembler->GotoIf(assembler->Word32Equal(receiver, assembler->NullConstant()), + assembler->GotoIf(assembler->WordEqual(receiver, assembler->NullConstant()), &return_null); assembler->GotoIf(assembler->WordIsSmi(receiver), &return_number); diff --git a/deps/v8/test/mjsunit/regress/regress-crbug-664506.js b/deps/v8/test/mjsunit/regress/regress-crbug-664506.js new file mode 100644 index 00000000000000..b0bf5e7591d4f4 --- /dev/null +++ b/deps/v8/test/mjsunit/regress/regress-crbug-664506.js @@ -0,0 +1,11 @@ +// Copyright 2016 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Flags: --expose-gc --predictable --random-seed=-1109634722 + +gc(); +gc(); +assertEquals("[object Object]", Object.prototype.toString.call({})); +gc(); +assertEquals("[object Array]", Object.prototype.toString.call([])); From 2bbee49e6f170a5d6628444a7c9a2235fe0dd929 Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Thu, 13 Apr 2017 13:48:01 +0200 Subject: [PATCH 02/30] v8: fix build errors with g++ 7 This is a local patch because upstream fixed it differently by moving large chunks of code out of objects.h. We cannot easily back-port those changes due to their size and invasiveness. Fixes: https://github.com/nodejs/node/issues/10388 PR-URL: https://github.com/nodejs/node/pull/12392 Reviewed-By: Anna Henningsen Reviewed-By: Benjamin Gruenbaum Reviewed-By: Daniel Bevenius Reviewed-By: James M Snell --- deps/v8/src/objects-body-descriptors.h | 2 +- deps/v8/src/objects-inl.h | 21 +++++++++++++++++++++ deps/v8/src/objects.h | 20 ++++---------------- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/deps/v8/src/objects-body-descriptors.h b/deps/v8/src/objects-body-descriptors.h index 91cb8883be8873..a1c3634bd762d7 100644 --- a/deps/v8/src/objects-body-descriptors.h +++ b/deps/v8/src/objects-body-descriptors.h @@ -99,7 +99,7 @@ class FixedBodyDescriptor final : public BodyDescriptorBase { template static inline void IterateBody(HeapObject* obj, int object_size) { - IterateBody(obj); + IterateBody(obj); } }; diff --git a/deps/v8/src/objects-inl.h b/deps/v8/src/objects-inl.h index af1261538e2b55..2549a2ac244c18 100644 --- a/deps/v8/src/objects-inl.h +++ b/deps/v8/src/objects-inl.h @@ -39,6 +39,27 @@ namespace v8 { namespace internal { +template +uint32_t HashTable::Hash(Key key) { + if (Shape::UsesSeed) { + return Shape::SeededHash(key, GetHeap()->HashSeed()); + } else { + return Shape::Hash(key); + } +} + + +template +uint32_t HashTable::HashForObject(Key key, + Object* object) { + if (Shape::UsesSeed) { + return Shape::SeededHashForObject(key, GetHeap()->HashSeed(), object); + } else { + return Shape::HashForObject(key, object); + } +} + + PropertyDetails::PropertyDetails(Smi* smi) { value_ = smi->value(); } diff --git a/deps/v8/src/objects.h b/deps/v8/src/objects.h index 1709cef15e8cd1..a3f9523e8ae9d0 100644 --- a/deps/v8/src/objects.h +++ b/deps/v8/src/objects.h @@ -3352,22 +3352,10 @@ class HashTable : public HashTableBase { public: typedef Shape ShapeT; - // Wrapper methods - inline uint32_t Hash(Key key) { - if (Shape::UsesSeed) { - return Shape::SeededHash(key, GetHeap()->HashSeed()); - } else { - return Shape::Hash(key); - } - } - - inline uint32_t HashForObject(Key key, Object* object) { - if (Shape::UsesSeed) { - return Shape::SeededHashForObject(key, GetHeap()->HashSeed(), object); - } else { - return Shape::HashForObject(key, object); - } - } + // Wrapper methods. Defined in src/objects-inl.h + // to break a cycle with src/heap/heap.h. + inline uint32_t Hash(Key key); + inline uint32_t HashForObject(Key key, Object* object); // Returns a new HashTable object. MUST_USE_RESULT static Handle New( From 2c69ab7f61f455e5c2fdebd4d31e05247e4f2d53 Mon Sep 17 00:00:00 2001 From: Rich Trott Date: Fri, 31 Mar 2017 22:48:35 -0700 Subject: [PATCH 03/30] buffer,util: refactor for performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit internal/util.js definied toInteger() and toLength() but they were only used by buffer.js. Inlining these small functions results in a small but statistically-significant performance gain. PR-URL: https://github.com/nodejs/node/pull/12153 Reviewed-By: Joyee Cheung Reviewed-By: Michaël Zasso Reviewed-By: James M Snell --- lib/buffer.js | 10 ++++-- lib/internal/util.js | 18 ---------- test/parallel/test-internal-util-toInteger.js | 32 ----------------- test/parallel/test-internal-util-toLength.js | 35 ------------------- 4 files changed, 8 insertions(+), 87 deletions(-) delete mode 100644 test/parallel/test-internal-util-toInteger.js delete mode 100644 test/parallel/test-internal-util-toLength.js diff --git a/lib/buffer.js b/lib/buffer.js index f15de7d8accc4f..808e0d3810298e 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -228,7 +228,9 @@ function fromArrayLike(obj) { } function fromArrayBuffer(obj, byteOffset, length) { - byteOffset = internalUtil.toInteger(byteOffset); + // convert byteOffset to integer + byteOffset = +byteOffset; + byteOffset = byteOffset ? Math.trunc(byteOffset) : 0; const maxLength = obj.byteLength - byteOffset; @@ -238,7 +240,11 @@ function fromArrayBuffer(obj, byteOffset, length) { if (length === undefined) { length = maxLength; } else { - length = internalUtil.toLength(length); + // convert length to non-negative integer + length = +length; + length = length ? Math.trunc(length) : 0; + length = length <= 0 ? 0 : Math.min(length, Number.MAX_SAFE_INTEGER); + if (length > maxLength) throw new RangeError("'length' is out of bounds"); } diff --git a/lib/internal/util.js b/lib/internal/util.js index b4a938f9c22a89..ae1f667cf1b1a7 100644 --- a/lib/internal/util.js +++ b/lib/internal/util.js @@ -139,21 +139,3 @@ exports.cachedResult = function cachedResult(fn) { return result.slice(); }; }; - -/* - * Implementation of ToInteger as per ECMAScript Specification - * Refer: http://www.ecma-international.org/ecma-262/6.0/#sec-tointeger - */ -const toInteger = exports.toInteger = function toInteger(argument) { - const number = +argument; - return Number.isNaN(number) ? 0 : Math.trunc(number); -}; - -/* - * Implementation of ToLength as per ECMAScript Specification - * Refer: http://www.ecma-international.org/ecma-262/6.0/#sec-tolength - */ -exports.toLength = function toLength(argument) { - const len = toInteger(argument); - return len <= 0 ? 0 : Math.min(len, Number.MAX_SAFE_INTEGER); -}; diff --git a/test/parallel/test-internal-util-toInteger.js b/test/parallel/test-internal-util-toInteger.js deleted file mode 100644 index 57a411964da90f..00000000000000 --- a/test/parallel/test-internal-util-toInteger.js +++ /dev/null @@ -1,32 +0,0 @@ -// Flags: --expose-internals -'use strict'; - -require('../common'); -const assert = require('assert'); -const {toInteger} = require('internal/util'); - -const expectZero = [ - '0', '-0', NaN, {}, [], {'a': 'b'}, [1, 2], '0x', '0o', '0b', false, - '', ' ', undefined, null -]; -expectZero.forEach(function(value) { - assert.strictEqual(toInteger(value), 0); -}); - -assert.strictEqual(toInteger(Infinity), Infinity); -assert.strictEqual(toInteger(-Infinity), -Infinity); - -const expectSame = [ - '0x100', '0o100', '0b100', 0x100, -0x100, 0o100, -0o100, 0b100, -0b100, true -]; -expectSame.forEach(function(value) { - assert.strictEqual(toInteger(value), +value, `${value} is not an Integer`); -}); - -const expectIntegers = new Map([ - [[1], 1], [[-1], -1], [['1'], 1], [['-1'], -1], - [3.14, 3], [-3.14, -3], ['3.14', 3], ['-3.14', -3], -]); -expectIntegers.forEach(function(expected, value) { - assert.strictEqual(toInteger(value), expected); -}); diff --git a/test/parallel/test-internal-util-toLength.js b/test/parallel/test-internal-util-toLength.js deleted file mode 100644 index ce594c47c1db19..00000000000000 --- a/test/parallel/test-internal-util-toLength.js +++ /dev/null @@ -1,35 +0,0 @@ -// Flags: --expose-internals -'use strict'; - -require('../common'); -const assert = require('assert'); -const {toLength} = require('internal/util'); -const maxValue = Number.MAX_SAFE_INTEGER; - -const expectZero = [ - '0', '-0', NaN, {}, [], {'a': 'b'}, [1, 2], '0x', '0o', '0b', false, - '', ' ', undefined, null, -1, -1.25, -1.1, -1.9, -Infinity -]; -expectZero.forEach(function(value) { - assert.strictEqual(toLength(value), 0); -}); - -assert.strictEqual(toLength(maxValue - 1), maxValue - 1); -assert.strictEqual(maxValue, maxValue); -assert.strictEqual(toLength(Infinity), maxValue); -assert.strictEqual(toLength(maxValue + 1), maxValue); - - -[ - '0x100', '0o100', '0b100', 0x100, -0x100, 0o100, -0o100, 0b100, -0b100, true -].forEach(function(value) { - assert.strictEqual(toLength(value), +value > 0 ? +value : 0); -}); - -const expectIntegers = new Map([ - [[1], 1], [[-1], 0], [['1'], 1], [['-1'], 0], - [3.14, 3], [-3.14, 0], ['3.14', 3], ['-3.14', 0], -]); -expectIntegers.forEach(function(expected, value) { - assert.strictEqual(toLength(value), expected); -}); From b869abdf87c40cce767597f10d5bc0b2d9235219 Mon Sep 17 00:00:00 2001 From: Rich Trott Date: Sat, 1 Apr 2017 23:06:31 -0700 Subject: [PATCH 04/30] tools: replace custom ESLint timers rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ESLint 3.19.0 allows the specification of selectors that represent disallowed syntax. Replace our custom rule for timer arguments with a pair of `no-restricted-syntax` option objects. PR-URL: https://github.com/nodejs/node/pull/12162 Reviewed-By: Teddy Katz Reviewed-By: Michaël Zasso Reviewed-By: Yuta Hiroto Reviewed-By: James M Snell Reviewed-By: Colin Ihrig --- .eslintrc.yaml | 8 +++++++- tools/eslint-rules/timer-arguments.js | 25 ------------------------- 2 files changed, 7 insertions(+), 26 deletions(-) delete mode 100644 tools/eslint-rules/timer-arguments.js diff --git a/.eslintrc.yaml b/.eslintrc.yaml index 1362c9cb29e387..0ecd781d0e9137 100644 --- a/.eslintrc.yaml +++ b/.eslintrc.yaml @@ -101,6 +101,13 @@ rules: new-parens: 2 no-mixed-spaces-and-tabs: 2 no-multiple-empty-lines: [2, {max: 2, maxEOF: 0, maxBOF: 0}] + no-restricted-syntax: [2, { + selector: "CallExpression[callee.name='setTimeout'][arguments.length<2]", + message: "setTimeout() must be invoked with at least two arguments." + }, { + selector: "CallExpression[callee.name='setInterval'][arguments.length<2]", + message: "setInterval() must be invoked with at least 2 arguments" + }] no-tabs: 2 no-trailing-spaces: 2 one-var-declaration-per-line: 2 @@ -135,7 +142,6 @@ rules: assert-fail-single-argument: 2 assert-throws-arguments: [2, { requireTwo: false }] new-with-error: [2, Error, RangeError, TypeError, SyntaxError, ReferenceError] - timer-arguments: 2 # Global scoped method and vars globals: diff --git a/tools/eslint-rules/timer-arguments.js b/tools/eslint-rules/timer-arguments.js deleted file mode 100644 index 4dd7816ff82ff2..00000000000000 --- a/tools/eslint-rules/timer-arguments.js +++ /dev/null @@ -1,25 +0,0 @@ -/** - * @fileoverview Require at least two arguments when calling setTimeout() or - * setInterval(). - * @author Rich Trott - */ -'use strict'; - -//------------------------------------------------------------------------------ -// Rule Definition -//------------------------------------------------------------------------------ - -function isTimer(name) { - return ['setTimeout', 'setInterval'].includes(name); -} - -module.exports = function(context) { - return { - 'CallExpression': function(node) { - const name = node.callee.name; - if (isTimer(name) && node.arguments.length < 2) { - context.report(node, `${name} must have at least 2 arguments`); - } - } - }; -}; From 71d3f94e695c458a68ce545212df0c8ca20ca171 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Sat, 28 Jan 2017 10:37:45 -0800 Subject: [PATCH 05/30] url: extend URLSearchParams constructor PR-URL: https://github.com/nodejs/node/pull/12507 Fixes: https://github.com/nodejs/node/issues/10635 Reviewed-By: James M Snell --- lib/internal/url.js | 50 ++++++++++- ...est-whatwg-url-searchparams-constructor.js | 87 +++++++++++-------- 2 files changed, 96 insertions(+), 41 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index a474ed30b4d6e0..c646a757024f34 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -614,11 +614,53 @@ function defineIDLClass(proto, classStr, obj) { } class URLSearchParams { - constructor(init = '') { - if (init instanceof URLSearchParams) { - const childParams = init[searchParams]; - this[searchParams] = childParams.slice(); + // URL Standard says the default value is '', but as undefined and '' have + // the same result, undefined is used to prevent unnecessary parsing. + // Default parameter is necessary to keep URLSearchParams.length === 0 in + // accordance with Web IDL spec. + constructor(init = undefined) { + if (init === null || init === undefined) { + this[searchParams] = []; + } else if (typeof init === 'object') { + const method = init[Symbol.iterator]; + if (method === this[Symbol.iterator]) { + // While the spec does not have this branch, we can use it as a + // shortcut to avoid having to go through the costly generic iterator. + const childParams = init[searchParams]; + this[searchParams] = childParams.slice(); + } else if (method !== null && method !== undefined) { + if (typeof method !== 'function') { + throw new TypeError('Query pairs must be iterable'); + } + + // sequence> + // Note: per spec we have to first exhaust the lists then process them + const pairs = []; + for (const pair of init) { + if (typeof pair !== 'object' || + typeof pair[Symbol.iterator] !== 'function') { + throw new TypeError('Each query pair must be iterable'); + } + pairs.push(Array.from(pair)); + } + + this[searchParams] = []; + for (const pair of pairs) { + if (pair.length !== 2) { + throw new TypeError('Each query pair must be a name/value tuple'); + } + this[searchParams].push(String(pair[0]), String(pair[1])); + } + } else { + // record + this[searchParams] = []; + for (const key of Object.keys(init)) { + const value = String(init[key]); + this[searchParams].push(key, value); + } + } } else { + // USVString init = String(init); if (init[0] === '?') init = init.slice(1); initSearchParams(this, init); diff --git a/test/parallel/test-whatwg-url-searchparams-constructor.js b/test/parallel/test-whatwg-url-searchparams-constructor.js index 4e177ce59ccf20..d57373e727ac51 100644 --- a/test/parallel/test-whatwg-url-searchparams-constructor.js +++ b/test/parallel/test-whatwg-url-searchparams-constructor.js @@ -4,7 +4,8 @@ const common = require('../common'); const assert = require('assert'); const URLSearchParams = require('url').URLSearchParams; const { - test, assert_equals, assert_true, assert_false + test, assert_equals, assert_true, + assert_false, assert_throws, assert_array_equals } = common.WPT; /* eslint-disable */ @@ -40,10 +41,10 @@ test(() => { assert_equals(params.__proto__, URLSearchParams.prototype, 'expected URLSearchParams.prototype as prototype.'); }, "URLSearchParams constructor, empty string as argument") -// test(() => { -// params = new URLSearchParams({}); -// assert_equals(params + '', ""); -// }, 'URLSearchParams constructor, {} as argument'); +test(() => { + params = new URLSearchParams({}); + assert_equals(params + '', ""); +}, 'URLSearchParams constructor, {} as argument'); test(function() { var params = new URLSearchParams('a=b'); @@ -142,39 +143,39 @@ test(function() { assert_equals(params.get('a\uD83D\uDCA9b'), 'c'); }, 'Parse %f0%9f%92%a9'); // Unicode Character 'PILE OF POO' (U+1F4A9) -// test(function() { -// var params = new URLSearchParams([]); -// assert_true(params != null, 'constructor returned non-null value.'); -// params = new URLSearchParams([['a', 'b'], ['c', 'd']]); -// assert_equals(params.get("a"), "b"); -// assert_equals(params.get("c"), "d"); -// assert_throws(new TypeError(), function() { new URLSearchParams([[1]]); }); -// assert_throws(new TypeError(), function() { new URLSearchParams([[1,2,3]]); }); -// }, "Constructor with sequence of sequences of strings"); - -// [ +test(function() { + var params = new URLSearchParams([]); + assert_true(params != null, 'constructor returned non-null value.'); + params = new URLSearchParams([['a', 'b'], ['c', 'd']]); + assert_equals(params.get("a"), "b"); + assert_equals(params.get("c"), "d"); + assert_throws(new TypeError(), function() { new URLSearchParams([[1]]); }); + assert_throws(new TypeError(), function() { new URLSearchParams([[1,2,3]]); }); +}, "Constructor with sequence of sequences of strings"); + +[ // { "input": {"+": "%C2"}, "output": [[" ", "\uFFFD"]], "name": "object with +" }, -// { "input": {c: "x", a: "?"}, "output": [["c", "x"], ["a", "?"]], "name": "object with two keys" }, -// { "input": [["c", "x"], ["a", "?"]], "output": [["c", "x"], ["a", "?"]], "name": "array with two keys" } -// ].forEach((val) => { -// test(() => { -// let params = new URLSearchParams(val.input), -// i = 0 -// for (let param of params) { -// assert_array_equals(param, val.output[i]) -// i++ -// } -// }, "Construct with " + val.name) -// }) + { "input": {c: "x", a: "?"}, "output": [["c", "x"], ["a", "?"]], "name": "object with two keys" }, + { "input": [["c", "x"], ["a", "?"]], "output": [["c", "x"], ["a", "?"]], "name": "array with two keys" } +].forEach((val) => { + test(() => { + let params = new URLSearchParams(val.input), + i = 0 + for (let param of params) { + assert_array_equals(param, val.output[i]) + i++ + } + }, "Construct with " + val.name) +}) -// test(() => { -// params = new URLSearchParams() -// params[Symbol.iterator] = function *() { -// yield ["a", "b"] -// } -// let params2 = new URLSearchParams(params) -// assert_equals(params2.get("a"), "b") -// }, "Custom [Symbol.iterator]") +test(() => { + params = new URLSearchParams() + params[Symbol.iterator] = function *() { + yield ["a", "b"] + } + let params2 = new URLSearchParams(params) + assert_equals(params2.get("a"), "b") +}, "Custom [Symbol.iterator]") /* eslint-enable */ // Tests below are not from WPT. @@ -192,5 +193,17 @@ test(function() { params = new URLSearchParams(undefined); assert.strictEqual(params.toString(), ''); params = new URLSearchParams(null); - assert.strictEqual(params.toString(), 'null='); + assert.strictEqual(params.toString(), ''); + assert.throws(() => new URLSearchParams([[1]]), + /^TypeError: Each query pair must be a name\/value tuple$/); + assert.throws(() => new URLSearchParams([[1, 2, 3]]), + /^TypeError: Each query pair must be a name\/value tuple$/); + assert.throws(() => new URLSearchParams({ [Symbol.iterator]: 42 }), + /^TypeError: Query pairs must be iterable$/); + assert.throws(() => new URLSearchParams([{}]), + /^TypeError: Each query pair must be iterable$/); + assert.throws(() => new URLSearchParams(['a']), + /^TypeError: Each query pair must be iterable$/); + assert.throws(() => new URLSearchParams([{ [Symbol.iterator]: 42 }]), + /^TypeError: Each query pair must be iterable$/); } From c40a45fd3bbfd9613ea98bf8b083973d2e24f3a3 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Sat, 28 Jan 2017 12:02:35 -0800 Subject: [PATCH 06/30] doc: document URLSearchParams constructor PR-URL: https://github.com/nodejs/node/pull/12507 Ref: https://github.com/whatwg/url/pull/175 Reviewed-By: James M Snell --- doc/api/url.md | 126 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 4 deletions(-) diff --git a/doc/api/url.md b/doc/api/url.md index aa33b554042ff6..088ea95d23f22b 100644 --- a/doc/api/url.md +++ b/doc/api/url.md @@ -693,7 +693,8 @@ console.log(JSON.stringify(myURLs)); ### Class: URLSearchParams The `URLSearchParams` API provides read and write access to the query of a -`URL`. +`URL`. The `URLSearchParams` class can also be used standalone with one of the +four following constructors. The WHATWG `URLSearchParams` interface and the [`querystring`][] module have similar purpose, but the purpose of the [`querystring`][] module is more @@ -701,7 +702,8 @@ general, as it allows the customization of delimiter characters (`&` and `=`). On the other hand, this API is designed purely for URL query strings. ```js -const URL = require('url').URL; +const { URL, URLSearchParams } = require('url'); + const myURL = new URL('https://example.org/?abc=123'); console.log(myURL.searchParams.get('abc')); // Prints 123 @@ -714,11 +716,125 @@ myURL.searchParams.delete('abc'); myURL.searchParams.set('a', 'b'); console.log(myURL.href); // Prints https://example.org/?a=b + +const newSearchParams = new URLSearchParams(myURL.searchParams); +// The above is equivalent to +// const newSearchParams = new URLSearchParams(myURL.search); + +newSearchParams.append('a', 'c'); +console.log(myURL.href); + // Prints https://example.org/?a=b +console.log(newSearchParams.toString()); + // Prints a=b&a=c + +// newSearchParams.toString() is implicitly called +myURL.search = newSearchParams; +console.log(myURL.href); + // Prints https://example.org/?a=b&a=c +newSearchParams.delete('a'); +console.log(myURL.href); + // Prints https://example.org/?a=b&a=c ``` -#### Constructor: new URLSearchParams([init]) +#### Constructor: new URLSearchParams() + +Instantiate a new empty `URLSearchParams` object. + +#### Constructor: new URLSearchParams(string) + +* `string` {string} A query string + +Parse the `string` as a query string, and use it to instantiate a new +`URLSearchParams` object. A leading `'?'`, if present, is ignored. + +```js +const { URLSearchParams } = require('url'); +let params; + +params = new URLSearchParams('user=abc&query=xyz'); +console.log(params.get('user')); + // Prints 'abc' +console.log(params.toString()); + // Prints 'user=abc&query=xyz' + +params = new URLSearchParams('?user=abc&query=xyz'); +console.log(params.toString()); + // Prints 'user=abc&query=xyz' +``` + +#### Constructor: new URLSearchParams(obj) + +* `obj` {Object} An object representing a collection of key-value pairs -* `init` {String} The URL query +Instantiate a new `URLSearchParams` object with a query hash map. The key and +value of each property of `obj` are always coerced to strings. + +*Note*: Unlike [`querystring`][] module, duplicate keys in the form of array +values are not allowed. Arrays are stringified using [`array.toString()`][], +which simply joins all array elements with commas. + +```js +const { URLSearchParams } = require('url'); +const params = new URLSearchParams({ + user: 'abc', + query: ['first', 'second'] +}); +console.log(params.getAll('query')); + // Prints ['first,second'] +console.log(params.toString()); + // Prints 'user=abc&query=first%2Csecond' +``` + +#### Constructor: new URLSearchParams(iterable) + +* `iterable` {Iterable} An iterable object whose elements are key-value pairs + +Instantiate a new `URLSearchParams` object with an iterable map in a way that +is similar to [`Map`][]'s constructor. `iterable` can be an Array or any +iterable object. That means `iterable` can be another `URLSearchParams`, in +which case the constructor will simply create a clone of the provided +`URLSearchParams`. Elements of `iterable` are key-value pairs, and can +themselves be any iterable object. + +Duplicate keys are allowed. + +```js +const { URLSearchParams } = require('url'); +let params; + +// Using an array +params = new URLSearchParams([ + ['user', 'abc'], + ['query', 'first'], + ['query', 'second'] +]); +console.log(params.toString()); + // Prints 'user=abc&query=first&query=second' + +// Using a Map object +const map = new Map(); +map.set('user', 'abc'); +map.set('query', 'xyz'); +params = new URLSearchParams(map); +console.log(params.toString()); + // Prints 'user=abc&query=xyz' + +// Using a generator function +function* getQueryPairs() { + yield ['user', 'abc']; + yield ['query', 'first']; + yield ['query', 'second']; +} +params = new URLSearchParams(getQueryPairs()); +console.log(params.toString()); + // Prints 'user=abc&query=first&query=second' + +// Each key-value pair must have exactly two elements +new URLSearchParams([ + ['user', 'abc', 'error'] +]); + // Throws TypeError: Each query pair must be a name/value tuple +``` #### urlSearchParams.append(name, value) @@ -975,6 +1091,8 @@ console.log(myURL.origin); [`require('url').format()`]: #url_url_format_url_options [`url.toString()`]: #url_url_tostring [Punycode]: https://tools.ietf.org/html/rfc5891#section-4.4 +[`Map`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map +[`array.toString()`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/toString [WHATWG URL]: #url_the_whatwg_url_api [`new URL()`]: #url_constructor_new_url_input_base [`url.href`]: #url_url_href From b0fecbe9807d2470d73cc5954f1f8f35cd9a9e78 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Fri, 3 Feb 2017 21:53:27 -0800 Subject: [PATCH 07/30] url: enforce valid UTF-8 in WHATWG parser This commit implements the Web IDL USVString conversion, which mandates all unpaired Unicode surrogates be turned into U+FFFD REPLACEMENT CHARACTER. It also disallows Symbols to be used as USVString per spec. Certain functions call into C++ methods in the binding that use the Utf8Value class to access string arguments. Utf8Value already does the normalization using V8's String::Write, so in those cases, instead of doing the full USVString normalization, only a symbol check is done (`'' + val`, which uses ES's ToString, versus `String()` which has special provisions for symbols). PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- lib/internal/url.js | 98 +++++--- src/node_url.cc | 53 ++++ test/fixtures/url-setter-tests-additional.js | 237 ++++++++++++++++++ test/fixtures/url-tests-additional.js | 30 +++ .../test-whatwg-url-searchparams-append.js | 9 + ...est-whatwg-url-searchparams-constructor.js | 16 ++ .../test-whatwg-url-searchparams-delete.js | 6 + .../test-whatwg-url-searchparams-get.js | 6 + .../test-whatwg-url-searchparams-getall.js | 6 + .../test-whatwg-url-searchparams-has.js | 6 + .../test-whatwg-url-searchparams-set.js | 9 + test/parallel/test-whatwg-url-searchparams.js | 32 ++- test/parallel/test-whatwg-url-setters.js | 45 ++++ 13 files changed, 509 insertions(+), 44 deletions(-) create mode 100644 test/fixtures/url-setter-tests-additional.js diff --git a/lib/internal/url.js b/lib/internal/url.js index c646a757024f34..a2c255f765cdc6 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -23,6 +23,18 @@ const IteratorPrototype = Object.getPrototypeOf( Object.getPrototypeOf([][Symbol.iterator]()) ); +const unpairedSurrogateRe = + /([^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])/; +function toUSVString(val) { + const str = '' + val; + // As of V8 5.5, `str.search()` (and `unpairedSurrogateRe[@@search]()`) are + // slower than `unpairedSurrogateRe.exec()`. + const match = unpairedSurrogateRe.exec(str); + if (!match) + return str; + return binding.toUSVString(str, match.index); +} + class OpaqueOrigin { toString() { return 'null'; @@ -108,7 +120,6 @@ function onParseError(flags, input) { // Reused by URL constructor and URL#href setter. function parse(url, input, base) { - input = String(input); const base_context = base ? base[context] : undefined; url[context] = new StorageObject(); binding.parse(input.trim(), -1, @@ -203,8 +214,10 @@ function onParseHashComplete(flags, protocol, username, password, class URL { constructor(input, base) { + // toUSVString is not needed. + input = '' + input; if (base !== undefined && !(base instanceof URL)) - base = new URL(String(base)); + base = new URL(base); parse(this, input, base); } @@ -312,6 +325,8 @@ Object.defineProperties(URL.prototype, { return this[kFormat]({}); }, set(input) { + // toUSVString is not needed. + input = '' + input; parse(this, input); } }, @@ -329,7 +344,8 @@ Object.defineProperties(URL.prototype, { return this[context].scheme; }, set(scheme) { - scheme = String(scheme); + // toUSVString is not needed. + scheme = '' + scheme; if (scheme.length === 0) return; binding.parse(scheme, binding.kSchemeStart, null, this[context], @@ -343,7 +359,8 @@ Object.defineProperties(URL.prototype, { return this[context].username || ''; }, set(username) { - username = String(username); + // toUSVString is not needed. + username = '' + username; if (!this.hostname) return; const ctx = this[context]; @@ -363,7 +380,8 @@ Object.defineProperties(URL.prototype, { return this[context].password || ''; }, set(password) { - password = String(password); + // toUSVString is not needed. + password = '' + password; if (!this.hostname) return; const ctx = this[context]; @@ -388,7 +406,8 @@ Object.defineProperties(URL.prototype, { }, set(host) { const ctx = this[context]; - host = String(host); + // toUSVString is not needed. + host = '' + host; if (this[cannotBeBase] || (this[special] && host.length === 0)) { // Cannot set the host if cannot-be-base is set or @@ -412,7 +431,8 @@ Object.defineProperties(URL.prototype, { }, set(host) { const ctx = this[context]; - host = String(host); + // toUSVString is not needed. + host = '' + host; if (this[cannotBeBase] || (this[special] && host.length === 0)) { // Cannot set the host if cannot-be-base is set or @@ -436,11 +456,12 @@ Object.defineProperties(URL.prototype, { return port === undefined ? '' : String(port); }, set(port) { + // toUSVString is not needed. + port = '' + port; const ctx = this[context]; if (!ctx.host || this[cannotBeBase] || this.protocol === 'file:') return; - port = String(port); if (port === '') { ctx.port = undefined; return; @@ -459,9 +480,11 @@ Object.defineProperties(URL.prototype, { return ctx.path !== undefined ? `/${ctx.path.join('/')}` : ''; }, set(path) { + // toUSVString is not needed. + path = '' + path; if (this[cannotBeBase]) return; - binding.parse(String(path), binding.kPathStart, null, this[context], + binding.parse(path, binding.kPathStart, null, this[context], onParsePathComplete.bind(this)); } }, @@ -474,7 +497,7 @@ Object.defineProperties(URL.prototype, { }, set(search) { const ctx = this[context]; - search = String(search); + search = toUSVString(search); if (!search) { ctx.query = null; ctx.flags &= ~binding.URL_FLAGS_HAS_QUERY; @@ -506,7 +529,8 @@ Object.defineProperties(URL.prototype, { }, set(hash) { const ctx = this[context]; - hash = String(hash); + // toUSVString is not needed. + hash = '' + hash; if (this.protocol === 'javascript:') return; if (!hash) { @@ -649,19 +673,22 @@ class URLSearchParams { if (pair.length !== 2) { throw new TypeError('Each query pair must be a name/value tuple'); } - this[searchParams].push(String(pair[0]), String(pair[1])); + const key = toUSVString(pair[0]); + const value = toUSVString(pair[1]); + this[searchParams].push(key, value); } } else { // record this[searchParams] = []; - for (const key of Object.keys(init)) { - const value = String(init[key]); + for (var key of Object.keys(init)) { + key = toUSVString(key); + const value = toUSVString(init[key]); this[searchParams].push(key, value); } } } else { // USVString - init = String(init); + init = toUSVString(init); if (init[0] === '?') init = init.slice(1); initSearchParams(this, init); } @@ -740,8 +767,8 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { throw new TypeError('"name" and "value" arguments must be specified'); } - name = String(name); - value = String(value); + name = toUSVString(name); + value = toUSVString(value); this[searchParams].push(name, value); update(this[context], this); }, @@ -755,7 +782,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { } const list = this[searchParams]; - name = String(name); + name = toUSVString(name); for (var i = 0; i < list.length;) { const cur = list[i]; if (cur === name) { @@ -776,7 +803,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { } const list = this[searchParams]; - name = String(name); + name = toUSVString(name); for (var i = 0; i < list.length; i += 2) { if (list[i] === name) { return list[i + 1]; @@ -795,7 +822,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { const list = this[searchParams]; const values = []; - name = String(name); + name = toUSVString(name); for (var i = 0; i < list.length; i += 2) { if (list[i] === name) { values.push(list[i + 1]); @@ -813,7 +840,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { } const list = this[searchParams]; - name = String(name); + name = toUSVString(name); for (var i = 0; i < list.length; i += 2) { if (list[i] === name) { return true; @@ -831,8 +858,8 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { } const list = this[searchParams]; - name = String(name); - value = String(value); + name = toUSVString(name); + value = toUSVString(value); // If there are any name-value pairs whose name is `name`, in `list`, set // the value of the first such name-value pair to `value` and remove the @@ -1094,11 +1121,13 @@ function originFor(url, base) { } function domainToASCII(domain) { - return binding.domainToASCII(String(domain)); + // toUSVString is not needed. + return binding.domainToASCII('' + domain); } function domainToUnicode(domain) { - return binding.domainToUnicode(String(domain)); + // toUSVString is not needed. + return binding.domainToUnicode('' + domain); } // Utility function that converts a URL object into an ordinary @@ -1184,11 +1213,14 @@ function getPathFromURL(path) { return isWindows ? getPathFromURLWin32(path) : getPathFromURLPosix(path); } -exports.getPathFromURL = getPathFromURL; -exports.URL = URL; -exports.URLSearchParams = URLSearchParams; -exports.domainToASCII = domainToASCII; -exports.domainToUnicode = domainToUnicode; -exports.urlToOptions = urlToOptions; -exports.formatSymbol = kFormat; -exports.searchParamsSymbol = searchParams; +module.exports = { + toUSVString, + getPathFromURL, + URL, + URLSearchParams, + domainToASCII, + domainToUnicode, + urlToOptions, + formatSymbol: kFormat, + searchParamsSymbol: searchParams +}; diff --git a/src/node_url.cc b/src/node_url.cc index a013380b75839e..d9213738e7f894 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -20,6 +20,8 @@ #include #endif +#define UNICODE_REPLACEMENT_CHARACTER 0xFFFD + namespace node { using v8::Array; @@ -104,6 +106,21 @@ namespace url { } #endif + // If a UTF-16 character is a low/trailing surrogate. + static inline bool IsUnicodeTrail(uint16_t c) { + return (c & 0xFC00) == 0xDC00; + } + + // If a UTF-16 character is a surrogate. + static inline bool IsUnicodeSurrogate(uint16_t c) { + return (c & 0xF800) == 0xD800; + } + + // If a UTF-16 surrogate is a low/trailing one. + static inline bool IsUnicodeSurrogateTrail(uint16_t c) { + return (c & 0x400) != 0; + } + static url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) { @@ -1356,6 +1373,41 @@ namespace url { v8::NewStringType::kNormal).ToLocalChecked()); } + static void ToUSVString(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 2); + CHECK(args[0]->IsString()); + CHECK(args[1]->IsNumber()); + + TwoByteValue value(env->isolate(), args[0]); + const size_t n = value.length(); + + const int64_t start = args[1]->IntegerValue(env->context()).FromJust(); + CHECK_GE(start, 0); + + for (size_t i = start; i < n; i++) { + uint16_t c = value[i]; + if (!IsUnicodeSurrogate(c)) { + continue; + } else if (IsUnicodeSurrogateTrail(c) || i == n - 1) { + value[i] = UNICODE_REPLACEMENT_CHARACTER; + } else { + uint16_t d = value[i + 1]; + if (IsUnicodeTrail(d)) { + i++; + } else { + value[i] = UNICODE_REPLACEMENT_CHARACTER; + } + } + } + + args.GetReturnValue().Set( + String::NewFromTwoByte(env->isolate(), + *value, + v8::NewStringType::kNormal, + n).ToLocalChecked()); + } + static void DomainToASCII(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 1); @@ -1403,6 +1455,7 @@ namespace url { Environment* env = Environment::GetCurrent(context); env->SetMethod(target, "parse", Parse); env->SetMethod(target, "encodeAuth", EncodeAuthSet); + env->SetMethod(target, "toUSVString", ToUSVString); env->SetMethod(target, "domainToASCII", DomainToASCII); env->SetMethod(target, "domainToUnicode", DomainToUnicode); diff --git a/test/fixtures/url-setter-tests-additional.js b/test/fixtures/url-setter-tests-additional.js new file mode 100644 index 00000000000000..b27ae336a28776 --- /dev/null +++ b/test/fixtures/url-setter-tests-additional.js @@ -0,0 +1,237 @@ +module.exports = { + 'username': [ + { + 'comment': 'Surrogate pair', + 'href': 'https://github.com/', + 'new_value': '\uD83D\uDE00', + 'expected': { + 'href': 'https://%F0%9F%98%80@github.com/', + 'username': '%F0%9F%98%80' + } + }, + { + 'comment': 'Unpaired low surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uD83D', + 'expected': { + 'href': 'https://%EF%BF%BD@github.com/', + 'username': '%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired low surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uD83Dnode', + 'expected': { + 'href': 'https://%EF%BF%BDnode@github.com/', + 'username': '%EF%BF%BDnode' + } + }, + { + 'comment': 'Unpaired high surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uDE00', + 'expected': { + 'href': 'https://%EF%BF%BD@github.com/', + 'username': '%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired high surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uDE00node', + 'expected': { + 'href': 'https://%EF%BF%BDnode@github.com/', + 'username': '%EF%BF%BDnode' + } + } + ], + 'password': [ + { + 'comment': 'Surrogate pair', + 'href': 'https://github.com/', + 'new_value': '\uD83D\uDE00', + 'expected': { + 'href': 'https://:%F0%9F%98%80@github.com/', + 'password': '%F0%9F%98%80' + } + }, + { + 'comment': 'Unpaired low surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uD83D', + 'expected': { + 'href': 'https://:%EF%BF%BD@github.com/', + 'password': '%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired low surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uD83Dnode', + 'expected': { + 'href': 'https://:%EF%BF%BDnode@github.com/', + 'password': '%EF%BF%BDnode' + } + }, + { + 'comment': 'Unpaired high surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uDE00', + 'expected': { + 'href': 'https://:%EF%BF%BD@github.com/', + 'password': '%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired high surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uDE00node', + 'expected': { + 'href': 'https://:%EF%BF%BDnode@github.com/', + 'password': '%EF%BF%BDnode' + } + } + ], + 'pathname': [ + { + 'comment': 'Surrogate pair', + 'href': 'https://github.com/', + 'new_value': '/\uD83D\uDE00', + 'expected': { + 'href': 'https://github.com/%F0%9F%98%80', + 'pathname': '/%F0%9F%98%80' + } + }, + { + 'comment': 'Unpaired low surrogate 1', + 'href': 'https://github.com/', + 'new_value': '/\uD83D', + 'expected': { + 'href': 'https://github.com/%EF%BF%BD', + 'pathname': '/%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired low surrogate 2', + 'href': 'https://github.com/', + 'new_value': '/\uD83Dnode', + 'expected': { + 'href': 'https://github.com/%EF%BF%BDnode', + 'pathname': '/%EF%BF%BDnode' + } + }, + { + 'comment': 'Unpaired high surrogate 1', + 'href': 'https://github.com/', + 'new_value': '/\uDE00', + 'expected': { + 'href': 'https://github.com/%EF%BF%BD', + 'pathname': '/%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired high surrogate 2', + 'href': 'https://github.com/', + 'new_value': '/\uDE00node', + 'expected': { + 'href': 'https://github.com/%EF%BF%BDnode', + 'pathname': '/%EF%BF%BDnode' + } + } + ], + 'search': [ + { + 'comment': 'Surrogate pair', + 'href': 'https://github.com/', + 'new_value': '\uD83D\uDE00', + 'expected': { + 'href': 'https://github.com/?%F0%9F%98%80', + 'search': '?%F0%9F%98%80' + } + }, + { + 'comment': 'Unpaired low surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uD83D', + 'expected': { + 'href': 'https://github.com/?%EF%BF%BD', + 'search': '?%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired low surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uD83Dnode', + 'expected': { + 'href': 'https://github.com/?%EF%BF%BDnode', + 'search': '?%EF%BF%BDnode' + } + }, + { + 'comment': 'Unpaired high surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uDE00', + 'expected': { + 'href': 'https://github.com/?%EF%BF%BD', + 'search': '?%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired high surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uDE00node', + 'expected': { + 'href': 'https://github.com/?%EF%BF%BDnode', + 'search': '?%EF%BF%BDnode' + } + } + ], + 'hash': [ + { + 'comment': 'Surrogate pair', + 'href': 'https://github.com/', + 'new_value': '\uD83D\uDE00', + 'expected': { + 'href': 'https://github.com/#%F0%9F%98%80', + 'hash': '#%F0%9F%98%80' + } + }, + { + 'comment': 'Unpaired low surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uD83D', + 'expected': { + 'href': 'https://github.com/#%EF%BF%BD', + 'hash': '#%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired low surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uD83Dnode', + 'expected': { + 'href': 'https://github.com/#%EF%BF%BDnode', + 'hash': '#%EF%BF%BDnode' + } + }, + { + 'comment': 'Unpaired high surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uDE00', + 'expected': { + 'href': 'https://github.com/#%EF%BF%BD', + 'hash': '#%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired high surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uDE00node', + 'expected': { + 'href': 'https://github.com/#%EF%BF%BDnode', + 'hash': '#%EF%BF%BDnode' + } + } + ] +}; diff --git a/test/fixtures/url-tests-additional.js b/test/fixtures/url-tests-additional.js index ffe47fb639dcba..c1c640f4bb4b7d 100644 --- a/test/fixtures/url-tests-additional.js +++ b/test/fixtures/url-tests-additional.js @@ -3,4 +3,34 @@ // This file contains test cases not part of the WPT module.exports = [ + { + // surrogate pair + 'url': 'https://github.com/nodejs/\uD83D\uDE00node', + 'protocol': 'https:', + 'pathname': '/nodejs/%F0%9F%98%80node' + }, + { + // unpaired low surrogate + 'url': 'https://github.com/nodejs/\uD83D', + 'protocol': 'https:', + 'pathname': '/nodejs/%EF%BF%BD' + }, + { + // unpaired low surrogate + 'url': 'https://github.com/nodejs/\uD83Dnode', + 'protocol': 'https:', + 'pathname': '/nodejs/%EF%BF%BDnode' + }, + { + // unmatched high surrogate + 'url': 'https://github.com/nodejs/\uDE00', + 'protocol': 'https:', + 'pathname': '/nodejs/%EF%BF%BD' + }, + { + // unmatched high surrogate + 'url': 'https://github.com/nodejs/\uDE00node', + 'protocol': 'https:', + 'pathname': '/nodejs/%EF%BF%BDnode' + } ]; diff --git a/test/parallel/test-whatwg-url-searchparams-append.js b/test/parallel/test-whatwg-url-searchparams-append.js index 2e3a33b26307c3..67eddbcc503e1e 100644 --- a/test/parallel/test-whatwg-url-searchparams-append.js +++ b/test/parallel/test-whatwg-url-searchparams-append.js @@ -57,4 +57,13 @@ test(function() { assert.throws(() => { params.set('a'); }, /^TypeError: "name" and "value" arguments must be specified$/); + + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + assert.throws(() => params.set(obj, 'b'), /^Error: toString$/); + assert.throws(() => params.set('a', obj), /^Error: toString$/); + assert.throws(() => params.set(sym, 'b'), + /^TypeError: Cannot convert a Symbol value to a string$/); + assert.throws(() => params.set('a', sym), + /^TypeError: Cannot convert a Symbol value to a string$/); } diff --git a/test/parallel/test-whatwg-url-searchparams-constructor.js b/test/parallel/test-whatwg-url-searchparams-constructor.js index d57373e727ac51..8ccd8f9427f160 100644 --- a/test/parallel/test-whatwg-url-searchparams-constructor.js +++ b/test/parallel/test-whatwg-url-searchparams-constructor.js @@ -207,3 +207,19 @@ test(() => { assert.throws(() => new URLSearchParams([{ [Symbol.iterator]: 42 }]), /^TypeError: Each query pair must be iterable$/); } + +{ + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + + assert.throws(() => new URLSearchParams({ a: obj }), /^Error: toString$/); + assert.throws(() => new URLSearchParams([['a', obj]]), /^Error: toString$/); + assert.throws(() => new URLSearchParams(sym), + /^TypeError: Cannot convert a Symbol value to a string$/); + assert.throws(() => new URLSearchParams({ a: sym }), + /^TypeError: Cannot convert a Symbol value to a string$/); + assert.throws(() => new URLSearchParams([[sym, 'a']]), + /^TypeError: Cannot convert a Symbol value to a string$/); + assert.throws(() => new URLSearchParams([['a', sym]]), + /^TypeError: Cannot convert a Symbol value to a string$/); +} diff --git a/test/parallel/test-whatwg-url-searchparams-delete.js b/test/parallel/test-whatwg-url-searchparams-delete.js index c6235263f22bad..d0bae75b4718a8 100644 --- a/test/parallel/test-whatwg-url-searchparams-delete.js +++ b/test/parallel/test-whatwg-url-searchparams-delete.js @@ -51,6 +51,12 @@ test(function() { assert.throws(() => { params.delete(); }, /^TypeError: "name" argument must be specified$/); + + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + assert.throws(() => params.delete(obj), /^Error: toString$/); + assert.throws(() => params.delete(sym), + /^TypeError: Cannot convert a Symbol value to a string$/); } // https://github.com/nodejs/node/issues/10480 diff --git a/test/parallel/test-whatwg-url-searchparams-get.js b/test/parallel/test-whatwg-url-searchparams-get.js index 3a46993214a997..2244fc28612755 100644 --- a/test/parallel/test-whatwg-url-searchparams-get.js +++ b/test/parallel/test-whatwg-url-searchparams-get.js @@ -42,4 +42,10 @@ test(function() { assert.throws(() => { params.get(); }, /^TypeError: "name" argument must be specified$/); + + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + assert.throws(() => params.get(obj), /^Error: toString$/); + assert.throws(() => params.get(sym), + /^TypeError: Cannot convert a Symbol value to a string$/); } diff --git a/test/parallel/test-whatwg-url-searchparams-getall.js b/test/parallel/test-whatwg-url-searchparams-getall.js index df055e009e7e4d..921a6c9bc66da2 100644 --- a/test/parallel/test-whatwg-url-searchparams-getall.js +++ b/test/parallel/test-whatwg-url-searchparams-getall.js @@ -46,4 +46,10 @@ test(function() { assert.throws(() => { params.getAll(); }, /^TypeError: "name" argument must be specified$/); + + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + assert.throws(() => params.getAll(obj), /^Error: toString$/); + assert.throws(() => params.getAll(sym), + /^TypeError: Cannot convert a Symbol value to a string$/); } diff --git a/test/parallel/test-whatwg-url-searchparams-has.js b/test/parallel/test-whatwg-url-searchparams-has.js index 1be9cf6121593e..9d7272f999c653 100644 --- a/test/parallel/test-whatwg-url-searchparams-has.js +++ b/test/parallel/test-whatwg-url-searchparams-has.js @@ -45,4 +45,10 @@ test(function() { assert.throws(() => { params.has(); }, /^TypeError: "name" argument must be specified$/); + + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + assert.throws(() => params.has(obj), /^Error: toString$/); + assert.throws(() => params.has(sym), + /^TypeError: Cannot convert a Symbol value to a string$/); } diff --git a/test/parallel/test-whatwg-url-searchparams-set.js b/test/parallel/test-whatwg-url-searchparams-set.js index e78ce4763158b5..0eee7b5c9a0130 100644 --- a/test/parallel/test-whatwg-url-searchparams-set.js +++ b/test/parallel/test-whatwg-url-searchparams-set.js @@ -43,4 +43,13 @@ test(function() { assert.throws(() => { params.set('a'); }, /^TypeError: "name" and "value" arguments must be specified$/); + + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + assert.throws(() => params.append(obj, 'b'), /^Error: toString$/); + assert.throws(() => params.append('a', obj), /^Error: toString$/); + assert.throws(() => params.append(sym, 'b'), + /^TypeError: Cannot convert a Symbol value to a string$/); + assert.throws(() => params.append('a', sym), + /^TypeError: Cannot convert a Symbol value to a string$/); } diff --git a/test/parallel/test-whatwg-url-searchparams.js b/test/parallel/test-whatwg-url-searchparams.js index 36fac3a2307ecc..e0d1826596704c 100644 --- a/test/parallel/test-whatwg-url-searchparams.js +++ b/test/parallel/test-whatwg-url-searchparams.js @@ -5,8 +5,14 @@ const assert = require('assert'); const URL = require('url').URL; // Tests below are not from WPT. -const serialized = 'a=a&a=1&a=true&a=undefined&a=null&a=%5Bobject%20Object%5D'; -const values = ['a', 1, true, undefined, null, {}]; +const serialized = 'a=a&a=1&a=true&a=undefined&a=null&a=%EF%BF%BD' + + '&a=%EF%BF%BD&a=%F0%9F%98%80&a=%EF%BF%BD%EF%BF%BD' + + '&a=%5Bobject%20Object%5D'; +const values = ['a', 1, true, undefined, null, '\uD83D', '\uDE00', + '\uD83D\uDE00', '\uDE00\uD83D', {}]; +const normalizedValues = ['a', '1', 'true', 'undefined', 'null', '\uFFFD', + '\uFFFD', '\uD83D\uDE00', '\uFFFD\uFFFD', + '[object Object]']; const m = new URL('http://example.org'); const sp = m.searchParams; @@ -27,7 +33,7 @@ assert.strictEqual(sp.toString(), ''); values.forEach((i) => sp.append('a', i)); assert(sp.has('a')); -assert.strictEqual(sp.getAll('a').length, 6); +assert.strictEqual(sp.getAll('a').length, values.length); assert.strictEqual(sp.get('a'), 'a'); assert.strictEqual(sp.toString(), serialized); @@ -39,23 +45,27 @@ assert.strictEqual(sp[Symbol.iterator], sp.entries); let key, val; let n = 0; for ([key, val] of sp) { - assert.strictEqual(key, 'a'); - assert.strictEqual(val, String(values[n++])); + assert.strictEqual(key, 'a', n); + assert.strictEqual(val, normalizedValues[n], n); + n++; } n = 0; for (key of sp.keys()) { - assert.strictEqual(key, 'a'); + assert.strictEqual(key, 'a', n); + n++; } n = 0; for (val of sp.values()) { - assert.strictEqual(val, String(values[n++])); + assert.strictEqual(val, normalizedValues[n], n); + n++; } n = 0; sp.forEach(function(val, key, obj) { - assert.strictEqual(this, undefined); - assert.strictEqual(key, 'a'); - assert.strictEqual(val, String(values[n++])); - assert.strictEqual(obj, sp); + assert.strictEqual(this, undefined, n); + assert.strictEqual(key, 'a', n); + assert.strictEqual(val, normalizedValues[n], n); + assert.strictEqual(obj, sp, n); + n++; }); sp.forEach(function() { assert.strictEqual(this, m); diff --git a/test/parallel/test-whatwg-url-setters.js b/test/parallel/test-whatwg-url-setters.js index 63ebba84918945..6e1f4bccbd2314 100644 --- a/test/parallel/test-whatwg-url-setters.js +++ b/test/parallel/test-whatwg-url-setters.js @@ -1,9 +1,12 @@ 'use strict'; const common = require('../common'); +const assert = require('assert'); const path = require('path'); const URL = require('url').URL; const { test, assert_equals } = common.WPT; +const additionalTestCases = require( + path.join(common.fixturesDir, 'url-setter-tests-additional.js')); if (!common.hasIntl) { // A handful of the tests fail when ICU is not included. @@ -76,3 +79,45 @@ function runURLSettersTests(all_test_cases) { startURLSettersTests() /* eslint-enable */ + +// Tests below are not from WPT. + +{ + for (const attributeToBeSet in additionalTestCases) { + if (attributeToBeSet === 'comment') { + continue; + } + const testCases = additionalTestCases[attributeToBeSet]; + for (const testCase of testCases) { + let name = `Setting <${testCase.href}>.${attributeToBeSet}` + + ` = "${testCase.new_value}"`; + if ('comment' in testCase) { + name += ' ' + testCase.comment; + } + test(function() { + const url = new URL(testCase.href); + url[attributeToBeSet] = testCase.new_value; + for (const attribute in testCase.expected) { + assert_equals(url[attribute], testCase.expected[attribute]); + } + }, 'URL: ' + name); + } + } +} + +{ + const url = new URL('http://example.com/'); + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + const props = Object.getOwnPropertyDescriptors(Object.getPrototypeOf(url)); + for (const [name, { set }] of Object.entries(props)) { + if (set) { + assert.throws(() => url[name] = obj, + /^Error: toString$/, + `url.${name} = { toString() { throw ... } }`); + assert.throws(() => url[name] = sym, + /^TypeError: Cannot convert a Symbol value to a string$/, + `url.${name} = ${String(sym)}`); + } + } +} From c3366a592bd6de0df8fbc5d3655e1f36aa41c3e2 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 7 Mar 2017 19:31:29 -0800 Subject: [PATCH 08/30] url: prioritize toString when stringifying The ES addition operator calls the ToPrimitive() abstract operation without hint String, leading a subsequent OrdinaryToPrimitive() to call valueOf() first on an object rather than the desired toString(). Instead, use template literals which directly call ToString() abstract operation, per Web IDL spec. PR-URL: https://github.com/nodejs/node/pull/12507 Fixes: b610a4db1c2919f887119 "url: enforce valid UTF-8 in WHATWG parser" Refs: https://github.com/nodejs/node/commit/b610a4db1c2919f88711962f5797f25ecb1cd36b#commitcomment-21200056 Refs: https://tc39.github.io/ecma262/#sec-addition-operator-plus-runtime-semantics-evaluation Refs: https://tc39.github.io/ecma262/#sec-template-literals-runtime-semantics-evaluation Reviewed-By: James M Snell --- lib/internal/url.js | 26 +++++++++---------- .../test-whatwg-url-searchparams-append.js | 5 +++- ...est-whatwg-url-searchparams-constructor.js | 5 +++- .../test-whatwg-url-searchparams-delete.js | 5 +++- .../test-whatwg-url-searchparams-get.js | 5 +++- .../test-whatwg-url-searchparams-getall.js | 5 +++- .../test-whatwg-url-searchparams-has.js | 5 +++- .../test-whatwg-url-searchparams-set.js | 5 +++- test/parallel/test-whatwg-url-setters.js | 5 +++- 9 files changed, 45 insertions(+), 21 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index a2c255f765cdc6..56fbfd27070863 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -26,7 +26,7 @@ const IteratorPrototype = Object.getPrototypeOf( const unpairedSurrogateRe = /([^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])/; function toUSVString(val) { - const str = '' + val; + const str = `${val}`; // As of V8 5.5, `str.search()` (and `unpairedSurrogateRe[@@search]()`) are // slower than `unpairedSurrogateRe.exec()`. const match = unpairedSurrogateRe.exec(str); @@ -215,7 +215,7 @@ function onParseHashComplete(flags, protocol, username, password, class URL { constructor(input, base) { // toUSVString is not needed. - input = '' + input; + input = `${input}`; if (base !== undefined && !(base instanceof URL)) base = new URL(base); parse(this, input, base); @@ -326,7 +326,7 @@ Object.defineProperties(URL.prototype, { }, set(input) { // toUSVString is not needed. - input = '' + input; + input = `${input}`; parse(this, input); } }, @@ -345,7 +345,7 @@ Object.defineProperties(URL.prototype, { }, set(scheme) { // toUSVString is not needed. - scheme = '' + scheme; + scheme = `${scheme}`; if (scheme.length === 0) return; binding.parse(scheme, binding.kSchemeStart, null, this[context], @@ -360,7 +360,7 @@ Object.defineProperties(URL.prototype, { }, set(username) { // toUSVString is not needed. - username = '' + username; + username = `${username}`; if (!this.hostname) return; const ctx = this[context]; @@ -381,7 +381,7 @@ Object.defineProperties(URL.prototype, { }, set(password) { // toUSVString is not needed. - password = '' + password; + password = `${password}`; if (!this.hostname) return; const ctx = this[context]; @@ -407,7 +407,7 @@ Object.defineProperties(URL.prototype, { set(host) { const ctx = this[context]; // toUSVString is not needed. - host = '' + host; + host = `${host}`; if (this[cannotBeBase] || (this[special] && host.length === 0)) { // Cannot set the host if cannot-be-base is set or @@ -432,7 +432,7 @@ Object.defineProperties(URL.prototype, { set(host) { const ctx = this[context]; // toUSVString is not needed. - host = '' + host; + host = `${host}`; if (this[cannotBeBase] || (this[special] && host.length === 0)) { // Cannot set the host if cannot-be-base is set or @@ -457,7 +457,7 @@ Object.defineProperties(URL.prototype, { }, set(port) { // toUSVString is not needed. - port = '' + port; + port = `${port}`; const ctx = this[context]; if (!ctx.host || this[cannotBeBase] || this.protocol === 'file:') @@ -481,7 +481,7 @@ Object.defineProperties(URL.prototype, { }, set(path) { // toUSVString is not needed. - path = '' + path; + path = `${path}`; if (this[cannotBeBase]) return; binding.parse(path, binding.kPathStart, null, this[context], @@ -530,7 +530,7 @@ Object.defineProperties(URL.prototype, { set(hash) { const ctx = this[context]; // toUSVString is not needed. - hash = '' + hash; + hash = `${hash}`; if (this.protocol === 'javascript:') return; if (!hash) { @@ -1122,12 +1122,12 @@ function originFor(url, base) { function domainToASCII(domain) { // toUSVString is not needed. - return binding.domainToASCII('' + domain); + return binding.domainToASCII(`${domain}`); } function domainToUnicode(domain) { // toUSVString is not needed. - return binding.domainToUnicode('' + domain); + return binding.domainToUnicode(`${domain}`); } // Utility function that converts a URL object into an ordinary diff --git a/test/parallel/test-whatwg-url-searchparams-append.js b/test/parallel/test-whatwg-url-searchparams-append.js index 67eddbcc503e1e..ff4a568c303668 100644 --- a/test/parallel/test-whatwg-url-searchparams-append.js +++ b/test/parallel/test-whatwg-url-searchparams-append.js @@ -58,7 +58,10 @@ test(function() { params.set('a'); }, /^TypeError: "name" and "value" arguments must be specified$/); - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); assert.throws(() => params.set(obj, 'b'), /^Error: toString$/); assert.throws(() => params.set('a', obj), /^Error: toString$/); diff --git a/test/parallel/test-whatwg-url-searchparams-constructor.js b/test/parallel/test-whatwg-url-searchparams-constructor.js index 8ccd8f9427f160..236d01396095f1 100644 --- a/test/parallel/test-whatwg-url-searchparams-constructor.js +++ b/test/parallel/test-whatwg-url-searchparams-constructor.js @@ -209,7 +209,10 @@ test(() => { } { - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); assert.throws(() => new URLSearchParams({ a: obj }), /^Error: toString$/); diff --git a/test/parallel/test-whatwg-url-searchparams-delete.js b/test/parallel/test-whatwg-url-searchparams-delete.js index d0bae75b4718a8..589fbc2f8698b5 100644 --- a/test/parallel/test-whatwg-url-searchparams-delete.js +++ b/test/parallel/test-whatwg-url-searchparams-delete.js @@ -52,7 +52,10 @@ test(function() { params.delete(); }, /^TypeError: "name" argument must be specified$/); - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); assert.throws(() => params.delete(obj), /^Error: toString$/); assert.throws(() => params.delete(sym), diff --git a/test/parallel/test-whatwg-url-searchparams-get.js b/test/parallel/test-whatwg-url-searchparams-get.js index 2244fc28612755..5e81be4f32cc1d 100644 --- a/test/parallel/test-whatwg-url-searchparams-get.js +++ b/test/parallel/test-whatwg-url-searchparams-get.js @@ -43,7 +43,10 @@ test(function() { params.get(); }, /^TypeError: "name" argument must be specified$/); - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); assert.throws(() => params.get(obj), /^Error: toString$/); assert.throws(() => params.get(sym), diff --git a/test/parallel/test-whatwg-url-searchparams-getall.js b/test/parallel/test-whatwg-url-searchparams-getall.js index 921a6c9bc66da2..f80f45d5427e77 100644 --- a/test/parallel/test-whatwg-url-searchparams-getall.js +++ b/test/parallel/test-whatwg-url-searchparams-getall.js @@ -47,7 +47,10 @@ test(function() { params.getAll(); }, /^TypeError: "name" argument must be specified$/); - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); assert.throws(() => params.getAll(obj), /^Error: toString$/); assert.throws(() => params.getAll(sym), diff --git a/test/parallel/test-whatwg-url-searchparams-has.js b/test/parallel/test-whatwg-url-searchparams-has.js index 9d7272f999c653..f2696063b998a1 100644 --- a/test/parallel/test-whatwg-url-searchparams-has.js +++ b/test/parallel/test-whatwg-url-searchparams-has.js @@ -46,7 +46,10 @@ test(function() { params.has(); }, /^TypeError: "name" argument must be specified$/); - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); assert.throws(() => params.has(obj), /^Error: toString$/); assert.throws(() => params.has(sym), diff --git a/test/parallel/test-whatwg-url-searchparams-set.js b/test/parallel/test-whatwg-url-searchparams-set.js index 0eee7b5c9a0130..acd62955d22a44 100644 --- a/test/parallel/test-whatwg-url-searchparams-set.js +++ b/test/parallel/test-whatwg-url-searchparams-set.js @@ -44,7 +44,10 @@ test(function() { params.set('a'); }, /^TypeError: "name" and "value" arguments must be specified$/); - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); assert.throws(() => params.append(obj, 'b'), /^Error: toString$/); assert.throws(() => params.append('a', obj), /^Error: toString$/); diff --git a/test/parallel/test-whatwg-url-setters.js b/test/parallel/test-whatwg-url-setters.js index 6e1f4bccbd2314..253415dad6e5a9 100644 --- a/test/parallel/test-whatwg-url-setters.js +++ b/test/parallel/test-whatwg-url-setters.js @@ -107,7 +107,10 @@ startURLSettersTests() { const url = new URL('http://example.com/'); - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); const props = Object.getOwnPropertyDescriptors(Object.getPrototypeOf(url)); for (const [name, { set }] of Object.entries(props)) { From 6b2cb6dd2efcca42a194383c532967b2dde4619f Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Fri, 3 Feb 2017 17:34:47 -0800 Subject: [PATCH 09/30] url: spec-compliant URLSearchParams serializer PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- ...cy-vs-whatwg-url-searchparams-serialize.js | 2 +- lib/internal/url.js | 105 ++++++++++++++++-- test/fixtures/url-tests.js | 2 +- test/parallel/test-whatwg-url-constructor.js | 6 +- ...est-whatwg-url-searchparams-constructor.js | 4 +- ...est-whatwg-url-searchparams-stringifier.js | 24 ++-- test/parallel/test-whatwg-url-searchparams.js | 2 +- 7 files changed, 113 insertions(+), 32 deletions(-) diff --git a/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js b/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js index 7e56b5fba6e4f8..2b8d2c36a810b3 100644 --- a/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js +++ b/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js @@ -7,7 +7,7 @@ const inputs = require('../fixtures/url-inputs.js').searchParams; const bench = common.createBenchmark(main, { type: Object.keys(inputs), method: ['legacy', 'whatwg'], - n: [1e5] + n: [1e6] }); function useLegacy(n, input, prop) { diff --git a/lib/internal/url.js b/lib/internal/url.js index 56fbfd27070863..0e43364a792ae1 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -1,7 +1,7 @@ 'use strict'; const util = require('util'); -const { StorageObject } = require('internal/querystring'); +const { hexTable, StorageObject } = require('internal/querystring'); const binding = process.binding('url'); const context = Symbol('context'); const cannotBeBase = Symbol('cannot-be-base'); @@ -594,18 +594,99 @@ function getParamsFromObject(obj) { return values; } -function getObjectFromParams(array) { - const obj = new StorageObject(); - for (var i = 0; i < array.length; i += 2) { - const name = array[i]; - const value = array[i + 1]; - if (obj[name]) { - obj[name].push(value); - } else { - obj[name] = [value]; +// Adapted from querystring's implementation. +// Ref: https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer +const noEscape = [ +//0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, // 0x20 - 0x2F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 0x4F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 0x50 - 0x5F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 0x6F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 0x70 - 0x7F +]; + +// Special version of hexTable that uses `+` for U+0020 SPACE. +const paramHexTable = hexTable.slice(); +paramHexTable[0x20] = '+'; + +function escapeParam(str) { + const len = str.length; + if (len === 0) + return ''; + + var out = ''; + var lastPos = 0; + + for (var i = 0; i < len; i++) { + var c = str.charCodeAt(i); + + // ASCII + if (c < 0x80) { + if (noEscape[c] === 1) + continue; + if (lastPos < i) + out += str.slice(lastPos, i); + lastPos = i + 1; + out += paramHexTable[c]; + continue; + } + + if (lastPos < i) + out += str.slice(lastPos, i); + + // Multi-byte characters ... + if (c < 0x800) { + lastPos = i + 1; + out += paramHexTable[0xC0 | (c >> 6)] + + paramHexTable[0x80 | (c & 0x3F)]; + continue; + } + if (c < 0xD800 || c >= 0xE000) { + lastPos = i + 1; + out += paramHexTable[0xE0 | (c >> 12)] + + paramHexTable[0x80 | ((c >> 6) & 0x3F)] + + paramHexTable[0x80 | (c & 0x3F)]; + continue; } + // Surrogate pair + ++i; + var c2; + if (i < len) + c2 = str.charCodeAt(i) & 0x3FF; + else { + // This branch should never happen because all URLSearchParams entries + // should already be converted to USVString. But, included for + // completion's sake anyway. + c2 = 0; + } + lastPos = i + 1; + c = 0x10000 + (((c & 0x3FF) << 10) | c2); + out += paramHexTable[0xF0 | (c >> 18)] + + paramHexTable[0x80 | ((c >> 12) & 0x3F)] + + paramHexTable[0x80 | ((c >> 6) & 0x3F)] + + paramHexTable[0x80 | (c & 0x3F)]; } - return obj; + if (lastPos === 0) + return str; + if (lastPos < len) + return out + str.slice(lastPos); + return out; +} + +// application/x-www-form-urlencoded serializer +// Ref: https://url.spec.whatwg.org/#concept-urlencoded-serializer +function serializeParams(array) { + const len = array.length; + if (len === 0) + return ''; + + var output = `${escapeParam(array[0])}=${escapeParam(array[1])}`; + for (var i = 2; i < len; i += 2) + output += `&${escapeParam(array[i])}=${escapeParam(array[i + 1])}`; + return output; } // Mainly to mitigate func-name-matching ESLint rule @@ -990,7 +1071,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { throw new TypeError('Value of `this` is not a URLSearchParams'); } - return querystring.stringify(getObjectFromParams(this[searchParams])); + return serializeParams(this[searchParams]); } }); diff --git a/test/fixtures/url-tests.js b/test/fixtures/url-tests.js index 0e510eb366d0f2..a4e7de9f26b199 100644 --- a/test/fixtures/url-tests.js +++ b/test/fixtures/url-tests.js @@ -4639,7 +4639,7 @@ module.exports = "port": "", "pathname": "/foo/bar", "search": "??a=b&c=d", - // "searchParams": "%3Fa=b&c=d", + "searchParams": "%3Fa=b&c=d", "hash": "" }, "# Scheme only", diff --git a/test/parallel/test-whatwg-url-constructor.js b/test/parallel/test-whatwg-url-constructor.js index c5d70b3f4c1544..c2773b9af105fb 100644 --- a/test/parallel/test-whatwg-url-constructor.js +++ b/test/parallel/test-whatwg-url-constructor.js @@ -120,12 +120,12 @@ function runURLSearchParamTests() { // And in the other direction, altering searchParams propagates // back to 'search'. searchParams.append('i', ' j ') - // assert_equals(url.search, '?e=f&g=h&i=+j+') - // assert_equals(url.searchParams.toString(), 'e=f&g=h&i=+j+') + assert_equals(url.search, '?e=f&g=h&i=+j+') + assert_equals(url.searchParams.toString(), 'e=f&g=h&i=+j+') assert_equals(searchParams.get('i'), ' j ') searchParams.set('e', 'updated') - // assert_equals(url.search, '?e=updated&g=h&i=+j+') + assert_equals(url.search, '?e=updated&g=h&i=+j+') assert_equals(searchParams.get('e'), 'updated') var url2 = bURL('http://example.org/file??a=b&c=d') diff --git a/test/parallel/test-whatwg-url-searchparams-constructor.js b/test/parallel/test-whatwg-url-searchparams-constructor.js index 236d01396095f1..da459fe99c7fb8 100644 --- a/test/parallel/test-whatwg-url-searchparams-constructor.js +++ b/test/parallel/test-whatwg-url-searchparams-constructor.js @@ -11,7 +11,7 @@ const { /* eslint-disable */ var params; // Strict mode fix for WPT. /* WPT Refs: - https://github.com/w3c/web-platform-tests/blob/405394a/url/urlsearchparams-constructor.html + https://github.com/w3c/web-platform-tests/blob/e94c604916/url/urlsearchparams-constructor.html License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html */ test(function() { @@ -154,7 +154,7 @@ test(function() { }, "Constructor with sequence of sequences of strings"); [ -// { "input": {"+": "%C2"}, "output": [[" ", "\uFFFD"]], "name": "object with +" }, + { "input": {"+": "%C2"}, "output": [["+", "%C2"]], "name": "object with +" }, { "input": {c: "x", a: "?"}, "output": [["c", "x"], ["a", "?"]], "name": "object with two keys" }, { "input": [["c", "x"], ["a", "?"]], "output": [["c", "x"], ["a", "?"]], "name": "array with two keys" } ].forEach((val) => { diff --git a/test/parallel/test-whatwg-url-searchparams-stringifier.js b/test/parallel/test-whatwg-url-searchparams-stringifier.js index 7e85b9726167ad..ac09979e027b7c 100644 --- a/test/parallel/test-whatwg-url-searchparams-stringifier.js +++ b/test/parallel/test-whatwg-url-searchparams-stringifier.js @@ -10,14 +10,14 @@ const { test, assert_equals } = common.WPT; https://github.com/w3c/web-platform-tests/blob/8791bed/url/urlsearchparams-stringifier.html License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html */ -// test(function() { -// var params = new URLSearchParams(); -// params.append('a', 'b c'); -// assert_equals(params + '', 'a=b+c'); -// params.delete('a'); -// params.append('a b', 'c'); -// assert_equals(params + '', 'a+b=c'); -// }, 'Serialize space'); +test(function() { + var params = new URLSearchParams(); + params.append('a', 'b c'); + assert_equals(params + '', 'a=b+c'); + params.delete('a'); + params.append('a b', 'c'); + assert_equals(params + '', 'a+b=c'); +}, 'Serialize space'); test(function() { var params = new URLSearchParams(); @@ -112,10 +112,10 @@ test(function() { test(function() { var params; - // params = new URLSearchParams('a=b&c=d&&e&&'); - // assert_equals(params.toString(), 'a=b&c=d&e='); - // params = new URLSearchParams('a = b &a=b&c=d%20'); - // assert_equals(params.toString(), 'a+=+b+&a=b&c=d+'); + params = new URLSearchParams('a=b&c=d&&e&&'); + assert_equals(params.toString(), 'a=b&c=d&e='); + params = new URLSearchParams('a = b &a=b&c=d%20'); + assert_equals(params.toString(), 'a+=+b+&a=b&c=d+'); // The lone '=' _does_ survive the roundtrip. params = new URLSearchParams('a=&a=b'); assert_equals(params.toString(), 'a=&a=b'); diff --git a/test/parallel/test-whatwg-url-searchparams.js b/test/parallel/test-whatwg-url-searchparams.js index e0d1826596704c..7d6df646407269 100644 --- a/test/parallel/test-whatwg-url-searchparams.js +++ b/test/parallel/test-whatwg-url-searchparams.js @@ -7,7 +7,7 @@ const URL = require('url').URL; // Tests below are not from WPT. const serialized = 'a=a&a=1&a=true&a=undefined&a=null&a=%EF%BF%BD' + '&a=%EF%BF%BD&a=%F0%9F%98%80&a=%EF%BF%BD%EF%BF%BD' + - '&a=%5Bobject%20Object%5D'; + '&a=%5Bobject+Object%5D'; const values = ['a', 1, true, undefined, null, '\uD83D', '\uDE00', '\uD83D\uDE00', '\uDE00\uD83D', {}]; const normalizedValues = ['a', '1', 'true', 'undefined', 'null', '\uFFFD', From 7e7fd662fb94e8f31230a304b9d06fd016dcc047 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 14 Mar 2017 23:41:57 -0700 Subject: [PATCH 10/30] src: remove explicit UTF-8 validity check in url This step was never part of the URL Standard's host parser algorithm, and is rendered unnecessary after IDNA errors are no longer ignored. PR-URL: https://github.com/nodejs/node/pull/12507 Refs: c2a302c50b3787666339371 "src: do not ignore IDNA conversion error" Reviewed-By: James M Snell --- src/node_url.cc | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index d9213738e7f894..6cd78c2c6c04c8 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -15,11 +15,6 @@ #include #include -#if defined(NODE_HAVE_I18N_SUPPORT) -#include -#include -#endif - #define UNICODE_REPLACEMENT_CHARACTER 0xFFFD namespace node { @@ -74,21 +69,6 @@ namespace url { output->assign(*buf, buf.length()); return true; } - - // Unfortunately there's not really a better way to do this. - // Iterate through each encoded codepoint and verify that - // it is a valid unicode codepoint. - static bool IsValidUTF8(std::string* input) { - const char* p = input->c_str(); - int32_t len = input->length(); - for (int32_t i = 0; i < len;) { - UChar32 c; - U8_NEXT_UNSAFE(p, i, c); - if (!U_IS_UNICODE_CHAR(c)) - return false; - } - return true; - } #else // Intentional non-ops if ICU is not present. static inline bool ToUnicode(std::string* input, std::string* output) { @@ -100,10 +80,6 @@ namespace url { *output = *input; return true; } - - static bool IsValidUTF8(std::string* input) { - return true; - } #endif // If a UTF-16 character is a low/trailing surrogate. @@ -355,12 +331,6 @@ namespace url { // First, we have to percent decode PercentDecode(input, length, &decoded); - // If there are any invalid UTF8 byte sequences, we have to fail. - // Unfortunately this means iterating through the string and checking - // each decoded codepoint. - if (!IsValidUTF8(&decoded)) - goto end; - // Then we have to punycode toASCII if (!ToASCII(&decoded, &decoded)) goto end; From 4a94c2d6203af6e8169475fbd8034322a86d134a Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 14 Mar 2017 20:54:13 -0700 Subject: [PATCH 11/30] querystring: move isHexTable to internal PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- lib/internal/querystring.js | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/lib/internal/querystring.js b/lib/internal/querystring.js index 2f8d77d3e9d2e7..c5dc0f63c7b30b 100644 --- a/lib/internal/querystring.js +++ b/lib/internal/querystring.js @@ -4,6 +4,25 @@ const hexTable = new Array(256); for (var i = 0; i < 256; ++i) hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase(); +const isHexTable = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 - 15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16 - 31 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32 - 47 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 48 - 63 + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64 - 79 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80 - 95 + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 96 - 111 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 112 - 127 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128 ... + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // ... 256 +]; + // Instantiating this is faster than explicitly calling `Object.create(null)` // to get a "clean" empty object (tested with v8 v4.9). function StorageObject() {} @@ -11,5 +30,6 @@ StorageObject.prototype = Object.create(null); module.exports = { hexTable, + isHexTable, StorageObject }; From d86f0d702afa8277e6f6bf1f7d4cd786420a05d3 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 14 Mar 2017 21:01:04 -0700 Subject: [PATCH 12/30] url: spec-compliant URLSearchParams parser The entire `URLSearchParams` class is now fully spec-compliant. PR-URL: https://github.com/nodejs/node/pull/12507 Fixes: https://github.com/nodejs/node/issues/10821 Reviewed-By: James M Snell --- ...legacy-vs-whatwg-url-searchparams-parse.js | 2 +- lib/internal/url.js | 115 +++++++++++++++--- test/fixtures/url-searchparams.js | 68 +++++++++++ test/parallel/test-whatwg-url-searchparams.js | 29 ++++- 4 files changed, 197 insertions(+), 17 deletions(-) create mode 100644 test/fixtures/url-searchparams.js diff --git a/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js b/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js index 86714df6c196a7..b4a80af4e5eabd 100644 --- a/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js +++ b/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js @@ -7,7 +7,7 @@ const inputs = require('../fixtures/url-inputs.js').searchParams; const bench = common.createBenchmark(main, { type: Object.keys(inputs), method: ['legacy', 'whatwg'], - n: [1e5] + n: [1e6] }); function useLegacy(n, input) { diff --git a/lib/internal/url.js b/lib/internal/url.js index 0e43364a792ae1..7a6ff227ed4191 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -1,7 +1,11 @@ 'use strict'; const util = require('util'); -const { hexTable, StorageObject } = require('internal/querystring'); +const { + hexTable, + isHexTable, + StorageObject +} = require('internal/querystring'); const binding = process.binding('url'); const context = Symbol('context'); const cannotBeBase = Symbol('cannot-be-base'); @@ -575,23 +579,106 @@ function initSearchParams(url, init) { url[searchParams] = []; return; } - url[searchParams] = getParamsFromObject(querystring.parse(init)); + url[searchParams] = parseParams(init); } -function getParamsFromObject(obj) { - const keys = Object.keys(obj); - const values = []; - for (var i = 0; i < keys.length; i++) { - const name = keys[i]; - const value = obj[name]; - if (Array.isArray(value)) { - for (const item of value) - values.push(name, item); - } else { - values.push(name, value); +// application/x-www-form-urlencoded parser +// Ref: https://url.spec.whatwg.org/#concept-urlencoded-parser +function parseParams(qs) { + const out = []; + var pairStart = 0; + var lastPos = 0; + var seenSep = false; + var buf = ''; + var encoded = false; + var encodeCheck = 0; + var i; + for (i = 0; i < qs.length; ++i) { + const code = qs.charCodeAt(i); + + // Try matching key/value pair separator + if (code === 38/*&*/) { + if (pairStart === i) { + // We saw an empty substring between pair separators + lastPos = pairStart = i + 1; + continue; + } + + if (lastPos < i) + buf += qs.slice(lastPos, i); + if (encoded) + buf = querystring.unescape(buf); + out.push(buf); + + // If `buf` is the key, add an empty value. + if (!seenSep) + out.push(''); + + seenSep = false; + buf = ''; + encoded = false; + encodeCheck = 0; + lastPos = pairStart = i + 1; + continue; + } + + // Try matching key/value separator (e.g. '=') if we haven't already + if (!seenSep && code === 61/*=*/) { + // Key/value separator match! + if (lastPos < i) + buf += qs.slice(lastPos, i); + if (encoded) + buf = querystring.unescape(buf); + out.push(buf); + + seenSep = true; + buf = ''; + encoded = false; + encodeCheck = 0; + lastPos = i + 1; + continue; + } + + // Handle + and percent decoding. + if (code === 43/*+*/) { + if (lastPos < i) + buf += qs.slice(lastPos, i); + buf += ' '; + lastPos = i + 1; + } else if (!encoded) { + // Try to match an (valid) encoded byte (once) to minimize unnecessary + // calls to string decoding functions + if (code === 37/*%*/) { + encodeCheck = 1; + } else if (encodeCheck > 0) { + // eslint-disable-next-line no-extra-boolean-cast + if (!!isHexTable[code]) { + if (++encodeCheck === 3) + encoded = true; + } else { + encodeCheck = 0; + } + } } } - return values; + + // Deal with any leftover key or value data + + // There is a trailing &. No more processing is needed. + if (pairStart === i) + return out; + + if (lastPos < i) + buf += qs.slice(lastPos, i); + if (encoded) + buf = querystring.unescape(buf); + out.push(buf); + + // If `buf` is the key, add an empty value. + if (!seenSep) + out.push(''); + + return out; } // Adapted from querystring's implementation. diff --git a/test/fixtures/url-searchparams.js b/test/fixtures/url-searchparams.js new file mode 100644 index 00000000000000..3b186fc97bc38b --- /dev/null +++ b/test/fixtures/url-searchparams.js @@ -0,0 +1,68 @@ +module.exports = [ + ['', '', []], + [ + 'foo=918854443121279438895193', + 'foo=918854443121279438895193', + [['foo', '918854443121279438895193']] + ], + ['foo=bar', 'foo=bar', [['foo', 'bar']]], + ['foo=bar&foo=quux', 'foo=bar&foo=quux', [['foo', 'bar'], ['foo', 'quux']]], + ['foo=1&bar=2', 'foo=1&bar=2', [['foo', '1'], ['bar', '2']]], + [ + "my%20weird%20field=q1!2%22'w%245%267%2Fz8)%3F", + 'my+weird+field=q1%212%22%27w%245%267%2Fz8%29%3F', + [['my weird field', 'q1!2"\'w$5&7/z8)?']] + ], + ['foo%3Dbaz=bar', 'foo%3Dbaz=bar', [['foo=baz', 'bar']]], + ['foo=baz=bar', 'foo=baz%3Dbar', [['foo', 'baz=bar']]], + [ + 'str=foo&arr=1&somenull&arr=2&undef=&arr=3', + 'str=foo&arr=1&somenull=&arr=2&undef=&arr=3', + [ + ['str', 'foo'], + ['arr', '1'], + ['somenull', ''], + ['arr', '2'], + ['undef', ''], + ['arr', '3'] + ] + ], + [' foo = bar ', '+foo+=+bar+', [[' foo ', ' bar ']]], + ['foo=%zx', 'foo=%25zx', [['foo', '%zx']]], + ['foo=%EF%BF%BD', 'foo=%EF%BF%BD', [['foo', '\ufffd']]], + // See: https://github.com/joyent/node/issues/3058 + ['foo&bar=baz', 'foo=&bar=baz', [['foo', ''], ['bar', 'baz']]], + ['a=b&c&d=e', 'a=b&c=&d=e', [['a', 'b'], ['c', ''], ['d', 'e']]], + ['a=b&c=&d=e', 'a=b&c=&d=e', [['a', 'b'], ['c', ''], ['d', 'e']]], + ['a=b&=c&d=e', 'a=b&=c&d=e', [['a', 'b'], ['', 'c'], ['d', 'e']]], + ['a=b&=&d=e', 'a=b&=&d=e', [['a', 'b'], ['', ''], ['d', 'e']]], + ['&&foo=bar&&', 'foo=bar', [['foo', 'bar']]], + ['&', '', []], + ['&&&&', '', []], + ['&=&', '=', [['', '']]], + ['&=&=', '=&=', [['', ''], ['', '']]], + ['=', '=', [['', '']]], + ['+', '+=', [[' ', '']]], + ['+=', '+=', [[' ', '']]], + ['=+', '=+', [['', ' ']]], + ['+=&', '+=', [[' ', '']]], + ['a&&b', 'a=&b=', [['a', ''], ['b', '']]], + ['a=a&&b=b', 'a=a&b=b', [['a', 'a'], ['b', 'b']]], + ['&a', 'a=', [['a', '']]], + ['&=', '=', [['', '']]], + ['a&a&', 'a=&a=', [['a', ''], ['a', '']]], + ['a&a&a&', 'a=&a=&a=', [['a', ''], ['a', ''], ['a', '']]], + ['a&a&a&a&', 'a=&a=&a=&a=', [['a', ''], ['a', ''], ['a', ''], ['a', '']]], + ['a=&a=value&a=', 'a=&a=value&a=', [['a', ''], ['a', 'value'], ['a', '']]], + ['foo%20bar=baz%20quux', 'foo+bar=baz+quux', [['foo bar', 'baz quux']]], + ['+foo=+bar', '+foo=+bar', [[' foo', ' bar']]], + [ + // fake percent encoding + 'foo=%©ar&baz=%A©uux&xyzzy=%©ud', + 'foo=%25%C2%A9ar&baz=%25A%C2%A9uux&xyzzy=%25%C2%A9ud', + [['foo', '%©ar'], ['baz', '%A©uux'], ['xyzzy', '%©ud']] + ], + // always preserve order of key-value pairs + ['a=1&b=2&a=3', 'a=1&b=2&a=3', [['a', '1'], ['b', '2'], ['a', '3']]], + ['?a', '%3Fa=', [['?a', '']]] +]; diff --git a/test/parallel/test-whatwg-url-searchparams.js b/test/parallel/test-whatwg-url-searchparams.js index 7d6df646407269..c7acb7d909d98c 100644 --- a/test/parallel/test-whatwg-url-searchparams.js +++ b/test/parallel/test-whatwg-url-searchparams.js @@ -1,8 +1,9 @@ 'use strict'; -require('../common'); +const common = require('../common'); const assert = require('assert'); -const URL = require('url').URL; +const path = require('path'); +const { URL, URLSearchParams } = require('url'); // Tests below are not from WPT. const serialized = 'a=a&a=1&a=true&a=undefined&a=null&a=%EF%BF%BD' + @@ -77,3 +78,27 @@ assert.throws(() => sp.forEach(1), m.search = '?a=a&b=b'; assert.strictEqual(sp.toString(), 'a=a&b=b'); + +const tests = require(path.join(common.fixturesDir, 'url-searchparams.js')); + +for (const [input, expected, parsed] of tests) { + if (input[0] !== '?') { + const sp = new URLSearchParams(input); + assert.strictEqual(String(sp), expected); + assert.deepStrictEqual(Array.from(sp), parsed); + + m.search = input; + assert.strictEqual(String(m.searchParams), expected); + assert.deepStrictEqual(Array.from(m.searchParams), parsed); + } + + { + const sp = new URLSearchParams(`?${input}`); + assert.strictEqual(String(sp), expected); + assert.deepStrictEqual(Array.from(sp), parsed); + + m.search = `?${input}`; + assert.strictEqual(String(m.searchParams), expected); + assert.deepStrictEqual(Array.from(m.searchParams), parsed); + } +} From a2a3d6ce4fd00458140b28fb7443637ae6126ded Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Wed, 22 Mar 2017 11:39:13 -0700 Subject: [PATCH 13/30] url: use a class for WHATWG url[context] The object is used as a structure, not as a map, which `StorageObject` was designed for. PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- lib/internal/url.js | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index 7a6ff227ed4191..64156803d8d30e 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -3,8 +3,7 @@ const util = require('util'); const { hexTable, - isHexTable, - StorageObject + isHexTable } = require('internal/querystring'); const binding = process.binding('url'); const context = Symbol('context'); @@ -97,6 +96,26 @@ class TupleOrigin { } } +// This class provides the internal state of a URL object. An instance of this +// class is stored in every URL object and is accessed internally by setters +// and getters. It roughly corresponds to the concept of a URL record in the +// URL Standard, with a few differences. It is also the object transported to +// the C++ binding. +// Refs: https://url.spec.whatwg.org/#concept-url +class URLContext { + constructor() { + this.flags = 0; + this.scheme = undefined; + this.username = undefined; + this.password = undefined; + this.host = undefined; + this.port = undefined; + this.path = []; + this.query = undefined; + this.fragment = undefined; + } +} + function onParseComplete(flags, protocol, username, password, host, port, path, query, fragment) { var ctx = this[context]; @@ -125,7 +144,7 @@ function onParseError(flags, input) { // Reused by URL constructor and URL#href setter. function parse(url, input, base) { const base_context = base ? base[context] : undefined; - url[context] = new StorageObject(); + url[context] = new URLContext(); binding.parse(input.trim(), -1, base_context, undefined, onParseComplete.bind(url), onParseError); From 75ef213b22770465ac36c962d6621c592713c100 Mon Sep 17 00:00:00 2001 From: James M Snell Date: Sun, 26 Mar 2017 19:49:33 -0700 Subject: [PATCH 14/30] url: add ToObject method to native URL class Provides a factory method to convert a native URL class into a JS URL object. ```c++ Environment* env = ... URL url("http://example.org/a/b/c?query#fragment"); MaybeLocal val = url.ToObject(env); ``` PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- lib/internal/bootstrap_node.js | 4 ++ lib/internal/url.js | 25 +++++++++ src/env.h | 1 + src/node_url.cc | 96 ++++++++++++++++++++++++++++------ src/node_url.h | 9 ++++ 5 files changed, 118 insertions(+), 17 deletions(-) diff --git a/lib/internal/bootstrap_node.js b/lib/internal/bootstrap_node.js index 1aa2f8c23b442a..3e53d6829bc1a3 100644 --- a/lib/internal/bootstrap_node.js +++ b/lib/internal/bootstrap_node.js @@ -54,6 +54,10 @@ _process.setupRawDebug(); + // Ensure setURLConstructor() is called before the native + // URL::ToObject() method is used. + NativeModule.require('internal/url'); + Object.defineProperty(process, 'argv0', { enumerable: true, configurable: false, diff --git a/lib/internal/url.js b/lib/internal/url.js index 64156803d8d30e..5fcabb803ef473 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -1400,6 +1400,31 @@ function getPathFromURL(path) { return isWindows ? getPathFromURLWin32(path) : getPathFromURLPosix(path); } +function NativeURL(ctx) { + this[context] = ctx; +} +NativeURL.prototype = URL.prototype; + +function constructUrl(flags, protocol, username, password, + host, port, path, query, fragment) { + var ctx = new URLContext(); + ctx.flags = flags; + ctx.scheme = protocol; + ctx.username = username; + ctx.password = password; + ctx.port = port; + ctx.path = path; + ctx.query = query; + ctx.fragment = fragment; + ctx.host = host; + const url = new NativeURL(ctx); + url[searchParams] = new URLSearchParams(); + url[searchParams][context] = url; + initSearchParams(url[searchParams], query); + return url; +} +binding.setURLConstructor(constructUrl); + module.exports = { toUSVString, getPathFromURL, diff --git a/src/env.h b/src/env.h index 28f9e0c1728fd9..2e3337c44fe30f 100644 --- a/src/env.h +++ b/src/env.h @@ -249,6 +249,7 @@ namespace node { V(tls_wrap_constructor_template, v8::FunctionTemplate) \ V(tty_constructor_template, v8::FunctionTemplate) \ V(udp_constructor_function, v8::Function) \ + V(url_constructor_function, v8::Function) \ V(write_wrap_constructor_function, v8::Function) \ class Environment; diff --git a/src/node_url.cc b/src/node_url.cc index 6cd78c2c6c04c8..4f3525332ebd94 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -27,9 +27,11 @@ using v8::HandleScope; using v8::Integer; using v8::Isolate; using v8::Local; +using v8::MaybeLocal; using v8::Null; using v8::Object; using v8::String; +using v8::TryCatch; using v8::Undefined; using v8::Value; @@ -1226,6 +1228,29 @@ namespace url { } } + static inline void SetArgs(Environment* env, + Local argv[], + const struct url_data* url) { + Isolate* isolate = env->isolate(); + argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags); + if (url->flags & URL_FLAGS_HAS_SCHEME) + argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); + if (url->flags & URL_FLAGS_HAS_USERNAME) + argv[ARG_USERNAME] = UTF8STRING(isolate, url->username); + if (url->flags & URL_FLAGS_HAS_PASSWORD) + argv[ARG_PASSWORD] = UTF8STRING(isolate, url->password); + if (url->flags & URL_FLAGS_HAS_HOST) + argv[ARG_HOST] = UTF8STRING(isolate, url->host); + if (url->flags & URL_FLAGS_HAS_QUERY) + argv[ARG_QUERY] = UTF8STRING(isolate, url->query); + if (url->flags & URL_FLAGS_HAS_FRAGMENT) + argv[ARG_FRAGMENT] = UTF8STRING(isolate, url->fragment); + if (url->port > -1) + argv[ARG_PORT] = Integer::New(isolate, url->port); + if (url->flags & URL_FLAGS_HAS_PATH) + argv[ARG_PATH] = Copy(env, url->path); + } + static void Parse(Environment* env, Local recv, const char* input, @@ -1267,23 +1292,7 @@ namespace url { undef, undef, }; - argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); - if (url.flags & URL_FLAGS_HAS_SCHEME) - argv[ARG_PROTOCOL] = OneByteString(isolate, url.scheme.c_str()); - if (url.flags & URL_FLAGS_HAS_USERNAME) - argv[ARG_USERNAME] = UTF8STRING(isolate, url.username); - if (url.flags & URL_FLAGS_HAS_PASSWORD) - argv[ARG_PASSWORD] = UTF8STRING(isolate, url.password); - if (url.flags & URL_FLAGS_HAS_HOST) - argv[ARG_HOST] = UTF8STRING(isolate, url.host); - if (url.flags & URL_FLAGS_HAS_QUERY) - argv[ARG_QUERY] = UTF8STRING(isolate, url.query); - if (url.flags & URL_FLAGS_HAS_FRAGMENT) - argv[ARG_FRAGMENT] = UTF8STRING(isolate, url.fragment); - if (url.port > -1) - argv[ARG_PORT] = Integer::New(isolate, url.port); - if (url.flags & URL_FLAGS_HAS_PATH) - argv[ARG_PATH] = Copy(env, url.path); + SetArgs(env, argv, &url); (void)cb->Call(context, recv, arraysize(argv), argv); } else if (error_cb->IsFunction()) { Local argv[2] = { undef, undef }; @@ -1418,6 +1427,58 @@ namespace url { v8::NewStringType::kNormal).ToLocalChecked()); } + // This function works by calling out to a JS function that creates and + // returns the JS URL object. Be mindful of the JS<->Native boundary + // crossing that is required. + const Local URL::ToObject(Environment* env) const { + Isolate* isolate = env->isolate(); + Local context = env->context(); + HandleScope handle_scope(isolate); + Context::Scope context_scope(context); + + const Local undef = Undefined(isolate); + + if (context_.flags & URL_FLAGS_FAILED) + return Local(); + + Local argv[9] = { + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + }; + SetArgs(env, argv, &context_); + + TryCatch try_catch(isolate); + + // The SetURLConstructor method must have been called already to + // set the constructor function used below. SetURLConstructor is + // called automatically when the internal/url.js module is loaded + // during the internal/bootstrap_node.js processing. + MaybeLocal ret = + env->url_constructor_function() + ->Call(env->context(), undef, 9, argv); + + if (ret.IsEmpty()) { + ClearFatalExceptionHandlers(env); + FatalException(isolate, try_catch); + } + + return ret.ToLocalChecked(); + } + + static void SetURLConstructor(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_EQ(args.Length(), 1); + CHECK(args[0]->IsFunction()); + env->set_url_constructor_function(args[0].As()); + } + static void Init(Local target, Local unused, Local context, @@ -1428,6 +1489,7 @@ namespace url { env->SetMethod(target, "toUSVString", ToUSVString); env->SetMethod(target, "domainToASCII", DomainToASCII); env->SetMethod(target, "domainToUnicode", DomainToUnicode); + env->SetMethod(target, "setURLConstructor", SetURLConstructor); #define XX(name, _) NODE_DEFINE_CONSTANT(target, name); FLAGS(XX) diff --git a/src/node_url.h b/src/node_url.h index b9d91782be9e59..4d18eb6f0a910d 100644 --- a/src/node_url.h +++ b/src/node_url.h @@ -4,11 +4,18 @@ #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS #include "node.h" +#include "env.h" +#include "env-inl.h" + #include namespace node { namespace url { +using v8::Local; +using v8::Value; + + #define BIT_AT(a, i) \ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ (1 << ((unsigned int) (i) & 7)))) @@ -619,6 +626,8 @@ class URL { return ret; } + const Local ToObject(Environment* env) const; + private: struct url_data context_; }; From 5b7b775e54a541a03b93cbb14679f7f5ad2d5358 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Wed, 15 Mar 2017 19:33:08 -0700 Subject: [PATCH 15/30] src: WHATWG URL C++ parser cleanup - Clarify port state - Remove scheme flag - Clarify URL_FLAG_TERMINATED PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- src/node_url.cc | 54 ++++++++++++++++++++++++++++--------------------- src/node_url.h | 13 ++++++------ 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 4f3525332ebd94..54a2944588071c 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -494,7 +494,9 @@ namespace url { if (flags->IsInt32()) base->flags = flags->Int32Value(context).FromJust(); - GET_AND_SET(env, base_obj, scheme, base, URL_FLAGS_HAS_SCHEME); + Local scheme = GET(env, base_obj, "scheme"); + base->scheme = Utf8Value(env->isolate(), scheme).out(); + GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME); GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD); GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST); @@ -644,7 +646,7 @@ namespace url { state = kNoScheme; continue; } else { - url->flags |= URL_FLAGS_TERMINATED; + url->flags |= URL_FLAGS_FAILED; return; } break; @@ -654,10 +656,12 @@ namespace url { p++; continue; } else if (ch == ':' || (has_state_override && ch == kEOL)) { - buffer += ':'; if (buffer.size() > 0) { - url->flags |= URL_FLAGS_HAS_SCHEME; + buffer += ':'; url->scheme = buffer; + } else if (has_state_override) { + url->flags |= URL_FLAGS_TERMINATED; + return; } if (IsSpecial(url->scheme)) { url->flags |= URL_FLAGS_SPECIAL; @@ -672,7 +676,6 @@ namespace url { state = kFile; } else if (special && has_base && - base->flags & URL_FLAGS_HAS_SCHEME && url->scheme == base->scheme) { state = kSpecialRelativeOrAuthority; } else if (special) { @@ -692,7 +695,7 @@ namespace url { p = input; continue; } else { - url->flags |= URL_FLAGS_TERMINATED; + url->flags |= URL_FLAGS_FAILED; return; } break; @@ -702,7 +705,6 @@ namespace url { url->flags |= URL_FLAGS_FAILED; return; } else if (cannot_be_base && ch == '#') { - url->flags |= URL_FLAGS_HAS_SCHEME; url->scheme = base->scheme; if (IsSpecial(url->scheme)) { url->flags |= URL_FLAGS_SPECIAL; @@ -725,12 +727,10 @@ namespace url { url->flags |= URL_FLAGS_CANNOT_BE_BASE; state = kFragment; } else if (has_base && - base->flags & URL_FLAGS_HAS_SCHEME && base->scheme != "file:") { state = kRelative; continue; } else { - url->flags |= URL_FLAGS_HAS_SCHEME; url->scheme = "file:"; url->flags |= URL_FLAGS_SPECIAL; special = true; @@ -756,7 +756,6 @@ namespace url { } break; case kRelative: - url->flags |= URL_FLAGS_HAS_SCHEME; url->scheme = base->scheme; if (IsSpecial(url->scheme)) { url->flags |= URL_FLAGS_SPECIAL; @@ -951,7 +950,6 @@ namespace url { buffer.clear(); state = kPort; if (state_override == kHostname) { - url->flags |= URL_FLAGS_TERMINATED; return; } } else if (ch == kEOL || @@ -972,7 +970,6 @@ namespace url { buffer.clear(); state = kPathStart; if (has_state_override) { - url->flags |= URL_FLAGS_TERMINATED; return; } } else { @@ -996,13 +993,26 @@ namespace url { int port = 0; for (size_t i = 0; i < buffer.size(); i++) port = port * 10 + buffer[i] - '0'; - if (port >= 0 && port <= 0xffff) { - url->port = NormalizePort(url->scheme, port); - } else if (!has_state_override) { - url->flags |= URL_FLAGS_FAILED; + if (port < 0 || port > 0xffff) { + // TODO(TimothyGu): This hack is currently needed for the host + // setter since it needs access to hostname if it is valid, and + // if the FAILED flag is set the entire response to JS layer + // will be empty. + if (state_override == kHost) + url->port = -1; + else + url->flags |= URL_FLAGS_FAILED; return; } + url->port = NormalizePort(url->scheme, port); buffer.clear(); + } else if (has_state_override) { + // TODO(TimothyGu): Similar case as above. + if (state_override == kHost) + url->port = -1; + else + url->flags |= URL_FLAGS_TERMINATED; + return; } state = kPathStart; continue; @@ -1014,7 +1024,6 @@ namespace url { case kFile: base_is_file = ( has_base && - base->flags & URL_FLAGS_HAS_SCHEME && base->scheme == "file:"); switch (ch) { case kEOL: @@ -1097,7 +1106,6 @@ namespace url { state = kFileHost; } else { if (has_base && - base->flags & URL_FLAGS_HAS_SCHEME && base->scheme == "file:" && base->flags & URL_FLAGS_HAS_PATH && base->path.size() > 0 && @@ -1158,8 +1166,7 @@ namespace url { url->path.push_back(""); } } else { - if (url->flags & URL_FLAGS_HAS_SCHEME && - url->scheme == "file:" && + if (url->scheme == "file:" && url->path.empty() && buffer.size() == 2 && WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { @@ -1233,8 +1240,7 @@ namespace url { const struct url_data* url) { Isolate* isolate = env->isolate(); argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags); - if (url->flags & URL_FLAGS_HAS_SCHEME) - argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); + argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); if (url->flags & URL_FLAGS_HAS_USERNAME) argv[ARG_USERNAME] = UTF8STRING(isolate, url->username); if (url->flags & URL_FLAGS_HAS_PASSWORD) @@ -1275,7 +1281,9 @@ namespace url { HarvestBase(env, &base, base_obj.As()); URL::Parse(input, len, state_override, &url, &base, has_base); - if (url.flags & URL_FLAGS_INVALID_PARSE_STATE) + if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) || + ((state_override != kUnknownState) && + (url.flags & URL_FLAGS_TERMINATED))) return; // Define the return value placeholders diff --git a/src/node_url.h b/src/node_url.h index 4d18eb6f0a910d..5b5b65b7c27e87 100644 --- a/src/node_url.h +++ b/src/node_url.h @@ -451,13 +451,12 @@ static inline void PercentDecode(const char* input, XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \ XX(URL_FLAGS_TERMINATED, 0x08) \ XX(URL_FLAGS_SPECIAL, 0x10) \ - XX(URL_FLAGS_HAS_SCHEME, 0x20) \ - XX(URL_FLAGS_HAS_USERNAME, 0x40) \ - XX(URL_FLAGS_HAS_PASSWORD, 0x80) \ - XX(URL_FLAGS_HAS_HOST, 0x100) \ - XX(URL_FLAGS_HAS_PATH, 0x200) \ - XX(URL_FLAGS_HAS_QUERY, 0x400) \ - XX(URL_FLAGS_HAS_FRAGMENT, 0x800) + XX(URL_FLAGS_HAS_USERNAME, 0x20) \ + XX(URL_FLAGS_HAS_PASSWORD, 0x40) \ + XX(URL_FLAGS_HAS_HOST, 0x80) \ + XX(URL_FLAGS_HAS_PATH, 0x100) \ + XX(URL_FLAGS_HAS_QUERY, 0x200) \ + XX(URL_FLAGS_HAS_FRAGMENT, 0x400) #define ARGS(XX) \ XX(ARG_FLAGS) \ From d912e28370ead511f3121376ca3c1b3b8c50de0a Mon Sep 17 00:00:00 2001 From: Daijiro Wachi Date: Mon, 3 Apr 2017 17:44:43 +0900 Subject: [PATCH 16/30] url: change path parsing for non-special URLs This changes to the way path parsing for non-special URLs. It allows paths to be empty for non-special URLs and also takes that into account when serializing. PR-URL: https://github.com/nodejs/node/pull/12507 Fixes: https://github.com/nodejs/node/issues/11962 Refs: https://github.com/whatwg/url/pull/213 Reviewed-By: James M Snell --- src/node_url.cc | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 54a2944588071c..f9965d537b9abf 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -862,8 +862,10 @@ namespace url { } break; case kRelativeSlash: - if (ch == '/' || special_back_slash) { + if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) { state = kSpecialAuthorityIgnoreSlashes; + } else if (ch == '/') { + state = kAuthority; } else { if (base->flags & URL_FLAGS_HAS_USERNAME) { url->flags |= URL_FLAGS_HAS_USERNAME; @@ -1145,9 +1147,25 @@ namespace url { } break; case kPathStart: - state = kPath; - if (ch != '/' && !special_back_slash) - continue; + if (IsSpecial(url->scheme)) { + state = kPath; + if (ch != '/' && ch != '\\') { + continue; + } + } else if (!has_state_override && ch == '?') { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query.clear(); + state = kQuery; + } else if (!has_state_override && ch == '#') { + url->flags |= URL_FLAGS_HAS_FRAGMENT; + url->fragment.clear(); + state = kFragment; + } else if (ch != kEOL) { + state = kPath; + if (ch != '/') { + continue; + } + } break; case kPath: if (ch == kEOL || @@ -1165,7 +1183,7 @@ namespace url { url->flags |= URL_FLAGS_HAS_PATH; url->path.push_back(""); } - } else { + } else if (!IsSingleDotSegment(buffer)) { if (url->scheme == "file:" && url->path.empty() && buffer.size() == 2 && From dceb12e1b1a431ad6c93b113b51190149e2eb1d9 Mon Sep 17 00:00:00 2001 From: Daijiro Wachi Date: Mon, 3 Apr 2017 17:47:46 +0900 Subject: [PATCH 17/30] test: synchronize WPT url test data PR-URL: https://github.com/nodejs/node/pull/12507 Refs: https://github.com/w3c/web-platform-tests/pull/4586 Refs: https://github.com/nodejs/node/pull/11887 Reviewed-By: James M Snell --- test/fixtures/url-setter-tests.js | 59 ++++++++++++++++----- test/fixtures/url-tests.js | 86 +++++++++++++++---------------- 2 files changed, 90 insertions(+), 55 deletions(-) diff --git a/test/fixtures/url-setter-tests.js b/test/fixtures/url-setter-tests.js index 9b39d0bed67bfc..8c15a3cc5ac885 100644 --- a/test/fixtures/url-setter-tests.js +++ b/test/fixtures/url-setter-tests.js @@ -34,7 +34,7 @@ module.exports = "href": "a://example.net", "new_value": "", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -42,7 +42,7 @@ module.exports = "href": "a://example.net", "new_value": "b", "expected": { - "href": "b://example.net/", + "href": "b://example.net", "protocol": "b:" } }, @@ -59,7 +59,7 @@ module.exports = "href": "a://example.net", "new_value": "B", "expected": { - "href": "b://example.net/", + "href": "b://example.net", "protocol": "b:" } }, @@ -68,7 +68,7 @@ module.exports = "href": "a://example.net", "new_value": "é", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -77,7 +77,7 @@ module.exports = "href": "a://example.net", "new_value": "0b", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -86,7 +86,7 @@ module.exports = "href": "a://example.net", "new_value": "+b", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -94,7 +94,7 @@ module.exports = "href": "a://example.net", "new_value": "bC0+-.", "expected": { - "href": "bc0+-.://example.net/", + "href": "bc0+-.://example.net", "protocol": "bc0+-.:" } }, @@ -103,7 +103,7 @@ module.exports = "href": "a://example.net", "new_value": "b,c", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -112,7 +112,7 @@ module.exports = "href": "a://example.net", "new_value": "bé", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -213,7 +213,7 @@ module.exports = "href": "ssh://me@example.net", "new_value": "http", "expected": { - "href": "ssh://me@example.net/", + "href": "ssh://me@example.net", "protocol": "ssh:" } }, @@ -221,7 +221,7 @@ module.exports = "href": "ssh://me@example.net", "new_value": "gopher", "expected": { - "href": "ssh://me@example.net/", + "href": "ssh://me@example.net", "protocol": "ssh:" } }, @@ -229,7 +229,15 @@ module.exports = "href": "ssh://me@example.net", "new_value": "file", "expected": { - "href": "ssh://me@example.net/", + "href": "ssh://me@example.net", + "protocol": "ssh:" + } + }, + { + "href": "ssh://example.net", + "new_value": "file", + "expected": { + "href": "ssh://example.net", "protocol": "ssh:" } }, @@ -1585,6 +1593,33 @@ module.exports = "href": "http://example.net/%3F", "pathname": "/%3F" } + }, + { + "comment": "# needs to be encoded", + "href": "http://example.net", + "new_value": "#", + "expected": { + "href": "http://example.net/%23", + "pathname": "/%23" + } + }, + { + "comment": "? needs to be encoded, non-special scheme", + "href": "sc://example.net", + "new_value": "?", + "expected": { + "href": "sc://example.net/%3F", + "pathname": "/%3F" + } + }, + { + "comment": "# needs to be encoded, non-special scheme", + "href": "sc://example.net", + "new_value": "#", + "expected": { + "href": "sc://example.net/%23", + "pathname": "/%23" + } } ], "search": [ diff --git a/test/fixtures/url-tests.js b/test/fixtures/url-tests.js index a4e7de9f26b199..c7e63f50331c3b 100644 --- a/test/fixtures/url-tests.js +++ b/test/fixtures/url-tests.js @@ -571,21 +571,21 @@ module.exports = "search": "", "hash": "" }, - // { - // "input": "foo://", - // "base": "http://example.org/foo/bar", - // "href": "foo://", - // "origin": "null", - // "protocol": "foo:", - // "username": "", - // "password": "", - // "host": "", - // "hostname": "", - // "port": "", - // "pathname": "", - // "search": "", - // "hash": "" - // }, + { + "input": "foo://", + "base": "http://example.org/foo/bar", + "href": "foo://", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, { "input": "http://a:b@c:29/d", "base": "http://example.org/foo/bar", @@ -5338,34 +5338,34 @@ module.exports = "search": "", "hash": "" }, - // { - // "input": "////", - // "base": "sc://x/", - // "href": "sc:////", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "", - // "hostname": "", - // "port": "", - // "pathname": "//", - // "search": "", - // "hash": "" - // }, - // { - // "input": "////x/", - // "base": "sc://x/", - // "href": "sc:////x/", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "", - // "hostname": "", - // "port": "", - // "pathname": "//x/", - // "search": "", - // "hash": "" - // }, + { + "input": "////", + "base": "sc://x/", + "href": "sc:////", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "////x/", + "base": "sc://x/", + "href": "sc:////x/", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//x/", + "search": "", + "hash": "" + }, { "input": "tftp://foobar.com/someconfig;mode=netascii", "base": "about:blank", From 43faf56f7b35e3872d1a0fb047f1405996065933 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Mon, 20 Mar 2017 14:29:54 -0700 Subject: [PATCH 18/30] url: error when domainTo*() is called w/o argument PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- lib/internal/url.js | 6 ++++++ test/parallel/test-whatwg-url-domainto.js | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/lib/internal/url.js b/lib/internal/url.js index 5fcabb803ef473..7fafc783dba4ae 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -1308,11 +1308,17 @@ function originFor(url, base) { } function domainToASCII(domain) { + if (arguments.length < 1) + throw new TypeError('"domain" argument must be specified'); + // toUSVString is not needed. return binding.domainToASCII(`${domain}`); } function domainToUnicode(domain) { + if (arguments.length < 1) + throw new TypeError('"domain" argument must be specified'); + // toUSVString is not needed. return binding.domainToUnicode(`${domain}`); } diff --git a/test/parallel/test-whatwg-url-domainto.js b/test/parallel/test-whatwg-url-domainto.js index f891f95a19cd3b..70b32c8dce279c 100644 --- a/test/parallel/test-whatwg-url-domainto.js +++ b/test/parallel/test-whatwg-url-domainto.js @@ -12,6 +12,15 @@ const { domainToASCII, domainToUnicode } = require('url'); // Tests below are not from WPT. const tests = require('../fixtures/url-idna.js'); +{ + assert.throws(() => domainToASCII(), + /^TypeError: "domain" argument must be specified$/); + assert.throws(() => domainToUnicode(), + /^TypeError: "domain" argument must be specified$/); + assert.strictEqual(domainToASCII(undefined), 'undefined'); + assert.strictEqual(domainToUnicode(undefined), 'undefined'); +} + { for (const [i, { ascii, unicode }] of tests.valid.entries()) { assert.strictEqual(ascii, domainToASCII(unicode), From dafa6008d143ef3d049e34c36f5c148dc2dce9c7 Mon Sep 17 00:00:00 2001 From: Brian White Date: Sun, 5 Mar 2017 05:29:35 -0500 Subject: [PATCH 19/30] url: avoid instanceof for WHATWG URL PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- benchmark/url/url-searchparams-read.js | 2 +- benchmark/url/whatwg-url-properties.js | 2 +- lib/internal/url.js | 36 +++++++++++++++----------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/benchmark/url/url-searchparams-read.js b/benchmark/url/url-searchparams-read.js index 94ddaf1cfa4072..762ffcca03d69d 100644 --- a/benchmark/url/url-searchparams-read.js +++ b/benchmark/url/url-searchparams-read.js @@ -5,7 +5,7 @@ const { URLSearchParams } = require('url'); const bench = common.createBenchmark(main, { method: ['get', 'getAll', 'has'], param: ['one', 'two', 'three', 'nonexistent'], - n: [1e6] + n: [2e7] }); const str = 'one=single&two=first&three=first&two=2nd&three=2nd&three=3rd'; diff --git a/benchmark/url/whatwg-url-properties.js b/benchmark/url/whatwg-url-properties.js index 9bdc9778a8c922..3a865d2335ab3c 100644 --- a/benchmark/url/whatwg-url-properties.js +++ b/benchmark/url/whatwg-url-properties.js @@ -8,7 +8,7 @@ const bench = common.createBenchmark(main, { prop: ['href', 'origin', 'protocol', 'username', 'password', 'host', 'hostname', 'port', 'pathname', 'search', 'searchParams', 'hash'], - n: [1e4] + n: [3e5] }); function setAndGet(n, url, prop, alternative) { diff --git a/lib/internal/url.js b/lib/internal/url.js index 7fafc783dba4ae..9a70838c30d4a1 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -239,8 +239,10 @@ class URL { constructor(input, base) { // toUSVString is not needed. input = `${input}`; - if (base !== undefined && !(base instanceof URL)) + if (base !== undefined && + (!base[searchParams] || !base[searchParams][searchParams])) { base = new URL(base); + } parse(this, input, base); } @@ -885,7 +887,7 @@ class URLSearchParams { } [util.inspect.custom](recurseTimes, ctx) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } @@ -947,7 +949,7 @@ function merge(out, start, mid, end, lBuffer, rBuffer) { defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { append(name, value) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } if (arguments.length < 2) { @@ -961,7 +963,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { }, delete(name) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } if (arguments.length < 1) { @@ -982,7 +984,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { }, get(name) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } if (arguments.length < 1) { @@ -1000,7 +1002,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { }, getAll(name) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } if (arguments.length < 1) { @@ -1019,7 +1021,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { }, has(name) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } if (arguments.length < 1) { @@ -1037,7 +1039,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { }, set(name, value) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } if (arguments.length < 2) { @@ -1125,7 +1127,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { // Define entries here rather than [Symbol.iterator] as the function name // must be set to `entries`. entries() { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } @@ -1133,7 +1135,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { }, forEach(callback, thisArg = undefined) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } if (typeof callback !== 'function') { @@ -1155,7 +1157,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { // https://heycam.github.io/webidl/#es-iterable keys() { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } @@ -1163,7 +1165,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { }, values() { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } @@ -1173,7 +1175,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { // https://heycam.github.io/webidl/#es-stringifier // https://url.spec.whatwg.org/#urlsearchparams-stringification-behavior toString() { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } @@ -1275,8 +1277,10 @@ defineIDLClass(URLSearchParamsIteratorPrototype, 'URLSearchParamsIterator', { }); function originFor(url, base) { - if (!(url instanceof URL)) + if (url != undefined && + (!url[searchParams] || !url[searchParams][searchParams])) { url = new URL(url, base); + } var origin; const protocol = url.protocol; switch (protocol) { @@ -1399,8 +1403,10 @@ function getPathFromURLPosix(url) { } function getPathFromURL(path) { - if (!(path instanceof URL)) + if (path == undefined || !path[searchParams] || + !path[searchParams][searchParams]) { return path; + } if (path.protocol !== 'file:') return new TypeError('Only `file:` URLs are supported'); return isWindows ? getPathFromURLWin32(path) : getPathFromURLPosix(path); From 68cf850bc43901ba4cfa82cb1fff87cd1e4c34bf Mon Sep 17 00:00:00 2001 From: Daijiro Wachi Date: Mon, 10 Apr 2017 18:09:06 +0200 Subject: [PATCH 20/30] url: trim leading slashes of file URL paths It should trim the slashes after the colon into three for file URL. PR-URL: https://github.com/nodejs/node/pull/12507 Refs: https://github.com/w3c/web-platform-tests/pull/5195 Fixes: https://github.com/nodejs/node/issues/11188 Reviewed-By: James M Snell --- src/node_url.cc | 22 +- test/fixtures/url-setter-tests.js | 29 ++- test/fixtures/url-tests.js | 352 +++++++++++++++++++++++++++++- 3 files changed, 393 insertions(+), 10 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index f9965d537b9abf..16a4cdd45b54b4 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -1108,12 +1108,14 @@ namespace url { state = kFileHost; } else { if (has_base && - base->scheme == "file:" && - base->flags & URL_FLAGS_HAS_PATH && - base->path.size() > 0 && - NORMALIZED_WINDOWS_DRIVE_LETTER(base->path[0])) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(base->path[0]); + base->scheme == "file:") { + if (NORMALIZED_WINDOWS_DRIVE_LETTER(base->path[0])) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path.push_back(base->path[0]); + } else { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; + } } state = kPath; continue; @@ -1196,6 +1198,14 @@ namespace url { url->path.push_back(segment); } buffer.clear(); + if (url->scheme == "file:" && + (ch == kEOL || + ch == '?' || + ch == '#')) { + while (url->path.size() > 1 && url->path[0].length() == 0) { + url->path.erase(url->path.begin()); + } + } if (ch == '?') { url->flags |= URL_FLAGS_HAS_QUERY; state = kQuery; diff --git a/test/fixtures/url-setter-tests.js b/test/fixtures/url-setter-tests.js index 8c15a3cc5ac885..d0688611a01c0f 100644 --- a/test/fixtures/url-setter-tests.js +++ b/test/fixtures/url-setter-tests.js @@ -1,7 +1,7 @@ 'use strict'; /* WPT Refs: - https://github.com/w3c/web-platform-tests/blob/e48dd15/url/setters_tests.json + https://github.com/w3c/web-platform-tests/blob/3eff1bd/url/setters_tests.json License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html */ module.exports = @@ -1620,6 +1620,33 @@ module.exports = "href": "sc://example.net/%23", "pathname": "/%23" } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file://monkey/", + "new_value": "\\\\", + "expected": { + "href": "file://monkey/", + "pathname": "/" + } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file:///unicorn", + "new_value": "//\\/", + "expected": { + "href": "file:///", + "pathname": "/" + } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file:///unicorn", + "new_value": "//monkey/..//", + "expected": { + "href": "file:///", + "pathname": "/" + } } ], "search": [ diff --git a/test/fixtures/url-tests.js b/test/fixtures/url-tests.js index c7e63f50331c3b..3858f12db55e0d 100644 --- a/test/fixtures/url-tests.js +++ b/test/fixtures/url-tests.js @@ -1,7 +1,7 @@ 'use strict'; /* WPT Refs: - https://github.com/w3c/web-platform-tests/blob/b207902/url/urltestdata.json + https://github.com/w3c/web-platform-tests/blob/3eff1bd/url/urltestdata.json License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html */ module.exports = @@ -281,6 +281,11 @@ module.exports = "base": "http://example.org/foo/bar", "failure": true }, + { + "input": "non-special://f:999999/c", + "base": "http://example.org/foo/bar", + "failure": true + }, { "input": "http://f: 21 / b ? d # e ", "base": "http://example.org/foo/bar", @@ -3669,6 +3674,35 @@ module.exports = "search": "", "hash": "" }, + // { + // "input": "https://faß.ExAmPlE/", + // "base": "about:blank", + // "href": "https://xn--fa-hia.example/", + // "origin": "https://faß.example", + // "protocol": "https:", + // "username": "", + // "password": "", + // "host": "xn--fa-hia.example", + // "hostname": "xn--fa-hia.example", + // "port": "", + // "pathname": "/", + // "search": "", + // "hash": "" + // }, + // { + // "input": "sc://faß.ExAmPlE/", + // "base": "about:blank", + // "href": "sc://fa%C3%9F.ExAmPlE/", + // "protocol": "sc:", + // "username": "", + // "password": "", + // "host": "fa%C3%9F.ExAmPlE", + // "hostname": "fa%C3%9F.ExAmPlE", + // "port": "", + // "pathname": "/", + // "search": "", + // "hash": "" + // }, "Invalid escaped characters should fail and the percents should be escaped. https://www.w3.org/Bugs/Public/show_bug.cgi?id=24191", { "input": "http://%zz%66%a.com", @@ -5110,6 +5144,318 @@ module.exports = "search": "?test", "hash": "#x" }, + "# File URLs and many (back)slashes", + { + "input": "file:\\\\//", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\\\\\", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\\\\\?fox", + "base": "about:blank", + "href": "file:///?fox", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "?fox", + "hash": "" + }, + { + "input": "file:\\\\\\\\#guppy", + "base": "about:blank", + "href": "file:///#guppy", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "#guppy" + }, + { + "input": "file://spider///", + "base": "about:blank", + "href": "file://spider/", + "protocol": "file:", + "username": "", + "password": "", + "host": "spider", + "hostname": "spider", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\localhost//", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:///localhost//cat", + "base": "about:blank", + "href": "file:///localhost//cat", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/localhost//cat", + "search": "", + "hash": "" + }, + { + "input": "file://\\/localhost//cat", + "base": "about:blank", + "href": "file:///localhost//cat", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/localhost//cat", + "search": "", + "hash": "" + }, + { + "input": "file://localhost//a//../..//", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "/////mouse", + "base": "file:///elephant", + "href": "file:///mouse", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/mouse", + "search": "", + "hash": "" + }, + { + "input": "\\//pig", + "base": "file://lion/", + "href": "file:///pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pig", + "search": "", + "hash": "" + }, + { + "input": "\\/localhost//pig", + "base": "file://lion/", + "href": "file:///pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pig", + "search": "", + "hash": "" + }, + { + "input": "//localhost//pig", + "base": "file://lion/", + "href": "file:///pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pig", + "search": "", + "hash": "" + }, + // { + // "input": "/..//localhost//pig", + // "base": "file://lion/", + // "href": "file://lion/localhost//pig", + // "protocol": "file:", + // "username": "", + // "password": "", + // "host": "lion", + // "hostname": "lion", + // "port": "", + // "pathname": "/localhost//pig", + // "search": "", + // "hash": "" + // }, + { + "input": "file://", + "base": "file://ape/", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# File URLs with non-empty hosts", + // { + // "input": "/rooibos", + // "base": "file://tea/", + // "href": "file://tea/rooibos", + // "protocol": "file:", + // "username": "", + // "password": "", + // "host": "tea", + // "hostname": "tea", + // "port": "", + // "pathname": "/rooibos", + // "search": "", + // "hash": "" + // }, + // { + // "input": "/?chai", + // "base": "file://tea/", + // "href": "file://tea/?chai", + // "protocol": "file:", + // "username": "", + // "password": "", + // "host": "tea", + // "hostname": "tea", + // "port": "", + // "pathname": "/", + // "search": "?chai", + // "hash": "" + // }, + "# Windows drive letter quirk with not empty host", + { + "input": "file://example.net/C:/", + "base": "about:blank", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://1.2.3.4/C:/", + "base": "about:blank", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://[1::8]/C:/", + "base": "about:blank", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + "# Windows drive letter quirk (no host)", + { + "input": "file:/C|/", + "base": "about:blank", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://C|/", + "base": "about:blank", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, "# file URLs without base URL by Rimas Misevičius", { "input": "file:", @@ -5213,12 +5559,12 @@ module.exports = { "input": "http://?", "base": "about:blank", - "failure": "true" + "failure": true }, { "input": "http://#", "base": "about:blank", - "failure": "true" + "failure": true }, "# Non-special-URL path tests", // { From 752097c277959c280909f71e89e4b34bfb183e64 Mon Sep 17 00:00:00 2001 From: Daijiro Wachi Date: Wed, 12 Apr 2017 20:43:22 +0200 Subject: [PATCH 21/30] url: remove javascript URL special case PR-URL: https://github.com/nodejs/node/pull/12507 Fixes: https://github.com/nodejs/node/issues/11485 Reviewed-By: James M Snell --- lib/internal/url.js | 2 -- test/fixtures/url-setter-tests.js | 16 ++++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index 9a70838c30d4a1..74d8de63d90310 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -556,8 +556,6 @@ Object.defineProperties(URL.prototype, { const ctx = this[context]; // toUSVString is not needed. hash = `${hash}`; - if (this.protocol === 'javascript:') - return; if (!hash) { ctx.fragment = null; ctx.flags &= ~binding.URL_FLAGS_HAS_FRAGMENT; diff --git a/test/fixtures/url-setter-tests.js b/test/fixtures/url-setter-tests.js index d0688611a01c0f..4c39cb7311791b 100644 --- a/test/fixtures/url-setter-tests.js +++ b/test/fixtures/url-setter-tests.js @@ -1800,13 +1800,13 @@ module.exports = "hash": "#%c3%89t%C3%A9" } }, - // { - // "href": "javascript:alert(1)", - // "new_value": "castle", - // "expected": { - // "href": "javascript:alert(1)#castle", - // "hash": "#castle" - // } - // } + { + "href": "javascript:alert(1)", + "new_value": "castle", + "expected": { + "href": "javascript:alert(1)#castle", + "hash": "#castle" + } + } ] } From f484cfdf29318a27e317b48d50e67d978b5d7214 Mon Sep 17 00:00:00 2001 From: Daijiro Wachi Date: Fri, 14 Apr 2017 18:12:16 +0200 Subject: [PATCH 22/30] url: disallow invalid IPv4 in IPv6 parser PR-URL: https://github.com/nodejs/node/pull/12507 Fixes: https://github.com/nodejs/node/issues/10655 Reviewed-By: James M Snell --- src/node_url.cc | 26 +++--- test/fixtures/url-setter-tests.js | 144 +++++++++++++++--------------- test/fixtures/url-tests.js | 40 ++++----- 3 files changed, 106 insertions(+), 104 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 16a4cdd45b54b4..39f56ece679005 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -110,7 +110,7 @@ namespace url { uint16_t* compress_pointer = nullptr; const char* pointer = input; const char* end = pointer + length; - unsigned value, len, swaps, dots; + unsigned value, len, swaps, numbers_seen; char ch = pointer < end ? pointer[0] : kEOL; if (ch == ':') { if (length < 2 || pointer[1] != ':') @@ -148,9 +148,17 @@ namespace url { ch = pointer < end ? pointer[0] : kEOL; if (piece_pointer > last_piece - 2) goto end; - dots = 0; + numbers_seen = 0; while (ch != kEOL) { value = 0xffffffff; + if (numbers_seen > 0) { + if (ch == '.' && numbers_seen < 4) { + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + } else { + goto end; + } + } if (!ASCII_DIGIT(ch)) goto end; while (ASCII_DIGIT(ch)) { @@ -167,19 +175,13 @@ namespace url { pointer++; ch = pointer < end ? pointer[0] : kEOL; } - if (dots < 3 && ch != '.') - goto end; *piece_pointer = *piece_pointer * 0x100 + value; - if (dots & 0x1) + numbers_seen++; + if (numbers_seen == 2 || numbers_seen == 4) piece_pointer++; - if (ch != kEOL) { - pointer++; - ch = pointer < end ? pointer[0] : kEOL; - } - if (dots == 3 && ch != kEOL) - goto end; - dots++; } + if (numbers_seen != 4) + goto end; continue; case ':': pointer++; diff --git a/test/fixtures/url-setter-tests.js b/test/fixtures/url-setter-tests.js index 4c39cb7311791b..f537075674b77a 100644 --- a/test/fixtures/url-setter-tests.js +++ b/test/fixtures/url-setter-tests.js @@ -880,42 +880,42 @@ module.exports = "hostname": "example.net" } }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.2.3.4x]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.2.3.]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.2.]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.4x]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, // { // "href": "file://y/", // "new_value": "x:123", @@ -1214,42 +1214,42 @@ module.exports = "hostname": "example.net" } }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.2.3.4x]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.2.3.]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.2.]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.4x]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, // { // "href": "file://y/", // "new_value": "x:123", diff --git a/test/fixtures/url-tests.js b/test/fixtures/url-tests.js index 3858f12db55e0d..d44a36bcfe7e13 100644 --- a/test/fixtures/url-tests.js +++ b/test/fixtures/url-tests.js @@ -3800,26 +3800,26 @@ module.exports = "base": "http://other.com/", "failure": true }, - // { - // "input": "http://[::1.2.3.4x]", - // "base": "http://other.com/", - // "failure": true - // }, - // { - // "input": "http://[::1.2.3.]", - // "base": "http://other.com/", - // "failure": true - // }, - // { - // "input": "http://[::1.2.]", - // "base": "http://other.com/", - // "failure": true - // }, - // { - // "input": "http://[::1.]", - // "base": "http://other.com/", - // "failure": true - // }, + { + "input": "http://[::1.2.3.4x]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.2.3.]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.2.]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.]", + "base": "http://other.com/", + "failure": true + }, "Misc Unicode", { "input": "http://foo:💩@example.com/bar", From 9288b735d8a0c15c240123669b6c6a15b01d7d5d Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 4 Apr 2017 19:13:37 -0700 Subject: [PATCH 23/30] url: clean up WHATWG URL origin generation - Use ordinary properties instead of symbols/getter redirection for internal object - Use template string literals - Remove unneeded custom inspection for internal objects - Remove unneeded OpaqueOrigin class - Remove unneeded type checks PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- lib/internal/url.js | 124 +++++++++++--------------------------------- 1 file changed, 29 insertions(+), 95 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index 74d8de63d90310..629bbb2ecedd02 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -15,10 +15,6 @@ const os = require('os'); const isWindows = process.platform === 'win32'; -const kScheme = Symbol('scheme'); -const kHost = Symbol('host'); -const kPort = Symbol('port'); -const kDomain = Symbol('domain'); const kFormat = Symbol('format'); // https://tc39.github.io/ecma262/#sec-%iteratorprototype%-object @@ -38,62 +34,15 @@ function toUSVString(val) { return binding.toUSVString(str, match.index); } -class OpaqueOrigin { - toString() { - return 'null'; - } +// Refs: https://html.spec.whatwg.org/multipage/browsers.html#concept-origin-opaque +const kOpaqueOrigin = 'null'; - get effectiveDomain() { - return this; - } -} - -class TupleOrigin { - constructor(scheme, host, port, domain) { - this[kScheme] = scheme; - this[kHost] = host; - this[kPort] = port; - this[kDomain] = domain; - } - - get scheme() { - return this[kScheme]; - } - - get host() { - return this[kHost]; - } - - get port() { - return this[kPort]; - } - - get domain() { - return this[kDomain]; - } - - get effectiveDomain() { - return this[kDomain] || this[kHost]; - } - - // https://url.spec.whatwg.org/#dom-url-origin - toString(unicode = true) { - var result = this[kScheme]; - result += '://'; - result += unicode ? domainToUnicode(this[kHost]) : this[kHost]; - if (this[kPort] !== undefined && this[kPort] !== null) - result += `:${this[kPort]}`; - return result; - } - - [util.inspect.custom]() { - return `TupleOrigin { - scheme: ${this[kScheme]}, - host: ${this[kHost]}, - port: ${this[kPort]}, - domain: ${this[kDomain]} - }`; - } +// Refs: +// - https://html.spec.whatwg.org/multipage/browsers.html#unicode-serialisation-of-an-origin +// - https://html.spec.whatwg.org/multipage/browsers.html#ascii-serialisation-of-an-origin +function serializeTupleOrigin(scheme, host, port, unicode = true) { + const unicodeHost = unicode ? domainToUnicode(host) : host; + return `${scheme}//${unicodeHost}${port == null ? '' : `:${port}`}`; } // This class provides the internal state of a URL object. An instance of this @@ -359,7 +308,27 @@ Object.defineProperties(URL.prototype, { enumerable: true, configurable: true, get() { - return originFor(this).toString(); + // Refs: https://url.spec.whatwg.org/#concept-url-origin + const ctx = this[context]; + switch (ctx.scheme) { + case 'blob:': + if (ctx.path.length > 0) { + try { + return (new URL(ctx.path[0])).origin; + } catch (err) { + // fall through... do nothing + } + } + return kOpaqueOrigin; + case 'ftp:': + case 'gopher:': + case 'http:': + case 'https:': + case 'ws:': + case 'wss:': + return serializeTupleOrigin(ctx.scheme, ctx.host, ctx.port); + } + return kOpaqueOrigin; } }, protocol: { @@ -1274,41 +1243,6 @@ defineIDLClass(URLSearchParamsIteratorPrototype, 'URLSearchParamsIterator', { } }); -function originFor(url, base) { - if (url != undefined && - (!url[searchParams] || !url[searchParams][searchParams])) { - url = new URL(url, base); - } - var origin; - const protocol = url.protocol; - switch (protocol) { - case 'blob:': - if (url[context].path && url[context].path.length > 0) { - try { - return (new URL(url[context].path[0])).origin; - } catch (err) { - // fall through... do nothing - } - } - origin = new OpaqueOrigin(); - break; - case 'ftp:': - case 'gopher:': - case 'http:': - case 'https:': - case 'ws:': - case 'wss:': - origin = new TupleOrigin(protocol.slice(0, -1), - url[context].host, - url[context].port, - null); - break; - default: - origin = new OpaqueOrigin(); - } - return origin; -} - function domainToASCII(domain) { if (arguments.length < 1) throw new TypeError('"domain" argument must be specified'); From 8f702ef13503cbd9ee7ffcdfca1dc25b4f81bcde Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 4 Apr 2017 21:03:14 -0700 Subject: [PATCH 24/30] url: improve WHATWG URL inspection PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- lib/internal/url.js | 78 +++++++++++----- test/parallel/test-whatwg-url-inspect.js | 114 ++++++++++------------- 2 files changed, 103 insertions(+), 89 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index 629bbb2ecedd02..771a916d704bac 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -184,6 +184,17 @@ function onParseHashComplete(flags, protocol, username, password, } } +function getEligibleConstructor(obj) { + while (obj !== null) { + if (Object.prototype.hasOwnProperty.call(obj, 'constructor') && + typeof obj.constructor === 'function') { + return obj.constructor; + } + obj = Object.getPrototypeOf(obj); + } + return null; +} + class URL { constructor(input, base) { // toUSVString is not needed. @@ -204,33 +215,43 @@ class URL { } [util.inspect.custom](depth, opts) { + if (this == null || + Object.getPrototypeOf(this[context]) !== URLContext.prototype) { + throw new TypeError('Value of `this` is not a URL'); + } + const ctx = this[context]; - var ret = 'URL {\n'; - ret += ` href: ${this.href}\n`; - if (ctx.scheme !== undefined) - ret += ` protocol: ${this.protocol}\n`; - if (ctx.username !== undefined) - ret += ` username: ${this.username}\n`; - if (ctx.password !== undefined) { - const pwd = opts.showHidden ? ctx.password : '--------'; - ret += ` password: ${pwd}\n`; - } - if (ctx.host !== undefined) - ret += ` hostname: ${this.hostname}\n`; - if (ctx.port !== undefined) - ret += ` port: ${this.port}\n`; - if (ctx.path !== undefined) - ret += ` pathname: ${this.pathname}\n`; - if (ctx.query !== undefined) - ret += ` search: ${this.search}\n`; - if (ctx.fragment !== undefined) - ret += ` hash: ${this.hash}\n`; + + if (typeof depth === 'number' && depth < 0) + return opts.stylize('[Object]', 'special'); + + const ctor = getEligibleConstructor(this); + + const obj = Object.create({ + constructor: ctor === null ? URL : ctor + }); + + obj.href = this.href; + obj.origin = this.origin; + obj.protocol = this.protocol; + obj.username = this.username; + obj.password = (opts.showHidden || ctx.password == null) ? + this.password : '--------'; + obj.host = this.host; + obj.hostname = this.hostname; + obj.port = this.port; + obj.pathname = this.pathname; + obj.search = this.search; + obj.searchParams = this.searchParams; + obj.hash = this.hash; + if (opts.showHidden) { - ret += ` cannot-be-base: ${this[cannotBeBase]}\n`; - ret += ` special: ${this[special]}\n`; + obj.cannotBeBase = this[cannotBeBase]; + obj.special = this[special]; + obj[context] = this[context]; } - ret += '}'; - return ret; + + return util.inspect(obj, opts); } } @@ -858,6 +879,9 @@ class URLSearchParams { throw new TypeError('Value of `this` is not a URLSearchParams'); } + if (typeof recurseTimes === 'number' && recurseTimes < 0) + return ctx.stylize('[Object]', 'special'); + const separator = ', '; const innerOpts = Object.assign({}, ctx); if (recurseTimes !== null) { @@ -1211,6 +1235,12 @@ defineIDLClass(URLSearchParamsIteratorPrototype, 'URLSearchParamsIterator', { }; }, [util.inspect.custom](recurseTimes, ctx) { + if (this == null || this[context] == null || this[context].target == null) + throw new TypeError('Value of `this` is not a URLSearchParamsIterator'); + + if (typeof recurseTimes === 'number' && recurseTimes < 0) + return ctx.stylize('[Object]', 'special'); + const innerOpts = Object.assign({}, ctx); if (recurseTimes !== null) { innerOpts.depth = recurseTimes - 1; diff --git a/test/parallel/test-whatwg-url-inspect.js b/test/parallel/test-whatwg-url-inspect.js index 4afbbc13102905..a8a59b77873f12 100644 --- a/test/parallel/test-whatwg-url-inspect.js +++ b/test/parallel/test-whatwg-url-inspect.js @@ -3,7 +3,6 @@ const common = require('../common'); const util = require('util'); const URL = require('url').URL; -const path = require('path'); const assert = require('assert'); if (!common.hasIntl) { @@ -13,71 +12,56 @@ if (!common.hasIntl) { } // Tests below are not from WPT. -const tests = require(path.join(common.fixturesDir, 'url-tests')); -const additional_tests = require( - path.join(common.fixturesDir, 'url-tests-additional')); +const url = new URL('https://username:password@host.name:8080/path/name/?que=ry#hash'); -const allTests = additional_tests.slice(); -for (const test of tests) { - if (test.failure || typeof test === 'string') continue; - allTests.push(test); -} - -for (const test of allTests) { - const url = test.url ? new URL(test.url) : new URL(test.input, test.base); - - for (const showHidden of [true, false]) { - const res = util.inspect(url, { - showHidden - }); - - const lines = res.split('\n'); +assert.strictEqual( + util.inspect(url), + `URL { + href: 'https://username:password@host.name:8080/path/name/?que=ry#hash', + origin: 'https://host.name:8080', + protocol: 'https:', + username: 'username', + password: '--------', + host: 'host.name:8080', + hostname: 'host.name', + port: '8080', + pathname: '/path/name/', + search: '?que=ry', + searchParams: URLSearchParams { 'que' => 'ry' }, + hash: '#hash' }`); - const firstLine = lines[0]; - assert.strictEqual(firstLine, 'URL {'); +assert.strictEqual( + util.inspect(url, { showHidden: true }), + `URL { + href: 'https://username:password@host.name:8080/path/name/?que=ry#hash', + origin: 'https://host.name:8080', + protocol: 'https:', + username: 'username', + password: 'password', + host: 'host.name:8080', + hostname: 'host.name', + port: '8080', + pathname: '/path/name/', + search: '?que=ry', + searchParams: URLSearchParams { 'que' => 'ry' }, + hash: '#hash', + cannotBeBase: false, + special: true, + [Symbol(context)]:\x20 + URLContext { + flags: 2032, + scheme: 'https:', + username: 'username', + password: 'password', + host: 'host.name', + port: 8080, + path: [ 'path', 'name', '', [length]: 3 ], + query: 'que=ry', + fragment: 'hash' } }`); - const lastLine = lines[lines.length - 1]; - assert.strictEqual(lastLine, '}'); +assert.strictEqual( + util.inspect({ a: url }, { depth: 0 }), + '{ a: [Object] }'); - const innerLines = lines.slice(1, lines.length - 1); - const keys = new Set(); - for (const line of innerLines) { - const i = line.indexOf(': '); - const k = line.slice(0, i).trim(); - const v = line.slice(i + 2); - assert.strictEqual(keys.has(k), false, 'duplicate key found: ' + k); - keys.add(k); - - const hidden = new Set([ - 'password', - 'cannot-be-base', - 'special' - ]); - if (showHidden) { - if (!hidden.has(k)) { - assert.strictEqual(v, url[k], k); - continue; - } - - if (k === 'password') { - assert.strictEqual(v, url[k], k); - } - if (k === 'cannot-be-base') { - assert.ok(v.match(/^true$|^false$/), k + ' is Boolean'); - } - if (k === 'special') { - assert.ok(v.match(/^true$|^false$/), k + ' is Boolean'); - } - continue; - } - - // showHidden is false - if (k === 'password') { - assert.strictEqual(v, '--------', k); - continue; - } - assert.strictEqual(hidden.has(k), false, 'no hidden keys: ' + k); - assert.strictEqual(v, url[k], k); - } - } -} +class MyURL extends URL {} +assert(util.inspect(new MyURL(url.href)).startsWith('MyURL {')); From 473bd5e64783a93b7598c43024c71137743c6ee7 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Wed, 5 Apr 2017 21:22:53 -0700 Subject: [PATCH 25/30] src: clean up WHATWG WG parser * reduce indentation * refactor URL inlined methods * prefer templates over macros * do not export ARG_* flags in url binding PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- src/node_url.cc | 3113 +++++++++++++++++++++++++++-------------------- src/node_url.h | 467 ------- 2 files changed, 1797 insertions(+), 1783 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 39f56ece679005..7df9461fdd25b1 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -15,8 +15,6 @@ #include #include -#define UNICODE_REPLACEMENT_CHARACTER 0xFFFD - namespace node { using v8::Array; @@ -55,709 +53,1142 @@ using v8::Value; namespace url { -#if defined(NODE_HAVE_I18N_SUPPORT) - static inline bool ToUnicode(std::string* input, std::string* output) { - MaybeStackBuffer buf; - if (i18n::ToUnicode(&buf, input->c_str(), input->length()) < 0) - return false; - output->assign(*buf, buf.length()); - return true; - } +// https://url.spec.whatwg.org/#eof-code-point +static const char kEOL = -1; + +// Used in ToUSVString(). +static const char16_t kUnicodeReplacementCharacter = 0xFFFD; + +union url_host_value { + std::string domain; + uint32_t ipv4; + uint16_t ipv6[8]; + ~url_host_value() {} +}; + +enum url_host_type { + HOST_TYPE_FAILED = -1, + HOST_TYPE_DOMAIN = 0, + HOST_TYPE_IPV4 = 1, + HOST_TYPE_IPV6 = 2 +}; + +struct url_host { + url_host_value value; + enum url_host_type type; +}; + +#define ARGS(XX) \ + XX(ARG_FLAGS) \ + XX(ARG_PROTOCOL) \ + XX(ARG_USERNAME) \ + XX(ARG_PASSWORD) \ + XX(ARG_HOST) \ + XX(ARG_PORT) \ + XX(ARG_PATH) \ + XX(ARG_QUERY) \ + XX(ARG_FRAGMENT) + +#define ERR_ARGS(XX) \ + XX(ERR_ARG_FLAGS) \ + XX(ERR_ARG_INPUT) \ + +enum url_cb_args { +#define XX(name) name, + ARGS(XX) +#undef XX +}; - static inline bool ToASCII(std::string* input, std::string* output) { - MaybeStackBuffer buf; - if (i18n::ToASCII(&buf, input->c_str(), input->length()) < 0) - return false; - output->assign(*buf, buf.length()); - return true; - } -#else - // Intentional non-ops if ICU is not present. - static inline bool ToUnicode(std::string* input, std::string* output) { - *output = *input; - return true; +enum url_error_cb_args { +#define XX(name) name, + ERR_ARGS(XX) +#undef XX +}; + +#define CHAR_TEST(bits, name, expr) \ + template \ + static inline bool name(const T ch) { \ + static_assert(sizeof(ch) >= (bits) / 8, \ + "Character must be wider than " #bits " bits"); \ + return (expr); \ } - static inline bool ToASCII(std::string* input, std::string* output) { - *output = *input; - return true; +#define TWO_CHAR_STRING_TEST(bits, name, expr) \ + template \ + static inline bool name(const T ch1, const T ch2) { \ + static_assert(sizeof(ch1) >= (bits) / 8, \ + "Character must be wider than " #bits " bits"); \ + return (expr); \ + } \ + template \ + static inline bool name(const std::basic_string& str) { \ + static_assert(sizeof(str[0]) >= (bits) / 8, \ + "Character must be wider than " #bits " bits"); \ + return str.length() >= 2 && name(str[0], str[1]); \ } -#endif - // If a UTF-16 character is a low/trailing surrogate. - static inline bool IsUnicodeTrail(uint16_t c) { - return (c & 0xFC00) == 0xDC00; +// https://infra.spec.whatwg.org/#ascii-tab-or-newline +CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r')) + +// https://infra.spec.whatwg.org/#ascii-digit +CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9')) + +// https://infra.spec.whatwg.org/#ascii-hex-digit +CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) || + (ch >= 'A' && ch <= 'F') || + (ch >= 'a' && ch <= 'f'))) + +// https://infra.spec.whatwg.org/#ascii-alpha +CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') || + (ch >= 'a' && ch <= 'z'))) + +// https://infra.spec.whatwg.org/#ascii-alphanumeric +CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch))) + +// https://infra.spec.whatwg.org/#ascii-lowercase +template +static inline T ASCIILowercase(T ch) { + return IsASCIIAlpha(ch) ? (ch | 0x20) : ch; +} + +// https://url.spec.whatwg.org/#windows-drive-letter +TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter, + (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|'))) + +// https://url.spec.whatwg.org/#normalized-windows-drive-letter +TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter, + (IsASCIIAlpha(ch1) && ch2 == ':')) + +// If a UTF-16 character is a low/trailing surrogate. +CHAR_TEST(16, IsUnicodeTrail, (ch & 0xFC00) == 0xDC00) + +// If a UTF-16 character is a surrogate. +CHAR_TEST(16, IsUnicodeSurrogate, (ch & 0xF800) == 0xD800) + +// If a UTF-16 surrogate is a low/trailing one. +CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0) + +#undef CHAR_TEST +#undef TWO_CHAR_STRING_TEST + +static const char* hex[256] = { + "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", + "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", + "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", + "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", + "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27", + "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F", + "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37", + "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F", + "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47", + "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F", + "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57", + "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F", + "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67", + "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F", + "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77", + "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F", + "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", + "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", + "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", + "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", + "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", + "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", + "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", + "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", + "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", + "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", + "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", + "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", + "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", + "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", + "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", + "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" +}; + +static const uint8_t SIMPLE_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +static const uint8_t DEFAULT_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +static const uint8_t USERINFO_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 40 41 42 43 44 45 46 47 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +static const uint8_t QUERY_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +static inline bool BitAt(const uint8_t a[], const uint8_t i) { + return !!(a[i >> 3] & (1 << (i & 7))); +} + +// Appends ch to str. If ch position in encode_set is set, the ch will +// be percent-encoded then appended. +static inline void AppendOrEscape(std::string* str, + const unsigned char ch, + const uint8_t encode_set[]) { + if (BitAt(encode_set, ch)) + *str += hex[ch]; + else + *str += ch; +} + +template +static inline unsigned hex2bin(const T ch) { + if (ch >= '0' && ch <= '9') + return ch - '0'; + if (ch >= 'A' && ch <= 'F') + return 10 + (ch - 'A'); + if (ch >= 'a' && ch <= 'f') + return 10 + (ch - 'a'); + return static_cast(-1); +} + +static inline void PercentDecode(const char* input, + size_t len, + std::string* dest) { + if (len == 0) + return; + dest->reserve(len); + const char* pointer = input; + const char* end = input + len; + size_t remaining = pointer - end - 1; + while (pointer < end) { + const char ch = pointer[0]; + remaining = (end - pointer) + 1; + if (ch != '%' || remaining < 2 || + (ch == '%' && + (!IsASCIIHexDigit(pointer[1]) || + !IsASCIIHexDigit(pointer[2])))) { + *dest += ch; + pointer++; + continue; + } else { + unsigned a = hex2bin(pointer[1]); + unsigned b = hex2bin(pointer[2]); + char c = static_cast(a * 16 + b); + *dest += c; + pointer += 3; + } } +} + +#define SPECIALS(XX) \ + XX("ftp:", 21) \ + XX("file:", -1) \ + XX("gopher:", 70) \ + XX("http:", 80) \ + XX("https:", 443) \ + XX("ws:", 80) \ + XX("wss:", 443) + +static inline bool IsSpecial(std::string scheme) { +#define XX(name, _) if (scheme == name) return true; + SPECIALS(XX); +#undef XX + return false; +} - // If a UTF-16 character is a surrogate. - static inline bool IsUnicodeSurrogate(uint16_t c) { - return (c & 0xF800) == 0xD800; - } +static inline int NormalizePort(std::string scheme, int p) { +#define XX(name, port) if (scheme == name && p == port) return -1; + SPECIALS(XX); +#undef XX + return p; +} - // If a UTF-16 surrogate is a low/trailing one. - static inline bool IsUnicodeSurrogateTrail(uint16_t c) { - return (c & 0x400) != 0; - } +#if defined(NODE_HAVE_I18N_SUPPORT) +static inline bool ToUnicode(std::string* input, std::string* output) { + MaybeStackBuffer buf; + if (i18n::ToUnicode(&buf, input->c_str(), input->length()) < 0) + return false; + output->assign(*buf, buf.length()); + return true; +} + +static inline bool ToASCII(std::string* input, std::string* output) { + MaybeStackBuffer buf; + if (i18n::ToASCII(&buf, input->c_str(), input->length()) < 0) + return false; + output->assign(*buf, buf.length()); + return true; +} +#else +// Intentional non-ops if ICU is not present. +static inline bool ToUnicode(std::string* input, std::string* output) { + *output = *input; + return true; +} + +static inline bool ToASCII(std::string* input, std::string* output) { + *output = *input; + return true; +} +#endif - static url_host_type ParseIPv6Host(url_host* host, - const char* input, - size_t length) { - url_host_type type = HOST_TYPE_FAILED; - for (unsigned n = 0; n < 8; n++) - host->value.ipv6[n] = 0; - uint16_t* piece_pointer = &host->value.ipv6[0]; - uint16_t* last_piece = piece_pointer + 8; - uint16_t* compress_pointer = nullptr; - const char* pointer = input; - const char* end = pointer + length; - unsigned value, len, swaps, numbers_seen; - char ch = pointer < end ? pointer[0] : kEOL; +static url_host_type ParseIPv6Host(url_host* host, + const char* input, + size_t length) { + url_host_type type = HOST_TYPE_FAILED; + for (unsigned n = 0; n < 8; n++) + host->value.ipv6[n] = 0; + uint16_t* piece_pointer = &host->value.ipv6[0]; + uint16_t* last_piece = piece_pointer + 8; + uint16_t* compress_pointer = nullptr; + const char* pointer = input; + const char* end = pointer + length; + unsigned value, len, swaps, numbers_seen; + char ch = pointer < end ? pointer[0] : kEOL; + if (ch == ':') { + if (length < 2 || pointer[1] != ':') + goto end; + pointer += 2; + ch = pointer < end ? pointer[0] : kEOL; + piece_pointer++; + compress_pointer = piece_pointer; + } + while (ch != kEOL) { + if (piece_pointer > last_piece) + goto end; if (ch == ':') { - if (length < 2 || pointer[1] != ':') + if (compress_pointer != nullptr) goto end; - pointer += 2; + pointer++; ch = pointer < end ? pointer[0] : kEOL; piece_pointer++; compress_pointer = piece_pointer; + continue; } - while (ch != kEOL) { - if (piece_pointer > last_piece) - goto end; - if (ch == ':') { - if (compress_pointer != nullptr) + value = 0; + len = 0; + while (len < 4 && IsASCIIHexDigit(ch)) { + value = value * 0x10 + hex2bin(ch); + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + len++; + } + switch (ch) { + case '.': + if (len == 0) goto end; - pointer++; + pointer -= len; ch = pointer < end ? pointer[0] : kEOL; - piece_pointer++; - compress_pointer = piece_pointer; - continue; - } - value = 0; - len = 0; - while (len < 4 && ASCII_HEX_DIGIT(ch)) { - value = value * 0x10 + hex2bin(ch); - pointer++; - ch = pointer < end ? pointer[0] : kEOL; - len++; - } - switch (ch) { - case '.': - if (len == 0) - goto end; - pointer -= len; - ch = pointer < end ? pointer[0] : kEOL; - if (piece_pointer > last_piece - 2) - goto end; - numbers_seen = 0; - while (ch != kEOL) { - value = 0xffffffff; - if (numbers_seen > 0) { - if (ch == '.' && numbers_seen < 4) { - pointer++; - ch = pointer < end ? pointer[0] : kEOL; - } else { - goto end; - } - } - if (!ASCII_DIGIT(ch)) - goto end; - while (ASCII_DIGIT(ch)) { - unsigned number = ch - '0'; - if (value == 0xffffffff) { - value = number; - } else if (value == 0) { - goto end; - } else { - value = value * 10 + number; - } - if (value > 255) - goto end; + if (piece_pointer > last_piece - 2) + goto end; + numbers_seen = 0; + while (ch != kEOL) { + value = 0xffffffff; + if (numbers_seen > 0) { + if (ch == '.' && numbers_seen < 4) { pointer++; ch = pointer < end ? pointer[0] : kEOL; + } else { + goto end; } - *piece_pointer = *piece_pointer * 0x100 + value; - numbers_seen++; - if (numbers_seen == 2 || numbers_seen == 4) - piece_pointer++; } - if (numbers_seen != 4) - goto end; - continue; - case ':': - pointer++; - ch = pointer < end ? pointer[0] : kEOL; - if (ch == kEOL) + if (!IsASCIIDigit(ch)) goto end; - break; - case kEOL: - break; - default: + while (IsASCIIDigit(ch)) { + unsigned number = ch - '0'; + if (value == 0xffffffff) { + value = number; + } else if (value == 0) { + goto end; + } else { + value = value * 10 + number; + } + if (value > 255) + goto end; + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + } + *piece_pointer = *piece_pointer * 0x100 + value; + numbers_seen++; + if (numbers_seen == 2 || numbers_seen == 4) + piece_pointer++; + } + if (numbers_seen != 4) goto end; - } - *piece_pointer = value; - piece_pointer++; - } - - if (compress_pointer != nullptr) { - swaps = piece_pointer - compress_pointer; - piece_pointer = last_piece - 1; - while (piece_pointer != &host->value.ipv6[0] && swaps > 0) { - uint16_t temp = *piece_pointer; - uint16_t* swap_piece = compress_pointer + swaps - 1; - *piece_pointer = *swap_piece; - *swap_piece = temp; - piece_pointer--; - swaps--; - } - } else if (compress_pointer == nullptr && - piece_pointer != last_piece) { - goto end; + continue; + case ':': + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + if (ch == kEOL) + goto end; + break; + case kEOL: + break; + default: + goto end; } - type = HOST_TYPE_IPV6; - end: - host->type = type; - return type; + *piece_pointer = value; + piece_pointer++; } - static inline int64_t ParseNumber(const char* start, const char* end) { - unsigned R = 10; - if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') { - start += 2; - R = 16; + if (compress_pointer != nullptr) { + swaps = piece_pointer - compress_pointer; + piece_pointer = last_piece - 1; + while (piece_pointer != &host->value.ipv6[0] && swaps > 0) { + uint16_t temp = *piece_pointer; + uint16_t* swap_piece = compress_pointer + swaps - 1; + *piece_pointer = *swap_piece; + *swap_piece = temp; + piece_pointer--; + swaps--; } - if (end - start == 0) { - return 0; - } else if (R == 10 && end - start > 1 && start[0] == '0') { - start++; - R = 8; - } - const char* p = start; - - while (p < end) { - const char ch = p[0]; - switch (R) { - case 8: - if (ch < '0' || ch > '7') - return -1; - break; - case 10: - if (!ASCII_DIGIT(ch)) - return -1; - break; - case 16: - if (!ASCII_HEX_DIGIT(ch)) - return -1; - break; - } - p++; + } else if (compress_pointer == nullptr && + piece_pointer != last_piece) { + goto end; + } + type = HOST_TYPE_IPV6; + end: + host->type = type; + return type; +} + +static inline int64_t ParseNumber(const char* start, const char* end) { + unsigned R = 10; + if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') { + start += 2; + R = 16; + } + if (end - start == 0) { + return 0; + } else if (R == 10 && end - start > 1 && start[0] == '0') { + start++; + R = 8; + } + const char* p = start; + + while (p < end) { + const char ch = p[0]; + switch (R) { + case 8: + if (ch < '0' || ch > '7') + return -1; + break; + case 10: + if (!IsASCIIDigit(ch)) + return -1; + break; + case 16: + if (!IsASCIIHexDigit(ch)) + return -1; + break; } - return strtoll(start, NULL, R); + p++; } + return strtoll(start, NULL, R); +} + +static url_host_type ParseIPv4Host(url_host* host, + const char* input, + size_t length) { + url_host_type type = HOST_TYPE_DOMAIN; + const char* pointer = input; + const char* mark = input; + const char* end = pointer + length; + int parts = 0; + uint32_t val = 0; + uint64_t numbers[4]; + int tooBigNumbers = 0; + if (length == 0) + goto end; + + while (pointer <= end) { + const char ch = pointer < end ? pointer[0] : kEOL; + const int remaining = end - pointer - 1; + if (ch == '.' || ch == kEOL) { + if (++parts > 4) + goto end; + if (pointer - mark == 0) + break; + int64_t n = ParseNumber(mark, pointer); + if (n < 0) + goto end; - static url_host_type ParseIPv4Host(url_host* host, - const char* input, - size_t length) { - url_host_type type = HOST_TYPE_DOMAIN; - const char* pointer = input; - const char* mark = input; - const char* end = pointer + length; - int parts = 0; - uint32_t val = 0; - uint64_t numbers[4]; - int tooBigNumbers = 0; - if (length == 0) - goto end; - - while (pointer <= end) { - const char ch = pointer < end ? pointer[0] : kEOL; - const int remaining = end - pointer - 1; - if (ch == '.' || ch == kEOL) { - if (++parts > 4) - goto end; - if (pointer - mark == 0) - break; - int64_t n = ParseNumber(mark, pointer); - if (n < 0) - goto end; - - if (n > 255) { - tooBigNumbers++; - } - numbers[parts - 1] = n; - mark = pointer + 1; - if (ch == '.' && remaining == 0) - break; + if (n > 255) { + tooBigNumbers++; } - pointer++; - } - CHECK_GT(parts, 0); - - // If any but the last item in numbers is greater than 255, return failure. - // If the last item in numbers is greater than or equal to - // 256^(5 - the number of items in numbers), return failure. - if (tooBigNumbers > 1 || - (tooBigNumbers == 1 && numbers[parts - 1] <= 255) || - numbers[parts - 1] >= pow(256, static_cast(5 - parts))) { - type = HOST_TYPE_FAILED; - goto end; - } - - type = HOST_TYPE_IPV4; - val = numbers[parts - 1]; - for (int n = 0; n < parts - 1; n++) { - double b = 3 - n; - val += numbers[n] * pow(256, b); + numbers[parts - 1] = n; + mark = pointer + 1; + if (ch == '.' && remaining == 0) + break; } - - host->value.ipv4 = val; - end: - host->type = type; - return type; + pointer++; + } + CHECK_GT(parts, 0); + + // If any but the last item in numbers is greater than 255, return failure. + // If the last item in numbers is greater than or equal to + // 256^(5 - the number of items in numbers), return failure. + if (tooBigNumbers > 1 || + (tooBigNumbers == 1 && numbers[parts - 1] <= 255) || + numbers[parts - 1] >= pow(256, static_cast(5 - parts))) { + type = HOST_TYPE_FAILED; + goto end; } - static url_host_type ParseHost(url_host* host, - const char* input, - size_t length, - bool unicode = false) { - url_host_type type = HOST_TYPE_FAILED; - const char* pointer = input; - std::string decoded; + type = HOST_TYPE_IPV4; + val = numbers[parts - 1]; + for (int n = 0; n < parts - 1; n++) { + double b = 3 - n; + val += numbers[n] * pow(256, b); + } - if (length == 0) + host->value.ipv4 = val; + end: + host->type = type; + return type; +} + +static url_host_type ParseHost(url_host* host, + const char* input, + size_t length, + bool unicode = false) { + url_host_type type = HOST_TYPE_FAILED; + const char* pointer = input; + std::string decoded; + + if (length == 0) + goto end; + + if (pointer[0] == '[') { + if (pointer[length - 1] != ']') goto end; + return ParseIPv6Host(host, ++pointer, length - 2); + } - if (pointer[0] == '[') { - if (pointer[length - 1] != ']') - goto end; - return ParseIPv6Host(host, ++pointer, length - 2); - } + // First, we have to percent decode + PercentDecode(input, length, &decoded); - // First, we have to percent decode - PercentDecode(input, length, &decoded); + // Then we have to punycode toASCII + if (!ToASCII(&decoded, &decoded)) + goto end; - // Then we have to punycode toASCII - if (!ToASCII(&decoded, &decoded)) + // If any of the following characters are still present, we have to fail + for (size_t n = 0; n < decoded.size(); n++) { + const char ch = decoded[n]; + if (ch == 0x00 || ch == 0x09 || ch == 0x0a || ch == 0x0d || + ch == 0x20 || ch == '#' || ch == '%' || ch == '/' || + ch == '?' || ch == '@' || ch == '[' || ch == '\\' || + ch == ']') { goto end; - - // If any of the following characters are still present, we have to fail - for (size_t n = 0; n < decoded.size(); n++) { - const char ch = decoded[n]; - if (ch == 0x00 || ch == 0x09 || ch == 0x0a || ch == 0x0d || - ch == 0x20 || ch == '#' || ch == '%' || ch == '/' || - ch == '?' || ch == '@' || ch == '[' || ch == '\\' || - ch == ']') { - goto end; - } } - - // Check to see if it's an IPv4 IP address - type = ParseIPv4Host(host, decoded.c_str(), decoded.length()); - if (type == HOST_TYPE_IPV4 || type == HOST_TYPE_FAILED) - goto end; - - // If the unicode flag is set, run the result through punycode ToUnicode - if (unicode && !ToUnicode(&decoded, &decoded)) - goto end; - - // It's not an IPv4 or IPv6 address, it must be a domain - type = HOST_TYPE_DOMAIN; - host->value.domain = decoded; - - end: - host->type = type; - return type; } - // Locates the longest sequence of 0 segments in an IPv6 address - // in order to use the :: compression when serializing - static inline uint16_t* FindLongestZeroSequence(uint16_t* values, - size_t len) { - uint16_t* start = values; - uint16_t* end = start + len; - uint16_t* result = nullptr; - - uint16_t* current = nullptr; - unsigned counter = 0, longest = 1; - - while (start < end) { - if (*start == 0) { - if (current == nullptr) - current = start; - counter++; - } else { - if (counter > longest) { - longest = counter; - result = current; - } - counter = 0; - current = nullptr; + // Check to see if it's an IPv4 IP address + type = ParseIPv4Host(host, decoded.c_str(), decoded.length()); + if (type == HOST_TYPE_IPV4 || type == HOST_TYPE_FAILED) + goto end; + + // If the unicode flag is set, run the result through punycode ToUnicode + if (unicode && !ToUnicode(&decoded, &decoded)) + goto end; + + // It's not an IPv4 or IPv6 address, it must be a domain + type = HOST_TYPE_DOMAIN; + host->value.domain = decoded; + + end: + host->type = type; + return type; +} + +// Locates the longest sequence of 0 segments in an IPv6 address +// in order to use the :: compression when serializing +static inline uint16_t* FindLongestZeroSequence(uint16_t* values, + size_t len) { + uint16_t* start = values; + uint16_t* end = start + len; + uint16_t* result = nullptr; + + uint16_t* current = nullptr; + unsigned counter = 0, longest = 1; + + while (start < end) { + if (*start == 0) { + if (current == nullptr) + current = start; + counter++; + } else { + if (counter > longest) { + longest = counter; + result = current; } - start++; + counter = 0; + current = nullptr; } - if (counter > longest) - result = current; - return result; + start++; } - - static url_host_type WriteHost(url_host* host, std::string* dest) { - dest->clear(); - switch (host->type) { - case HOST_TYPE_DOMAIN: - *dest = host->value.domain; - break; - case HOST_TYPE_IPV4: { - dest->reserve(15); - uint32_t value = host->value.ipv4; - for (int n = 0; n < 4; n++) { - char buf[4]; - char* buffer = buf; - snprintf(buffer, sizeof(buf), "%d", value % 256); - dest->insert(0, buf); - if (n < 3) - dest->insert(0, 1, '.'); - value /= 256; - } - break; + if (counter > longest) + result = current; + return result; +} + +static url_host_type WriteHost(url_host* host, std::string* dest) { + dest->clear(); + switch (host->type) { + case HOST_TYPE_DOMAIN: + *dest = host->value.domain; + break; + case HOST_TYPE_IPV4: { + dest->reserve(15); + uint32_t value = host->value.ipv4; + for (int n = 0; n < 4; n++) { + char buf[4]; + char* buffer = buf; + snprintf(buffer, sizeof(buf), "%d", value % 256); + dest->insert(0, buf); + if (n < 3) + dest->insert(0, 1, '.'); + value /= 256; } - case HOST_TYPE_IPV6: { - dest->reserve(41); - *dest+= '['; - uint16_t* start = &host->value.ipv6[0]; - uint16_t* compress_pointer = - FindLongestZeroSequence(start, 8); - for (int n = 0; n <= 7; n++) { - uint16_t* piece = &host->value.ipv6[n]; - if (compress_pointer == piece) { - *dest += n == 0 ? "::" : ":"; - while (*piece == 0 && ++n < 8) - piece = &host->value.ipv6[n]; - if (n == 8) - break; - } - char buf[5]; - char* buffer = buf; - snprintf(buffer, sizeof(buf), "%x", *piece); - *dest += buf; - if (n < 7) - *dest += ':'; + break; + } + case HOST_TYPE_IPV6: { + dest->reserve(41); + *dest+= '['; + uint16_t* start = &host->value.ipv6[0]; + uint16_t* compress_pointer = + FindLongestZeroSequence(start, 8); + for (int n = 0; n <= 7; n++) { + uint16_t* piece = &host->value.ipv6[n]; + if (compress_pointer == piece) { + *dest += n == 0 ? "::" : ":"; + while (*piece == 0 && ++n < 8) + piece = &host->value.ipv6[n]; + if (n == 8) + break; } - *dest += ']'; - break; + char buf[5]; + char* buffer = buf; + snprintf(buffer, sizeof(buf), "%x", *piece); + *dest += buf; + if (n < 7) + *dest += ':'; } - case HOST_TYPE_FAILED: - break; + *dest += ']'; + break; } - return host->type; + case HOST_TYPE_FAILED: + break; } + return host->type; +} - static bool ParseHost(std::string* input, - std::string* output, - bool unicode = false) { - if (input->length() == 0) - return true; - url_host host{{""}, HOST_TYPE_DOMAIN}; - ParseHost(&host, input->c_str(), input->length(), unicode); - if (host.type == HOST_TYPE_FAILED) - return false; - WriteHost(&host, output); +static bool ParseHost(std::string* input, + std::string* output, + bool unicode = false) { + if (input->length() == 0) return true; - } - - static inline void Copy(Environment* env, - Local ary, - std::vector* vec) { - const int32_t len = ary->Length(); - if (len == 0) - return; // nothing to copy - vec->reserve(len); - for (int32_t n = 0; n < len; n++) { - Local val = ary->Get(env->context(), n).ToLocalChecked(); - if (val->IsString()) { - Utf8Value value(env->isolate(), val.As()); - vec->push_back(std::string(*value, value.length())); - } + url_host host{{""}, HOST_TYPE_DOMAIN}; + ParseHost(&host, input->c_str(), input->length(), unicode); + if (host.type == HOST_TYPE_FAILED) + return false; + WriteHost(&host, output); + return true; +} + +static inline void Copy(Environment* env, + Local ary, + std::vector* vec) { + const int32_t len = ary->Length(); + if (len == 0) + return; // nothing to copy + vec->reserve(len); + for (int32_t n = 0; n < len; n++) { + Local val = ary->Get(env->context(), n).ToLocalChecked(); + if (val->IsString()) { + Utf8Value value(env->isolate(), val.As()); + vec->push_back(std::string(*value, value.length())); } } - - static inline Local Copy(Environment* env, - std::vector vec) { - Isolate* isolate = env->isolate(); - Local ary = Array::New(isolate, vec.size()); - for (size_t n = 0; n < vec.size(); n++) - ary->Set(env->context(), n, UTF8STRING(isolate, vec[n])).FromJust(); - return ary; +} + +static inline Local Copy(Environment* env, + std::vector vec) { + Isolate* isolate = env->isolate(); + Local ary = Array::New(isolate, vec.size()); + for (size_t n = 0; n < vec.size(); n++) + ary->Set(env->context(), n, UTF8STRING(isolate, vec[n])).FromJust(); + return ary; +} + +static inline void HarvestBase(Environment* env, + struct url_data* base, + Local base_obj) { + Local context = env->context(); + Local flags = GET(env, base_obj, "flags"); + if (flags->IsInt32()) + base->flags = flags->Int32Value(context).FromJust(); + + Local scheme = GET(env, base_obj, "scheme"); + base->scheme = Utf8Value(env->isolate(), scheme).out(); + + GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME); + GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD); + GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST); + GET_AND_SET(env, base_obj, query, base, URL_FLAGS_HAS_QUERY); + GET_AND_SET(env, base_obj, fragment, base, URL_FLAGS_HAS_FRAGMENT); + Local port = GET(env, base_obj, "port"); + if (port->IsInt32()) + base->port = port->Int32Value(context).FromJust(); + Local path = GET(env, base_obj, "path"); + if (path->IsArray()) { + base->flags |= URL_FLAGS_HAS_PATH; + Copy(env, path.As(), &(base->path)); } - - static inline void HarvestBase(Environment* env, - struct url_data* base, - Local base_obj) { - Local context = env->context(); - Local flags = GET(env, base_obj, "flags"); - if (flags->IsInt32()) - base->flags = flags->Int32Value(context).FromJust(); - - Local scheme = GET(env, base_obj, "scheme"); - base->scheme = Utf8Value(env->isolate(), scheme).out(); - - GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME); - GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD); - GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST); - GET_AND_SET(env, base_obj, query, base, URL_FLAGS_HAS_QUERY); - GET_AND_SET(env, base_obj, fragment, base, URL_FLAGS_HAS_FRAGMENT); - Local port = GET(env, base_obj, "port"); - if (port->IsInt32()) - base->port = port->Int32Value(context).FromJust(); - Local path = GET(env, base_obj, "path"); - if (path->IsArray()) { - base->flags |= URL_FLAGS_HAS_PATH; - Copy(env, path.As(), &(base->path)); - } +} + +static inline void HarvestContext(Environment* env, + struct url_data* context, + Local context_obj) { + Local flags = GET(env, context_obj, "flags"); + if (flags->IsInt32()) { + int32_t _flags = flags->Int32Value(env->context()).FromJust(); + if (_flags & URL_FLAGS_SPECIAL) + context->flags |= URL_FLAGS_SPECIAL; + if (_flags & URL_FLAGS_CANNOT_BE_BASE) + context->flags |= URL_FLAGS_CANNOT_BE_BASE; } - - static inline void HarvestContext(Environment* env, - struct url_data* context, - Local context_obj) { - Local flags = GET(env, context_obj, "flags"); - if (flags->IsInt32()) { - int32_t _flags = flags->Int32Value(env->context()).FromJust(); - if (_flags & URL_FLAGS_SPECIAL) - context->flags |= URL_FLAGS_SPECIAL; - if (_flags & URL_FLAGS_CANNOT_BE_BASE) - context->flags |= URL_FLAGS_CANNOT_BE_BASE; - } - Local scheme = GET(env, context_obj, "scheme"); - if (scheme->IsString()) { - Utf8Value value(env->isolate(), scheme); - context->scheme.assign(*value, value.length()); - } - Local port = GET(env, context_obj, "port"); - if (port->IsInt32()) - context->port = port->Int32Value(env->context()).FromJust(); + Local scheme = GET(env, context_obj, "scheme"); + if (scheme->IsString()) { + Utf8Value value(env->isolate(), scheme); + context->scheme.assign(*value, value.length()); } - - // Single dot segment can be ".", "%2e", or "%2E" - static inline bool IsSingleDotSegment(std::string str) { - switch (str.size()) { - case 1: - return str == "."; - case 3: - return str[0] == '%' && - str[1] == '2' && - TO_LOWER(str[2]) == 'e'; - default: - return false; - } + Local port = GET(env, context_obj, "port"); + if (port->IsInt32()) + context->port = port->Int32Value(env->context()).FromJust(); +} + +// Single dot segment can be ".", "%2e", or "%2E" +static inline bool IsSingleDotSegment(std::string str) { + switch (str.size()) { + case 1: + return str == "."; + case 3: + return str[0] == '%' && + str[1] == '2' && + ASCIILowercase(str[2]) == 'e'; + default: + return false; } - - // Double dot segment can be: - // "..", ".%2e", ".%2E", "%2e.", "%2E.", - // "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e" - static inline bool IsDoubleDotSegment(std::string str) { - switch (str.size()) { - case 2: - return str == ".."; - case 4: - if (str[0] != '.' && str[0] != '%') - return false; - return ((str[0] == '.' && - str[1] == '%' && - str[2] == '2' && - TO_LOWER(str[3]) == 'e') || - (str[0] == '%' && - str[1] == '2' && - TO_LOWER(str[2]) == 'e' && - str[3] == '.')); - case 6: - return (str[0] == '%' && - str[1] == '2' && - TO_LOWER(str[2]) == 'e' && - str[3] == '%' && - str[4] == '2' && - TO_LOWER(str[5]) == 'e'); - default: +} + +// Double dot segment can be: +// "..", ".%2e", ".%2E", "%2e.", "%2E.", +// "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e" +static inline bool IsDoubleDotSegment(std::string str) { + switch (str.size()) { + case 2: + return str == ".."; + case 4: + if (str[0] != '.' && str[0] != '%') return false; - } + return ((str[0] == '.' && + str[1] == '%' && + str[2] == '2' && + ASCIILowercase(str[3]) == 'e') || + (str[0] == '%' && + str[1] == '2' && + ASCIILowercase(str[2]) == 'e' && + str[3] == '.')); + case 6: + return (str[0] == '%' && + str[1] == '2' && + ASCIILowercase(str[2]) == 'e' && + str[3] == '%' && + str[4] == '2' && + ASCIILowercase(str[5]) == 'e'); + default: + return false; } - - static inline void ShortenUrlPath(struct url_data* url) { - if (url->path.empty()) return; - if (url->path.size() == 1 && url->scheme == "file:" && - NORMALIZED_WINDOWS_DRIVE_LETTER(url->path[0])) return; - url->path.pop_back(); +} + +static inline void ShortenUrlPath(struct url_data* url) { + if (url->path.empty()) return; + if (url->path.size() == 1 && url->scheme == "file:" && + IsNormalizedWindowsDriveLetter(url->path[0])) return; + url->path.pop_back(); +} + +void URL::Parse(const char* input, + const size_t len, + enum url_parse_state state_override, + struct url_data* url, + const struct url_data* base, + bool has_base) { + bool atflag = false; + bool sbflag = false; + bool uflag = false; + bool base_is_file = false; + int wskip = 0; + + std::string buffer; + url->scheme.reserve(len); + url->username.reserve(len); + url->password.reserve(len); + url->host.reserve(len); + url->path.reserve(len); + url->query.reserve(len); + url->fragment.reserve(len); + buffer.reserve(len); + + // Set the initial parse state. + const bool has_state_override = state_override != kUnknownState; + enum url_parse_state state = has_state_override ? state_override : + kSchemeStart; + + const char* p = input; + const char* end = input + len; + + if (state < kSchemeStart || state > kFragment) { + url->flags |= URL_FLAGS_INVALID_PARSE_STATE; + return; } - void URL::Parse(const char* input, - const size_t len, - enum url_parse_state state_override, - struct url_data* url, - const struct url_data* base, - bool has_base) { - bool atflag = false; - bool sbflag = false; - bool uflag = false; - bool base_is_file = false; - int wskip = 0; - - std::string buffer; - url->scheme.reserve(len); - url->username.reserve(len); - url->password.reserve(len); - url->host.reserve(len); - url->path.reserve(len); - url->query.reserve(len); - url->fragment.reserve(len); - buffer.reserve(len); - - // Set the initial parse state. - const bool has_state_override = state_override != kUnknownState; - enum url_parse_state state = has_state_override ? state_override : - kSchemeStart; - - const char* p = input; - const char* end = input + len; - - if (state < kSchemeStart || state > kFragment) { - url->flags |= URL_FLAGS_INVALID_PARSE_STATE; - return; - } - - while (p <= end) { - const char ch = p < end ? p[0] : kEOL; + while (p <= end) { + const char ch = p < end ? p[0] : kEOL; - if (TAB_AND_NEWLINE(ch)) { - if (state == kAuthority) { - // It's necessary to keep track of how much whitespace - // is being ignored when in kAuthority state because of - // how the buffer is managed. TODO: See if there's a better - // way - wskip++; - } - p++; - continue; + if (IsASCIITabOrNewline(ch)) { + if (state == kAuthority) { + // It's necessary to keep track of how much whitespace + // is being ignored when in kAuthority state because of + // how the buffer is managed. TODO: See if there's a better + // way + wskip++; } + p++; + continue; + } - bool special = (url->flags & URL_FLAGS_SPECIAL); - bool cannot_be_base; - const bool special_back_slash = (special && ch == '\\'); - switch (state) { - case kSchemeStart: - if (ASCII_ALPHA(ch)) { - buffer += TO_LOWER(ch); - state = kScheme; - } else if (!has_state_override) { - state = kNoScheme; - continue; - } else { - url->flags |= URL_FLAGS_FAILED; + bool special = (url->flags & URL_FLAGS_SPECIAL); + bool cannot_be_base; + const bool special_back_slash = (special && ch == '\\'); + switch (state) { + case kSchemeStart: + if (IsASCIIAlpha(ch)) { + buffer += ASCIILowercase(ch); + state = kScheme; + } else if (!has_state_override) { + state = kNoScheme; + continue; + } else { + url->flags |= URL_FLAGS_FAILED; + return; + } + break; + case kScheme: + if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') { + buffer += ASCIILowercase(ch); + p++; + continue; + } else if (ch == ':' || (has_state_override && ch == kEOL)) { + if (buffer.size() > 0) { + buffer += ':'; + url->scheme = buffer; + } else if (has_state_override) { + url->flags |= URL_FLAGS_TERMINATED; return; } - break; - case kScheme: - if (SCHEME_CHAR(ch)) { - buffer += TO_LOWER(ch); - p++; - continue; - } else if (ch == ':' || (has_state_override && ch == kEOL)) { - if (buffer.size() > 0) { - buffer += ':'; - url->scheme = buffer; - } else if (has_state_override) { - url->flags |= URL_FLAGS_TERMINATED; - return; - } - if (IsSpecial(url->scheme)) { - url->flags |= URL_FLAGS_SPECIAL; - special = true; - } else { - url->flags &= ~URL_FLAGS_SPECIAL; - } - if (has_state_override) - return; - buffer.clear(); - if (url->scheme == "file:") { - state = kFile; - } else if (special && - has_base && - url->scheme == base->scheme) { - state = kSpecialRelativeOrAuthority; - } else if (special) { - state = kSpecialAuthoritySlashes; - } else if (p[1] == '/') { - state = kPathOrAuthority; - p++; - } else { - url->flags |= URL_FLAGS_CANNOT_BE_BASE; - url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(""); - state = kCannotBeBase; - } - } else if (!has_state_override) { - buffer.clear(); - state = kNoScheme; - p = input; - continue; + if (IsSpecial(url->scheme)) { + url->flags |= URL_FLAGS_SPECIAL; + special = true; } else { - url->flags |= URL_FLAGS_FAILED; - return; + url->flags &= ~URL_FLAGS_SPECIAL; } - break; - case kNoScheme: - cannot_be_base = base->flags & URL_FLAGS_CANNOT_BE_BASE; - if (!has_base || (cannot_be_base && ch != '#')) { - url->flags |= URL_FLAGS_FAILED; + if (has_state_override) return; - } else if (cannot_be_base && ch == '#') { - url->scheme = base->scheme; - if (IsSpecial(url->scheme)) { - url->flags |= URL_FLAGS_SPECIAL; - special = true; - } else { - url->flags &= ~URL_FLAGS_SPECIAL; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - if (base->flags & URL_FLAGS_HAS_FRAGMENT) { - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment = base->fragment; - } - url->flags |= URL_FLAGS_CANNOT_BE_BASE; - state = kFragment; - } else if (has_base && - base->scheme != "file:") { - state = kRelative; - continue; - } else { - url->scheme = "file:"; - url->flags |= URL_FLAGS_SPECIAL; - special = true; + buffer.clear(); + if (url->scheme == "file:") { state = kFile; - continue; - } - break; - case kSpecialRelativeOrAuthority: - if (ch == '/' && p[1] == '/') { - state = kSpecialAuthorityIgnoreSlashes; + } else if (special && + has_base && + url->scheme == base->scheme) { + state = kSpecialRelativeOrAuthority; + } else if (special) { + state = kSpecialAuthoritySlashes; + } else if (p[1] == '/') { + state = kPathOrAuthority; p++; } else { - state = kRelative; - continue; - } - break; - case kPathOrAuthority: - if (ch == '/') { - state = kAuthority; - } else { - state = kPath; - continue; + url->flags |= URL_FLAGS_CANNOT_BE_BASE; + url->flags |= URL_FLAGS_HAS_PATH; + url->path.push_back(""); + state = kCannotBeBase; } - break; - case kRelative: + } else if (!has_state_override) { + buffer.clear(); + state = kNoScheme; + p = input; + continue; + } else { + url->flags |= URL_FLAGS_FAILED; + return; + } + break; + case kNoScheme: + cannot_be_base = base->flags & URL_FLAGS_CANNOT_BE_BASE; + if (!has_base || (cannot_be_base && ch != '#')) { + url->flags |= URL_FLAGS_FAILED; + return; + } else if (cannot_be_base && ch == '#') { url->scheme = base->scheme; if (IsSpecial(url->scheme)) { url->flags |= URL_FLAGS_SPECIAL; @@ -765,110 +1196,59 @@ namespace url { } else { url->flags &= ~URL_FLAGS_SPECIAL; } - switch (ch) { - case kEOL: - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - url->port = base->port; - break; - case '/': - state = kRelativeSlash; - break; - case '?': - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - url->port = base->port; - state = kQuery; - break; - case '#': - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - url->port = base->port; - state = kFragment; - break; - default: - if (special_back_slash) { - state = kRelativeSlash; - } else { - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - ShortenUrlPath(url); - } - url->port = base->port; - state = kPath; - continue; - } + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; } - break; - case kRelativeSlash: - if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) { - state = kSpecialAuthorityIgnoreSlashes; - } else if (ch == '/') { - state = kAuthority; - } else { + if (base->flags & URL_FLAGS_HAS_QUERY) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = base->query; + } + if (base->flags & URL_FLAGS_HAS_FRAGMENT) { + url->flags |= URL_FLAGS_HAS_FRAGMENT; + url->fragment = base->fragment; + } + url->flags |= URL_FLAGS_CANNOT_BE_BASE; + state = kFragment; + } else if (has_base && + base->scheme != "file:") { + state = kRelative; + continue; + } else { + url->scheme = "file:"; + url->flags |= URL_FLAGS_SPECIAL; + special = true; + state = kFile; + continue; + } + break; + case kSpecialRelativeOrAuthority: + if (ch == '/' && p[1] == '/') { + state = kSpecialAuthorityIgnoreSlashes; + p++; + } else { + state = kRelative; + continue; + } + break; + case kPathOrAuthority: + if (ch == '/') { + state = kAuthority; + } else { + state = kPath; + continue; + } + break; + case kRelative: + url->scheme = base->scheme; + if (IsSpecial(url->scheme)) { + url->flags |= URL_FLAGS_SPECIAL; + special = true; + } else { + url->flags &= ~URL_FLAGS_SPECIAL; + } + switch (ch) { + case kEOL: if (base->flags & URL_FLAGS_HAS_USERNAME) { url->flags |= URL_FLAGS_HAS_USERNAME; url->username = base->username; @@ -881,604 +1261,546 @@ namespace url { url->flags |= URL_FLAGS_HAS_HOST; url->host = base->host; } + if (base->flags & URL_FLAGS_HAS_QUERY) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = base->query; + } + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; + } url->port = base->port; - state = kPath; - continue; - } - break; - case kSpecialAuthoritySlashes: - state = kSpecialAuthorityIgnoreSlashes; - if (ch == '/' && p[1] == '/') { - p++; - } else { - continue; - } - break; - case kSpecialAuthorityIgnoreSlashes: - if (ch != '/' && ch != '\\') { - state = kAuthority; - continue; - } - break; - case kAuthority: - if (ch == '@') { - if (atflag) { - buffer.reserve(buffer.size() + 3); - buffer.insert(0, "%40"); + break; + case '/': + state = kRelativeSlash; + break; + case '?': + if (base->flags & URL_FLAGS_HAS_USERNAME) { + url->flags |= URL_FLAGS_HAS_USERNAME; + url->username = base->username; + } + if (base->flags & URL_FLAGS_HAS_PASSWORD) { + url->flags |= URL_FLAGS_HAS_PASSWORD; + url->password = base->password; } - atflag = true; - const size_t blen = buffer.size(); - if (blen > 0 && buffer[0] != ':') { + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; + } + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; + } + url->port = base->port; + state = kQuery; + break; + case '#': + if (base->flags & URL_FLAGS_HAS_USERNAME) { url->flags |= URL_FLAGS_HAS_USERNAME; + url->username = base->username; } - for (size_t n = 0; n < blen; n++) { - const char bch = buffer[n]; - if (bch == ':') { - url->flags |= URL_FLAGS_HAS_PASSWORD; - if (!uflag) { - uflag = true; - continue; - } - } - if (uflag) { - AppendOrEscape(&url->password, bch, UserinfoEncodeSet); - } else { - AppendOrEscape(&url->username, bch, UserinfoEncodeSet); - } + if (base->flags & URL_FLAGS_HAS_PASSWORD) { + url->flags |= URL_FLAGS_HAS_PASSWORD; + url->password = base->password; } - buffer.clear(); - } else if (ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || - special_back_slash) { - p -= buffer.size() + 1 + wskip; - buffer.clear(); - state = kHost; - } else { - buffer += ch; - } - break; - case kHost: - case kHostname: - if (ch == ':' && !sbflag) { - if (special && buffer.size() == 0) { - url->flags |= URL_FLAGS_FAILED; - return; + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(&buffer, &url->host)) { - url->flags |= URL_FLAGS_FAILED; - return; + if (base->flags & URL_FLAGS_HAS_QUERY) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = base->query; } - buffer.clear(); - state = kPort; - if (state_override == kHostname) { - return; + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; } - } else if (ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || - special_back_slash) { - p--; - if (special && buffer.size() == 0) { - url->flags |= URL_FLAGS_FAILED; - return; + url->port = base->port; + state = kFragment; + break; + default: + if (special_back_slash) { + state = kRelativeSlash; + } else { + if (base->flags & URL_FLAGS_HAS_USERNAME) { + url->flags |= URL_FLAGS_HAS_USERNAME; + url->username = base->username; + } + if (base->flags & URL_FLAGS_HAS_PASSWORD) { + url->flags |= URL_FLAGS_HAS_PASSWORD; + url->password = base->password; + } + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; + } + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; + ShortenUrlPath(url); + } + url->port = base->port; + state = kPath; + continue; } + } + break; + case kRelativeSlash: + if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) { + state = kSpecialAuthorityIgnoreSlashes; + } else if (ch == '/') { + state = kAuthority; + } else { + if (base->flags & URL_FLAGS_HAS_USERNAME) { + url->flags |= URL_FLAGS_HAS_USERNAME; + url->username = base->username; + } + if (base->flags & URL_FLAGS_HAS_PASSWORD) { + url->flags |= URL_FLAGS_HAS_PASSWORD; + url->password = base->password; + } + if (base->flags & URL_FLAGS_HAS_HOST) { url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(&buffer, &url->host)) { - url->flags |= URL_FLAGS_FAILED; - return; + url->host = base->host; + } + url->port = base->port; + state = kPath; + continue; + } + break; + case kSpecialAuthoritySlashes: + state = kSpecialAuthorityIgnoreSlashes; + if (ch == '/' && p[1] == '/') { + p++; + } else { + continue; + } + break; + case kSpecialAuthorityIgnoreSlashes: + if (ch != '/' && ch != '\\') { + state = kAuthority; + continue; + } + break; + case kAuthority: + if (ch == '@') { + if (atflag) { + buffer.reserve(buffer.size() + 3); + buffer.insert(0, "%40"); + } + atflag = true; + const size_t blen = buffer.size(); + if (blen > 0 && buffer[0] != ':') { + url->flags |= URL_FLAGS_HAS_USERNAME; + } + for (size_t n = 0; n < blen; n++) { + const char bch = buffer[n]; + if (bch == ':') { + url->flags |= URL_FLAGS_HAS_PASSWORD; + if (!uflag) { + uflag = true; + continue; + } } - buffer.clear(); - state = kPathStart; - if (has_state_override) { - return; + if (uflag) { + AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET); + } else { + AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET); } - } else { - if (ch == '[') - sbflag = true; - if (ch == ']') - sbflag = false; - buffer += TO_LOWER(ch); } - break; - case kPort: - if (ASCII_DIGIT(ch)) { - buffer += ch; - } else if (has_state_override || - ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || - special_back_slash) { - if (buffer.size() > 0) { - int port = 0; - for (size_t i = 0; i < buffer.size(); i++) - port = port * 10 + buffer[i] - '0'; - if (port < 0 || port > 0xffff) { - // TODO(TimothyGu): This hack is currently needed for the host - // setter since it needs access to hostname if it is valid, and - // if the FAILED flag is set the entire response to JS layer - // will be empty. - if (state_override == kHost) - url->port = -1; - else - url->flags |= URL_FLAGS_FAILED; - return; - } - url->port = NormalizePort(url->scheme, port); - buffer.clear(); - } else if (has_state_override) { - // TODO(TimothyGu): Similar case as above. + buffer.clear(); + } else if (ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || + special_back_slash) { + p -= buffer.size() + 1 + wskip; + buffer.clear(); + state = kHost; + } else { + buffer += ch; + } + break; + case kHost: + case kHostname: + if (ch == ':' && !sbflag) { + if (special && buffer.size() == 0) { + url->flags |= URL_FLAGS_FAILED; + return; + } + url->flags |= URL_FLAGS_HAS_HOST; + if (!ParseHost(&buffer, &url->host)) { + url->flags |= URL_FLAGS_FAILED; + return; + } + buffer.clear(); + state = kPort; + if (state_override == kHostname) { + return; + } + } else if (ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || + special_back_slash) { + p--; + if (special && buffer.size() == 0) { + url->flags |= URL_FLAGS_FAILED; + return; + } + url->flags |= URL_FLAGS_HAS_HOST; + if (!ParseHost(&buffer, &url->host)) { + url->flags |= URL_FLAGS_FAILED; + return; + } + buffer.clear(); + state = kPathStart; + if (has_state_override) { + return; + } + } else { + if (ch == '[') + sbflag = true; + if (ch == ']') + sbflag = false; + buffer += ASCIILowercase(ch); + } + break; + case kPort: + if (IsASCIIDigit(ch)) { + buffer += ch; + } else if (has_state_override || + ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || + special_back_slash) { + if (buffer.size() > 0) { + int port = 0; + for (size_t i = 0; i < buffer.size(); i++) + port = port * 10 + buffer[i] - '0'; + if (port < 0 || port > 0xffff) { + // TODO(TimothyGu): This hack is currently needed for the host + // setter since it needs access to hostname if it is valid, and + // if the FAILED flag is set the entire response to JS layer + // will be empty. if (state_override == kHost) url->port = -1; else - url->flags |= URL_FLAGS_TERMINATED; + url->flags |= URL_FLAGS_FAILED; return; } - state = kPathStart; - continue; - } else { - url->flags |= URL_FLAGS_FAILED; + url->port = NormalizePort(url->scheme, port); + buffer.clear(); + } else if (has_state_override) { + // TODO(TimothyGu): Similar case as above. + if (state_override == kHost) + url->port = -1; + else + url->flags |= URL_FLAGS_TERMINATED; return; } - break; - case kFile: - base_is_file = ( - has_base && - base->scheme == "file:"); - switch (ch) { - case kEOL: - if (base_is_file) { - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - break; + state = kPathStart; + continue; + } else { + url->flags |= URL_FLAGS_FAILED; + return; + } + break; + case kFile: + base_is_file = ( + has_base && + base->scheme == "file:"); + switch (ch) { + case kEOL: + if (base_is_file) { + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - state = kPath; - continue; - case '\\': - case '/': - state = kFileSlash; - break; - case '?': - if (base_is_file) { - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; + } + if (base->flags & URL_FLAGS_HAS_QUERY) { url->flags |= URL_FLAGS_HAS_QUERY; - state = kQuery; - break; + url->query = base->query; } - case '#': - if (base_is_file) { - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - state = kFragment; - break; + break; + } + state = kPath; + continue; + case '\\': + case '/': + state = kFileSlash; + break; + case '?': + if (base_is_file) { + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - default: - if (base_is_file && - (!WINDOWS_DRIVE_LETTER(ch, p[1]) || - end - p == 1 || - (p[2] != '/' && - p[2] != '\\' && - p[2] != '?' && - p[2] != '#'))) { - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - ShortenUrlPath(url); + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; } - state = kPath; - continue; - } - break; - case kFileSlash: - if (ch == '/' || ch == '\\') { - state = kFileHost; - } else { - if (has_base && - base->scheme == "file:") { - if (NORMALIZED_WINDOWS_DRIVE_LETTER(base->path[0])) { + url->flags |= URL_FLAGS_HAS_QUERY; + state = kQuery; + break; + } + case '#': + if (base_is_file) { + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; + } + if (base->flags & URL_FLAGS_HAS_PATH) { url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(base->path[0]); - } else { + url->path = base->path; + } + if (base->flags & URL_FLAGS_HAS_QUERY) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = base->query; + } + state = kFragment; + break; + } + default: + if (base_is_file && + (!IsWindowsDriveLetter(ch, p[1]) || + end - p == 1 || + (p[2] != '/' && + p[2] != '\\' && + p[2] != '?' && + p[2] != '#'))) { + if (base->flags & URL_FLAGS_HAS_HOST) { url->flags |= URL_FLAGS_HAS_HOST; url->host = base->host; } + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; + } + ShortenUrlPath(url); } state = kPath; continue; - } - break; - case kFileHost: - if (ch == kEOL || - ch == '/' || - ch == '\\' || - ch == '?' || - ch == '#') { - if (buffer.size() == 2 && - WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { - state = kPath; - } else if (buffer.size() == 0) { - state = kPathStart; + } + break; + case kFileSlash: + if (ch == '/' || ch == '\\') { + state = kFileHost; + } else { + if (has_base && + base->scheme == "file:") { + if (IsNormalizedWindowsDriveLetter(base->path[0])) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path.push_back(base->path[0]); } else { - if (buffer != "localhost") { - url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(&buffer, &url->host)) { - url->flags |= URL_FLAGS_FAILED; - return; - } - } - buffer.clear(); - state = kPathStart; + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - continue; - } else { - buffer += ch; } - break; - case kPathStart: - if (IsSpecial(url->scheme)) { - state = kPath; - if (ch != '/' && ch != '\\') { - continue; - } - } else if (!has_state_override && ch == '?') { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query.clear(); - state = kQuery; - } else if (!has_state_override && ch == '#') { - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment.clear(); - state = kFragment; - } else if (ch != kEOL) { + state = kPath; + continue; + } + break; + case kFileHost: + if (ch == kEOL || + ch == '/' || + ch == '\\' || + ch == '?' || + ch == '#') { + if (buffer.size() == 2 && + IsWindowsDriveLetter(buffer)) { state = kPath; - if (ch != '/') { - continue; + } else if (buffer.size() == 0) { + state = kPathStart; + } else { + if (buffer != "localhost") { + url->flags |= URL_FLAGS_HAS_HOST; + if (!ParseHost(&buffer, &url->host)) { + url->flags |= URL_FLAGS_FAILED; + return; + } } + buffer.clear(); + state = kPathStart; } - break; - case kPath: - if (ch == kEOL || - ch == '/' || - special_back_slash || - (!has_state_override && (ch == '?' || ch == '#'))) { - if (IsDoubleDotSegment(buffer)) { - ShortenUrlPath(url); - if (ch != '/' && !special_back_slash) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(""); - } - } else if (IsSingleDotSegment(buffer)) { - if (ch != '/' && !special_back_slash) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(""); - } - } else if (!IsSingleDotSegment(buffer)) { - if (url->scheme == "file:" && - url->path.empty() && - buffer.size() == 2 && - WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { - url->flags &= ~URL_FLAGS_HAS_HOST; - buffer[1] = ':'; - } + continue; + } else { + buffer += ch; + } + break; + case kPathStart: + if (IsSpecial(url->scheme)) { + state = kPath; + if (ch != '/' && ch != '\\') { + continue; + } + } else if (!has_state_override && ch == '?') { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query.clear(); + state = kQuery; + } else if (!has_state_override && ch == '#') { + url->flags |= URL_FLAGS_HAS_FRAGMENT; + url->fragment.clear(); + state = kFragment; + } else if (ch != kEOL) { + state = kPath; + if (ch != '/') { + continue; + } + } + break; + case kPath: + if (ch == kEOL || + ch == '/' || + special_back_slash || + (!has_state_override && (ch == '?' || ch == '#'))) { + if (IsDoubleDotSegment(buffer)) { + ShortenUrlPath(url); + if (ch != '/' && !special_back_slash) { url->flags |= URL_FLAGS_HAS_PATH; - std::string segment(buffer.c_str(), buffer.size()); - url->path.push_back(segment); + url->path.push_back(""); } - buffer.clear(); - if (url->scheme == "file:" && - (ch == kEOL || - ch == '?' || - ch == '#')) { - while (url->path.size() > 1 && url->path[0].length() == 0) { - url->path.erase(url->path.begin()); - } + } else if (IsSingleDotSegment(buffer)) { + if (ch != '/' && !special_back_slash) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path.push_back(""); } - if (ch == '?') { - url->flags |= URL_FLAGS_HAS_QUERY; - state = kQuery; - } else if (ch == '#') { - state = kFragment; + } else if (!IsSingleDotSegment(buffer)) { + if (url->scheme == "file:" && + url->path.empty() && + buffer.size() == 2 && + IsWindowsDriveLetter(buffer)) { + url->flags &= ~URL_FLAGS_HAS_HOST; + buffer[1] = ':'; } - } else { - AppendOrEscape(&buffer, ch, DefaultEncodeSet); + url->flags |= URL_FLAGS_HAS_PATH; + std::string segment(buffer.c_str(), buffer.size()); + url->path.push_back(segment); } - break; - case kCannotBeBase: - switch (ch) { - case '?': - state = kQuery; - break; - case '#': - state = kFragment; - break; - default: - if (url->path.size() == 0) - url->path.push_back(""); - if (url->path.size() > 0 && ch != kEOL) - AppendOrEscape(&url->path[0], ch, SimpleEncodeSet); + buffer.clear(); + if (url->scheme == "file:" && + (ch == kEOL || + ch == '?' || + ch == '#')) { + while (url->path.size() > 1 && url->path[0].length() == 0) { + url->path.erase(url->path.begin()); + } } - break; - case kQuery: - if (ch == kEOL || (!has_state_override && ch == '#')) { + if (ch == '?') { url->flags |= URL_FLAGS_HAS_QUERY; - url->query = buffer; - buffer.clear(); - if (ch == '#') - state = kFragment; - } else { - AppendOrEscape(&buffer, ch, QueryEncodeSet); - } - break; - case kFragment: - switch (ch) { - case kEOL: - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment = buffer; - break; - case 0: - break; - default: - AppendOrEscape(&buffer, ch, SimpleEncodeSet); + state = kQuery; + } else if (ch == '#') { + state = kFragment; } - break; - default: - url->flags |= URL_FLAGS_INVALID_PARSE_STATE; - return; - } - - p++; - } - } - - static inline void SetArgs(Environment* env, - Local argv[], - const struct url_data* url) { - Isolate* isolate = env->isolate(); - argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags); - argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); - if (url->flags & URL_FLAGS_HAS_USERNAME) - argv[ARG_USERNAME] = UTF8STRING(isolate, url->username); - if (url->flags & URL_FLAGS_HAS_PASSWORD) - argv[ARG_PASSWORD] = UTF8STRING(isolate, url->password); - if (url->flags & URL_FLAGS_HAS_HOST) - argv[ARG_HOST] = UTF8STRING(isolate, url->host); - if (url->flags & URL_FLAGS_HAS_QUERY) - argv[ARG_QUERY] = UTF8STRING(isolate, url->query); - if (url->flags & URL_FLAGS_HAS_FRAGMENT) - argv[ARG_FRAGMENT] = UTF8STRING(isolate, url->fragment); - if (url->port > -1) - argv[ARG_PORT] = Integer::New(isolate, url->port); - if (url->flags & URL_FLAGS_HAS_PATH) - argv[ARG_PATH] = Copy(env, url->path); - } - - static void Parse(Environment* env, - Local recv, - const char* input, - const size_t len, - enum url_parse_state state_override, - Local base_obj, - Local context_obj, - Local cb, - Local error_cb) { - Isolate* isolate = env->isolate(); - Local context = env->context(); - HandleScope handle_scope(isolate); - Context::Scope context_scope(context); - - const bool has_base = base_obj->IsObject(); - - struct url_data base; - struct url_data url; - if (context_obj->IsObject()) - HarvestContext(env, &url, context_obj.As()); - if (has_base) - HarvestBase(env, &base, base_obj.As()); - - URL::Parse(input, len, state_override, &url, &base, has_base); - if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) || - ((state_override != kUnknownState) && - (url.flags & URL_FLAGS_TERMINATED))) - return; - - // Define the return value placeholders - const Local undef = Undefined(isolate); - if (!(url.flags & URL_FLAGS_FAILED)) { - Local argv[9] = { - undef, - undef, - undef, - undef, - undef, - undef, - undef, - undef, - undef, - }; - SetArgs(env, argv, &url); - (void)cb->Call(context, recv, arraysize(argv), argv); - } else if (error_cb->IsFunction()) { - Local argv[2] = { undef, undef }; - argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); - argv[ERR_ARG_INPUT] = - String::NewFromUtf8(env->isolate(), - input, - v8::NewStringType::kNormal).ToLocalChecked(); - (void)error_cb.As()->Call(context, recv, arraysize(argv), argv); - } - } - - static void Parse(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 5); - CHECK(args[0]->IsString()); // input - CHECK(args[2]->IsUndefined() || // base context - args[2]->IsNull() || - args[2]->IsObject()); - CHECK(args[3]->IsUndefined() || // context - args[3]->IsNull() || - args[3]->IsObject()); - CHECK(args[4]->IsFunction()); // complete callback - CHECK(args[5]->IsUndefined() || args[5]->IsFunction()); // error callback - - Utf8Value input(env->isolate(), args[0]); - enum url_parse_state state_override = kUnknownState; - if (args[1]->IsNumber()) { - state_override = static_cast( - args[1]->Uint32Value(env->context()).FromJust()); - } - - Parse(env, args.This(), - *input, input.length(), - state_override, - args[2], - args[3], - args[4].As(), - args[5]); - } - - static void EncodeAuthSet(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - Utf8Value value(env->isolate(), args[0]); - std::string output; - const size_t len = value.length(); - output.reserve(len); - for (size_t n = 0; n < len; n++) { - const char ch = (*value)[n]; - AppendOrEscape(&output, ch, UserinfoEncodeSet); - } - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), - output.c_str(), - v8::NewStringType::kNormal).ToLocalChecked()); - } - - static void ToUSVString(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 2); - CHECK(args[0]->IsString()); - CHECK(args[1]->IsNumber()); - - TwoByteValue value(env->isolate(), args[0]); - const size_t n = value.length(); - - const int64_t start = args[1]->IntegerValue(env->context()).FromJust(); - CHECK_GE(start, 0); - - for (size_t i = start; i < n; i++) { - uint16_t c = value[i]; - if (!IsUnicodeSurrogate(c)) { - continue; - } else if (IsUnicodeSurrogateTrail(c) || i == n - 1) { - value[i] = UNICODE_REPLACEMENT_CHARACTER; - } else { - uint16_t d = value[i + 1]; - if (IsUnicodeTrail(d)) { - i++; } else { - value[i] = UNICODE_REPLACEMENT_CHARACTER; + AppendOrEscape(&buffer, ch, DEFAULT_ENCODE_SET); } - } - } - - args.GetReturnValue().Set( - String::NewFromTwoByte(env->isolate(), - *value, - v8::NewStringType::kNormal, - n).ToLocalChecked()); - } - - static void DomainToASCII(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - Utf8Value value(env->isolate(), args[0]); - - url_host host{{""}, HOST_TYPE_DOMAIN}; - ParseHost(&host, *value, value.length()); - if (host.type == HOST_TYPE_FAILED) { - args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); - return; + break; + case kCannotBeBase: + switch (ch) { + case '?': + state = kQuery; + break; + case '#': + state = kFragment; + break; + default: + if (url->path.size() == 0) + url->path.push_back(""); + if (url->path.size() > 0 && ch != kEOL) + AppendOrEscape(&url->path[0], ch, SIMPLE_ENCODE_SET); + } + break; + case kQuery: + if (ch == kEOL || (!has_state_override && ch == '#')) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = buffer; + buffer.clear(); + if (ch == '#') + state = kFragment; + } else { + AppendOrEscape(&buffer, ch, QUERY_ENCODE_SET); + } + break; + case kFragment: + switch (ch) { + case kEOL: + url->flags |= URL_FLAGS_HAS_FRAGMENT; + url->fragment = buffer; + break; + case 0: + break; + default: + AppendOrEscape(&buffer, ch, SIMPLE_ENCODE_SET); + } + break; + default: + url->flags |= URL_FLAGS_INVALID_PARSE_STATE; + return; } - std::string out; - WriteHost(&host, &out); - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), - out.c_str(), - v8::NewStringType::kNormal).ToLocalChecked()); - } - static void DomainToUnicode(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - Utf8Value value(env->isolate(), args[0]); - - url_host host{{""}, HOST_TYPE_DOMAIN}; - ParseHost(&host, *value, value.length(), true); - if (host.type == HOST_TYPE_FAILED) { - args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); - return; - } - std::string out; - WriteHost(&host, &out); - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), - out.c_str(), - v8::NewStringType::kNormal).ToLocalChecked()); + p++; } - - // This function works by calling out to a JS function that creates and - // returns the JS URL object. Be mindful of the JS<->Native boundary - // crossing that is required. - const Local URL::ToObject(Environment* env) const { - Isolate* isolate = env->isolate(); - Local context = env->context(); - HandleScope handle_scope(isolate); - Context::Scope context_scope(context); - - const Local undef = Undefined(isolate); - - if (context_.flags & URL_FLAGS_FAILED) - return Local(); - +} // NOLINT(readability/fn_size) + +static inline void SetArgs(Environment* env, + Local argv[], + const struct url_data* url) { + Isolate* isolate = env->isolate(); + argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags); + argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); + if (url->flags & URL_FLAGS_HAS_USERNAME) + argv[ARG_USERNAME] = UTF8STRING(isolate, url->username); + if (url->flags & URL_FLAGS_HAS_PASSWORD) + argv[ARG_PASSWORD] = UTF8STRING(isolate, url->password); + if (url->flags & URL_FLAGS_HAS_HOST) + argv[ARG_HOST] = UTF8STRING(isolate, url->host); + if (url->flags & URL_FLAGS_HAS_QUERY) + argv[ARG_QUERY] = UTF8STRING(isolate, url->query); + if (url->flags & URL_FLAGS_HAS_FRAGMENT) + argv[ARG_FRAGMENT] = UTF8STRING(isolate, url->fragment); + if (url->port > -1) + argv[ARG_PORT] = Integer::New(isolate, url->port); + if (url->flags & URL_FLAGS_HAS_PATH) + argv[ARG_PATH] = Copy(env, url->path); +} + +static void Parse(Environment* env, + Local recv, + const char* input, + const size_t len, + enum url_parse_state state_override, + Local base_obj, + Local context_obj, + Local cb, + Local error_cb) { + Isolate* isolate = env->isolate(); + Local context = env->context(); + HandleScope handle_scope(isolate); + Context::Scope context_scope(context); + + const bool has_base = base_obj->IsObject(); + + struct url_data base; + struct url_data url; + if (context_obj->IsObject()) + HarvestContext(env, &url, context_obj.As()); + if (has_base) + HarvestBase(env, &base, base_obj.As()); + + URL::Parse(input, len, state_override, &url, &base, has_base); + if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) || + ((state_override != kUnknownState) && + (url.flags & URL_FLAGS_TERMINATED))) + return; + + // Define the return value placeholders + const Local undef = Undefined(isolate); + if (!(url.flags & URL_FLAGS_FAILED)) { Local argv[9] = { undef, undef, @@ -1490,54 +1812,213 @@ namespace url { undef, undef, }; - SetArgs(env, argv, &context_); - - TryCatch try_catch(isolate); - - // The SetURLConstructor method must have been called already to - // set the constructor function used below. SetURLConstructor is - // called automatically when the internal/url.js module is loaded - // during the internal/bootstrap_node.js processing. - MaybeLocal ret = - env->url_constructor_function() - ->Call(env->context(), undef, 9, argv); + SetArgs(env, argv, &url); + (void)cb->Call(context, recv, arraysize(argv), argv); + } else if (error_cb->IsFunction()) { + Local argv[2] = { undef, undef }; + argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); + argv[ERR_ARG_INPUT] = + String::NewFromUtf8(env->isolate(), + input, + v8::NewStringType::kNormal).ToLocalChecked(); + (void)error_cb.As()->Call(context, recv, arraysize(argv), argv); + } +} + +static void Parse(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 5); + CHECK(args[0]->IsString()); // input + CHECK(args[2]->IsUndefined() || // base context + args[2]->IsNull() || + args[2]->IsObject()); + CHECK(args[3]->IsUndefined() || // context + args[3]->IsNull() || + args[3]->IsObject()); + CHECK(args[4]->IsFunction()); // complete callback + CHECK(args[5]->IsUndefined() || args[5]->IsFunction()); // error callback + + Utf8Value input(env->isolate(), args[0]); + enum url_parse_state state_override = kUnknownState; + if (args[1]->IsNumber()) { + state_override = static_cast( + args[1]->Uint32Value(env->context()).FromJust()); + } - if (ret.IsEmpty()) { - ClearFatalExceptionHandlers(env); - FatalException(isolate, try_catch); + Parse(env, args.This(), + *input, input.length(), + state_override, + args[2], + args[3], + args[4].As(), + args[5]); +} + +static void EncodeAuthSet(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + Utf8Value value(env->isolate(), args[0]); + std::string output; + const size_t len = value.length(); + output.reserve(len); + for (size_t n = 0; n < len; n++) { + const char ch = (*value)[n]; + AppendOrEscape(&output, ch, USERINFO_ENCODE_SET); + } + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), + output.c_str(), + v8::NewStringType::kNormal).ToLocalChecked()); +} + +static void ToUSVString(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 2); + CHECK(args[0]->IsString()); + CHECK(args[1]->IsNumber()); + + TwoByteValue value(env->isolate(), args[0]); + const size_t n = value.length(); + + const int64_t start = args[1]->IntegerValue(env->context()).FromJust(); + CHECK_GE(start, 0); + + for (size_t i = start; i < n; i++) { + char16_t c = value[i]; + if (!IsUnicodeSurrogate(c)) { + continue; + } else if (IsUnicodeSurrogateTrail(c) || i == n - 1) { + value[i] = kUnicodeReplacementCharacter; + } else { + char16_t d = value[i + 1]; + if (IsUnicodeTrail(d)) { + i++; + } else { + value[i] = kUnicodeReplacementCharacter; + } } - - return ret.ToLocalChecked(); } - static void SetURLConstructor(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_EQ(args.Length(), 1); - CHECK(args[0]->IsFunction()); - env->set_url_constructor_function(args[0].As()); + args.GetReturnValue().Set( + String::NewFromTwoByte(env->isolate(), + *value, + v8::NewStringType::kNormal, + n).ToLocalChecked()); +} + +static void DomainToASCII(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + Utf8Value value(env->isolate(), args[0]); + + url_host host{{""}, HOST_TYPE_DOMAIN}; + ParseHost(&host, *value, value.length()); + if (host.type == HOST_TYPE_FAILED) { + args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); + return; + } + std::string out; + WriteHost(&host, &out); + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), + out.c_str(), + v8::NewStringType::kNormal).ToLocalChecked()); +} + +static void DomainToUnicode(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + Utf8Value value(env->isolate(), args[0]); + + url_host host{{""}, HOST_TYPE_DOMAIN}; + ParseHost(&host, *value, value.length(), true); + if (host.type == HOST_TYPE_FAILED) { + args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); + return; + } + std::string out; + WriteHost(&host, &out); + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), + out.c_str(), + v8::NewStringType::kNormal).ToLocalChecked()); +} + +// This function works by calling out to a JS function that creates and +// returns the JS URL object. Be mindful of the JS<->Native boundary +// crossing that is required. +const Local URL::ToObject(Environment* env) const { + Isolate* isolate = env->isolate(); + Local context = env->context(); + HandleScope handle_scope(isolate); + Context::Scope context_scope(context); + + const Local undef = Undefined(isolate); + + if (context_.flags & URL_FLAGS_FAILED) + return Local(); + + Local argv[9] = { + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + }; + SetArgs(env, argv, &context_); + + TryCatch try_catch(isolate); + + // The SetURLConstructor method must have been called already to + // set the constructor function used below. SetURLConstructor is + // called automatically when the internal/url.js module is loaded + // during the internal/bootstrap_node.js processing. + MaybeLocal ret = + env->url_constructor_function() + ->Call(env->context(), undef, 9, argv); + + if (ret.IsEmpty()) { + ClearFatalExceptionHandlers(env); + FatalException(isolate, try_catch); } - static void Init(Local target, - Local unused, - Local context, - void* priv) { - Environment* env = Environment::GetCurrent(context); - env->SetMethod(target, "parse", Parse); - env->SetMethod(target, "encodeAuth", EncodeAuthSet); - env->SetMethod(target, "toUSVString", ToUSVString); - env->SetMethod(target, "domainToASCII", DomainToASCII); - env->SetMethod(target, "domainToUnicode", DomainToUnicode); - env->SetMethod(target, "setURLConstructor", SetURLConstructor); + return ret.ToLocalChecked(); +} + +static void SetURLConstructor(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_EQ(args.Length(), 1); + CHECK(args[0]->IsFunction()); + env->set_url_constructor_function(args[0].As()); +} + +static void Init(Local target, + Local unused, + Local context, + void* priv) { + Environment* env = Environment::GetCurrent(context); + env->SetMethod(target, "parse", Parse); + env->SetMethod(target, "encodeAuth", EncodeAuthSet); + env->SetMethod(target, "toUSVString", ToUSVString); + env->SetMethod(target, "domainToASCII", DomainToASCII); + env->SetMethod(target, "domainToUnicode", DomainToUnicode); + env->SetMethod(target, "setURLConstructor", SetURLConstructor); #define XX(name, _) NODE_DEFINE_CONSTANT(target, name); - FLAGS(XX) + FLAGS(XX) #undef XX #define XX(name) NODE_DEFINE_CONSTANT(target, name); - ARGS(XX) - PARSESTATES(XX) + PARSESTATES(XX) #undef XX - } +} } // namespace url } // namespace node diff --git a/src/node_url.h b/src/node_url.h index 5b5b65b7c27e87..49bfb264e8d987 100644 --- a/src/node_url.h +++ b/src/node_url.h @@ -16,411 +16,6 @@ using v8::Local; using v8::Value; -#define BIT_AT(a, i) \ - (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ - (1 << ((unsigned int) (i) & 7)))) -#define TAB_AND_NEWLINE(ch) \ - (ch == 0x09 || ch == 0x0a || ch == 0x0d) -#define ASCII_DIGIT(ch) \ - (ch >= 0x30 && ch <= 0x39) -#define ASCII_HEX_DIGIT(ch) \ - (ASCII_DIGIT(ch) || (ch >= 0x41 && ch <= 0x46) || (ch >= 0x61 && ch <= 0x66)) -#define ASCII_ALPHA(ch) \ - ((ch >= 0x41 && ch <= 0x5a) || (ch >= 0x61 && ch <= 0x7a)) -#define ASCII_ALPHANUMERIC(ch) \ - (ASCII_DIGIT(ch) || ASCII_ALPHA(ch)) -#define TO_LOWER(ch) \ - (ASCII_ALPHA(ch) ? (ch | 0x20) : ch) -#define SCHEME_CHAR(ch) \ - (ASCII_ALPHANUMERIC(ch) || ch == '+' || ch == '-' || ch == '.') -#define WINDOWS_DRIVE_LETTER(ch, next) \ - (ASCII_ALPHA(ch) && (next == ':' || next == '|')) -#define NORMALIZED_WINDOWS_DRIVE_LETTER(str) \ - (str.length() == 2 && \ - ASCII_ALPHA(str[0]) && \ - str[1] == ':') - -static const char* hex[256] = { - "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", - "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", - "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", - "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", - "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27", - "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F", - "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37", - "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F", - "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47", - "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F", - "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57", - "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F", - "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67", - "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F", - "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77", - "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F", - "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", - "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", - "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", - "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", - "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", - "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", - "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", - "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", - "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", - "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", - "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", - "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", - "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", - "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", - "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", - "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" -}; - -static const uint8_t SIMPLE_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -static const uint8_t DEFAULT_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -static const uint8_t USERINFO_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 40 41 42 43 44 45 46 47 - 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -static const uint8_t QUERY_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -// Must return true if the character is to be percent-encoded -typedef bool (*must_escape_cb)(const unsigned char ch); - -// Appends ch to str. If test(ch) returns true, the ch will -// be percent-encoded then appended. -static inline void AppendOrEscape(std::string* str, - const unsigned char ch, - must_escape_cb test) { - if (test(ch)) - *str += hex[ch]; - else - *str += ch; -} - -static inline bool SimpleEncodeSet(const unsigned char ch) { - return BIT_AT(SIMPLE_ENCODE_SET, ch); -} - -static inline bool DefaultEncodeSet(const unsigned char ch) { - return BIT_AT(DEFAULT_ENCODE_SET, ch); -} - -static inline bool UserinfoEncodeSet(const unsigned char ch) { - return BIT_AT(USERINFO_ENCODE_SET, ch); -} - -static inline bool QueryEncodeSet(const unsigned char ch) { - return BIT_AT(QUERY_ENCODE_SET, ch); -} - -static inline unsigned hex2bin(const char ch) { - if (ch >= '0' && ch <= '9') - return ch - '0'; - if (ch >= 'A' && ch <= 'F') - return 10 + (ch - 'A'); - if (ch >= 'a' && ch <= 'f') - return 10 + (ch - 'a'); - return static_cast(-1); -} - -static inline void PercentDecode(const char* input, - size_t len, - std::string* dest) { - if (len == 0) - return; - dest->reserve(len); - const char* pointer = input; - const char* end = input + len; - size_t remaining = pointer - end - 1; - while (pointer < end) { - const char ch = pointer[0]; - remaining = (end - pointer) + 1; - if (ch != '%' || remaining < 2 || - (ch == '%' && - (!ASCII_HEX_DIGIT(pointer[1]) || - !ASCII_HEX_DIGIT(pointer[2])))) { - *dest += ch; - pointer++; - continue; - } else { - unsigned a = hex2bin(pointer[1]); - unsigned b = hex2bin(pointer[2]); - char c = static_cast(a * 16 + b); - *dest += c; - pointer += 3; - } - } -} - -#define SPECIALS(XX) \ - XX("ftp:", 21) \ - XX("file:", -1) \ - XX("gopher:", 70) \ - XX("http:", 80) \ - XX("https:", 443) \ - XX("ws:", 80) \ - XX("wss:", 443) - #define PARSESTATES(XX) \ XX(kSchemeStart) \ XX(kScheme) \ @@ -458,23 +53,6 @@ static inline void PercentDecode(const char* input, XX(URL_FLAGS_HAS_QUERY, 0x200) \ XX(URL_FLAGS_HAS_FRAGMENT, 0x400) -#define ARGS(XX) \ - XX(ARG_FLAGS) \ - XX(ARG_PROTOCOL) \ - XX(ARG_USERNAME) \ - XX(ARG_PASSWORD) \ - XX(ARG_HOST) \ - XX(ARG_PORT) \ - XX(ARG_PATH) \ - XX(ARG_QUERY) \ - XX(ARG_FRAGMENT) - -#define ERR_ARGS(XX) \ - XX(ERR_ARG_FLAGS) \ - XX(ERR_ARG_INPUT) \ - -static const char kEOL = -1; - enum url_parse_state { kUnknownState = -1, #define XX(name) name, @@ -488,32 +66,6 @@ enum url_flags { #undef XX }; -enum url_cb_args { -#define XX(name) name, - ARGS(XX) -#undef XX -}; - -enum url_error_cb_args { -#define XX(name) name, - ERR_ARGS(XX) -#undef XX -} url_error_cb_args; - -static inline bool IsSpecial(std::string scheme) { -#define XX(name, _) if (scheme == name) return true; - SPECIALS(XX); -#undef XX - return false; -} - -static inline int NormalizePort(std::string scheme, int p) { -#define XX(name, port) if (scheme == name && p == port) return -1; - SPECIALS(XX); -#undef XX - return p; -} - struct url_data { int32_t flags = URL_FLAGS_NONE; int port = -1; @@ -526,25 +78,6 @@ struct url_data { std::vector path; }; -union url_host_value { - std::string domain; - uint32_t ipv4; - uint16_t ipv6[8]; - ~url_host_value() {} -}; - -enum url_host_type { - HOST_TYPE_FAILED = -1, - HOST_TYPE_DOMAIN = 0, - HOST_TYPE_IPV4 = 1, - HOST_TYPE_IPV6 = 2 -}; - -struct url_host { - url_host_value value; - enum url_host_type type; -}; - class URL { public: static void Parse(const char* input, From 7db0af6fb18c95776adb73743878ce090de36313 Mon Sep 17 00:00:00 2001 From: "Italo A. Casas" Date: Tue, 11 Apr 2017 11:22:45 -0400 Subject: [PATCH 26/30] 2017-04-11, Version 7.9.0 (Current) Notable changes: * util: console is now closer to what is supported in all major browsers (Roman Reiss) [#10308](https://github.com/nodejs/node/pull/10308) PR-URL: https://github.com/nodejs/node/pull/12319 --- CHANGELOG.md | 3 +- doc/changelogs/CHANGELOG_V7.md | 66 +++++++++++++++++++++++++++++++++- src/node_version.h | 6 ++-- 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff24baf55faa72..3bf1747aa7b37c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,8 @@ release. - 7.8.0
+ 7.9.0
+ 7.8.0
7.7.4
7.7.3
7.7.2
diff --git a/doc/changelogs/CHANGELOG_V7.md b/doc/changelogs/CHANGELOG_V7.md index e9b1ab03ff630d..d725a43c5d7b96 100644 --- a/doc/changelogs/CHANGELOG_V7.md +++ b/doc/changelogs/CHANGELOG_V7.md @@ -6,6 +6,7 @@ +7.9.0
7.8.0
7.7.4
7.7.3
@@ -33,6 +34,69 @@ * [io.js](CHANGELOG_IOJS.md) * [Archive](CHANGELOG_ARCHIVE.md) + +## 2017-04-11, Version 7.9.0 (Current), @italoacasas + +### Notable Changes + +* **util**: console is now closer to what is supported in all major browsers (Roman Reiss) [#10308](https://github.com/nodejs/node/pull/10308) + +### Commits + +* [[`9f73df5910`](https://github.com/nodejs/node/commit/9f73df5910)] - **deps**: cherry-pick 22858cb from V8 upstream (Ali Ijaz Sheikh) [#11998](https://github.com/nodejs/node/pull/11998) +* [[`b997e62692`](https://github.com/nodejs/node/commit/b997e62692)] - **test**: add internal/socket_list tests (DavidCai) [#12109](https://github.com/nodejs/node/pull/12109) +* [[`c11c23b22b`](https://github.com/nodejs/node/commit/c11c23b22b)] - **doc**: make the heading consistent (Sakthipriyan Vairamani (thefourtheye)) [#11569](https://github.com/nodejs/node/pull/11569) +* [[`67d21149a2`](https://github.com/nodejs/node/commit/67d21149a2)] - **crypto**: handle exceptions in hmac/hash.digest (Tobias Nießen) [#12164](https://github.com/nodejs/node/pull/12164) +* [[`3b765f5366`](https://github.com/nodejs/node/commit/3b765f5366)] - **doc**: fix confusing example in process.md (Vse Mozhet Byt) [#12282](https://github.com/nodejs/node/pull/12282) +* [[`37568c093a`](https://github.com/nodejs/node/commit/37568c093a)] - **src**: use std::list for at_exit_functions (Daniel Bevenius) [#12255](https://github.com/nodejs/node/pull/12255) +* [[`2f9e2fcf3e`](https://github.com/nodejs/node/commit/2f9e2fcf3e)] - **doc**: update information on test/known_issues (Jan Krems) [#12262](https://github.com/nodejs/node/pull/12262) +* [[`0f4319a14a`](https://github.com/nodejs/node/commit/0f4319a14a)] - **src**: use std::string for trace enabled_categories (Sam Roberts) [#12242](https://github.com/nodejs/node/pull/12242) +* [[`6826637f11`](https://github.com/nodejs/node/commit/6826637f11)] - **doc**: fix missing argument for dns.resolvePtr() (Uppinder Chugh) [#12256](https://github.com/nodejs/node/pull/12256) +* [[`4a6bb378d4`](https://github.com/nodejs/node/commit/4a6bb378d4)] - **doc**: fix confusing reference in net.md (Vse Mozhet Byt) [#12247](https://github.com/nodejs/node/pull/12247) +* [[`3e8991cc56`](https://github.com/nodejs/node/commit/3e8991cc56)] - **doc**: modernize and fix code examples in modules.md (Vse Mozhet Byt) [#12224](https://github.com/nodejs/node/pull/12224) +* [[`376f5ef1ee`](https://github.com/nodejs/node/commit/376f5ef1ee)] - **doc**: document the performance team (Gibson Fahnestock) [#12213](https://github.com/nodejs/node/pull/12213) +* [[`c0b7c075da`](https://github.com/nodejs/node/commit/c0b7c075da)] - **doc**: add refack to collaborators (Refael Ackermann) [#12277](https://github.com/nodejs/node/pull/12277) +* [[`83f855d505`](https://github.com/nodejs/node/commit/83f855d505)] - **doc**: add aqrln to collaborators (Alexey Orlenko) [#12273](https://github.com/nodejs/node/pull/12273) +* [[`2fb2289177`](https://github.com/nodejs/node/commit/2fb2289177)] - **doc**: add sub domain to host in url (Steven) [#12233](https://github.com/nodejs/node/pull/12233) +* [[`ac200a6122`](https://github.com/nodejs/node/commit/ac200a6122)] - **test**: add a second argument to assert.throws() (dave-k) [#12139](https://github.com/nodejs/node/pull/12139) +* [[`3cdd04b1c0`](https://github.com/nodejs/node/commit/3cdd04b1c0)] - **test**: skip irrelevant test on Windows (Rich Trott) [#12261](https://github.com/nodejs/node/pull/12261) +* [[`d4d6986551`](https://github.com/nodejs/node/commit/d4d6986551)] - **build**: fix path voodoo in icu-generic.gyp (Refael Ackermann) [#11217](https://github.com/nodejs/node/pull/11217) +* [[`a735c16d52`](https://github.com/nodejs/node/commit/a735c16d52)] - **deps**: backport ec1ffe3 from upstream V8 (Daniel Bevenius) [#12061](https://github.com/nodejs/node/pull/12061) +* [[`d641164d09`](https://github.com/nodejs/node/commit/d641164d09)] - **doc**: update pull request template URL layout (Rich Trott) [#12216](https://github.com/nodejs/node/pull/12216) +* [[`6feea08587`](https://github.com/nodejs/node/commit/6feea08587)] - **buffer**: preallocate array with buffer length (alejandro) [#11733](https://github.com/nodejs/node/pull/11733) +* [[`a703bdecc4`](https://github.com/nodejs/node/commit/a703bdecc4)] - **build**: add checks for openssl configure options (Daniel Bevenius) [#12175](https://github.com/nodejs/node/pull/12175) +* [[`b495b6acdf`](https://github.com/nodejs/node/commit/b495b6acdf)] - **build**: make configure print statements consistent (Daniel Bevenius) [#12176](https://github.com/nodejs/node/pull/12176) +* [[`f60b4553f3`](https://github.com/nodejs/node/commit/f60b4553f3)] - **doc**: modernize and fix code examples in https.md (Vse Mozhet Byt) [#12171](https://github.com/nodejs/node/pull/12171) +* [[`74d0266694`](https://github.com/nodejs/node/commit/74d0266694)] - **doc**: fix string interpolation in Stream 'finish' (Vinay Hiremath) [#12221](https://github.com/nodejs/node/pull/12221) +* [[`4b54520a4a`](https://github.com/nodejs/node/commit/4b54520a4a)] - **test**: refactor mkdtemp test and added async (Luca Maraschi) [#12080](https://github.com/nodejs/node/pull/12080) +* [[`8caf6fd58a`](https://github.com/nodejs/node/commit/8caf6fd58a)] - **test**: add Unicode characters regression test (Alexey Orlenko) [#11423](https://github.com/nodejs/node/pull/11423) +* [[`961c89cc61`](https://github.com/nodejs/node/commit/961c89cc61)] - **doc**: add table of contents to README.md (Jason Marsh) [#11635](https://github.com/nodejs/node/pull/11635) +* [[`a11ed6a0b3`](https://github.com/nodejs/node/commit/a11ed6a0b3)] - **test**: more robust check for location of `node.exe` (Refael Ackermann) [#12120](https://github.com/nodejs/node/pull/12120) +* [[`6083e7aa7b`](https://github.com/nodejs/node/commit/6083e7aa7b)] - **benchmark**: avoid TurboFan deopt in arrays bench (Michaël Zasso) [#11894](https://github.com/nodejs/node/pull/11894) +* [[`cf1117bc13`](https://github.com/nodejs/node/commit/cf1117bc13)] - **doc**: fix the timing of setImmediate's execution (Daiki Arai) [#12034](https://github.com/nodejs/node/pull/12034) +* [[`806c4f3c0c`](https://github.com/nodejs/node/commit/806c4f3c0c)] - **doc**: fix fs.read arg type (Daiki Arai) [#12034](https://github.com/nodejs/node/pull/12034) +* [[`c814c7e9ea`](https://github.com/nodejs/node/commit/c814c7e9ea)] - **events**: do not keep arrays with a single listener (Luigi Pinca) [#12043](https://github.com/nodejs/node/pull/12043) +* [[`36617fd5b8`](https://github.com/nodejs/node/commit/36617fd5b8)] - **doc**: add notes to http.get options (Raphael Okon) [#12124](https://github.com/nodejs/node/pull/12124) +* [[`9e6b0a4604`](https://github.com/nodejs/node/commit/9e6b0a4604)] - **test**: performance, remove Popen(shell=True) on Win (Refael Ackermann) [#12138](https://github.com/nodejs/node/pull/12138) +* [[`805ebef8b1`](https://github.com/nodejs/node/commit/805ebef8b1)] - **buffer**: optimize decoding wrapped base64 data (Alexey Orlenko) [#12146](https://github.com/nodejs/node/pull/12146) +* [[`fb34d9c210`](https://github.com/nodejs/node/commit/fb34d9c210)] - **test**: increase querystring coverage (DavidCai) [#12163](https://github.com/nodejs/node/pull/12163) +* [[`d6e9cf7c22`](https://github.com/nodejs/node/commit/d6e9cf7c22)] - **doc**: fix and update examples in http.md (Vse Mozhet Byt) [#12169](https://github.com/nodejs/node/pull/12169) +* [[`f057cc3d84`](https://github.com/nodejs/node/commit/f057cc3d84)] - **benchmark**: replace \[\].join() with ''.repeat() (Vse Mozhet Byt) [#12170](https://github.com/nodejs/node/pull/12170) +* [[`b15dc95848`](https://github.com/nodejs/node/commit/b15dc95848)] - **test**: fix flaky test-child-process-exec-timeout (Santiago Gimeno) [#12159](https://github.com/nodejs/node/pull/12159) +* [[`72a27b3eb5`](https://github.com/nodejs/node/commit/72a27b3eb5)] - **build**: use $(RM) in Makefile for consistency (Gibson Fahnestock) [#12157](https://github.com/nodejs/node/pull/12157) +* [[`3af9101d20`](https://github.com/nodejs/node/commit/3af9101d20)] - **doc, inspector**: note that the host is optional (Gibson Fahnestock) [#12149](https://github.com/nodejs/node/pull/12149) +* [[`b52b3f6710`](https://github.com/nodejs/node/commit/b52b3f6710)] - **test**: reduce buffer size in buffer-creation test (Sakthipriyan Vairamani (thefourtheye)) [#11177](https://github.com/nodejs/node/pull/11177) +* [[`b5283f9d4b`](https://github.com/nodejs/node/commit/b5283f9d4b)] - **doc**: add logo to README (Roman Reiss) [#12148](https://github.com/nodejs/node/pull/12148) +* [[`305f822a36`](https://github.com/nodejs/node/commit/305f822a36)] - **net**: rename internal functions for readability (Joyee Cheung) [#11796](https://github.com/nodejs/node/pull/11796) +* [[`2f88de1ce3`](https://github.com/nodejs/node/commit/2f88de1ce3)] - **vm**: use SetterCallback to set func declarations (AnnaMag) [#12051](https://github.com/nodejs/node/pull/12051) +* [[`ffbcfdfe32`](https://github.com/nodejs/node/commit/ffbcfdfe32)] - **src**: fix base64 decoding (Nikolai Vavilov) [#11995](https://github.com/nodejs/node/pull/11995) +* [[`8823861d9d`](https://github.com/nodejs/node/commit/8823861d9d)] - **tools**: update dotfile whitelist in .gitignore (Michaël Zasso) [#12116](https://github.com/nodejs/node/pull/12116) +* [[`87ca9a6ffe`](https://github.com/nodejs/node/commit/87ca9a6ffe)] - **test**: fix flaky child-process-exec-kill-throws (Rich Trott) [#12111](https://github.com/nodejs/node/pull/12111) +* [[`fdf76d5aa0`](https://github.com/nodejs/node/commit/fdf76d5aa0)] - **tools**: add missing #include "unicode/putil.h" (Steven R. Loomis) [#12078](https://github.com/nodejs/node/pull/12078) +* [[`6130d547a0`](https://github.com/nodejs/node/commit/6130d547a0)] - **deps**: backport 8dde6ac from upstream V8 (Daniel Bevenius) [#12060](https://github.com/nodejs/node/pull/12060) +* [[`1ee38eb874`](https://github.com/nodejs/node/commit/1ee38eb874)] - **(SEMVER-MINOR)** **util**: add %i and %f formatting specifiers (Roman Reiss) [#10308](https://github.com/nodejs/node/pull/10308) +* [[`5ac719d0d2`](https://github.com/nodejs/node/commit/5ac719d0d2)] - **doc**: add deprecations page to docs toc (Michaël Zasso) [#12268](https://github.com/nodejs/node/pull/12268) + ## 2017-03-28, Version 7.8.0 (Current), @MylesBorins @@ -40,7 +104,7 @@ * **buffer**: - do not segfault on out-of-range index (Timothy Gu) [#11927](https://github.com/nodejs/node/pull/11927) -* **crypto**: +* **crypto**: - Fix memory leak if certificate is revoked (Tom Atkinson) [#12089](https://github.com/nodejs/node/pull/12089) * **deps**: * upgrade npm to 4.2.0 (Kat Marchán) [#11389](https://github.com/nodejs/node/pull/11389) diff --git a/src/node_version.h b/src/node_version.h index f8a63a76fed259..8c97d759b72295 100644 --- a/src/node_version.h +++ b/src/node_version.h @@ -2,10 +2,10 @@ #define SRC_NODE_VERSION_H_ #define NODE_MAJOR_VERSION 7 -#define NODE_MINOR_VERSION 8 -#define NODE_PATCH_VERSION 1 +#define NODE_MINOR_VERSION 9 +#define NODE_PATCH_VERSION 0 -#define NODE_VERSION_IS_RELEASE 0 +#define NODE_VERSION_IS_RELEASE 1 #ifndef NODE_STRINGIFY #define NODE_STRINGIFY(n) NODE_STRINGIFY_HELPER(n) From da50af6521e00cb47f6dfd11cf2db5f06144e1e5 Mon Sep 17 00:00:00 2001 From: "Italo A. Casas" Date: Tue, 11 Apr 2017 13:33:56 -0400 Subject: [PATCH 27/30] Working on v7.9.1 PR-URL: https://github.com/nodejs/node/pull/12319 --- src/node_version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/node_version.h b/src/node_version.h index 8c97d759b72295..47af78e0821029 100644 --- a/src/node_version.h +++ b/src/node_version.h @@ -3,9 +3,9 @@ #define NODE_MAJOR_VERSION 7 #define NODE_MINOR_VERSION 9 -#define NODE_PATCH_VERSION 0 +#define NODE_PATCH_VERSION 1 -#define NODE_VERSION_IS_RELEASE 1 +#define NODE_VERSION_IS_RELEASE 0 #ifndef NODE_STRINGIFY #define NODE_STRINGIFY(n) NODE_STRINGIFY_HELPER(n) From abc5749138ae1c400588d89f370a6d8ad8733ec4 Mon Sep 17 00:00:00 2001 From: JR McEntee Date: Tue, 25 Apr 2017 15:18:40 -0400 Subject: [PATCH 28/30] doc: change Mac OS X to macOS This update changes references to "Mac OS X", "OS X", and "OSX" in markdown files to "macOS". Fixes: https://github.com/nodejs/node/issues/12086 --- BUILDING.md | 12 ++++++------ CONTRIBUTING.md | 2 +- doc/STYLE_GUIDE.md | 2 +- doc/api/child_process.md | 4 ++-- doc/api/documentation.md | 4 ++-- doc/api/errors.md | 2 +- doc/api/fs.md | 14 +++++++------- doc/api/os.md | 2 +- doc/api/process.md | 2 +- test/README.md | 2 +- 10 files changed, 23 insertions(+), 23 deletions(-) diff --git a/BUILDING.md b/BUILDING.md index 2dd5c142779ead..61b177c12328e8 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -9,7 +9,7 @@ If you consistently can reproduce a test failure, search for it in the file a new issue. -### Unix / OS X +### Unix / macOS Prerequisites: @@ -18,7 +18,7 @@ Prerequisites: * Python 2.6 or 2.7 * GNU Make 3.81 or newer -On OS X, you will also need: +On macOS, you will also need: * [Xcode](https://developer.apple.com/xcode/download/) * You also need to install the `Command Line Tools` via Xcode. You can find this under the menu `Xcode -> Preferences -> Downloads` @@ -171,7 +171,7 @@ With the `--download=all`, this may download ICU if you don't have an ICU in `deps/icu`. (The embedded `small-icu` included in the default Node.js source does not include all locales.) -##### Unix / OS X: +##### Unix / macOS: ```console $ ./configure --with-intl=full-icu --download=all @@ -188,7 +188,7 @@ $ ./configure --with-intl=full-icu --download=all The `Intl` object will not be available, nor some other APIs such as `String.normalize`. -##### Unix / OS X: +##### Unix / macOS: ```console $ ./configure --without-intl @@ -200,7 +200,7 @@ $ ./configure --without-intl > .\vcbuild nosign without-intl ``` -#### Use existing installed ICU (Unix / OS X only): +#### Use existing installed ICU (Unix / macOS only): ```console $ pkg-config --modversion icu-i18n && ./configure --with-intl=system-icu @@ -216,7 +216,7 @@ You can find other ICU releases at Download the file named something like `icu4c-**##.#**-src.tgz` (or `.zip`). -##### Unix / OS X +##### Unix / macOS From an already-unpacked ICU: ```console diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4daf126bd16053..9480944c3146b9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -157,7 +157,7 @@ Bug fixes and features **should come with tests**. Add your tests in the project, see this [guide](./doc/guides/writing-tests.md). Looking at other tests to see how they should be structured can also help. -To run the tests on Unix / OS X: +To run the tests on Unix / macOS: ```text $ ./configure && make -j4 test diff --git a/doc/STYLE_GUIDE.md b/doc/STYLE_GUIDE.md index f087718a6754fd..0d3fc001073fcb 100644 --- a/doc/STYLE_GUIDE.md +++ b/doc/STYLE_GUIDE.md @@ -39,7 +39,7 @@ * When documenting APIs, note the version the API was introduced in at the end of the section. If an API has been deprecated, also note the first version that the API appeared deprecated in. -* When using dashes, use emdashes ("—", Ctrl+Alt+"-" on OSX) surrounded by +* When using dashes, use emdashes ("—", Ctrl+Alt+"-" on macOS) surrounded by spaces, per the New York Times usage. * Including assets: * If you wish to add an illustration or full program, add it to the diff --git a/doc/api/child_process.md b/doc/api/child_process.md index 4560e5d555a207..7f0a79dd573bd5 100644 --- a/doc/api/child_process.md +++ b/doc/api/child_process.md @@ -75,7 +75,7 @@ when the child process terminates. The importance of the distinction between [`child_process.exec()`][] and [`child_process.execFile()`][] can vary based on platform. On Unix-type operating -systems (Unix, Linux, OSX) [`child_process.execFile()`][] can be more efficient +systems (Unix, Linux, macOS) [`child_process.execFile()`][] can be more efficient because it does not spawn a shell. On Windows, however, `.bat` and `.cmd` files are not executable on their own without a terminal, and therefore cannot be launched using [`child_process.execFile()`][]. When running on Windows, `.bat` @@ -430,7 +430,7 @@ child.on('error', (err) => { }); ``` -*Note: Certain platforms (OS X, Linux) will use the value of `argv[0]` for the +*Note: Certain platforms (macOS, Linux) will use the value of `argv[0]` for the process title while others (Windows, SunOS) will use `command`.* *Note: Node.js currently overwrites `argv[0]` with `process.execPath` on diff --git a/doc/api/documentation.md b/doc/api/documentation.md index 5f45c9b56ed387..450a250ea9b0d4 100644 --- a/doc/api/documentation.md +++ b/doc/api/documentation.md @@ -73,11 +73,11 @@ like `fs.open()`, will document that. The docs link to the corresponding man pages (short for manual pages) which describe how the syscalls work. **Caveat:** some syscalls, like lchown(2), are BSD-specific. That means, for -example, that `fs.lchown()` only works on Mac OS X and other BSD-derived systems, +example, that `fs.lchown()` only works on macOS and other BSD-derived systems, and is not available on Linux. Most Unix syscalls have Windows equivalents, but behavior may differ on Windows -relative to Linux and OS X. For an example of the subtle ways in which it's +relative to Linux and macOS. For an example of the subtle ways in which it's sometimes impossible to replace Unix syscall semantics on Windows, see [Node issue 4760](https://github.com/nodejs/node/issues/4760). diff --git a/doc/api/errors.md b/doc/api/errors.md index 96409a53677689..05758cf20b7dd7 100644 --- a/doc/api/errors.md +++ b/doc/api/errors.md @@ -523,7 +523,7 @@ found [here][online]. [file descriptors][] allowable on the system has been reached, and requests for another descriptor cannot be fulfilled until at least one has been closed. This is encountered when opening many files at once in - parallel, especially on systems (in particular, OS X) where there is a low + parallel, especially on systems (in particular, macOS) where there is a low file descriptor limit for processes. To remedy a low limit, run `ulimit -n 2048` in the same shell that will run the Node.js process. diff --git a/doc/api/fs.md b/doc/api/fs.md index 1b2a3c791532c0..33bc2c7fe1377e 100644 --- a/doc/api/fs.md +++ b/doc/api/fs.md @@ -1099,7 +1099,7 @@ changes: Asynchronous lchmod(2). No arguments other than a possible exception are given to the completion callback. -Only available on Mac OS X. +Only available on macOS. ## fs.lchmodSync(path, mode) -On Linux and OS X systems, `fs.watch()` resolves the path to an [inode][] and +On Linux and macOS systems, `fs.watch()` resolves the path to an [inode][] and watches the inode. If the watched path is deleted and recreated, it is assigned a new inode. The watch will emit an event for the delete but will continue watching the *original* inode. Events for the new inode will not be emitted. @@ -1983,7 +1983,7 @@ In AIX, save and close of a file being watched causes two notifications - one for adding new content, and one for truncation. Moreover, save and close operations on some platforms cause inode changes that force watch operations to become invalid and ineffective. AIX retains inode for the -lifetime of a file, that way though this is different from Linux / OS X, +lifetime of a file, that way though this is different from Linux / macOS, this improves the usability of file watching. This is expected behavior. #### Filename Argument diff --git a/doc/api/os.md b/doc/api/os.md index 06779dbb37cf70..480305a7e1fafa 100644 --- a/doc/api/os.md +++ b/doc/api/os.md @@ -363,7 +363,7 @@ added: v0.3.3 * Returns: {string} The `os.type()` method returns a string identifying the operating system name -as returned by uname(3). For example `'Linux'` on Linux, `'Darwin'` on OS X and +as returned by uname(3). For example `'Linux'` on Linux, `'Darwin'` on macOS and `'Windows_NT'` on Windows. Please see https://en.wikipedia.org/wiki/Uname#Examples for additional diff --git a/doc/api/process.md b/doc/api/process.md index e10271060ff5ff..a833761693e1bb 100644 --- a/doc/api/process.md +++ b/doc/api/process.md @@ -1621,7 +1621,7 @@ the current value of `ps`. *Note*: When a new value is assigned, different platforms will impose different maximum length restrictions on the title. Usually such restrictions are quite -limited. For instance, on Linux and OS X, `process.title` is limited to the size +limited. For instance, on Linux and macOS, `process.title` is limited to the size of the binary name plus the length of the command line arguments because setting the `process.title` overwrites the `argv` memory of the process. Node.js v0.8 allowed for longer process title strings by also overwriting the `environ` diff --git a/test/README.md b/test/README.md index b40b15ad030c89..743a32496dc1dc 100644 --- a/test/README.md +++ b/test/README.md @@ -296,7 +296,7 @@ Platform check for Linux on PowerPC. ### isOSX * return [<Boolean>](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Data_structures#Boolean_type) -Platform check for OS X. +Platform check for macOS. ### isSunOS * return [<Boolean>](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Data_structures#Boolean_type) From 89d073bdc280a777945ca302138a3e714f6fcda1 Mon Sep 17 00:00:00 2001 From: JR McEntee Date: Tue, 28 Mar 2017 21:12:04 -0400 Subject: [PATCH 29/30] doc: correct markdown file line lengths This commit updates two paragraphs that exceeded the 80 line standard after updating to macOS. Fixes: https://github.com/nodejs/node/issues/12086 --- doc/api/fs.md | 6 +++--- doc/api/process.md | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/api/fs.md b/doc/api/fs.md index 33bc2c7fe1377e..687ad455277dab 100644 --- a/doc/api/fs.md +++ b/doc/api/fs.md @@ -1374,9 +1374,9 @@ The kernel ignores the position argument and always appends the data to the end of the file. _Note: The behavior of `fs.open()` is platform specific for some flags. As such, -opening a directory on macOS and Linux with the `'a+'` flag - see example below - -will return an error. In contrast, on Windows and FreeBSD, a file descriptor -will be returned._ +opening a directory on macOS and Linux with the `'a+'` flag - see example +below - will return an error. In contrast, on Windows and FreeBSD, a file +descriptor will be returned._ ```js // macOS and Linux diff --git a/doc/api/process.md b/doc/api/process.md index a833761693e1bb..c2c56b90b90049 100644 --- a/doc/api/process.md +++ b/doc/api/process.md @@ -1621,12 +1621,12 @@ the current value of `ps`. *Note*: When a new value is assigned, different platforms will impose different maximum length restrictions on the title. Usually such restrictions are quite -limited. For instance, on Linux and macOS, `process.title` is limited to the size -of the binary name plus the length of the command line arguments because setting -the `process.title` overwrites the `argv` memory of the process. Node.js v0.8 -allowed for longer process title strings by also overwriting the `environ` -memory but that was potentially insecure and confusing in some (rather obscure) -cases. +limited. For instance, on Linux and macOS, `process.title` is limited to the +size of the binary name plus the length of the command line arguments because +setting the `process.title` overwrites the `argv` memory of the process. +Node.js v0.8 allowed for longer process title strings by also overwriting the +`environ` memory but that was potentially insecure and confusing in some +(rather obscure) cases. ## process.umask([mask])