diff --git a/doc/api/url.md b/doc/api/url.md index c6eb2f9876cef9..9b121b7f15c5d0 100644 --- a/doc/api/url.md +++ b/doc/api/url.md @@ -992,14 +992,27 @@ for (const [name, value] of params) { // xyz baz ``` -### require('url').domainToAscii(domain) +### require('url').domainToASCII(domain) * `domain` {String} * Returns: {String} -Returns the [Punycode][] ASCII serialization of the `domain`. +Returns the [Punycode][] ASCII serialization of the `domain`. If `domain` is an +invalid domain, the empty string is returned. -*Note*: The `require('url').domainToAscii()` method is introduced as part of +It performs the inverse operation to [`require('url').domainToUnicode()`][]. + +```js +const url = require('url'); +console.log(url.domainToASCII('español.com')); + // Prints xn--espaol-zwa.com +console.log(url.domainToASCII('中文.com')); + // Prints xn--fiq228c.com +console.log(url.domainToASCII('xn--iñvalid.com')); + // Prints an empty string +``` + +*Note*: The `require('url').domainToASCII()` method is introduced as part of the new `URL` implementation but is not part of the WHATWG URL standard. ### require('url').domainToUnicode(domain) @@ -1007,7 +1020,20 @@ the new `URL` implementation but is not part of the WHATWG URL standard. * `domain` {String} * Returns: {String} -Returns the Unicode serialization of the `domain`. +Returns the Unicode serialization of the `domain`. If `domain` is an invalid +domain, the empty string is returned. + +It performs the inverse operation to [`require('url').domainToASCII()`][]. + +```js +const url = require('url'); +console.log(url.domainToUnicode('xn--espaol-zwa.com')); + // Prints español.com +console.log(url.domainToUnicode('xn--fiq228c.com')); + // Prints 中文.com +console.log(url.domainToUnicode('xn--iñvalid.com')); + // Prints an empty string +``` *Note*: The `require('url').domainToUnicode()` API is introduced as part of the the new `URL` implementation but is not part of the WHATWG URL standard. @@ -1074,6 +1100,8 @@ console.log(myURL.origin); [`URLSearchParams`]: #url_class_urlsearchparams [`urlSearchParams.entries()`]: #url_urlsearchparams_entries [`urlSearchParams@@iterator()`]: #url_urlsearchparams_iterator +[`require('url').domainToASCII()`]: #url_require_url_domaintoascii_domain +[`require('url').domainToUnicode()`]: #url_require_url_domaintounicode_domain [stable sorting algorithm]: https://en.wikipedia.org/wiki/Sorting_algorithm#Stability [`JSON.stringify()`]: https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/JSON/stringify [`url.toJSON()`]: #url_url_tojson diff --git a/lib/url.js b/lib/url.js index 5d59d3c10f6d67..2db780c7eb069d 100644 --- a/lib/url.js +++ b/lib/url.js @@ -319,7 +319,10 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) { // It only converts parts of the domain name that // have non-ASCII characters, i.e. it doesn't matter if // you call it with a domain that already is ASCII-only. - this.hostname = toASCII(this.hostname); + + // Use lenient mode (`true`) to try to support even non-compliant + // URLs. + this.hostname = toASCII(this.hostname, true); } var p = this.port ? ':' + this.port : ''; diff --git a/src/node_i18n.cc b/src/node_i18n.cc index ae14aed7c6b4c2..b337456c639318 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -410,7 +410,8 @@ bool InitializeICUDirectory(const std::string& path) { int32_t ToUnicode(MaybeStackBuffer* buf, const char* input, - size_t length) { + size_t length, + bool lenient) { UErrorCode status = U_ZERO_ERROR; uint32_t options = UIDNA_DEFAULT; options |= UIDNA_NONTRANSITIONAL_TO_UNICODE; @@ -435,7 +436,7 @@ int32_t ToUnicode(MaybeStackBuffer* buf, &status); } - if (U_FAILURE(status)) { + if (U_FAILURE(status) || (!lenient && info.errors != 0)) { len = -1; buf->SetLength(0); } else { @@ -448,7 +449,8 @@ int32_t ToUnicode(MaybeStackBuffer* buf, int32_t ToASCII(MaybeStackBuffer* buf, const char* input, - size_t length) { + size_t length, + bool lenient) { UErrorCode status = U_ZERO_ERROR; uint32_t options = UIDNA_DEFAULT; options |= UIDNA_NONTRANSITIONAL_TO_ASCII; @@ -473,7 +475,7 @@ int32_t ToASCII(MaybeStackBuffer* buf, &status); } - if (U_FAILURE(status)) { + if (U_FAILURE(status) || (!lenient && info.errors != 0)) { len = -1; buf->SetLength(0); } else { @@ -489,8 +491,11 @@ static void ToUnicode(const FunctionCallbackInfo& args) { CHECK_GE(args.Length(), 1); CHECK(args[0]->IsString()); Utf8Value val(env->isolate(), args[0]); + // optional arg + bool lenient = args[1]->BooleanValue(env->context()).FromJust(); + MaybeStackBuffer buf; - int32_t len = ToUnicode(&buf, *val, val.length()); + int32_t len = ToUnicode(&buf, *val, val.length(), lenient); if (len < 0) { return env->ThrowError("Cannot convert name to Unicode"); @@ -508,8 +513,11 @@ static void ToASCII(const FunctionCallbackInfo& args) { CHECK_GE(args.Length(), 1); CHECK(args[0]->IsString()); Utf8Value val(env->isolate(), args[0]); + // optional arg + bool lenient = args[1]->BooleanValue(env->context()).FromJust(); + MaybeStackBuffer buf; - int32_t len = ToASCII(&buf, *val, val.length()); + int32_t len = ToASCII(&buf, *val, val.length(), lenient); if (len < 0) { return env->ThrowError("Cannot convert name to ASCII"); diff --git a/src/node_i18n.h b/src/node_i18n.h index 21567eeb3ec38f..270eb1f3d1bc46 100644 --- a/src/node_i18n.h +++ b/src/node_i18n.h @@ -18,10 +18,12 @@ bool InitializeICUDirectory(const std::string& path); int32_t ToASCII(MaybeStackBuffer* buf, const char* input, - size_t length); + size_t length, + bool lenient = false); int32_t ToUnicode(MaybeStackBuffer* buf, const char* input, - size_t length); + size_t length, + bool lenient = false); } // namespace i18n } // namespace node diff --git a/test/fixtures/url-idna.js b/test/fixtures/url-idna.js new file mode 100644 index 00000000000000..af169bb04a8736 --- /dev/null +++ b/test/fixtures/url-idna.js @@ -0,0 +1,217 @@ +'use strict'; + +// Credit for list: http://www.i18nguy.com/markup/idna-examples.html +module.exports = { + valid: [ + { ascii: 'xn--mgbaal8b0b9b2b.icom.museum', + unicode: 'افغانستا.icom.museum' + }, + { + ascii: 'xn--lgbbat1ad8j.icom.museum', + unicode: 'الجزائر.icom.museum' + }, + { + ascii: 'xn--sterreich-z7a.icom.museum', + unicode: 'österreich.icom.museum' + }, + { + ascii: 'xn--54b6eqazv8bc7e.icom.museum', + unicode: 'বাংলাদেশ.icom.museum' + }, + { + ascii: 'xn--80abmy0agn7e.icom.museum', + unicode: 'беларусь.icom.museum' + }, + { + ascii: 'xn--belgi-rsa.icom.museum', + unicode: 'belgië.icom.museum' + }, + { + ascii: 'xn--80abgvm6a7d2b.icom.museum', + unicode: 'българия.icom.museum' + }, + { + ascii: 'xn--mgbfqim.icom.museum', + unicode: 'تشادر.icom.museum' + }, + { + ascii: 'xn--fiqs8s.icom.museum', + unicode: '中国.icom.museum' + }, + { + ascii: 'xn--mgbu4chg.icom.museum', + unicode: 'القمر.icom.museum' + }, + { + ascii: 'xn--vxakcego.icom.museum', + unicode: 'κυπρος.icom.museum' + }, + { + ascii: 'xn--eskrepublika-ebb62d.icom.museum', + unicode: 'českárepublika.icom.museum' + }, + { + ascii: 'xn--wgbh1c.icom.museum', + unicode: 'مصر.icom.museum' + }, + { + ascii: 'xn--hxakic4aa.icom.museum', + unicode: 'ελλάδα.icom.museum' + }, + { + ascii: 'xn--magyarorszg-t7a.icom.museum', + unicode: 'magyarország.icom.museum' + }, + { + ascii: 'xn--sland-ysa.icom.museum', + unicode: 'ísland.icom.museum' + }, + { + ascii: 'xn--h2brj9c.icom.museum', + unicode: 'भारत.icom.museum' + }, + { + ascii: 'xn--mgba3a4fra.icom.museum', + unicode: 'ايران.icom.museum' + }, + { + ascii: 'xn--ire-9la.icom.museum', + unicode: 'éire.icom.museum' + }, + { + ascii: 'xn--4dbklr2c8d.xn--4dbrk0ce.museum', + unicode: 'איקו״ם.ישראל.museum' + }, + { + ascii: 'xn--wgv71a.icom.museum', + unicode: '日本.icom.museum' + }, + { + ascii: 'xn--igbhzh7gpa.icom.museum', + unicode: 'الأردن.icom.museum' + }, + { + ascii: 'xn--80aaa0a6awh12ed.icom.museum', + unicode: 'қазақстан.icom.museum' + }, + { + ascii: 'xn--3e0b707e.icom.museum', + unicode: '한국.icom.museum' + }, + { + ascii: 'xn--80afmksoji0fc.icom.museum', + unicode: 'кыргызстан.icom.museum' + }, + { + ascii: 'xn--q7ce6a.icom.museum', + unicode: 'ລາວ.icom.museum' + }, + { + ascii: 'xn--mgbb7fjb.icom.museum', + unicode: 'لبنان.icom.museum' + }, + { + ascii: 'xn--80aaldqjmmi6x.icom.museum', + unicode: 'македонија.icom.museum' + }, + { + ascii: 'xn--mgbah1a3hjkrd.icom.museum', + unicode: 'موريتانيا.icom.museum' + }, + { + ascii: 'xn--mxico-bsa.icom.museum', + unicode: 'méxico.icom.museum' + }, + { + ascii: 'xn--c1aqabffc0aq.icom.museum', + unicode: 'монголулс.icom.museum' + }, + { + ascii: 'xn--mgbc0a9azcg.icom.museum', + unicode: 'المغرب.icom.museum' + }, + { + ascii: 'xn--l2bey1c2b.icom.museum', + unicode: 'नेपाल.icom.museum' + }, + { + ascii: 'xn--mgb9awbf.icom.museum', + unicode: 'عمان.icom.museum' + }, + { + ascii: 'xn--wgbl6a.icom.museum', + unicode: 'قطر.icom.museum' + }, + { + ascii: 'xn--romnia-yta.icom.museum', + unicode: 'românia.icom.museum' + }, + { + ascii: 'xn--h1alffa9f.xn--h1aegh.museum', + unicode: 'россия.иком.museum' + }, + { + ascii: 'xn--80aaabm1ab4blmeec9e7n.xn--h1aegh.museum', + unicode: 'србијаицрнагора.иком.museum' + }, + { + ascii: 'xn--xkc2al3hye2a.icom.museum', + unicode: 'இலங்கை.icom.museum' + }, + { + ascii: 'xn--espaa-rta.icom.museum', + unicode: 'españa.icom.museum' + }, + { + ascii: 'xn--o3cw4h.icom.museum', + unicode: 'ไทย.icom.museum' + }, + { + ascii: 'xn--pgbs0dh.icom.museum', + unicode: 'تونس.icom.museum' + }, + { + ascii: 'xn--trkiye-3ya.icom.museum', + unicode: 'türkiye.icom.museum' + }, + { + ascii: 'xn--80aaxgrpt.icom.museum', + unicode: 'украина.icom.museum' + }, + { + ascii: 'xn--vitnam-jk8b.icom.museum', + unicode: 'việtnam.icom.museum' + }, + // long URL + { + ascii: `${`${'a'.repeat(63)}.`.repeat(3)}com`, + unicode: `${`${'a'.repeat(63)}.`.repeat(3)}com` + } + ], + invalid: [ + // long label + { + url: `${'a'.repeat(64)}.com`, + mode: 'ascii' + }, + // long URL + { + url: `${`${'a'.repeat(63)}.`.repeat(4)}com`, + mode: 'ascii' + }, + // invalid character + { + url: '\ufffd.com', + mode: 'ascii' + }, + { + url: '\ufffd.com', + mode: 'unicode' + }, + // invalid Punycode + { + url: 'xn---abc.com', + mode: 'unicode' + } + ] +} diff --git a/test/fixtures/url-tests.js b/test/fixtures/url-tests.js index 3b162391cd2ef5..3e303910ca6e19 100644 --- a/test/fixtures/url-tests.js +++ b/test/fixtures/url-tests.js @@ -3589,17 +3589,17 @@ module.exports = "base": "http://other.com/", "failure": true }, - // "U+FFFD", - // { - // "input": "https://\ufffd", - // "base": "about:blank", - // "failure": true - // }, - // { - // "input": "https://%EF%BF%BD", - // "base": "about:blank", - // "failure": true - // }, + "U+FFFD", + { + "input": "https://\ufffd", + "base": "about:blank", + "failure": true + }, + { + "input": "https://%EF%BF%BD", + "base": "about:blank", + "failure": true + }, { "input": "https://x/\ufffd?\ufffd#\ufffd", "base": "about:blank", @@ -4497,26 +4497,26 @@ module.exports = "hash": "" }, "# Hosts and percent-encoding", - // { - // "input": "ftp://example.com%80/", - // "base": "about:blank", - // "failure": true - // }, - // { - // "input": "ftp://example.com%A0/", - // "base": "about:blank", - // "failure": true - // }, - // { - // "input": "https://example.com%80/", - // "base": "about:blank", - // "failure": true - // }, - // { - // "input": "https://example.com%A0/", - // "base": "about:blank", - // "failure": true - // }, + { + "input": "ftp://example.com%80/", + "base": "about:blank", + "failure": true + }, + { + "input": "ftp://example.com%A0/", + "base": "about:blank", + "failure": true + }, + { + "input": "https://example.com%80/", + "base": "about:blank", + "failure": true + }, + { + "input": "https://example.com%A0/", + "base": "about:blank", + "failure": true + }, { "input": "ftp://%e2%98%83", "base": "about:blank", diff --git a/test/parallel/test-icu-punycode.js b/test/parallel/test-icu-punycode.js index 62508bc9f78f49..411704bb8f4477 100644 --- a/test/parallel/test-icu-punycode.js +++ b/test/parallel/test-icu-punycode.js @@ -6,67 +6,36 @@ if (!common.hasIntl) { return; } -const icu = getPunycode(); +const icu = process.binding('icu'); const assert = require('assert'); -function getPunycode() { - try { - return process.binding('icu'); - } catch (err) { - return undefined; +const tests = require('../fixtures/url-idna.js'); + +{ + for (const [i, { ascii, unicode }] of tests.valid.entries()) { + assert.strictEqual(ascii, icu.toASCII(unicode), `toASCII(${i + 1})`); + assert.strictEqual(unicode, icu.toUnicode(ascii), `toUnicode(${i + 1})`); + assert.strictEqual(ascii, icu.toASCII(icu.toUnicode(ascii)), + `toASCII(toUnicode(${i + 1}))`); + assert.strictEqual(unicode, icu.toUnicode(icu.toASCII(unicode)), + `toUnicode(toASCII(${i + 1}))`); } } -// Credit for list: http://www.i18nguy.com/markup/idna-examples.html -const tests = [ - 'افغانستا.icom.museum', - 'الجزائر.icom.museum', - 'österreich.icom.museum', - 'বাংলাদেশ.icom.museum', - 'беларусь.icom.museum', - 'belgië.icom.museum', - 'българия.icom.museum', - 'تشادر.icom.museum', - '中国.icom.museum', - 'القمر.icom.museum', - 'κυπρος.icom.museum', - 'českárepublika.icom.museum', - 'مصر.icom.museum', - 'ελλάδα.icom.museum', - 'magyarország.icom.museum', - 'ísland.icom.museum', - 'भारत.icom.museum', - 'ايران.icom.museum', - 'éire.icom.museum', - 'איקו״ם.ישראל.museum', - '日本.icom.museum', - 'الأردن.icom.museum', - 'қазақстан.icom.museum', - '한국.icom.museum', - 'кыргызстан.icom.museum', - 'ລາວ.icom.museum', - 'لبنان.icom.museum', - 'македонија.icom.museum', - 'موريتانيا.icom.museum', - 'méxico.icom.museum', - 'монголулс.icom.museum', - 'المغرب.icom.museum', - 'नेपाल.icom.museum', - 'عمان.icom.museum', - 'قطر.icom.museum', - 'românia.icom.museum', - 'россия.иком.museum', - 'србијаицрнагора.иком.museum', - 'இலங்கை.icom.museum', - 'españa.icom.museum', - 'ไทย.icom.museum', - 'تونس.icom.museum', - 'türkiye.icom.museum', - 'украина.icom.museum', - 'việtnam.icom.museum' -]; +{ + const errorRe = { + ascii: /^Error: Cannot convert name to ASCII$/, + unicode: /^Error: Cannot convert name to Unicode$/ + }; + const convertFunc = { + ascii: icu.toASCII, + unicode: icu.toUnicode + }; -// Testing the roundtrip -tests.forEach((i) => { - assert.strictEqual(i, icu.toUnicode(icu.toASCII(i))); -}); + for (const [i, { url, mode }] of tests.invalid.entries()) { + assert.throws(() => convertFunc[mode](url), errorRe[mode], + `Invalid case ${i + 1}`); + assert.doesNotThrow(() => convertFunc[mode](url, true), + `Invalid case ${i + 1} in lenient mode`); + } +} diff --git a/test/parallel/test-whatwg-url-domainto.js b/test/parallel/test-whatwg-url-domainto.js new file mode 100644 index 00000000000000..f891f95a19cd3b --- /dev/null +++ b/test/parallel/test-whatwg-url-domainto.js @@ -0,0 +1,36 @@ +'use strict'; +const common = require('../common'); + +if (!common.hasIntl) { + common.skip('missing Intl'); + return; +} + +const assert = require('assert'); +const { domainToASCII, domainToUnicode } = require('url'); + +// Tests below are not from WPT. +const tests = require('../fixtures/url-idna.js'); + +{ + for (const [i, { ascii, unicode }] of tests.valid.entries()) { + assert.strictEqual(ascii, domainToASCII(unicode), + `domainToASCII(${i + 1})`); + assert.strictEqual(unicode, domainToUnicode(ascii), + `domainToUnicode(${i + 1})`); + assert.strictEqual(ascii, domainToASCII(domainToUnicode(ascii)), + `domainToASCII(domainToUnicode(${i + 1}))`); + assert.strictEqual(unicode, domainToUnicode(domainToASCII(unicode)), + `domainToUnicode(domainToASCII(${i + 1}))`); + } +} + +{ + const convertFunc = { + ascii: domainToASCII, + unicode: domainToUnicode + }; + + for (const [i, { url, mode }] of tests.invalid.entries()) + assert.strictEqual(convertFunc[mode](url), '', `Invalid case ${i + 1}`); +}