From 844a470fbb7ee02b21a0b38abcae37eb9d3e0234 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benno=20F=C3=BCnfst=C3=BCck?= Date: Mon, 16 Oct 2017 15:36:32 +0200 Subject: [PATCH 1/4] http: disallow two-byte characters in URL path This commit changes node's handling of two-byte characters in the path component of an http URL. Previously, node would just strip the higher byte when generating the request. So this code: ``` http.request({host: "example.com", port: "80", "/\uFF2e"}) ``` would request `http://example.com/.` (`.` is the character for the byte `0x2e`). This is not useful and can in some cases lead to filter evasion. With this change, the code generates `ERR_UNESCAPED_CHARACTERS`, just like space and control characters already did. --- lib/_http_client.js | 16 ++++----- .../parallel/test-http-client-invalid-path.js | 33 +++++++++++++++++++ 2 files changed, 41 insertions(+), 8 deletions(-) create mode 100644 test/parallel/test-http-client-invalid-path.js diff --git a/lib/_http_client.js b/lib/_http_client.js index 5b568628006d68..190216665d1079 100644 --- a/lib/_http_client.js +++ b/lib/_http_client.js @@ -52,20 +52,20 @@ const errors = require('internal/errors'); // checks can greatly outperform the equivalent regexp (tested in V8 5.4). function isInvalidPath(s) { var i = 0; - if (s.charCodeAt(0) <= 32) return true; + if (s.charCodeAt(0) <= 32 || s.charCodeAt(0) > 0xFF) return true; if (++i >= s.length) return false; - if (s.charCodeAt(1) <= 32) return true; + if (s.charCodeAt(1) <= 32 || s.charCodeAt(1) > 0xFF) return true; if (++i >= s.length) return false; - if (s.charCodeAt(2) <= 32) return true; + if (s.charCodeAt(2) <= 32 || s.charCodeAt(2) > 0xFF) return true; if (++i >= s.length) return false; - if (s.charCodeAt(3) <= 32) return true; + if (s.charCodeAt(3) <= 32 || s.charCodeAt(3) > 0xFF) return true; if (++i >= s.length) return false; - if (s.charCodeAt(4) <= 32) return true; + if (s.charCodeAt(4) <= 32 || s.charCodeAt(4) > 0xFF) return true; if (++i >= s.length) return false; - if (s.charCodeAt(5) <= 32) return true; + if (s.charCodeAt(5) <= 32 || s.charCodeAt(5) > 0xFF) return true; ++i; for (; i < s.length; ++i) - if (s.charCodeAt(i) <= 32) return true; + if (s.charCodeAt(i) <= 32 || s.charCodeAt(i) > 0xFF) return true; return false; } @@ -121,7 +121,7 @@ function ClientRequest(options, cb) { if (path.length <= 39) { // Determined experimentally in V8 5.4 invalidPath = isInvalidPath(path); } else { - invalidPath = /[\u0000-\u0020]/.test(path); + invalidPath = /[\u0000-\u0020\u0100-\uffff]/.test(path); } if (invalidPath) throw new errors.TypeError('ERR_UNESCAPED_CHARACTERS', 'Request path'); diff --git a/test/parallel/test-http-client-invalid-path.js b/test/parallel/test-http-client-invalid-path.js new file mode 100644 index 00000000000000..de2636638c6552 --- /dev/null +++ b/test/parallel/test-http-client-invalid-path.js @@ -0,0 +1,33 @@ +// Copyright Joyent, Inc. and other Node contributors. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to permit +// persons to whom the Software is furnished to do so, subject to the +// following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +// USE OR OTHER DEALINGS IN THE SOFTWARE. + +'use strict'; +const common = require('../common'); +const http = require('http'); + +common.expectsError(() => { + http.request({ + path: '/thisisinvalid\uffe2' + }).end(); +}, { + code: 'ERR_UNESCAPED_CHARACTERS', + type: TypeError +}); From eab523ce8117455bfe015a069e42725ed0716fbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benno=20F=C3=BCnfst=C3=BCck?= Date: Mon, 16 Oct 2017 18:45:04 +0200 Subject: [PATCH 2/4] fixup! http: disallow two-byte characters in URL path --- .../parallel/test-http-client-invalid-path.js | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/test/parallel/test-http-client-invalid-path.js b/test/parallel/test-http-client-invalid-path.js index de2636638c6552..c042d61eda0999 100644 --- a/test/parallel/test-http-client-invalid-path.js +++ b/test/parallel/test-http-client-invalid-path.js @@ -1,24 +1,3 @@ -// Copyright Joyent, Inc. and other Node contributors. -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the -// "Software"), to deal in the Software without restriction, including -// without limitation the rights to use, copy, modify, merge, publish, -// distribute, sublicense, and/or sell copies of the Software, and to permit -// persons to whom the Software is furnished to do so, subject to the -// following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN -// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -// USE OR OTHER DEALINGS IN THE SOFTWARE. - 'use strict'; const common = require('../common'); const http = require('http'); From 58ac26f60ea622d97acfec61c5e017ce22e8f9d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benno=20F=C3=BCnfst=C3=BCck?= Date: Mon, 16 Oct 2017 18:47:07 +0200 Subject: [PATCH 3/4] fixup! http: disallow two-byte characters in URL path --- lib/_http_client.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/_http_client.js b/lib/_http_client.js index 190216665d1079..cad3994d381a81 100644 --- a/lib/_http_client.js +++ b/lib/_http_client.js @@ -46,7 +46,7 @@ const errors = require('internal/errors'); // with an additional rule for ignoring percentage-escaped characters, but // that's a) hard to capture in a regular expression that performs well, and // b) possibly too restrictive for real-world usage. So instead we restrict the -// filter to just control characters and spaces. +// filter to just control characters, spaces and two-byte characters. // // This function is used in the case of small paths, where manual character code // checks can greatly outperform the equivalent regexp (tested in V8 5.4). From 60b3290a50879ade234835c95c16109e19757baa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benno=20F=C3=BCnfst=C3=BCck?= Date: Mon, 16 Oct 2017 19:47:28 +0200 Subject: [PATCH 4/4] http: always use regex for invalid path check Using the "optimized" version was not significantly faster and even slower for larger n. --- lib/_http_client.js | 36 ++---------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/lib/_http_client.js b/lib/_http_client.js index cad3994d381a81..c511f4fdc59517 100644 --- a/lib/_http_client.js +++ b/lib/_http_client.js @@ -41,33 +41,7 @@ const { outHeadersKey } = require('internal/http'); const { nextTick } = require('internal/process/next_tick'); const errors = require('internal/errors'); -// The actual list of disallowed characters in regexp form is more like: -// /[^A-Za-z0-9\-._~!$&'()*+,;=/:@]/ -// with an additional rule for ignoring percentage-escaped characters, but -// that's a) hard to capture in a regular expression that performs well, and -// b) possibly too restrictive for real-world usage. So instead we restrict the -// filter to just control characters, spaces and two-byte characters. -// -// This function is used in the case of small paths, where manual character code -// checks can greatly outperform the equivalent regexp (tested in V8 5.4). -function isInvalidPath(s) { - var i = 0; - if (s.charCodeAt(0) <= 32 || s.charCodeAt(0) > 0xFF) return true; - if (++i >= s.length) return false; - if (s.charCodeAt(1) <= 32 || s.charCodeAt(1) > 0xFF) return true; - if (++i >= s.length) return false; - if (s.charCodeAt(2) <= 32 || s.charCodeAt(2) > 0xFF) return true; - if (++i >= s.length) return false; - if (s.charCodeAt(3) <= 32 || s.charCodeAt(3) > 0xFF) return true; - if (++i >= s.length) return false; - if (s.charCodeAt(4) <= 32 || s.charCodeAt(4) > 0xFF) return true; - if (++i >= s.length) return false; - if (s.charCodeAt(5) <= 32 || s.charCodeAt(5) > 0xFF) return true; - ++i; - for (; i < s.length; ++i) - if (s.charCodeAt(i) <= 32 || s.charCodeAt(i) > 0xFF) return true; - return false; -} +const INVALID_PATH_REGEX = /[\u0000-\u0020\u0100-\uffff]/; function validateHost(host, name) { if (host != null && typeof host !== 'string') { @@ -117,13 +91,7 @@ function ClientRequest(options, cb) { var path; if (options.path) { path = String(options.path); - var invalidPath; - if (path.length <= 39) { // Determined experimentally in V8 5.4 - invalidPath = isInvalidPath(path); - } else { - invalidPath = /[\u0000-\u0020\u0100-\uffff]/.test(path); - } - if (invalidPath) + if (INVALID_PATH_REGEX.test(path)) throw new errors.TypeError('ERR_UNESCAPED_CHARACTERS', 'Request path'); }