From dbd9d22da979dee36e533471cb14269921c22331 Mon Sep 17 00:00:00 2001 From: Taku Amano Date: Wed, 15 Apr 2026 09:47:15 +0900 Subject: [PATCH] perf: replace Uint8Array lookup tables with regex in buildUrl --- src/url.ts | 115 +++++------------------------------------------------ 1 file changed, 11 insertions(+), 104 deletions(-) diff --git a/src/url.ts b/src/url.ts index dee2bf9..89c5850 100644 --- a/src/url.ts +++ b/src/url.ts @@ -1,106 +1,20 @@ import { RequestError } from './error' -const isPathDelimiter = (charCode: number): boolean => - charCode === 0x2f || charCode === 0x3f || charCode === 0x23 +// Fast-path character validation for request URLs. +// Matches: ! # $ & ' ( ) * + , - . / 0-9 : ; = ? @ A-Z [ ] _ a-z ~ +// Rejects: control chars, space, " % < > \ ^ ` { | } DEL, non-ASCII → fallback to new URL() +const reValidRequestUrl = /^\/[!#$&-;=?-\[\]_a-z~]*$/ -// `/.`, `/..` (including `%2e` variants, which are handled by `%` detection) are normalized by `new URL()`. -const hasDotSegment = (url: string, dotIndex: number): boolean => { - const prev = dotIndex === 0 ? 0x2f : url.charCodeAt(dotIndex - 1) - if (prev !== 0x2f) { - return false - } - - const nextIndex = dotIndex + 1 - if (nextIndex === url.length) { - return true - } - - const next = url.charCodeAt(nextIndex) - if (isPathDelimiter(next)) { - return true - } - if (next !== 0x2e) { - return false - } +// Dot segments: /. or /.. followed by / ? # or end-of-string +const reDotSegment = /\/\.\.?(?:[/?#]|$)/ - const nextNextIndex = dotIndex + 2 - if (nextNextIndex === url.length) { - return true - } - return isPathDelimiter(url.charCodeAt(nextNextIndex)) -} - -const allowedRequestUrlChar = new Uint8Array(128) -for (let c = 0x30; c <= 0x39; c++) { - allowedRequestUrlChar[c] = 1 -} -for (let c = 0x41; c <= 0x5a; c++) { - allowedRequestUrlChar[c] = 1 -} -for (let c = 0x61; c <= 0x7a; c++) { - allowedRequestUrlChar[c] = 1 -} -;(() => { - const chars = "-./:?#[]@!$&'()*+,;=~_" - for (let i = 0; i < chars.length; i++) { - allowedRequestUrlChar[chars.charCodeAt(i)] = 1 - } -})() - -const safeHostChar = new Uint8Array(128) -// 0-9 -for (let c = 0x30; c <= 0x39; c++) { - safeHostChar[c] = 1 -} -// a-z -for (let c = 0x61; c <= 0x7a; c++) { - safeHostChar[c] = 1 -} -;(() => { - const chars = '.-_:' - for (let i = 0; i < chars.length; i++) { - safeHostChar[chars.charCodeAt(i)] = 1 - } -})() +// Host validation: a-z 0-9 . - _ with optional port 1000-59999 +const reValidHost = /^[a-z0-9._-]+(?::(?:[1-5]\d{3,4}|[6-9]\d{3}))?$/ export const buildUrl = (scheme: string, host: string, incomingUrl: string) => { const url = `${scheme}://${host}${incomingUrl}` - let needsHostValidationByURL = false - for (let i = 0, len = host.length; i < len; i++) { - const c = host.charCodeAt(i) - if (c > 0x7f || safeHostChar[c] === 0) { - needsHostValidationByURL = true - break - } - if (c === 0x3a) { - // ':' - i++ - const firstDigit = host.charCodeAt(i) - - // if the number starts with 1-9 and ranges from 1000-59999, then there is no need for normalization, so proceed - if ( - firstDigit < 0x31 || - firstDigit > 0x39 || - i + 4 > len || - i + (firstDigit < 0x36 ? 5 : 4) < len - ) { - needsHostValidationByURL = true - break - } - for (; i < len; i++) { - const c = host.charCodeAt(i) - if (c < 0x30 || c > 0x39) { - needsHostValidationByURL = true - break - } - } - - // valid port number - } - } - - if (needsHostValidationByURL) { + if (!reValidHost.test(host)) { const urlObj = new URL(url) // if suspicious, check by host. host header sometimes contains port. @@ -119,15 +33,8 @@ export const buildUrl = (scheme: string, host: string, incomingUrl: string) => { throw new RequestError('Invalid URL') } - for (let i = 1, len = incomingUrl.length; i < len; i++) { - const c = incomingUrl.charCodeAt(i) - if ( - c > 0x7f || - allowedRequestUrlChar[c] === 0 || - (c === 0x2e && hasDotSegment(incomingUrl, i)) - ) { - return new URL(url).href - } + if (!reValidRequestUrl.test(incomingUrl) || reDotSegment.test(incomingUrl)) { + return new URL(url).href } return url