Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 11 additions & 104 deletions src/url.ts
Original file line number Diff line number Diff line change
@@ -1,106 +1,20 @@
import { RequestError } from './error'

const isPathDelimiter = (charCode: number): boolean =>
charCode === 0x2f || charCode === 0x3f || charCode === 0x23
// Fast-path character validation for request URLs.
// Matches: ! # $ & ' ( ) * + , - . / 0-9 : ; = ? @ A-Z [ ] _ a-z ~
// Rejects: control chars, space, " % < > \ ^ ` { | } DEL, non-ASCII → fallback to new URL()
const reValidRequestUrl = /^\/[!#$&-;=?-\[\]_a-z~]*$/

// `/.`, `/..` (including `%2e` variants, which are handled by `%` detection) are normalized by `new URL()`.
const hasDotSegment = (url: string, dotIndex: number): boolean => {
const prev = dotIndex === 0 ? 0x2f : url.charCodeAt(dotIndex - 1)
if (prev !== 0x2f) {
return false
}

const nextIndex = dotIndex + 1
if (nextIndex === url.length) {
return true
}

const next = url.charCodeAt(nextIndex)
if (isPathDelimiter(next)) {
return true
}
if (next !== 0x2e) {
return false
}
// Dot segments: /. or /.. followed by / ? # or end-of-string
const reDotSegment = /\/\.\.?(?:[/?#]|$)/

const nextNextIndex = dotIndex + 2
if (nextNextIndex === url.length) {
return true
}
return isPathDelimiter(url.charCodeAt(nextNextIndex))
}

const allowedRequestUrlChar = new Uint8Array(128)
for (let c = 0x30; c <= 0x39; c++) {
allowedRequestUrlChar[c] = 1
}
for (let c = 0x41; c <= 0x5a; c++) {
allowedRequestUrlChar[c] = 1
}
for (let c = 0x61; c <= 0x7a; c++) {
allowedRequestUrlChar[c] = 1
}
;(() => {
const chars = "-./:?#[]@!$&'()*+,;=~_"
for (let i = 0; i < chars.length; i++) {
allowedRequestUrlChar[chars.charCodeAt(i)] = 1
}
})()

const safeHostChar = new Uint8Array(128)
// 0-9
for (let c = 0x30; c <= 0x39; c++) {
safeHostChar[c] = 1
}
// a-z
for (let c = 0x61; c <= 0x7a; c++) {
safeHostChar[c] = 1
}
;(() => {
const chars = '.-_:'
for (let i = 0; i < chars.length; i++) {
safeHostChar[chars.charCodeAt(i)] = 1
}
})()
// Host validation: a-z 0-9 . - _ with optional port 1000-59999
const reValidHost = /^[a-z0-9._-]+(?::(?:[1-5]\d{3,4}|[6-9]\d{3}))?$/

export const buildUrl = (scheme: string, host: string, incomingUrl: string) => {
const url = `${scheme}://${host}${incomingUrl}`

let needsHostValidationByURL = false
for (let i = 0, len = host.length; i < len; i++) {
const c = host.charCodeAt(i)
if (c > 0x7f || safeHostChar[c] === 0) {
needsHostValidationByURL = true
break
}
if (c === 0x3a) {
// ':'
i++
const firstDigit = host.charCodeAt(i)

// if the number starts with 1-9 and ranges from 1000-59999, then there is no need for normalization, so proceed
if (
firstDigit < 0x31 ||
firstDigit > 0x39 ||
i + 4 > len ||
i + (firstDigit < 0x36 ? 5 : 4) < len
) {
needsHostValidationByURL = true
break
}
for (; i < len; i++) {
const c = host.charCodeAt(i)
if (c < 0x30 || c > 0x39) {
needsHostValidationByURL = true
break
}
}

// valid port number
}
}

if (needsHostValidationByURL) {
if (!reValidHost.test(host)) {
const urlObj = new URL(url)

// if suspicious, check by host. host header sometimes contains port.
Expand All @@ -119,15 +33,8 @@ export const buildUrl = (scheme: string, host: string, incomingUrl: string) => {
throw new RequestError('Invalid URL')
}

for (let i = 1, len = incomingUrl.length; i < len; i++) {
const c = incomingUrl.charCodeAt(i)
if (
c > 0x7f ||
allowedRequestUrlChar[c] === 0 ||
(c === 0x2e && hasDotSegment(incomingUrl, i))
) {
return new URL(url).href
}
if (!reValidRequestUrl.test(incomingUrl) || reDotSegment.test(incomingUrl)) {
return new URL(url).href
}

return url
Expand Down
Loading