Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@
<p>

Address this vulnerability by escaping <code>.</code>
appropriately: <code>let regex = /(www|beta|)\.example\.com/</code>.
appropriately: <code>let regex = /((www|beta)\.)?example\.com/</code>.

</p>

</example>

<references>
<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
<li>MDN: <a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions">Regular Expressions</a></li> <li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing linebreak between the two <li>s.

<li>OWASP: <a href="https://www.owasp.org/index.php/Unvalidated_Redirects_and_Forwards_Cheat_Sheet">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
</references>
</qhelp>
24 changes: 17 additions & 7 deletions javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,27 @@
*/

import javascript
import SmallStrings

/**
* A taint tracking configuration for incomplete hostname regular expressions sources.
*/
class Configuration extends TaintTracking::Configuration {
class Configuration extends DataFlow::Configuration {
Configuration() { this = "IncompleteHostnameRegExpTracking" }

override predicate isSource(DataFlow::Node source) {
isIncompleteHostNameRegExpPattern(source.getStringValue(), _)
exists (string pattern |
isSmallRegExpPattern(source, pattern) and
isIncompleteHostNameRegExpPattern(pattern, _)
)
}

override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }

override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) {
any(TaintTracking::AdditionalTaintStep dts).step(pred, succ)
}

}

/**
Expand All @@ -36,22 +45,23 @@ predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
// an unescaped single `.`
"(?<!\\\\)[.]" +
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
"([():|?a-z0-9-]+(\\\\)?[.](" + RegExpPatterns::commonTLD() + "))" + ".*", 1)
"([():|?a-z0-9-]+(\\\\)?[.]" + RegExpPatterns::commonTLD() + ")" + ".*", 1)
}

from Expr e, string pattern, string hostPart
from DataFlow::Node e, string pattern, string hostPart
where
isSmallRegExpPattern(e, pattern) and
(
e.(RegExpLiteral).getValue() = pattern
e.asExpr() instanceof RegExpLiteral
or
exists(Configuration cfg |
cfg.hasFlow(e.flow(), _) and
cfg.hasFlow(e, _) and
e.mayHaveStringValue(pattern)
)
) and
isIncompleteHostNameRegExpPattern(pattern, hostPart) and
// ignore patterns with capture groups after the TLD
not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*")
not pattern.regexpMatch("(?i).*[.]" + RegExpPatterns::commonTLD() + ".*[(][?]:.*[)].*")
select e,
"This regular expression has an unescaped '.' before '" + hostPart +
"', so it might match more hosts than expected."
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* @name Incomplete URL substring sanitization
* @description Security checks on the substrings of an unparsed URL are often vulnerable to bypassing.
* @kind problem
* @kind path-problem
* @problem.severity warning
* @precision high
* @id js/incomplete-url-substring-sanitization
Expand All @@ -11,7 +11,34 @@
*/

import javascript
private import semmle.javascript.dataflow.InferredTypes
import DataFlow::PathGraph
import SmallStrings

/**
* A node for a string value that looks like a URL. Used as a source
* for incomplete URL substring checks.
*/
class UrlStringSource extends DataFlow::Node {
string str;

UrlStringSource() {
isSmallString(this, str) and
(
// contains a domain on a common TLD, and perhaps some other URL components
str
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::commonTLD() +
"(:[0-9]+)?/?")
or
// is a HTTP URL to a domain on any TLD
str.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
)
}

/**
* Gets the string value of this node.
*/
string getString() { result = str }
}

/**
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
Expand All @@ -31,35 +58,57 @@ class SomeSubstringCheck extends DataFlow::Node {
DataFlow::Node getSubstring() { result = substring }
}

from SomeSubstringCheck check, DataFlow::Node substring, string target, string msg
where
substring = check.getSubstring() and
substring.mayHaveStringValue(target) and
(
// target contains a domain on a common TLD, and perhaps some other URL components
target
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(" + RegExpPatterns::commonTLD() +
")(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
) and
(
if check instanceof StringOps::StartsWith
then msg = "may be followed by an arbitrary host name"
else
if check instanceof StringOps::EndsWith
then msg = "may be preceded by an arbitrary host name"
else msg = "can be anywhere in the URL, and arbitrary hosts may come before or after it"
/**
* A taint tracking configuration for incomplete URL substring checks.
*/
class DomainUrlStringSubstringCheckConfiguration extends DataFlow::Configuration {
DomainUrlStringSubstringCheckConfiguration() {
this = "DomainUrlStringSubstringCheckConfiguration"
}

override predicate isSource(DataFlow::Node source) { source instanceof UrlStringSource }

override predicate isSink(DataFlow::Node sink) { any(SomeSubstringCheck s).getSubstring() = sink }

override predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) {
any(TaintTracking::AdditionalTaintStep dts).step(pred, succ)
}

}

/**
* Holds if `source` flows to `sink`, indicating a use of `substring`
* in an incomplete URL substring checks, explained by `msg`.
*/
predicate isIncompleteSubstringCheck(
DataFlow::PathNode source, DataFlow::PathNode sink, string substring, string msg
) {
exists(DomainUrlStringSubstringCheckConfiguration cfg |
cfg.hasFlowPath(source, sink) and
substring = source.getNode().(UrlStringSource).getString()
) and
// whitelist
not (
// the leading dot in a subdomain sequence makes the suffix-check safe (if it is performed on the host of the url)
check instanceof StringOps::EndsWith and
target.regexpMatch("(?i)\\.([a-z0-9-]+)(\\.[a-z0-9-]+)+")
or
// the trailing port or slash makes the prefix-check safe
check instanceof StringOps::StartsWith and
target.regexpMatch(".*(:[0-9]+|/)")
exists(SomeSubstringCheck check | check.getSubstring() = sink.getNode() |
(
if check instanceof StringOps::StartsWith
then msg = "may be followed by an arbitrary host name"
else
if check instanceof StringOps::EndsWith
then msg = "may be preceded by an arbitrary host name"
else msg = "can be anywhere in the URL, and arbitrary hosts may come before or after it"
) and
// whitelist
not (
// the leading dot in a subdomain sequence makes the suffix-check safe (if it is performed on the host of the url)
check instanceof StringOps::EndsWith and
substring.regexpMatch("(?i)\\.([a-z0-9-]+)(\\.[a-z0-9-]+)+")
or
// the trailing port or slash makes the prefix-check safe
check instanceof StringOps::StartsWith and
substring.regexpMatch(".*(:[0-9]+|/)")
)
)
select check, "'$@' " + msg + ".", substring, target
}

from DataFlow::PathNode source, DataFlow::PathNode sink, string sourceString, string msg
where isIncompleteSubstringCheck(source, sink, sourceString, msg)
select sink.getNode(), source, sink, "'$@' " + msg + ".", source.getNode(), sourceString
51 changes: 51 additions & 0 deletions javascript/ql/src/Security/CWE-020/SmallStrings.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/**
* Provides predicates for restricting reasoning to small strings, in the name of performance.
*/

import javascript

/**
* Holds if `n` represents a small regular expression pattern,
* either because `n` is a regular expression literal, or because `n` is
* a string that could be used to construct a regular expression.
*/
pragma[noinline]
predicate isSmallRegExpPattern(DataFlow::Node n, string pattern) {
pattern.length() < 1000 and
(
n.asExpr().(RegExpLiteral).getValue() = pattern
or
n.asExpr().(SimpleConstantString).getSimpleStringValue() = pattern
)
}

/**
* Holds if `n` represents a small string.
*/
pragma[noinline]
predicate isSmallString(DataFlow::Node n, string str) {
str.length() < 1000 and
n.asExpr().(SimpleConstantString).getSimpleStringValue() = str
}

/**
* An expression that evaluates to a constant string in one step.
*/
private class SimpleConstantString extends ConstantString {
string str;

SimpleConstantString() {
str = this.(StringLiteral).getValue()
or
exists(TemplateLiteral template | this = template |
template.getNumChildExpr() = 0 and
str = ""
or
template.getNumChildExpr() = 1 and
str = template.getElement(0).(TemplateElement).getValue()
)
}

/** Gets the constant string value this expression evaluates to. */
string getSimpleStringValue() { result = str }
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ app.get('/some/path', function(req, res) {
let url = req.param('url'),
host = urlLib.parse(url).host;
// BAD: the host of `url` may be controlled by an attacker
let regex = /(www|beta|).example.com/;
let regex = /((www|beta).)?example.com/;
if (host.match(regex)) {
res.redirect(url);
}
Expand Down
4 changes: 2 additions & 2 deletions javascript/ql/src/semmle/javascript/Regexp.qll
Original file line number Diff line number Diff line change
Expand Up @@ -397,10 +397,10 @@ predicate isInterpretedAsRegExp(DataFlow::Node source) {
*/
module RegExpPatterns {
/**
* Gets a pattern that matches common top-level domain names.
* Gets a pattern that matches common top-level domain names in lower case.
*/
string commonTLD() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "com|org|edu|gov|uk|net|io"
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@
| tst-IncompleteHostnameRegExp.js:5:2:5:28 | /http:\\ ... le.net/ | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:11:13:11:37 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:12:10:12:34 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:15:22:15:46 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:17:14:17:30 | test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:19:17:19:34 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:22:27:22:44 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:28:22:28:39 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:30:30:30:47 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | This regular expression has an unescaped '.' before ')?example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:40:2:40:29 | /https? ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:41:41:41:68 | '^https ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:42:13:42:61 | 'http[s ... \\/(.+)' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:44:9:44:100 | 'protos ... ernal)' | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:46:2:46:26 | /exampl ... le.com/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:48:41:48:68 | '^https ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
Loading