diff --git a/change-notes/1.21/analysis-javascript.md b/change-notes/1.21/analysis-javascript.md index d2faa5bd44a8..8a63c8df65cc 100644 --- a/change-notes/1.21/analysis-javascript.md +++ b/change-notes/1.21/analysis-javascript.md @@ -27,6 +27,7 @@ | **Query** | **Tags** | **Purpose** | |-----------------------------------------------|------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Missing regular expression anchor (`js/regex/missing-regexp-anchor`) | correctness, security, external/cwe/cwe-20 | Highlights regular expression patterns that may be missing an anchor, indicating a possible violation of [CWE-20](https://cwe.mitre.org/data/definitions/20.html). Results are not shown on LGTM by default. | | Prototype pollution (`js/prototype-pollution`) | security, external/cwe-250, external/cwe-400 | Highlights code that allows an attacker to modify a built-in prototype object through an unsanitized recursive merge function. The results are shown on LGTM by default. | ## Changes to existing queries diff --git a/javascript/config/suites/javascript/security b/javascript/config/suites/javascript/security index 1d48d378db4d..8ff7c7530c15 100644 --- a/javascript/config/suites/javascript/security +++ b/javascript/config/suites/javascript/security @@ -3,6 +3,7 @@ + semmlecode-javascript-queries/Security/CWE-020/IncompleteHostnameRegExp.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlSubstringSanitization.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-020/IncorrectSuffixCheck.ql: /Security/CWE/CWE-020 ++ semmlecode-javascript-queries/Security/CWE-020/MissingRegExpAnchor.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-022/TaintedPath.ql: /Security/CWE/CWE-022 + semmlecode-javascript-queries/Security/CWE-022/ZipSlip.ql: /Security/CWE/CWE-022 + semmlecode-javascript-queries/Security/CWE-078/CommandInjection.ql: /Security/CWE/CWE-078 diff --git a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql index 70589092064e..88ebe5cf7d1e 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql +++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql @@ -12,38 +12,6 @@ import javascript -/** - * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted - * as a regular expression. - */ -DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker t) { - t.start() and - re = result and - isInterpretedAsRegExp(result) - or - exists(DataFlow::TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) | - t2 = t.smallstep(result, succ) - or - any(TaintTracking::AdditionalTaintStep dts).step(result, succ) and - t = t2 - ) -} - -DataFlow::Node regExpSource(DataFlow::Node re) { - result = regExpSource(re, DataFlow::TypeBackTracker::end()) -} - -/** Holds if `re` is a regular expression with value `pattern`. */ -predicate regexp(DataFlow::Node re, string pattern, string kind, DataFlow::Node aux) { - re.asExpr().(RegExpLiteral).getValue() = pattern and - kind = "regular expression" and - aux = re - or - re = regExpSource(aux) and - pattern = re.getStringValue() and - kind = "string, which is used as a regular expression $@," -} - /** * Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`, * and `pattern` contains a subtle mistake that allows it to match unexpected hosts. @@ -58,11 +26,21 @@ predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) { "([():|?a-z0-9-]+(\\\\)?[.]" + RegExpPatterns::commonTLD() + ")" + ".*", 1) } -from DataFlow::Node re, string pattern, string hostPart, string kind, DataFlow::Node aux -where regexp(re, pattern, kind, aux) and +from RegExpPatternSource re, string pattern, string hostPart, string kind, DataFlow::Node aux +where + pattern = re.getPattern() and isIncompleteHostNameRegExpPattern(pattern, hostPart) and + ( + if re.getAParse() != re + then ( + kind = "string, which is used as a regular expression $@," and + aux = re.getAParse() + ) else ( + kind = "regular expression" and aux = re + ) + ) and // ignore patterns with capture groups after the TLD not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*") select re, "This " + kind + " has an unescaped '.' before '" + hostPart + - "', so it might match more hosts than expected.", aux, "here" \ No newline at end of file + "', so it might match more hosts than expected.", aux, "here" diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp new file mode 100644 index 000000000000..6c2d6aa59f2b --- /dev/null +++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp @@ -0,0 +1,77 @@ + + + + +

+ + Sanitizing untrusted input with regular expressions is a + common technique. However, it is error-prone to match untrusted input + against regular expressions without anchors such as ^ or + $. Malicious input can bypass such security checks by + embedding one of the allowed patterns in an unexpected location. + +

+ +

+ + Even if the matching is not done in a security-critical + context, it may still cause undesirable behavior when the regular + expression accidentally matches. + +

+
+ + +

+ + Use anchors to ensure that regular expressions match at + the expected locations. + +

+
+ + + +

+ + The following example code checks that a URL redirection + will reach the example.com domain, or one of its + subdomains, and not some malicious site. + +

+ + + +

+ + The check with the regular expression match is, however, easy to bypass. For example + by embedding example.com in the path component: + http://evil-example.net/example.com, or in the query + string component: http://evil-example.net/?x=example.com. + + Address these shortcomings by using anchors in the regular expression instead: + +

+ + + +

+ + A related mistake is to write a regular expression with + multiple alternatives, but to only include an anchor for one of the + alternatives. As an example, the regular expression + /^www\\.example\\.com|beta\\.example\\.com/ will match the host + evil.beta.example.com because the regular expression is parsed + as /(^www\\.example\\.com)|(beta\\.example\\.com)/ + +

+
+ + +
  • MDN: Regular Expressions
  • +
  • OWASP: SSRF
  • +
  • OWASP: XSS Unvalidated Redirects and Forwards Cheat Sheet.
  • +
    +
    diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql new file mode 100644 index 000000000000..d0141361f4bc --- /dev/null +++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql @@ -0,0 +1,83 @@ +/** + * @name Missing regular expression anchor + * @description Regular expressions without anchors can be vulnerable to bypassing. + * @kind problem + * @problem.severity warning + * @precision medium + * @id js/regex/missing-regexp-anchor + * @tags correctness + * security + * external/cwe/cwe-20 + */ + +import javascript + +/** + * Holds if `src` is a pattern for a collection of alternatives where + * only the first or last alternative is anchored, indicating a + * precedence mistake explained by `msg`. + * + * The canonical example of such a mistake is: `^a|b|c`, which is + * parsed as `(^a)|(b)|(c)`. + */ +predicate isInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string msg) { + exists(string str, string maybeGroupedStr, string regex, string anchorPart, string escapedDot | + // a dot that might be escaped in a regular expression, for example `/\./` or `new RegExp('\\.')` + escapedDot = "\\\\[.]" and + // a string that is mostly free from special reqular expression symbols + str = "(?:(?:" + escapedDot + ")|[a-z:/.?_,@0-9 -])+" and + // the string may be wrapped in parentheses + maybeGroupedStr = "(?:" + str + "|\\(" + str + "\\))" and + ( + // a problematic pattern: `^a|b|...|x` + regex = "(?i)(\\^" + maybeGroupedStr + ")(?:\\|" + maybeGroupedStr + ")+" + or + // a problematic pattern: `a|b|...|x$` + regex = "(?i)(?:" + maybeGroupedStr + "\\|)+(" + maybeGroupedStr + "\\$)" + ) and + anchorPart = src.getPattern().regexpCapture(regex, 1) and + anchorPart.regexpMatch("(?i).*[a-z].*") and + msg = "Misleading operator precedence. The subexpression '" + anchorPart + "' is anchored, but the other parts of this regular expression are not" + ) +} + +/** + * Holds if `src` is an unanchored pattern for a URL, indicating a + * mistake explained by `msg`. + */ +predicate isInterestingUnanchoredRegExpString(RegExpPatternSource src, string msg) { + exists(string pattern | pattern = src.getPattern() | + // a substring sequence of a protocol and subdomains, perhaps with some regex characters mixed in, followed by a known TLD + pattern + .regexpMatch("(?i)[():|?a-z0-9-\\\\./]+[.]" + RegExpPatterns::commonTLD() + + "([/#?():]\\S*)?") and + // without any anchors + pattern.regexpMatch("[^$^]+") and + // that is not used for capture or replace + not exists(DataFlow::MethodCallNode mcn, string name | name = mcn.getMethodName() | + name = "exec" and + mcn = src.getARegExpObject().getAMethodCall() and + exists(mcn.getAPropertyRead()) + or + exists(DataFlow::Node arg | + arg = mcn.getArgument(0) and + ( + src.getARegExpObject().flowsTo(arg) or + src.getAParse() = arg + ) + | + name = "replace" + or + name = "match" and exists(mcn.getAPropertyRead()) + ) + ) and + msg = "When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it." + ) +} + +from DataFlow::Node nd, string msg +where + isInterestingUnanchoredRegExpString(nd, msg) + or + isInterestingSemiAnchoredRegExpString(nd, msg) +select nd, msg diff --git a/javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js b/javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js index d3dfc3a867d0..11d11a97ca4a 100644 --- a/javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js +++ b/javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js @@ -2,7 +2,7 @@ app.get('/some/path', function(req, res) { let url = req.param('url'), host = urlLib.parse(url).host; // BAD: the host of `url` may be controlled by an attacker - let regex = /((www|beta).)?example.com/; + let regex = /^((www|beta).)?example.com/; if (host.match(regex)) { res.redirect(url); } diff --git a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js new file mode 100644 index 000000000000..fbf3430c5247 --- /dev/null +++ b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js @@ -0,0 +1,7 @@ +app.get("/some/path", function(req, res) { + let url = req.param("url"); + // BAD: the host of `url` may be controlled by an attacker + if (url.match(/https?:\/\/www\.example\.com\//)) { + res.redirect(url); + } +}); diff --git a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js new file mode 100644 index 000000000000..ebb408eec4be --- /dev/null +++ b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js @@ -0,0 +1,7 @@ +app.get("/some/path", function(req, res) { + let url = req.param("url"); + // GOOD: the host of `url` can not be controlled by an attacker + if (url.match(/^https?:\/\/www\.example\.com\//)) { + res.redirect(url); + } +}); diff --git a/javascript/ql/src/semmle/javascript/Regexp.qll b/javascript/ql/src/semmle/javascript/Regexp.qll index 58cd02adf7d5..909a52512c72 100644 --- a/javascript/ql/src/semmle/javascript/Regexp.qll +++ b/javascript/ql/src/semmle/javascript/Regexp.qll @@ -41,6 +41,9 @@ abstract class RegExpTerm extends Locatable, @regexpterm { override string toString() { regexpterm(this, _, _, _, result) } + /** Gets the raw source text of this term. */ + string getRawValue() { regexpterm(this, _, _, _, result) } + /** Holds if this regular expression term can match the empty string. */ abstract predicate isNullable(); @@ -404,3 +407,94 @@ module RegExpPatterns { result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])" } } + +/** + * Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted + * as a part of a regular expression. + */ +private DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker t) { + t.start() and + re = result and + isInterpretedAsRegExp(result) + or + exists(DataFlow::TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) | + t2 = t.smallstep(result, succ) + or + any(TaintTracking::AdditionalTaintStep dts).step(result, succ) and + t = t2 + ) +} + +/** + * Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted + * as a part of a regular expression. + */ +private DataFlow::Node regExpSource(DataFlow::Node re) { + result = regExpSource(re, DataFlow::TypeBackTracker::end()) +} + +/** + * A node whose value may flow to a position where it is interpreted + * as a part of a regular expression. + */ +abstract class RegExpPatternSource extends DataFlow::Node { + /** + * Gets a node where the pattern of this node is parsed as a part of + * a regular expression. + */ + abstract DataFlow::Node getAParse(); + + /** + * Gets the pattern of this node that is interpreted as a part of a + * regular expression. + */ + abstract string getPattern(); + + /** + * Gets a regular expression object that is constructed from the pattern + * of this node. + */ + abstract DataFlow::SourceNode getARegExpObject(); +} + +/** + * A regular expression literal, viewed as the pattern source for itself. + */ +private class RegExpLiteralPatternSource extends RegExpPatternSource { + string pattern; + + RegExpLiteralPatternSource() { + exists(string raw | raw = asExpr().(RegExpLiteral).getRoot().getRawValue() | + // hide the fact that `/` is escaped in the literal + pattern = raw.regexpReplaceAll("\\\\/", "/") + ) + } + + override DataFlow::Node getAParse() { result = this } + + override string getPattern() { result = pattern } + + override DataFlow::SourceNode getARegExpObject() { result = this } +} + +/** + * A node whose string value may flow to a position where it is interpreted + * as a part of a regular expression. + */ +private class StringRegExpPatternSource extends RegExpPatternSource { + DataFlow::Node parse; + + StringRegExpPatternSource() { this = regExpSource(parse) } + + override DataFlow::Node getAParse() { result = parse } + + override DataFlow::SourceNode getARegExpObject() { + exists(DataFlow::InvokeNode constructor | + constructor = DataFlow::globalVarRef("RegExp").getAnInvocation() and + parse = constructor.getArgument(0) and + result = constructor + ) + } + + override string getPattern() { result = getStringValue() } +} diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected index b34db52ed794..c128823024eb 100644 --- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected +++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected @@ -1,24 +1,27 @@ -| tst-IncompleteHostnameRegExp.js:3:2:3:28 | /http:\\ ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:3:2:3:28 | /http:\\ ... le.com/ | here | -| tst-IncompleteHostnameRegExp.js:5:2:5:28 | /http:\\ ... le.net/ | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:5:2:5:28 | /http:\\ ... le.net/ | here | -| tst-IncompleteHostnameRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | here | -| tst-IncompleteHostnameRegExp.js:11:13:11:37 | "http:/ ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:37 | "http:/ ... le.com" | here | -| tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | here | -| tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here | +| tst-IncompleteHostnameRegExp.js:3:2:3:29 | /^http: ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:3:2:3:29 | /^http: ... le.com/ | here | +| tst-IncompleteHostnameRegExp.js:5:2:5:29 | /^http: ... le.net/ | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:5:2:5:29 | /^http: ... le.net/ | here | +| tst-IncompleteHostnameRegExp.js:6:2:6:43 | /^http: ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:6:2:6:43 | /^http: ... b).com/ | here | +| tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | here | +| tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | here | +| tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here | | tst-IncompleteHostnameRegExp.js:17:14:17:30 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here | -| tst-IncompleteHostnameRegExp.js:19:17:19:34 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:20:13:20:26 | `${hostname}$` | here | -| tst-IncompleteHostnameRegExp.js:22:27:22:44 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:23:13:23:27 | domain.hostname | here | -| tst-IncompleteHostnameRegExp.js:28:23:28:40 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:26:21:26:35 | domain.hostname | here | -| tst-IncompleteHostnameRegExp.js:30:30:30:47 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:32:21:32:35 | domain.hostname | here | +| tst-IncompleteHostnameRegExp.js:19:17:19:35 | '^test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:20:13:20:26 | `${hostname}$` | here | +| tst-IncompleteHostnameRegExp.js:22:27:22:45 | 'test.example.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:23:13:23:27 | domain.hostname | here | +| tst-IncompleteHostnameRegExp.js:28:23:28:41 | 'test.example.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:26:21:26:35 | domain.hostname | here | +| tst-IncompleteHostnameRegExp.js:30:30:30:48 | 'test.example.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:32:21:32:35 | domain.hostname | here | | tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | This regular expression has an unescaped '.' before ')?example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | here | | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | here | -| tst-IncompleteHostnameRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | here | -| tst-IncompleteHostnameRegExp.js:40:2:40:29 | /https? ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:40:2:40:29 | /https? ... le.com/ | here | -| tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here | +| tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | here | +| tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | here | +| tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here | | tst-IncompleteHostnameRegExp.js:41:41:41:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here | -| tst-IncompleteHostnameRegExp.js:42:13:42:61 | 'http[s ... \\/(.+)' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:42:13:42:61 | 'http[s ... \\/(.+)' | here | +| tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | here | | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | here | -| tst-IncompleteHostnameRegExp.js:44:9:44:100 | 'protos ... ernal)' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:100 | 'protos ... ernal)' | here | -| tst-IncompleteHostnameRegExp.js:46:2:46:26 | /exampl ... le.com/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:46:2:46:26 | /exampl ... le.com/ | here | -| tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here | +| tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here | +| tst-IncompleteHostnameRegExp.js:46:2:46:29 | /^(exam ... e.com)/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:46:2:46:29 | /^(exam ... e.com)/ | here | +| tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here | | tst-IncompleteHostnameRegExp.js:48:41:48:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here | -| tst-IncompleteHostnameRegExp.js:53:13:53:35 | 'test.' ... le.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:35 | 'test.' ... le.com' | here | +| tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here | +| tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | here | +| tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | here | +| tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | here | diff --git a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected new file mode 100644 index 000000000000..dbdadc83eed6 --- /dev/null +++ b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected @@ -0,0 +1,49 @@ +| tst-SemiAnchoredRegExp.js:3:2:3:7 | /^a\|b/ | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:6:2:6:9 | /^a\|b\|c/ | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:12:2:12:9 | /^a\|(b)/ | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:14:2:14:11 | /^(a)\|(b)/ | Misleading operator precedence. The subexpression '^(a)' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:17:2:17:7 | /a\|b$/ | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:20:2:20:9 | /a\|b\|c$/ | Misleading operator precedence. The subexpression 'c$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:26:2:26:9 | /(a)\|b$/ | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:28:2:28:11 | /(a)\|(b)$/ | Misleading operator precedence. The subexpression '(b)$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:31:2:31:25 | /^good\\ ... r\\.com/ | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:39:13:39:18 | "^a\|b" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:42:13:42:20 | "^a\|b\|c" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:48:13:48:20 | "^a\|(b)" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:50:13:50:22 | "^(a)\|(b)" | Misleading operator precedence. The subexpression '^(a)' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:53:13:53:18 | "a\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:56:13:56:20 | "a\|b\|c$" | Misleading operator precedence. The subexpression 'c$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:62:13:62:20 | "(a)\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:64:13:64:22 | "(a)\|(b)$" | Misleading operator precedence. The subexpression '(b)$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:68:13:68:38 | '^good\\ ... \\\\.com' | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:69:13:69:40 | '^good\\ ... \\\\.com' | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:79:2:79:27 | /(\\.xxx ... .zzz)$/ | Misleading operator precedence. The subexpression '(\\.zzz)$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:81:2:81:23 | /\\.xxx\| ... zzz$/ig | Misleading operator precedence. The subexpression '\\.zzz$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:82:2:82:19 | /\\.xxx\|\\.yyy\|zzz$/ | Misleading operator precedence. The subexpression 'zzz$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:85:2:85:28 | /^(xxx ... yyy)/i | Misleading operator precedence. The subexpression '^(xxx yyy zzz)' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:87:2:87:24 | /^(xxx: ... (zzz:)/ | Misleading operator precedence. The subexpression '^(xxx:)' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:88:2:88:23 | /^(xxx? ... zzz\\/)/ | Misleading operator precedence. The subexpression '^(xxx?:)' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:89:2:89:16 | /^@media\|@page/ | Misleading operator precedence. The subexpression '^@media' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:91:2:91:21 | /^click\|mouse\|touch/ | Misleading operator precedence. The subexpression '^click' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:92:2:92:43 | /^http: ... r\\.com/ | Misleading operator precedence. The subexpression '^http://good\\.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:93:2:93:47 | /^https ... r\\.com/ | Misleading operator precedence. The subexpression '^https?://good\\.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:94:2:94:55 | /^mouse ... ragend/ | Misleading operator precedence. The subexpression '^mouse' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:95:2:95:14 | /^xxx:\|yyy:/i | Misleading operator precedence. The subexpression '^xxx:' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:96:2:96:18 | /_xxx\|_yyy\|_zzz$/ | Misleading operator precedence. The subexpression '_zzz$' is anchored, but the other parts of this regular expression are not | +| tst-UnanchoredUrlRegExp.js:3:43:3:61 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:4:54:4:72 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:10:2:10:22 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:11:13:11:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:13:44:13:62 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:15:13:15:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:19:43:19:62 | "https?://good.com/" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:20:43:20:66 | "https? ... m:8080" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:23:3:23:21 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:24:3:24:23 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:25:14:25:32 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:35:2:35:32 | /https? ... 0-9]+)/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:49:11:49:51 | /youtub ... -_]+)/i | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:77:11:77:32 | /vimeo\\ ... 0-9]+)/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | diff --git a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.qlref b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.qlref new file mode 100644 index 000000000000..5860f4b3a82a --- /dev/null +++ b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.qlref @@ -0,0 +1 @@ +Security/CWE-020/MissingRegExpAnchor.ql \ No newline at end of file diff --git a/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js b/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js index 708fbee16f29..47b85447abcb 100644 --- a/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js +++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js @@ -1,54 +1,54 @@ (function() { - /http:\/\/example.com/; // OK - /http:\/\/test.example.com/; // NOT OK - /http:\/\/test\\.example.com/; // OK - /http:\/\/test.example.net/; // NOT OK - /http:\/\/test.(example-a|example-b).com/; // NOT OK - /http:\/\/(.+)\\.example.com/; // NOT OK, but not yet supported with enough precision - /http:\/\/(\\.+)\\.example.com/; // OK - /http:\/\/(?:.+)\\.test\\.example.com/; // NOT OK, but not yet supported with enough precision - /http:\/\/test.example.com\/(?:.*)/; // OK - new RegExp("http://test.example.com"); // NOT OK + /^http:\/\/example.com/; // OK + /^http:\/\/test.example.com/; // NOT OK + /^http:\/\/test\\.example.com/; // OK + /^http:\/\/test.example.net/; // NOT OK + /^http:\/\/test.(example-a|example-b).com/; // NOT OK + /^http:\/\/(.+)\\.example.com/; // NOT OK, but not yet supported with enough precision + /^http:\/\/(\\.+)\\.example.com/; // OK + /^http:\/\/(?:.+)\\.test\\.example.com/; // NOT OK, but not yet supported with enough precision + /^http:\/\/test.example.com\/(?:.*)/; // OK + new RegExp("^http://test.example.com"); // NOT OK s.match("^http://test.example.com"); // NOT OK function id(e) { return e; } - new RegExp(id(id(id("http://test.example.com")))); // NOT OK, but not supported by type tracking + new RegExp(id(id(id("^http://test.example.com")))); // NOT OK, but not supported by type tracking new RegExp(`test.example.com$`); // NOT OK - let hostname = 'test.example.com'; // NOT OK + let hostname = '^test.example.com'; // NOT OK new RegExp(`${hostname}$`); - let domain = { hostname: 'test.example.com' }; + let domain = { hostname: 'test.example.com$' }; new RegExp(domain.hostname); function convert1(domain) { return new RegExp(domain.hostname); } - convert1({ hostname: 'test.example.com' }); // NOT OK + convert1({ hostname: 'test.example.com$' }); // NOT OK - let domains = [ { hostname: 'test.example.com' } ]; // NOT OK + let domains = [ { hostname: 'test.example.com$' } ]; // NOT OK function convert2(domain) { return new RegExp(domain.hostname); } domains.map(d => convert2(d)); - /(.+\.(?:example-a|example-b)\.com)/; // NOT OK, but not yet supported with enough precision + /^(.+\.(?:example-a|example-b)\.com)/; // NOT OK, but not yet supported with enough precision /^(https?:)?\/\/((service|www).)?example.com(?=$|\/)/; // NOT OK /^(http|https):\/\/www.example.com\/p\/f\//; // NOT OK - /\(http:\/\/sub.example.com\/\)/g; // NOT OK - /https?:\/\/api.example.com/; // NOT OK + /^(http:\/\/sub.example.com\/)/g; // NOT OK + /^https?:\/\/api.example.com/; // NOT OK new RegExp('^http://localhost:8000|' + '^https?://.+\.example\.com'); // NOT OK - new RegExp('http[s]?:\/\/?sub1\.sub2\.example\.com\/f\/(.+)'); // NOT OK + new RegExp('^http[s]?:\/\/?sub1\.sub2\.example\.com\/f\/(.+)'); // NOT OK /^https:\/\/[a-z]*.example.com$/; // NOT OK - RegExp('protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)'); // NOT OK + RegExp('^protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)'); // NOT OK - /example.dev|example.com/; // OK, but still flagged + /^(example.dev|example.com)/; // OK, but still flagged new RegExp('^http://localhost:8000|' + '^https?://.+\.example\.com'); // NOT OK - var primary = 'example.com'; + var primary = 'example.com$'; new RegExp('test.' + primary); // NOT OK, but not detected - new RegExp('test.' + 'example.com'); // NOT OK + new RegExp('test.' + 'example.com$'); // NOT OK }); diff --git a/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js b/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js new file mode 100644 index 000000000000..f679e8074c57 --- /dev/null +++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js @@ -0,0 +1,126 @@ +(function coreRegExp() { + /^a|/; + /^a|b/; // NOT OK + /a|^b/; + /^a|^b/; + /^a|b|c/; // NOT OK + /a|^b|c/; + /a|b|^c/; + /^a|^b|c/; + + /(^a)|b/; + /^a|(b)/; // NOT OK + /^a|(^b)/; + /^(a)|(b)/; // NOT OK + + + /a|b$/; // NOT OK + /a$|b/; + /a$|b$/; + /a|b|c$/; // NOT OK + /a|b$|c/; + /a$|b|c/; + /a|b$|c$/; + + /a|(b$)/; + /(a)|b$/; // NOT OK + /(a$)|b$/; + /(a)|(b)$/; // NOT OK + + /^good.com|better.com/; // NOT OK + /^good\.com|better\.com/; // NOT OK + /^good\\.com|better\\.com/; + /^good\\\.com|better\\\.com/; + /^good\\\\.com|better\\\\.com/; +}); + +(function coreString() { + new RegExp("^a|"); + new RegExp("^a|b"); // NOT OK + new RegExp("a|^b"); + new RegExp("^a|^b"); + new RegExp("^a|b|c"); // NOT OK + new RegExp("a|^b|c"); + new RegExp("a|b|^c"); + new RegExp("^a|^b|c"); + + new RegExp("(^a)|b"); + new RegExp("^a|(b)"); // NOT OK + new RegExp("^a|(^b)"); + new RegExp("^(a)|(b)"); // NOT OK + + + new RegExp("a|b$"); // NOT OK + new RegExp("a$|b"); + new RegExp("a$|b$"); + new RegExp("a|b|c$"); // NOT OK + new RegExp("a|b$|c"); + new RegExp("a$|b|c"); + new RegExp("a|b$|c$"); + + new RegExp("a|(b$)"); + new RegExp("(a)|b$"); // NOT OK + new RegExp("(a$)|b$"); + new RegExp("(a)|(b)$"); // NOT OK + + new RegExp('^good.com|better.com'); // NOT OK + new RegExp('^good\.com|better\.com'); // NOT OK + new RegExp('^good\\.com|better\\.com'); // NOT OK + new RegExp('^good\\\.com|better\\\.com'); // NOT OK + new RegExp('^good\\\\.com|better\\\\.com'); +}); + +(function realWorld() { + // real-world examples that have been anonymized a bit + + /* + * NOT OK: flagged + */ + /(\.xxx)|(\.yyy)|(\.zzz)$/; + /(^left|right|center)\sbottom$/; // not flagged at the moment due to multiple anchors + /\.xxx|\.yyy|\.zzz$/ig; + /\.xxx|\.yyy|zzz$/; + /^(?:mouse|contextmenu)|click/; // not flagged at the moment due to nested alternatives + /^([A-Z]|xxx[XY]$)/; // not flagged at the moment due to multiple anchors + /^(xxx yyy zzz)|(xxx yyy)/i; + /^(xxx yyy zzz)|(xxx yyy)|(1st( xxx)? yyy)|xxx|1st/i; // not flagged at the moment due to nested parens + /^(xxx:)|(yyy:)|(zzz:)/; + /^(xxx?:)|(yyy:zzz\/)/; + /^@media|@page/; + /^\s*(xxx?|yyy|zzz):|xxx:yyy\//; // not flagged at the moment due to quantifiers + /^click|mouse|touch/; + /^http:\/\/good\.com|http:\/\/better\.com/; + /^https?:\/\/good\.com|https?:\/\/better\.com/; + /^mouse|touch|click|contextmenu|drop|dragover|dragend/; + /^xxx:|yyy:/i; + /_xxx|_yyy|_zzz$/; + /em|%$/; // not flagged at the moment due to the anchor not being for letters + + /* + * MAYBE OK due to apparent complexity: not flagged + */ + /(?:^[#?]?|&)([^=&]+)(?:=([^&]*))?/g; + /(^\s*|;\s*)\*.*;/m; + /(^\s*|\[)(?:xxx|yyy_(?:xxx|yyy)|xxx|yyy(?:xxx|yyy)?|xxx|yyy)\b/m; + /\s\S| \t|\t |\s$/; + /\{[^}{]*\{|\}[^}{]*\}|\{[^}]*$/g; + /^((\+|\-)\s*\d\d\d\d)|((\+|\-)\d\d\:?\d\d)/; + /^(\/\/)|([a-z]+:(\/\/)?)/; + /^[=?!#%@$]|!(?=[:}])/; + /^[\[\]!:]|[<>]/; + /^for\b|\b(?:xxx|yyy)\b/i; + /^if\b|\b(?:xxx|yyy|zzz)\b/i; + + /* + * OK: not flagged + */ + /$^|only-match/g; + /(#.+)|#$/; + /(NaN| {2}|^$)/; + /[^\n]*(?:\n|[^\n]$)/g; + /^$|\/(?:xxx|yyy)zzz/i; + /^(\/|(xxx|yyy|zzz)$)/; + /^9$|27/; + /^\+|\s*/g; + /xxx_yyy=\w+|^$/; +}); diff --git a/javascript/ql/test/query-tests/Security/CWE-020/tst-UnanchoredUrlRegExp.js b/javascript/ql/test/query-tests/Security/CWE-020/tst-UnanchoredUrlRegExp.js new file mode 100644 index 000000000000..1831f31be533 --- /dev/null +++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-UnanchoredUrlRegExp.js @@ -0,0 +1,106 @@ +(function(x){ + + "http://evil.com/?http://good.com".match("https?://good.com"); // NOT OK + "http://evil.com/?http://good.com".match(new RegExp("https?://good.com")); // NOT OK + "http://evil.com/?http://good.com".match("^https?://good.com"); // OK + "http://evil.com/?http://good.com".match(/^https?:\/\/good.com/); // OK + "http://evil.com/?http://good.com".match("(^https?://good1.com)|(^https?://good2.com)"); // OK + "http://evil.com/?http://good.com".match("(https?://good.com)|(^https?://goodie.com)"); // NOT OK, but not detected + + /https?:\/\/good.com/.exec("http://evil.com/?http://good.com"); // NOT OK + new RegExp("https?://good.com").exec("http://evil.com/?http://good.com"); // NOT OK + + "http://evil.com/?http://good.com".search("https?://good.com"); // NOT OK + + new RegExp("https?://good.com").test("http://evil.com/?http://good.com"); // NOT OK + + "something".match("other"); // OK + "something".match("x.commissary"); // OK + "http://evil.com/?http://good.com".match("https?://good.com/"); // NOT OK + "http://evil.com/?http://good.com".match("https?://good.com:8080"); // NOT OK + + let trustedUrls = [ + "https?://good.com", // NOT OK, referenced below + /https?:\/\/good.com/, // NOT OK, referenced below + new RegExp("https?://good.com"), // NOT OK, referenced below + "^https?://good.com" + ]; + function isTrustedUrl(url) { + for (let trustedUrl of trustedUrls) { + if (url.match(trustedUrl)) return true; + } + return false; + } + + /https?:\/\/good.com\/([0-9]+)/.exec(url); // NOT OK + "https://verygood.com/?id=" + /https?:\/\/good.com\/([0-9]+)/.exec(url)[0]; // OK + "http" + (secure? "s": "") + "://" + "verygood.com/?id=" + /https?:\/\/good.com\/([0-9]+)/.exec(url)[0]; // OK + "http" + (secure? "s": "") + "://" + ("verygood.com/?id=" + /https?:\/\/good.com\/([0-9]+)/.exec(url)[0]); // OK + + // g or .replace? + file = file.replace( + /https:\/\/cdn\.ampproject\.org\/v0\/amp-story-0\.1\.js/g, + hostName + '/dist/v0/amp-story-1.0.max.js' + ); + + // missing context of use + const urlPatterns = [ + { + regex: /youtube.com\/embed\/([a-z0-9\?&=\-_]+)/i, + type: 'iframe', w: 560, h: 314, + url: '//www.youtube.com/embed/$1', + allowFullscreen: true + }]; + + // ditto + F.helpers.media = { + defaults : { + youtube : { + matcher : /(youtube\.com|youtu\.be)\/(watch\?v=|v\/|u\/|embed\/?)?(videoseries\?list=(.*)|[\w-]{11}|\?listType=(.*)&list=(.*)).*/i, + params : { + autoplay : 1, + autohide : 1, + fs : 1, + rel : 0, + hd : 1, + wmode : 'opaque', + enablejsapi : 1 + }, + type : 'iframe', + url : '//www.youtube.com/embed/$3' + }}} + + // ditto + var urlPatterns = [ + {regex: /youtu\.be\/([\w\-.]+)/, type: 'iframe', w: 425, h: 350, url: '//www.youtube.com/embed/$1'}, + {regex: /youtube\.com(.+)v=([^&]+)/, type: 'iframe', w: 425, h: 350, url: '//www.youtube.com/embed/$2'}, + {regex: /vimeo\.com\/([0-9]+)/, type: 'iframe', w: 425, h: 350, url: '//player.vimeo.com/video/$1?title=0&byline=0&portrait=0&color=8dc7dc'}, + ]; + + // check optional successsor to TLD + new RegExp("(Pingdom.com_bot_version_)(\\d+)\\.(\\d+)") + + // replace and spaces + error.replace(/See https:\/\/github\.com\/Squirrel\/Squirrel\.Mac\/issues\/182 for more information/, 'See [this link](https://github.com/Microsoft/vscode/issues/7426#issuecomment-425093469) for more information'); + + // not a url + var sharedScript = //; + + // replace + const repo = repoURL.replace(/http(s)?:\/\/(\d+\.)?github.com\//gi, '') + + // replace and space + cmp.replace(/\n "); + + // replace and space + const helpMsg = /For help see https:\/\/nodejs.org\/en\/docs\/inspector\s*/; + msg = msg.replace(helpMsg, ''); + + // not a url + pkg.source.match(/