From 98ae2597bb5c47c4cf7762ca1359452e0813bc9c Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Mon, 27 May 2019 13:52:18 +0200 Subject: [PATCH 01/11] JS: refactor `IncompleteHostnameRegExp::regexp` to RegExp.qll --- .../CWE-020/IncompleteHostnameRegExp.ql | 32 ----------------- .../ql/src/semmle/javascript/Regexp.qll | 36 +++++++++++++++++++ 2 files changed, 36 insertions(+), 32 deletions(-) diff --git a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql index 70589092064e..9bd04c3050a8 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql +++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql @@ -12,38 +12,6 @@ import javascript -/** - * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted - * as a regular expression. - */ -DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker t) { - t.start() and - re = result and - isInterpretedAsRegExp(result) - or - exists(DataFlow::TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) | - t2 = t.smallstep(result, succ) - or - any(TaintTracking::AdditionalTaintStep dts).step(result, succ) and - t = t2 - ) -} - -DataFlow::Node regExpSource(DataFlow::Node re) { - result = regExpSource(re, DataFlow::TypeBackTracker::end()) -} - -/** Holds if `re` is a regular expression with value `pattern`. */ -predicate regexp(DataFlow::Node re, string pattern, string kind, DataFlow::Node aux) { - re.asExpr().(RegExpLiteral).getValue() = pattern and - kind = "regular expression" and - aux = re - or - re = regExpSource(aux) and - pattern = re.getStringValue() and - kind = "string, which is used as a regular expression $@," -} - /** * Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`, * and `pattern` contains a subtle mistake that allows it to match unexpected hosts. diff --git a/javascript/ql/src/semmle/javascript/Regexp.qll b/javascript/ql/src/semmle/javascript/Regexp.qll index 58cd02adf7d5..21962a5b6626 100644 --- a/javascript/ql/src/semmle/javascript/Regexp.qll +++ b/javascript/ql/src/semmle/javascript/Regexp.qll @@ -404,3 +404,39 @@ module RegExpPatterns { result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])" } } + +/** + * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted + * as a regular expression. + */ +private DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker t) { + t.start() and + re = result and + isInterpretedAsRegExp(result) + or + exists(DataFlow::TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) | + t2 = t.smallstep(result, succ) + or + any(TaintTracking::AdditionalTaintStep dts).step(result, succ) and + t = t2 + ) +} + +/** + * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted + * as a regular expression. + */ +private DataFlow::Node regExpSource(DataFlow::Node re) { + result = regExpSource(re, DataFlow::TypeBackTracker::end()) +} + +/** Holds if `re` is a regular expression with value `pattern`. */ +predicate regexp(DataFlow::Node re, string pattern, string kind, DataFlow::Node aux) { + re.asExpr().(RegExpLiteral).getValue() = pattern and + kind = "regular expression" and + aux = re + or + re = regExpSource(aux) and + pattern = re.getStringValue() and + kind = "string, which is used as a regular expression $@," +} From 3358e49698b412a93e83519be791e58732745f76 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Fri, 31 May 2019 07:45:02 +0200 Subject: [PATCH 02/11] JS: refactor the predicate `RegExp::regexp` to three classes. This preserves the ad hoc message formatting in IncompleteHostnameRegExp.ql --- .../CWE-020/IncompleteHostnameRegExp.ql | 16 ++++- .../ql/src/semmle/javascript/Regexp.qll | 70 ++++++++++++++++--- 2 files changed, 72 insertions(+), 14 deletions(-) diff --git a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql index 9bd04c3050a8..ad1e609b2aac 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql +++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql @@ -26,11 +26,21 @@ predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) { "([():|?a-z0-9-]+(\\\\)?[.]" + RegExpPatterns::commonTLD() + ")" + ".*", 1) } -from DataFlow::Node re, string pattern, string hostPart, string kind, DataFlow::Node aux -where regexp(re, pattern, kind, aux) and +from RegExpPatternSource re, string pattern, string hostPart, string kind, DataFlow::Node aux +where + pattern = re.getPattern() and isIncompleteHostNameRegExpPattern(pattern, hostPart) and + ( + if re instanceof StringRegExpPatternSource + then ( + kind = "string, which is used as a regular expression $@," and + aux = re.(StringRegExpPatternSource).getAUse() + ) else ( + kind = "regular expression" and aux = re + ) + ) and // ignore patterns with capture groups after the TLD not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*") select re, "This " + kind + " has an unescaped '.' before '" + hostPart + - "', so it might match more hosts than expected.", aux, "here" \ No newline at end of file + "', so it might match more hosts than expected.", aux, "here" diff --git a/javascript/ql/src/semmle/javascript/Regexp.qll b/javascript/ql/src/semmle/javascript/Regexp.qll index 21962a5b6626..773530ba4176 100644 --- a/javascript/ql/src/semmle/javascript/Regexp.qll +++ b/javascript/ql/src/semmle/javascript/Regexp.qll @@ -407,7 +407,7 @@ module RegExpPatterns { /** * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted - * as a regular expression. + * as a part of a regular expression. */ private DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker t) { t.start() and @@ -424,19 +424,67 @@ private DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker /** * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted - * as a regular expression. + * as a part of a regular expression. */ private DataFlow::Node regExpSource(DataFlow::Node re) { result = regExpSource(re, DataFlow::TypeBackTracker::end()) } -/** Holds if `re` is a regular expression with value `pattern`. */ -predicate regexp(DataFlow::Node re, string pattern, string kind, DataFlow::Node aux) { - re.asExpr().(RegExpLiteral).getValue() = pattern and - kind = "regular expression" and - aux = re - or - re = regExpSource(aux) and - pattern = re.getStringValue() and - kind = "string, which is used as a regular expression $@," +/** + * A node whose value may flow to a position where it is interpreted + * as a part of a regular expression. + */ +abstract class RegExpPatternSource extends DataFlow::Node { + /** + * Gets the pattern of this node. + */ + abstract string getPattern(); + + /** + * Gets a regular expression object that is built from the value of this node. + */ + abstract DataFlow::SourceNode getARegExpObject(); +} + +/** + * A regular expression literal, viewed as the pattern source for itself. + */ +class RegExpLiteralPatternSource extends RegExpPatternSource { + string pattern; + + RegExpLiteralPatternSource() { + exists(string raw | raw = asExpr().(RegExpLiteral).getRoot().toString() | + // hide the fact that `/` is escaped in the literal + pattern = raw.regexpReplaceAll("\\\\/", "/") + ) + } + + override string getPattern() { result = pattern } + + override DataFlow::SourceNode getARegExpObject() { result = this } +} + +/** + * A node whose string value may flow to a position where it is interpreted + * as a part of a regular expression. + */ +class StringRegExpPatternSource extends RegExpPatternSource { + DataFlow::Node use; + + StringRegExpPatternSource() { this = regExpSource(use) } + + /** + * Gets a node that use this source as a regular expression pattern. + */ + DataFlow::Node getAUse() { result = use } + + override DataFlow::SourceNode getARegExpObject() { + exists(DataFlow::InvokeNode constructor | + constructor = DataFlow::globalVarRef("RegExp").getAnInvocation() and + use = constructor.getArgument(0) and + result = constructor + ) + } + + override string getPattern() { result = getStringValue() } } From 69db54a03a29a384de8e0bc7296dfb04db456b97 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Fri, 31 May 2019 08:17:41 +0200 Subject: [PATCH 03/11] JS: add anchors to js/incomplete-hostname-regexp examples --- .../examples/IncompleteHostnameRegExp.js | 2 +- .../CWE-020/IncompleteHostnameRegExp.expected | 28 +++++------ .../CWE-020/tst-IncompleteHostnameRegExp.js | 46 +++++++++---------- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js b/javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js index d3dfc3a867d0..11d11a97ca4a 100644 --- a/javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js +++ b/javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js @@ -2,7 +2,7 @@ app.get('/some/path', function(req, res) { let url = req.param('url'), host = urlLib.parse(url).host; // BAD: the host of `url` may be controlled by an attacker - let regex = /((www|beta).)?example.com/; + let regex = /^((www|beta).)?example.com/; if (host.match(regex)) { res.redirect(url); } diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected index b34db52ed794..4360e28cc83c 100644 --- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected +++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected @@ -1,24 +1,24 @@ -| tst-IncompleteHostnameRegExp.js:3:2:3:28 | /http:\\ ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:3:2:3:28 | /http:\\ ... le.com/ | here | -| tst-IncompleteHostnameRegExp.js:5:2:5:28 | /http:\\ ... le.net/ | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:5:2:5:28 | /http:\\ ... le.net/ | here | -| tst-IncompleteHostnameRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | here | -| tst-IncompleteHostnameRegExp.js:11:13:11:37 | "http:/ ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:37 | "http:/ ... le.com" | here | +| tst-IncompleteHostnameRegExp.js:3:2:3:29 | /^http: ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:3:2:3:29 | /^http: ... le.com/ | here | +| tst-IncompleteHostnameRegExp.js:5:2:5:29 | /^http: ... le.net/ | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:5:2:5:29 | /^http: ... le.net/ | here | +| tst-IncompleteHostnameRegExp.js:6:2:6:43 | /^http: ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:6:2:6:43 | /^http: ... b).com/ | here | +| tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | here | | tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | here | | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here | | tst-IncompleteHostnameRegExp.js:17:14:17:30 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here | -| tst-IncompleteHostnameRegExp.js:19:17:19:34 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:20:13:20:26 | `${hostname}$` | here | -| tst-IncompleteHostnameRegExp.js:22:27:22:44 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:23:13:23:27 | domain.hostname | here | -| tst-IncompleteHostnameRegExp.js:28:23:28:40 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:26:21:26:35 | domain.hostname | here | -| tst-IncompleteHostnameRegExp.js:30:30:30:47 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:32:21:32:35 | domain.hostname | here | +| tst-IncompleteHostnameRegExp.js:19:17:19:35 | '^test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:20:13:20:26 | `${hostname}$` | here | +| tst-IncompleteHostnameRegExp.js:22:27:22:45 | 'test.example.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:23:13:23:27 | domain.hostname | here | +| tst-IncompleteHostnameRegExp.js:28:23:28:41 | 'test.example.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:26:21:26:35 | domain.hostname | here | +| tst-IncompleteHostnameRegExp.js:30:30:30:48 | 'test.example.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:32:21:32:35 | domain.hostname | here | | tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | This regular expression has an unescaped '.' before ')?example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | here | | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | here | -| tst-IncompleteHostnameRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | here | -| tst-IncompleteHostnameRegExp.js:40:2:40:29 | /https? ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:40:2:40:29 | /https? ... le.com/ | here | +| tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | here | +| tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | here | | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here | | tst-IncompleteHostnameRegExp.js:41:41:41:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here | -| tst-IncompleteHostnameRegExp.js:42:13:42:61 | 'http[s ... \\/(.+)' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:42:13:42:61 | 'http[s ... \\/(.+)' | here | +| tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | here | | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | here | -| tst-IncompleteHostnameRegExp.js:44:9:44:100 | 'protos ... ernal)' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:100 | 'protos ... ernal)' | here | -| tst-IncompleteHostnameRegExp.js:46:2:46:26 | /exampl ... le.com/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:46:2:46:26 | /exampl ... le.com/ | here | +| tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here | +| tst-IncompleteHostnameRegExp.js:46:2:46:29 | /^(exam ... e.com)/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:46:2:46:29 | /^(exam ... e.com)/ | here | | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here | | tst-IncompleteHostnameRegExp.js:48:41:48:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here | -| tst-IncompleteHostnameRegExp.js:53:13:53:35 | 'test.' ... le.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:35 | 'test.' ... le.com' | here | +| tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here | diff --git a/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js b/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js index 708fbee16f29..47b85447abcb 100644 --- a/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js +++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js @@ -1,54 +1,54 @@ (function() { - /http:\/\/example.com/; // OK - /http:\/\/test.example.com/; // NOT OK - /http:\/\/test\\.example.com/; // OK - /http:\/\/test.example.net/; // NOT OK - /http:\/\/test.(example-a|example-b).com/; // NOT OK - /http:\/\/(.+)\\.example.com/; // NOT OK, but not yet supported with enough precision - /http:\/\/(\\.+)\\.example.com/; // OK - /http:\/\/(?:.+)\\.test\\.example.com/; // NOT OK, but not yet supported with enough precision - /http:\/\/test.example.com\/(?:.*)/; // OK - new RegExp("http://test.example.com"); // NOT OK + /^http:\/\/example.com/; // OK + /^http:\/\/test.example.com/; // NOT OK + /^http:\/\/test\\.example.com/; // OK + /^http:\/\/test.example.net/; // NOT OK + /^http:\/\/test.(example-a|example-b).com/; // NOT OK + /^http:\/\/(.+)\\.example.com/; // NOT OK, but not yet supported with enough precision + /^http:\/\/(\\.+)\\.example.com/; // OK + /^http:\/\/(?:.+)\\.test\\.example.com/; // NOT OK, but not yet supported with enough precision + /^http:\/\/test.example.com\/(?:.*)/; // OK + new RegExp("^http://test.example.com"); // NOT OK s.match("^http://test.example.com"); // NOT OK function id(e) { return e; } - new RegExp(id(id(id("http://test.example.com")))); // NOT OK, but not supported by type tracking + new RegExp(id(id(id("^http://test.example.com")))); // NOT OK, but not supported by type tracking new RegExp(`test.example.com$`); // NOT OK - let hostname = 'test.example.com'; // NOT OK + let hostname = '^test.example.com'; // NOT OK new RegExp(`${hostname}$`); - let domain = { hostname: 'test.example.com' }; + let domain = { hostname: 'test.example.com$' }; new RegExp(domain.hostname); function convert1(domain) { return new RegExp(domain.hostname); } - convert1({ hostname: 'test.example.com' }); // NOT OK + convert1({ hostname: 'test.example.com$' }); // NOT OK - let domains = [ { hostname: 'test.example.com' } ]; // NOT OK + let domains = [ { hostname: 'test.example.com$' } ]; // NOT OK function convert2(domain) { return new RegExp(domain.hostname); } domains.map(d => convert2(d)); - /(.+\.(?:example-a|example-b)\.com)/; // NOT OK, but not yet supported with enough precision + /^(.+\.(?:example-a|example-b)\.com)/; // NOT OK, but not yet supported with enough precision /^(https?:)?\/\/((service|www).)?example.com(?=$|\/)/; // NOT OK /^(http|https):\/\/www.example.com\/p\/f\//; // NOT OK - /\(http:\/\/sub.example.com\/\)/g; // NOT OK - /https?:\/\/api.example.com/; // NOT OK + /^(http:\/\/sub.example.com\/)/g; // NOT OK + /^https?:\/\/api.example.com/; // NOT OK new RegExp('^http://localhost:8000|' + '^https?://.+\.example\.com'); // NOT OK - new RegExp('http[s]?:\/\/?sub1\.sub2\.example\.com\/f\/(.+)'); // NOT OK + new RegExp('^http[s]?:\/\/?sub1\.sub2\.example\.com\/f\/(.+)'); // NOT OK /^https:\/\/[a-z]*.example.com$/; // NOT OK - RegExp('protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)'); // NOT OK + RegExp('^protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)'); // NOT OK - /example.dev|example.com/; // OK, but still flagged + /^(example.dev|example.com)/; // OK, but still flagged new RegExp('^http://localhost:8000|' + '^https?://.+\.example\.com'); // NOT OK - var primary = 'example.com'; + var primary = 'example.com$'; new RegExp('test.' + primary); // NOT OK, but not detected - new RegExp('test.' + 'example.com'); // NOT OK + new RegExp('test.' + 'example.com$'); // NOT OK }); From 0fa73b8331e1c1ade1a88b151f12517e27c58e5b Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Fri, 31 May 2019 08:45:20 +0200 Subject: [PATCH 04/11] JS: add query js/regex/missing-regexp-anchor --- change-notes/1.21/analysis-javascript.md | 1 + javascript/config/suites/javascript/security | 1 + .../CWE-020/MissingRegExpAnchor.qhelp | 77 +++++++++++ .../Security/CWE-020/MissingRegExpAnchor.ql | 86 ++++++++++++ .../examples/MissingRegExpAnchor_BAD.js | 7 + .../examples/MissingRegExpAnchor_GOOD.js | 7 + .../CWE-020/IncompleteHostnameRegExp.expected | 3 + .../CWE-020/MissingRegExpAnchor.expected | 49 +++++++ .../CWE-020/MissingRegExpAnchor.qlref | 1 + .../CWE-020/tst-SemiAnchoredRegExp.js | 122 ++++++++++++++++++ .../CWE-020/tst-UnanchoredUrlRegExp.js | 106 +++++++++++++++ 11 files changed, 460 insertions(+) create mode 100644 javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp create mode 100644 javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql create mode 100644 javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js create mode 100644 javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js create mode 100644 javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected create mode 100644 javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.qlref create mode 100644 javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js create mode 100644 javascript/ql/test/query-tests/Security/CWE-020/tst-UnanchoredUrlRegExp.js diff --git a/change-notes/1.21/analysis-javascript.md b/change-notes/1.21/analysis-javascript.md index d2faa5bd44a8..8a63c8df65cc 100644 --- a/change-notes/1.21/analysis-javascript.md +++ b/change-notes/1.21/analysis-javascript.md @@ -27,6 +27,7 @@ | **Query** | **Tags** | **Purpose** | |-----------------------------------------------|------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Missing regular expression anchor (`js/regex/missing-regexp-anchor`) | correctness, security, external/cwe/cwe-20 | Highlights regular expression patterns that may be missing an anchor, indicating a possible violation of [CWE-20](https://cwe.mitre.org/data/definitions/20.html). Results are not shown on LGTM by default. | | Prototype pollution (`js/prototype-pollution`) | security, external/cwe-250, external/cwe-400 | Highlights code that allows an attacker to modify a built-in prototype object through an unsanitized recursive merge function. The results are shown on LGTM by default. | ## Changes to existing queries diff --git a/javascript/config/suites/javascript/security b/javascript/config/suites/javascript/security index 1d48d378db4d..8ff7c7530c15 100644 --- a/javascript/config/suites/javascript/security +++ b/javascript/config/suites/javascript/security @@ -3,6 +3,7 @@ + semmlecode-javascript-queries/Security/CWE-020/IncompleteHostnameRegExp.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlSubstringSanitization.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-020/IncorrectSuffixCheck.ql: /Security/CWE/CWE-020 ++ semmlecode-javascript-queries/Security/CWE-020/MissingRegExpAnchor.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-022/TaintedPath.ql: /Security/CWE/CWE-022 + semmlecode-javascript-queries/Security/CWE-022/ZipSlip.ql: /Security/CWE/CWE-022 + semmlecode-javascript-queries/Security/CWE-078/CommandInjection.ql: /Security/CWE/CWE-078 diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp new file mode 100644 index 000000000000..0e93e80b70ee --- /dev/null +++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp @@ -0,0 +1,77 @@ + + + + +

+ + Sanitizing untrusted input with regular expressions is a + common technique. However, it is error prone to match untrusted input + against regular expressions without anchors such as ^ or + $. Malicious input can bypass such security checks by + embedding one of the allowed patterns in an unexpected location. + +

+ +

+ + Even if the matching is not done in a security-critical + context, it may still cause undesirable behaviors when the regular + expression matches accidentally. + +

+
+ + +

+ + Use anchors to ensure that regular expressions match at + the expected locations. + +

+
+ + + +

+ + The following example code checks that a URL redirection + will reach the example.com domain, or one of its + subdomains, and not some malicious site. + +

+ + + +

+ + The check with the regular expression match is, however, easy to bypass. For example + by embedding example.com in the path component: + http://evil-example.net/example.com, or in the query + string component: http://evil-example.net/?x=example.com. + + Address these shortcomings by using anchors in the regular expression instead: + +

+ + + +

+ + A related mistake is to write a regular expression with + multiple alternatives, but to only include an anchor for one of the + alternatives. As an example, the regular expression + /^www\\.example\\.com|beta\\.example\\.com/ will match the host + evil.beta.example.com because the regular expression is parsed + as /(^www\\.example\\.com)|(beta\\.example\\.com)/ + +

+
+ + +
  • MDN: Regular Expressions
  • +
  • OWASP: SSRF
  • +
  • OWASP: XSS Unvalidated Redirects and Forwards Cheat Sheet.
  • +
    +
    diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql new file mode 100644 index 000000000000..ffd475811870 --- /dev/null +++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql @@ -0,0 +1,86 @@ +/** + * @name Missing regular expression anchor + * @description Regular expressions without anchors can be vulnerable to bypassing. + * @kind problem + * @problem.severity warning + * @precision medium + * @id js/regex/missing-regexp-anchor + * @tags correctness + * security + * external/cwe/cwe-20 + */ + +import javascript + +/** + * Holds if `src` is a pattern for a collection of alternatives where + * only the first or last alternative is anchored, indicating a + * precedence mistake explained by `msg`. + * + * The canonical example of such a mistake is: `^a|b|c`, which is + * parsed as `(^a)|(b)|(c)`. + */ +predicate isAnInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string msg) { + exists(string str, string maybeGroupedStr, string regex, string anchorPart, string posString, string escapedDot | + // a dot that might be escaped in a regular expression, for example `/\./` or new `RegExp('\\.')` + escapedDot = "\\\\\\\\?[.]" and + // a string that is mostly free from special reqular expression symbols + str = "(?:(?:" + escapedDot + ")|[a-z:/.?_,@0-9 -])+" and + // the string may be wrapped in parentheses + maybeGroupedStr = "(?:" + str + "|\\(" + str + "\\))" and + ( + // a problematic pattern: `^a|b|...|x` + regex = "(?i)(\\^" + maybeGroupedStr + ")(?:\\|" + maybeGroupedStr + ")+" and + posString = "beginning" + or + // a problematic pattern: `a|b|...|x$` + regex = "(?i)(?:" + maybeGroupedStr + "\\|)+(" + maybeGroupedStr + "\\$)" and + posString = "end" + ) and + anchorPart = src.getPattern().regexpCapture(regex, 1) and + anchorPart.regexpMatch("(?i).*[a-z].*") and + msg = "The alternative '" + anchorPart + "' uses an anchor to match from the " + posString + + " of a string, but the other alternatives of this regular expression do not use anchors." + ) +} + +/** + * Holds if `src` is an unanchored pattern for a URL, indicating a + * mistake explained by `msg`. + */ +predicate isAnInterestingUnanchoredRegExpString(RegExpPatternSource src, string msg) { + exists(string pattern | pattern = src.getPattern() | + // a substring sequence of a protocol and subdomains, perhaps with some regex characters mixed in, followed by a known TLD + pattern + .regexpMatch("(?i)[():|?a-z0-9-\\\\./]+[.]" + RegExpPatterns::commonTLD() + + "([/#?():]\\S*)?") and + // without any anchors + pattern.regexpMatch("[^$^]+") and + // that is not used for capture or replace + not exists(DataFlow::MethodCallNode mcn, string name | name = mcn.getMethodName() | + name = "exec" and + mcn = src.getARegExpObject().getAMethodCall() and + exists(mcn.getAPropertyRead()) + or + exists(DataFlow::Node arg | + arg = mcn.getArgument(0) and + ( + src.getARegExpObject().flowsTo(arg) or + src.(StringRegExpPatternSource).getAUse() = arg + ) + | + name = "replace" + or + name = "match" and exists(mcn.getAPropertyRead()) + ) + ) and + msg = "When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it." + ) +} + +from DataFlow::Node nd, string msg +where + isAnInterestingUnanchoredRegExpString(nd, msg) + or + isAnInterestingSemiAnchoredRegExpString(nd, msg) +select nd, msg diff --git a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js new file mode 100644 index 000000000000..ef8b2c8d2e15 --- /dev/null +++ b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js @@ -0,0 +1,7 @@ +app.get('/some/path', function(req, res) { + let url = req.param("url"); + // BAD: the host of `url` may be controlled by an attacker + if (url.match(/https?:\/\/www\.example\.com\//)) { + res.redirect(url); + } +}); diff --git a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js new file mode 100644 index 000000000000..8ba1f60b8325 --- /dev/null +++ b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js @@ -0,0 +1,7 @@ +app.get('/some/path', function(req, res) { + let url = req.param("url"); + // GOOD: the host of `url` can not be controlled by an attacker + if (url.match(/^https?:\/\/www\.example\.com\//)) { + res.redirect(url); + } +}); diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected index 4360e28cc83c..ffbe31cb5c67 100644 --- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected +++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected @@ -22,3 +22,6 @@ | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here | | tst-IncompleteHostnameRegExp.js:48:41:48:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here | | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here | +| tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | here | +| tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | here | +| tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | here | diff --git a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected new file mode 100644 index 000000000000..a686b1dec7c1 --- /dev/null +++ b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected @@ -0,0 +1,49 @@ +| tst-SemiAnchoredRegExp.js:3:2:3:7 | /^a\|b/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:6:2:6:9 | /^a\|b\|c/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:12:2:12:9 | /^a\|(b)/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:14:2:14:11 | /^(a)\|(b)/ | The alternative '^(a)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:17:2:17:7 | /a\|b$/ | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:20:2:20:9 | /a\|b\|c$/ | The alternative 'c$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:26:2:26:9 | /(a)\|b$/ | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:28:2:28:11 | /(a)\|(b)$/ | The alternative '(b)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:31:2:31:25 | /^good\\ ... r\\.com/ | The alternative '^good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:32:2:32:27 | /^good\\ ... \\\\.com/ | The alternative '^good\\\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:37:13:37:18 | "^a\|b" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:40:13:40:20 | "^a\|b\|c" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:46:13:46:20 | "^a\|(b)" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:48:13:48:22 | "^(a)\|(b)" | The alternative '^(a)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:51:13:51:18 | "a\|b$" | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:54:13:54:20 | "a\|b\|c$" | The alternative 'c$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:60:13:60:20 | "(a)\|b$" | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:62:13:62:22 | "(a)\|(b)$" | The alternative '(b)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:66:13:66:38 | '^good\\ ... \\\\.com' | The alternative '^good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:75:2:75:27 | /(\\.xxx ... .zzz)$/ | The alternative '(\\.zzz)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:77:2:77:23 | /\\.xxx\| ... zzz$/ig | The alternative '\\.zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:78:2:78:19 | /\\.xxx\|\\.yyy\|zzz$/ | The alternative 'zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:81:2:81:28 | /^(xxx ... yyy)/i | The alternative '^(xxx yyy zzz)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:83:2:83:24 | /^(xxx: ... (zzz:)/ | The alternative '^(xxx:)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:84:2:84:23 | /^(xxx? ... zzz\\/)/ | The alternative '^(xxx?:)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:85:2:85:16 | /^@media\|@page/ | The alternative '^@media' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:87:2:87:21 | /^click\|mouse\|touch/ | The alternative '^click' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:88:2:88:43 | /^http: ... r\\.com/ | The alternative '^http://good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:89:2:89:47 | /^https ... r\\.com/ | The alternative '^https?://good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:90:2:90:55 | /^mouse ... ragend/ | The alternative '^mouse' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:91:2:91:14 | /^xxx:\|yyy:/i | The alternative '^xxx:' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:92:2:92:18 | /_xxx\|_yyy\|_zzz$/ | The alternative '_zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-UnanchoredUrlRegExp.js:3:43:3:61 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:4:54:4:72 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:10:2:10:22 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:11:13:11:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:13:44:13:62 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:15:13:15:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:19:43:19:62 | "https?://good.com/" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:20:43:20:66 | "https? ... m:8080" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:23:3:23:21 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:24:3:24:23 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:25:14:25:32 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:35:2:35:32 | /https? ... 0-9]+)/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:49:11:49:51 | /youtub ... -_]+)/i | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | +| tst-UnanchoredUrlRegExp.js:77:11:77:32 | /vimeo\\ ... 0-9]+)/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | diff --git a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.qlref b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.qlref new file mode 100644 index 000000000000..5860f4b3a82a --- /dev/null +++ b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.qlref @@ -0,0 +1 @@ +Security/CWE-020/MissingRegExpAnchor.ql \ No newline at end of file diff --git a/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js b/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js new file mode 100644 index 000000000000..776e63987297 --- /dev/null +++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js @@ -0,0 +1,122 @@ +(function coreRegExp() { + /^a|/; + /^a|b/; // NOT OK + /a|^b/; + /^a|^b/; + /^a|b|c/; // NOT OK + /a|^b|c/; + /a|b|^c/; + /^a|^b|c/; + + /(^a)|b/; + /^a|(b)/; // NOT OK + /^a|(^b)/; + /^(a)|(b)/; // NOT OK + + + /a|b$/; // NOT OK + /a$|b/; + /a$|b$/; + /a|b|c$/; // NOT OK + /a|b$|c/; + /a$|b|c/; + /a|b$|c$/; + + /a|(b$)/; + /(a)|b$/; // NOT OK + /(a$)|b$/; + /(a)|(b)$/; // NOT OK + + /^good.com|better.com/; // NOT OK + /^good\.com|better\.com/; // NOT OK + /^good\\.com|better\\.com/; // NOT OK +}); + +(function coreString() { + new RegExp("^a|"); + new RegExp("^a|b"); // NOT OK + new RegExp("a|^b"); + new RegExp("^a|^b"); + new RegExp("^a|b|c"); // NOT OK + new RegExp("a|^b|c"); + new RegExp("a|b|^c"); + new RegExp("^a|^b|c"); + + new RegExp("(^a)|b"); + new RegExp("^a|(b)"); // NOT OK + new RegExp("^a|(^b)"); + new RegExp("^(a)|(b)"); // NOT OK + + + new RegExp("a|b$"); // NOT OK + new RegExp("a$|b"); + new RegExp("a$|b$"); + new RegExp("a|b|c$"); // NOT OK + new RegExp("a|b$|c"); + new RegExp("a$|b|c"); + new RegExp("a|b$|c$"); + + new RegExp("a|(b$)"); + new RegExp("(a)|b$"); // NOT OK + new RegExp("(a$)|b$"); + new RegExp("(a)|(b)$"); // NOT OK + + new RegExp('^good.com|better.com'); // NOT OK + new RegExp('^good\.com|better\.com'); // NOT OK + new RegExp('^good\\.com|better\\.com'); // NOT OK +}); + +(function realWorld() { + // real-world examples that have been anonymized a bit + + /* + * NOT OK: flagged + */ + /(\.xxx)|(\.yyy)|(\.zzz)$/; + /(^left|right|center)\sbottom$/; // not flagged at the moment due to multiple anchors + /\.xxx|\.yyy|\.zzz$/ig; + /\.xxx|\.yyy|zzz$/; + /^(?:mouse|contextmenu)|click/; // not flagged at the moment due to nested alternatives + /^([A-Z]|xxx[XY]$)/; // not flagged at the moment due to multiple anchors + /^(xxx yyy zzz)|(xxx yyy)/i; + /^(xxx yyy zzz)|(xxx yyy)|(1st( xxx)? yyy)|xxx|1st/i; // not flagged at the moment due to nested parens + /^(xxx:)|(yyy:)|(zzz:)/; + /^(xxx?:)|(yyy:zzz\/)/; + /^@media|@page/; + /^\s*(xxx?|yyy|zzz):|xxx:yyy\//; // not flagged at the moment due to quantifiers + /^click|mouse|touch/; + /^http:\/\/good\.com|http:\/\/better\.com/; + /^https?:\/\/good\.com|https?:\/\/better\.com/; + /^mouse|touch|click|contextmenu|drop|dragover|dragend/; + /^xxx:|yyy:/i; + /_xxx|_yyy|_zzz$/; + /em|%$/; // not flagged at the moment due to the anchor not being for letters + + /* + * MAYBE OK due to apparent complexity: not flagged + */ + /(?:^[#?]?|&)([^=&]+)(?:=([^&]*))?/g; + /(^\s*|;\s*)\*.*;/m; + /(^\s*|\[)(?:xxx|yyy_(?:xxx|yyy)|xxx|yyy(?:xxx|yyy)?|xxx|yyy)\b/m; + /\s\S| \t|\t |\s$/; + /\{[^}{]*\{|\}[^}{]*\}|\{[^}]*$/g; + /^((\+|\-)\s*\d\d\d\d)|((\+|\-)\d\d\:?\d\d)/; + /^(\/\/)|([a-z]+:(\/\/)?)/; + /^[=?!#%@$]|!(?=[:}])/; + /^[\[\]!:]|[<>]/; + /^for\b|\b(?:xxx|yyy)\b/i; + /^if\b|\b(?:xxx|yyy|zzz)\b/i; + + /* + * OK: not flagged + */ + /$^|only-match/g; + /(#.+)|#$/; + /(NaN| {2}|^$)/; + /[^\n]*(?:\n|[^\n]$)/g; + /^$|\/(?:xxx|yyy)zzz/i; + /^(\/|(xxx|yyy|zzz)$)/; + /^9$|27/; + /^\+|\s*/g; + /xxx_yyy=\w+|^$/; +}); diff --git a/javascript/ql/test/query-tests/Security/CWE-020/tst-UnanchoredUrlRegExp.js b/javascript/ql/test/query-tests/Security/CWE-020/tst-UnanchoredUrlRegExp.js new file mode 100644 index 000000000000..1831f31be533 --- /dev/null +++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-UnanchoredUrlRegExp.js @@ -0,0 +1,106 @@ +(function(x){ + + "http://evil.com/?http://good.com".match("https?://good.com"); // NOT OK + "http://evil.com/?http://good.com".match(new RegExp("https?://good.com")); // NOT OK + "http://evil.com/?http://good.com".match("^https?://good.com"); // OK + "http://evil.com/?http://good.com".match(/^https?:\/\/good.com/); // OK + "http://evil.com/?http://good.com".match("(^https?://good1.com)|(^https?://good2.com)"); // OK + "http://evil.com/?http://good.com".match("(https?://good.com)|(^https?://goodie.com)"); // NOT OK, but not detected + + /https?:\/\/good.com/.exec("http://evil.com/?http://good.com"); // NOT OK + new RegExp("https?://good.com").exec("http://evil.com/?http://good.com"); // NOT OK + + "http://evil.com/?http://good.com".search("https?://good.com"); // NOT OK + + new RegExp("https?://good.com").test("http://evil.com/?http://good.com"); // NOT OK + + "something".match("other"); // OK + "something".match("x.commissary"); // OK + "http://evil.com/?http://good.com".match("https?://good.com/"); // NOT OK + "http://evil.com/?http://good.com".match("https?://good.com:8080"); // NOT OK + + let trustedUrls = [ + "https?://good.com", // NOT OK, referenced below + /https?:\/\/good.com/, // NOT OK, referenced below + new RegExp("https?://good.com"), // NOT OK, referenced below + "^https?://good.com" + ]; + function isTrustedUrl(url) { + for (let trustedUrl of trustedUrls) { + if (url.match(trustedUrl)) return true; + } + return false; + } + + /https?:\/\/good.com\/([0-9]+)/.exec(url); // NOT OK + "https://verygood.com/?id=" + /https?:\/\/good.com\/([0-9]+)/.exec(url)[0]; // OK + "http" + (secure? "s": "") + "://" + "verygood.com/?id=" + /https?:\/\/good.com\/([0-9]+)/.exec(url)[0]; // OK + "http" + (secure? "s": "") + "://" + ("verygood.com/?id=" + /https?:\/\/good.com\/([0-9]+)/.exec(url)[0]); // OK + + // g or .replace? + file = file.replace( + /https:\/\/cdn\.ampproject\.org\/v0\/amp-story-0\.1\.js/g, + hostName + '/dist/v0/amp-story-1.0.max.js' + ); + + // missing context of use + const urlPatterns = [ + { + regex: /youtube.com\/embed\/([a-z0-9\?&=\-_]+)/i, + type: 'iframe', w: 560, h: 314, + url: '//www.youtube.com/embed/$1', + allowFullscreen: true + }]; + + // ditto + F.helpers.media = { + defaults : { + youtube : { + matcher : /(youtube\.com|youtu\.be)\/(watch\?v=|v\/|u\/|embed\/?)?(videoseries\?list=(.*)|[\w-]{11}|\?listType=(.*)&list=(.*)).*/i, + params : { + autoplay : 1, + autohide : 1, + fs : 1, + rel : 0, + hd : 1, + wmode : 'opaque', + enablejsapi : 1 + }, + type : 'iframe', + url : '//www.youtube.com/embed/$3' + }}} + + // ditto + var urlPatterns = [ + {regex: /youtu\.be\/([\w\-.]+)/, type: 'iframe', w: 425, h: 350, url: '//www.youtube.com/embed/$1'}, + {regex: /youtube\.com(.+)v=([^&]+)/, type: 'iframe', w: 425, h: 350, url: '//www.youtube.com/embed/$2'}, + {regex: /vimeo\.com\/([0-9]+)/, type: 'iframe', w: 425, h: 350, url: '//player.vimeo.com/video/$1?title=0&byline=0&portrait=0&color=8dc7dc'}, + ]; + + // check optional successsor to TLD + new RegExp("(Pingdom.com_bot_version_)(\\d+)\\.(\\d+)") + + // replace and spaces + error.replace(/See https:\/\/github\.com\/Squirrel\/Squirrel\.Mac\/issues\/182 for more information/, 'See [this link](https://github.com/Microsoft/vscode/issues/7426#issuecomment-425093469) for more information'); + + // not a url + var sharedScript = //; + + // replace + const repo = repoURL.replace(/http(s)?:\/\/(\d+\.)?github.com\//gi, '') + + // replace and space + cmp.replace(/\n "); + + // replace and space + const helpMsg = /For help see https:\/\/nodejs.org\/en\/docs\/inspector\s*/; + msg = msg.replace(helpMsg, ''); + + // not a url + pkg.source.match(/ Date: Fri, 31 May 2019 15:05:14 +0200 Subject: [PATCH 05/11] JS: address minor review comments --- .../Security/CWE-020/MissingRegExpAnchor.ql | 19 +++-- .../ql/src/semmle/javascript/Regexp.qll | 5 +- .../CWE-020/MissingRegExpAnchor.expected | 70 +++++++++---------- 3 files changed, 47 insertions(+), 47 deletions(-) diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql index ffd475811870..a15a61a00afe 100644 --- a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql +++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql @@ -20,8 +20,8 @@ import javascript * The canonical example of such a mistake is: `^a|b|c`, which is * parsed as `(^a)|(b)|(c)`. */ -predicate isAnInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string msg) { - exists(string str, string maybeGroupedStr, string regex, string anchorPart, string posString, string escapedDot | +predicate isInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string msg) { + exists(string str, string maybeGroupedStr, string regex, string anchorPart, string escapedDot | // a dot that might be escaped in a regular expression, for example `/\./` or new `RegExp('\\.')` escapedDot = "\\\\\\\\?[.]" and // a string that is mostly free from special reqular expression symbols @@ -30,17 +30,14 @@ predicate isAnInterestingSemiAnchoredRegExpString(RegExpPatternSource src, strin maybeGroupedStr = "(?:" + str + "|\\(" + str + "\\))" and ( // a problematic pattern: `^a|b|...|x` - regex = "(?i)(\\^" + maybeGroupedStr + ")(?:\\|" + maybeGroupedStr + ")+" and - posString = "beginning" + regex = "(?i)(\\^" + maybeGroupedStr + ")(?:\\|" + maybeGroupedStr + ")+" or // a problematic pattern: `a|b|...|x$` - regex = "(?i)(?:" + maybeGroupedStr + "\\|)+(" + maybeGroupedStr + "\\$)" and - posString = "end" + regex = "(?i)(?:" + maybeGroupedStr + "\\|)+(" + maybeGroupedStr + "\\$)" ) and anchorPart = src.getPattern().regexpCapture(regex, 1) and anchorPart.regexpMatch("(?i).*[a-z].*") and - msg = "The alternative '" + anchorPart + "' uses an anchor to match from the " + posString + - " of a string, but the other alternatives of this regular expression do not use anchors." + msg = "Misleading operator precedence. The subexpression '" + anchorPart + "' is anchored, but the other parts of this regular expression are not" ) } @@ -48,7 +45,7 @@ predicate isAnInterestingSemiAnchoredRegExpString(RegExpPatternSource src, strin * Holds if `src` is an unanchored pattern for a URL, indicating a * mistake explained by `msg`. */ -predicate isAnInterestingUnanchoredRegExpString(RegExpPatternSource src, string msg) { +predicate isInterestingUnanchoredRegExpString(RegExpPatternSource src, string msg) { exists(string pattern | pattern = src.getPattern() | // a substring sequence of a protocol and subdomains, perhaps with some regex characters mixed in, followed by a known TLD pattern @@ -80,7 +77,7 @@ predicate isAnInterestingUnanchoredRegExpString(RegExpPatternSource src, string from DataFlow::Node nd, string msg where - isAnInterestingUnanchoredRegExpString(nd, msg) + isInterestingUnanchoredRegExpString(nd, msg) or - isAnInterestingSemiAnchoredRegExpString(nd, msg) + isInterestingSemiAnchoredRegExpString(nd, msg) select nd, msg diff --git a/javascript/ql/src/semmle/javascript/Regexp.qll b/javascript/ql/src/semmle/javascript/Regexp.qll index 773530ba4176..e605418ab371 100644 --- a/javascript/ql/src/semmle/javascript/Regexp.qll +++ b/javascript/ql/src/semmle/javascript/Regexp.qll @@ -41,6 +41,9 @@ abstract class RegExpTerm extends Locatable, @regexpterm { override string toString() { regexpterm(this, _, _, _, result) } + /** Gets the raw source text of this term. */ + string getRawValue() { regexpterm(this, _, _, _, result) } + /** Holds if this regular expression term can match the empty string. */ abstract predicate isNullable(); @@ -453,7 +456,7 @@ class RegExpLiteralPatternSource extends RegExpPatternSource { string pattern; RegExpLiteralPatternSource() { - exists(string raw | raw = asExpr().(RegExpLiteral).getRoot().toString() | + exists(string raw | raw = asExpr().(RegExpLiteral).getRoot().getRawValue() | // hide the fact that `/` is escaped in the literal pattern = raw.regexpReplaceAll("\\\\/", "/") ) diff --git a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected index a686b1dec7c1..d72f512a9413 100644 --- a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected +++ b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected @@ -1,38 +1,38 @@ -| tst-SemiAnchoredRegExp.js:3:2:3:7 | /^a\|b/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:6:2:6:9 | /^a\|b\|c/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:12:2:12:9 | /^a\|(b)/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:14:2:14:11 | /^(a)\|(b)/ | The alternative '^(a)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:17:2:17:7 | /a\|b$/ | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:20:2:20:9 | /a\|b\|c$/ | The alternative 'c$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:26:2:26:9 | /(a)\|b$/ | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:28:2:28:11 | /(a)\|(b)$/ | The alternative '(b)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:31:2:31:25 | /^good\\ ... r\\.com/ | The alternative '^good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:32:2:32:27 | /^good\\ ... \\\\.com/ | The alternative '^good\\\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:37:13:37:18 | "^a\|b" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:40:13:40:20 | "^a\|b\|c" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:46:13:46:20 | "^a\|(b)" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:48:13:48:22 | "^(a)\|(b)" | The alternative '^(a)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:51:13:51:18 | "a\|b$" | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:54:13:54:20 | "a\|b\|c$" | The alternative 'c$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:60:13:60:20 | "(a)\|b$" | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:62:13:62:22 | "(a)\|(b)$" | The alternative '(b)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:66:13:66:38 | '^good\\ ... \\\\.com' | The alternative '^good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:75:2:75:27 | /(\\.xxx ... .zzz)$/ | The alternative '(\\.zzz)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:77:2:77:23 | /\\.xxx\| ... zzz$/ig | The alternative '\\.zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:78:2:78:19 | /\\.xxx\|\\.yyy\|zzz$/ | The alternative 'zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:81:2:81:28 | /^(xxx ... yyy)/i | The alternative '^(xxx yyy zzz)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:83:2:83:24 | /^(xxx: ... (zzz:)/ | The alternative '^(xxx:)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:84:2:84:23 | /^(xxx? ... zzz\\/)/ | The alternative '^(xxx?:)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:85:2:85:16 | /^@media\|@page/ | The alternative '^@media' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:87:2:87:21 | /^click\|mouse\|touch/ | The alternative '^click' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:88:2:88:43 | /^http: ... r\\.com/ | The alternative '^http://good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:89:2:89:47 | /^https ... r\\.com/ | The alternative '^https?://good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:90:2:90:55 | /^mouse ... ragend/ | The alternative '^mouse' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:91:2:91:14 | /^xxx:\|yyy:/i | The alternative '^xxx:' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. | -| tst-SemiAnchoredRegExp.js:92:2:92:18 | /_xxx\|_yyy\|_zzz$/ | The alternative '_zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. | +| tst-SemiAnchoredRegExp.js:3:2:3:7 | /^a\|b/ | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:6:2:6:9 | /^a\|b\|c/ | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:12:2:12:9 | /^a\|(b)/ | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:14:2:14:11 | /^(a)\|(b)/ | Misleading operator precedence. The subexpression '^(a)' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:17:2:17:7 | /a\|b$/ | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:20:2:20:9 | /a\|b\|c$/ | Misleading operator precedence. The subexpression 'c$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:26:2:26:9 | /(a)\|b$/ | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:28:2:28:11 | /(a)\|(b)$/ | Misleading operator precedence. The subexpression '(b)$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:31:2:31:25 | /^good\\ ... r\\.com/ | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:32:2:32:27 | /^good\\ ... \\\\.com/ | Misleading operator precedence. The subexpression '^good\\\\.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:37:13:37:18 | "^a\|b" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:40:13:40:20 | "^a\|b\|c" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:46:13:46:20 | "^a\|(b)" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:48:13:48:22 | "^(a)\|(b)" | Misleading operator precedence. The subexpression '^(a)' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:51:13:51:18 | "a\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:54:13:54:20 | "a\|b\|c$" | Misleading operator precedence. The subexpression 'c$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:60:13:60:20 | "(a)\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:62:13:62:22 | "(a)\|(b)$" | Misleading operator precedence. The subexpression '(b)$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:66:13:66:38 | '^good\\ ... \\\\.com' | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:75:2:75:27 | /(\\.xxx ... .zzz)$/ | Misleading operator precedence. The subexpression '(\\.zzz)$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:77:2:77:23 | /\\.xxx\| ... zzz$/ig | Misleading operator precedence. The subexpression '\\.zzz$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:78:2:78:19 | /\\.xxx\|\\.yyy\|zzz$/ | Misleading operator precedence. The subexpression 'zzz$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:81:2:81:28 | /^(xxx ... yyy)/i | Misleading operator precedence. The subexpression '^(xxx yyy zzz)' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:83:2:83:24 | /^(xxx: ... (zzz:)/ | Misleading operator precedence. The subexpression '^(xxx:)' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:84:2:84:23 | /^(xxx? ... zzz\\/)/ | Misleading operator precedence. The subexpression '^(xxx?:)' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:85:2:85:16 | /^@media\|@page/ | Misleading operator precedence. The subexpression '^@media' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:87:2:87:21 | /^click\|mouse\|touch/ | Misleading operator precedence. The subexpression '^click' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:88:2:88:43 | /^http: ... r\\.com/ | Misleading operator precedence. The subexpression '^http://good\\.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:89:2:89:47 | /^https ... r\\.com/ | Misleading operator precedence. The subexpression '^https?://good\\.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:90:2:90:55 | /^mouse ... ragend/ | Misleading operator precedence. The subexpression '^mouse' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:91:2:91:14 | /^xxx:\|yyy:/i | Misleading operator precedence. The subexpression '^xxx:' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:92:2:92:18 | /_xxx\|_yyy\|_zzz$/ | Misleading operator precedence. The subexpression '_zzz$' is anchored, but the other parts of this regular expression are not | | tst-UnanchoredUrlRegExp.js:3:43:3:61 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | | tst-UnanchoredUrlRegExp.js:4:54:4:72 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | | tst-UnanchoredUrlRegExp.js:10:2:10:22 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | From 7018a386910b3e991b8b4c9188b0ad4d3158fecf Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Sat, 1 Jun 2019 13:29:33 +0200 Subject: [PATCH 06/11] JS: improve tests and regexp for js/regex/missing-regexp-anchor --- .../Security/CWE-020/MissingRegExpAnchor.ql | 2 +- .../CWE-020/IncompleteHostnameRegExp.expected | 4 +- .../CWE-020/MissingRegExpAnchor.expected | 50 +++++++++---------- .../CWE-020/tst-SemiAnchoredRegExp.js | 6 ++- 4 files changed, 33 insertions(+), 29 deletions(-) diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql index a15a61a00afe..3bd43d0dd60f 100644 --- a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql +++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql @@ -23,7 +23,7 @@ import javascript predicate isInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string msg) { exists(string str, string maybeGroupedStr, string regex, string anchorPart, string escapedDot | // a dot that might be escaped in a regular expression, for example `/\./` or new `RegExp('\\.')` - escapedDot = "\\\\\\\\?[.]" and + escapedDot = "\\\\[.]" and // a string that is mostly free from special reqular expression symbols str = "(?:(?:" + escapedDot + ")|[a-z:/.?_,@0-9 -])+" and // the string may be wrapped in parentheses diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected index ffbe31cb5c67..1e337828d8a1 100644 --- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected +++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected @@ -23,5 +23,5 @@ | tst-IncompleteHostnameRegExp.js:48:41:48:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here | | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here | | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | here | -| tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | here | -| tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | here | +| tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | here | +| tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | here | diff --git a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected index d72f512a9413..dbdadc83eed6 100644 --- a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected +++ b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected @@ -8,31 +8,31 @@ | tst-SemiAnchoredRegExp.js:28:2:28:11 | /(a)\|(b)$/ | Misleading operator precedence. The subexpression '(b)$' is anchored, but the other parts of this regular expression are not | | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not | | tst-SemiAnchoredRegExp.js:31:2:31:25 | /^good\\ ... r\\.com/ | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:32:2:32:27 | /^good\\ ... \\\\.com/ | Misleading operator precedence. The subexpression '^good\\\\.com' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:37:13:37:18 | "^a\|b" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:40:13:40:20 | "^a\|b\|c" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:46:13:46:20 | "^a\|(b)" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:48:13:48:22 | "^(a)\|(b)" | Misleading operator precedence. The subexpression '^(a)' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:51:13:51:18 | "a\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:54:13:54:20 | "a\|b\|c$" | Misleading operator precedence. The subexpression 'c$' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:60:13:60:20 | "(a)\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:62:13:62:22 | "(a)\|(b)$" | Misleading operator precedence. The subexpression '(b)$' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:66:13:66:38 | '^good\\ ... \\\\.com' | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:75:2:75:27 | /(\\.xxx ... .zzz)$/ | Misleading operator precedence. The subexpression '(\\.zzz)$' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:77:2:77:23 | /\\.xxx\| ... zzz$/ig | Misleading operator precedence. The subexpression '\\.zzz$' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:78:2:78:19 | /\\.xxx\|\\.yyy\|zzz$/ | Misleading operator precedence. The subexpression 'zzz$' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:81:2:81:28 | /^(xxx ... yyy)/i | Misleading operator precedence. The subexpression '^(xxx yyy zzz)' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:83:2:83:24 | /^(xxx: ... (zzz:)/ | Misleading operator precedence. The subexpression '^(xxx:)' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:84:2:84:23 | /^(xxx? ... zzz\\/)/ | Misleading operator precedence. The subexpression '^(xxx?:)' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:85:2:85:16 | /^@media\|@page/ | Misleading operator precedence. The subexpression '^@media' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:87:2:87:21 | /^click\|mouse\|touch/ | Misleading operator precedence. The subexpression '^click' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:88:2:88:43 | /^http: ... r\\.com/ | Misleading operator precedence. The subexpression '^http://good\\.com' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:89:2:89:47 | /^https ... r\\.com/ | Misleading operator precedence. The subexpression '^https?://good\\.com' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:90:2:90:55 | /^mouse ... ragend/ | Misleading operator precedence. The subexpression '^mouse' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:91:2:91:14 | /^xxx:\|yyy:/i | Misleading operator precedence. The subexpression '^xxx:' is anchored, but the other parts of this regular expression are not | -| tst-SemiAnchoredRegExp.js:92:2:92:18 | /_xxx\|_yyy\|_zzz$/ | Misleading operator precedence. The subexpression '_zzz$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:39:13:39:18 | "^a\|b" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:42:13:42:20 | "^a\|b\|c" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:48:13:48:20 | "^a\|(b)" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:50:13:50:22 | "^(a)\|(b)" | Misleading operator precedence. The subexpression '^(a)' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:53:13:53:18 | "a\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:56:13:56:20 | "a\|b\|c$" | Misleading operator precedence. The subexpression 'c$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:62:13:62:20 | "(a)\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:64:13:64:22 | "(a)\|(b)$" | Misleading operator precedence. The subexpression '(b)$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:68:13:68:38 | '^good\\ ... \\\\.com' | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:69:13:69:40 | '^good\\ ... \\\\.com' | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:79:2:79:27 | /(\\.xxx ... .zzz)$/ | Misleading operator precedence. The subexpression '(\\.zzz)$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:81:2:81:23 | /\\.xxx\| ... zzz$/ig | Misleading operator precedence. The subexpression '\\.zzz$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:82:2:82:19 | /\\.xxx\|\\.yyy\|zzz$/ | Misleading operator precedence. The subexpression 'zzz$' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:85:2:85:28 | /^(xxx ... yyy)/i | Misleading operator precedence. The subexpression '^(xxx yyy zzz)' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:87:2:87:24 | /^(xxx: ... (zzz:)/ | Misleading operator precedence. The subexpression '^(xxx:)' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:88:2:88:23 | /^(xxx? ... zzz\\/)/ | Misleading operator precedence. The subexpression '^(xxx?:)' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:89:2:89:16 | /^@media\|@page/ | Misleading operator precedence. The subexpression '^@media' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:91:2:91:21 | /^click\|mouse\|touch/ | Misleading operator precedence. The subexpression '^click' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:92:2:92:43 | /^http: ... r\\.com/ | Misleading operator precedence. The subexpression '^http://good\\.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:93:2:93:47 | /^https ... r\\.com/ | Misleading operator precedence. The subexpression '^https?://good\\.com' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:94:2:94:55 | /^mouse ... ragend/ | Misleading operator precedence. The subexpression '^mouse' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:95:2:95:14 | /^xxx:\|yyy:/i | Misleading operator precedence. The subexpression '^xxx:' is anchored, but the other parts of this regular expression are not | +| tst-SemiAnchoredRegExp.js:96:2:96:18 | /_xxx\|_yyy\|_zzz$/ | Misleading operator precedence. The subexpression '_zzz$' is anchored, but the other parts of this regular expression are not | | tst-UnanchoredUrlRegExp.js:3:43:3:61 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | | tst-UnanchoredUrlRegExp.js:4:54:4:72 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | | tst-UnanchoredUrlRegExp.js:10:2:10:22 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | diff --git a/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js b/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js index 776e63987297..f679e8074c57 100644 --- a/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js +++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js @@ -29,7 +29,9 @@ /^good.com|better.com/; // NOT OK /^good\.com|better\.com/; // NOT OK - /^good\\.com|better\\.com/; // NOT OK + /^good\\.com|better\\.com/; + /^good\\\.com|better\\\.com/; + /^good\\\\.com|better\\\\.com/; }); (function coreString() { @@ -64,6 +66,8 @@ new RegExp('^good.com|better.com'); // NOT OK new RegExp('^good\.com|better\.com'); // NOT OK new RegExp('^good\\.com|better\\.com'); // NOT OK + new RegExp('^good\\\.com|better\\\.com'); // NOT OK + new RegExp('^good\\\\.com|better\\\\.com'); }); (function realWorld() { From 14644270ac6ec499743a1b139bd6cfab7f1ebe03 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Mon, 3 Jun 2019 08:32:35 +0200 Subject: [PATCH 07/11] JS: fix comment typo --- javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql index 3bd43d0dd60f..52ac93fa175b 100644 --- a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql +++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql @@ -22,7 +22,7 @@ import javascript */ predicate isInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string msg) { exists(string str, string maybeGroupedStr, string regex, string anchorPart, string escapedDot | - // a dot that might be escaped in a regular expression, for example `/\./` or new `RegExp('\\.')` + // a dot that might be escaped in a regular expression, for example `/\./` or `new RegExp('\\.')` escapedDot = "\\\\[.]" and // a string that is mostly free from special reqular expression symbols str = "(?:(?:" + escapedDot + ")|[a-z:/.?_,@0-9 -])+" and From 7b652214c5be7724e2234bba31990ac1dbf97f89 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Mon, 3 Jun 2019 13:59:39 +0200 Subject: [PATCH 08/11] JS: address docstring comments --- javascript/ql/src/semmle/javascript/Regexp.qll | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/javascript/ql/src/semmle/javascript/Regexp.qll b/javascript/ql/src/semmle/javascript/Regexp.qll index e605418ab371..ebd2387c08a5 100644 --- a/javascript/ql/src/semmle/javascript/Regexp.qll +++ b/javascript/ql/src/semmle/javascript/Regexp.qll @@ -409,7 +409,7 @@ module RegExpPatterns { } /** - * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted + * Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted * as a part of a regular expression. */ private DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker t) { @@ -426,7 +426,7 @@ private DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker } /** - * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted + * Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted * as a part of a regular expression. */ private DataFlow::Node regExpSource(DataFlow::Node re) { @@ -439,12 +439,14 @@ private DataFlow::Node regExpSource(DataFlow::Node re) { */ abstract class RegExpPatternSource extends DataFlow::Node { /** - * Gets the pattern of this node. + * Gets the pattern of this node that is interpreted as a part of a + * regular expression. */ abstract string getPattern(); /** - * Gets a regular expression object that is built from the value of this node. + * Gets a regular expression object that is constructed from the pattern + * of this node. */ abstract DataFlow::SourceNode getARegExpObject(); } @@ -477,7 +479,7 @@ class StringRegExpPatternSource extends RegExpPatternSource { StringRegExpPatternSource() { this = regExpSource(use) } /** - * Gets a node that use this source as a regular expression pattern. + * Gets a node that uses this source as a regular expression pattern. */ DataFlow::Node getAUse() { result = use } From bf51c54338c2c1b72a46e389399da8819488e485 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Mon, 3 Jun 2019 14:23:22 +0200 Subject: [PATCH 09/11] JS: add `RegExpPatternSource::getAParse` to hide the subclasses --- .../CWE-020/IncompleteHostnameRegExp.ql | 4 ++-- .../Security/CWE-020/MissingRegExpAnchor.ql | 2 +- .../ql/src/semmle/javascript/Regexp.qll | 23 +++++++++++-------- .../CWE-020/IncompleteHostnameRegExp.expected | 20 ++++++++-------- 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql index ad1e609b2aac..88ebe5cf7d1e 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql +++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql @@ -31,10 +31,10 @@ where pattern = re.getPattern() and isIncompleteHostNameRegExpPattern(pattern, hostPart) and ( - if re instanceof StringRegExpPatternSource + if re.getAParse() != re then ( kind = "string, which is used as a regular expression $@," and - aux = re.(StringRegExpPatternSource).getAUse() + aux = re.getAParse() ) else ( kind = "regular expression" and aux = re ) diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql index 52ac93fa175b..d0141361f4bc 100644 --- a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql +++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql @@ -63,7 +63,7 @@ predicate isInterestingUnanchoredRegExpString(RegExpPatternSource src, string ms arg = mcn.getArgument(0) and ( src.getARegExpObject().flowsTo(arg) or - src.(StringRegExpPatternSource).getAUse() = arg + src.getAParse() = arg ) | name = "replace" diff --git a/javascript/ql/src/semmle/javascript/Regexp.qll b/javascript/ql/src/semmle/javascript/Regexp.qll index ebd2387c08a5..909a52512c72 100644 --- a/javascript/ql/src/semmle/javascript/Regexp.qll +++ b/javascript/ql/src/semmle/javascript/Regexp.qll @@ -438,6 +438,12 @@ private DataFlow::Node regExpSource(DataFlow::Node re) { * as a part of a regular expression. */ abstract class RegExpPatternSource extends DataFlow::Node { + /** + * Gets a node where the pattern of this node is parsed as a part of + * a regular expression. + */ + abstract DataFlow::Node getAParse(); + /** * Gets the pattern of this node that is interpreted as a part of a * regular expression. @@ -454,7 +460,7 @@ abstract class RegExpPatternSource extends DataFlow::Node { /** * A regular expression literal, viewed as the pattern source for itself. */ -class RegExpLiteralPatternSource extends RegExpPatternSource { +private class RegExpLiteralPatternSource extends RegExpPatternSource { string pattern; RegExpLiteralPatternSource() { @@ -464,6 +470,8 @@ class RegExpLiteralPatternSource extends RegExpPatternSource { ) } + override DataFlow::Node getAParse() { result = this } + override string getPattern() { result = pattern } override DataFlow::SourceNode getARegExpObject() { result = this } @@ -473,20 +481,17 @@ class RegExpLiteralPatternSource extends RegExpPatternSource { * A node whose string value may flow to a position where it is interpreted * as a part of a regular expression. */ -class StringRegExpPatternSource extends RegExpPatternSource { - DataFlow::Node use; +private class StringRegExpPatternSource extends RegExpPatternSource { + DataFlow::Node parse; - StringRegExpPatternSource() { this = regExpSource(use) } + StringRegExpPatternSource() { this = regExpSource(parse) } - /** - * Gets a node that uses this source as a regular expression pattern. - */ - DataFlow::Node getAUse() { result = use } + override DataFlow::Node getAParse() { result = parse } override DataFlow::SourceNode getARegExpObject() { exists(DataFlow::InvokeNode constructor | constructor = DataFlow::globalVarRef("RegExp").getAnInvocation() and - use = constructor.getArgument(0) and + parse = constructor.getArgument(0) and result = constructor ) } diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected index 1e337828d8a1..c128823024eb 100644 --- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected +++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected @@ -1,9 +1,9 @@ | tst-IncompleteHostnameRegExp.js:3:2:3:29 | /^http: ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:3:2:3:29 | /^http: ... le.com/ | here | | tst-IncompleteHostnameRegExp.js:5:2:5:29 | /^http: ... le.net/ | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:5:2:5:29 | /^http: ... le.net/ | here | | tst-IncompleteHostnameRegExp.js:6:2:6:43 | /^http: ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:6:2:6:43 | /^http: ... b).com/ | here | -| tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | here | -| tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | here | -| tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here | +| tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | here | +| tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | here | +| tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here | | tst-IncompleteHostnameRegExp.js:17:14:17:30 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here | | tst-IncompleteHostnameRegExp.js:19:17:19:35 | '^test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:20:13:20:26 | `${hostname}$` | here | | tst-IncompleteHostnameRegExp.js:22:27:22:45 | 'test.example.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:23:13:23:27 | domain.hostname | here | @@ -13,15 +13,15 @@ | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | here | | tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | here | | tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | here | -| tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here | +| tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here | | tst-IncompleteHostnameRegExp.js:41:41:41:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here | -| tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | here | +| tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | here | | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | here | -| tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here | +| tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here | | tst-IncompleteHostnameRegExp.js:46:2:46:29 | /^(exam ... e.com)/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:46:2:46:29 | /^(exam ... e.com)/ | here | -| tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here | +| tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here | | tst-IncompleteHostnameRegExp.js:48:41:48:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here | -| tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here | +| tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here | | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | here | -| tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | here | -| tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | here | +| tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | here | +| tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | here | From 9e0a97e82fa72ea00713109a5a03f111295655e1 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Mon, 3 Jun 2019 16:39:39 +0200 Subject: [PATCH 10/11] JS: address qhelp review comments --- .../ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp index 0e93e80b70ee..6c2d6aa59f2b 100644 --- a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp +++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp @@ -7,7 +7,7 @@

    Sanitizing untrusted input with regular expressions is a - common technique. However, it is error prone to match untrusted input + common technique. However, it is error-prone to match untrusted input against regular expressions without anchors such as ^ or $. Malicious input can bypass such security checks by embedding one of the allowed patterns in an unexpected location. @@ -17,8 +17,8 @@

    Even if the matching is not done in a security-critical - context, it may still cause undesirable behaviors when the regular - expression matches accidentally. + context, it may still cause undesirable behavior when the regular + expression accidentally matches.

    From 04868e5b97b4abfaf19755854ca932f33b69d96f Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Mon, 3 Jun 2019 17:05:19 +0200 Subject: [PATCH 11/11] JS: format qhelp examples --- .../src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js | 4 ++-- .../src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js index ef8b2c8d2e15..fbf3430c5247 100644 --- a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js +++ b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js @@ -1,7 +1,7 @@ -app.get('/some/path', function(req, res) { +app.get("/some/path", function(req, res) { let url = req.param("url"); // BAD: the host of `url` may be controlled by an attacker - if (url.match(/https?:\/\/www\.example\.com\//)) { + if (url.match(/https?:\/\/www\.example\.com\//)) { res.redirect(url); } }); diff --git a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js index 8ba1f60b8325..ebb408eec4be 100644 --- a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js +++ b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js @@ -1,7 +1,7 @@ -app.get('/some/path', function(req, res) { +app.get("/some/path", function(req, res) { let url = req.param("url"); // GOOD: the host of `url` can not be controlled by an attacker - if (url.match(/^https?:\/\/www\.example\.com\//)) { + if (url.match(/^https?:\/\/www\.example\.com\//)) { res.redirect(url); } });