From 98ae2597bb5c47c4cf7762ca1359452e0813bc9c Mon Sep 17 00:00:00 2001
From: Esben Sparre Andreasen <esben@semmle.com>
Date: Mon, 27 May 2019 13:52:18 +0200
Subject: [PATCH 01/11] JS: refactor `IncompleteHostnameRegExp::regexp` to
 RegExp.qll

---
 .../CWE-020/IncompleteHostnameRegExp.ql       | 32 -----------------
 .../ql/src/semmle/javascript/Regexp.qll       | 36 +++++++++++++++++++
 2 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql
index 70589092064e..9bd04c3050a8 100644
--- a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql
+++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql
@@ -12,38 +12,6 @@
 
 import javascript
 
-/**
- * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted
- * as a regular expression.
- */
-DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker t) {
-  t.start() and
-  re = result and
-  isInterpretedAsRegExp(result)
-  or
-  exists(DataFlow::TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) |
-    t2 = t.smallstep(result, succ)
-    or
-    any(TaintTracking::AdditionalTaintStep dts).step(result, succ) and
-    t = t2
-  )
-}
-
-DataFlow::Node regExpSource(DataFlow::Node re) {
-  result = regExpSource(re, DataFlow::TypeBackTracker::end())
-}
-
-/** Holds if `re` is a regular expression with value `pattern`. */
-predicate regexp(DataFlow::Node re, string pattern, string kind, DataFlow::Node aux) {
-  re.asExpr().(RegExpLiteral).getValue() = pattern and
-  kind = "regular expression" and
-  aux = re
-  or
-  re = regExpSource(aux) and
-  pattern = re.getStringValue() and
-  kind = "string, which is used as a regular expression $@,"
-}
-
 /**
  * Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
  * and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
diff --git a/javascript/ql/src/semmle/javascript/Regexp.qll b/javascript/ql/src/semmle/javascript/Regexp.qll
index 58cd02adf7d5..21962a5b6626 100644
--- a/javascript/ql/src/semmle/javascript/Regexp.qll
+++ b/javascript/ql/src/semmle/javascript/Regexp.qll
@@ -404,3 +404,39 @@ module RegExpPatterns {
     result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
   }
 }
+
+/**
+ * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted
+ * as a regular expression.
+ */
+private DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker t) {
+  t.start() and
+  re = result and
+  isInterpretedAsRegExp(result)
+  or
+  exists(DataFlow::TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) |
+    t2 = t.smallstep(result, succ)
+    or
+    any(TaintTracking::AdditionalTaintStep dts).step(result, succ) and
+    t = t2
+  )
+}
+
+/**
+ * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted
+ * as a regular expression.
+ */
+private DataFlow::Node regExpSource(DataFlow::Node re) {
+  result = regExpSource(re, DataFlow::TypeBackTracker::end())
+}
+
+/** Holds if `re` is a regular expression with value `pattern`. */
+predicate regexp(DataFlow::Node re, string pattern, string kind, DataFlow::Node aux) {
+  re.asExpr().(RegExpLiteral).getValue() = pattern and
+  kind = "regular expression" and
+  aux = re
+  or
+  re = regExpSource(aux) and
+  pattern = re.getStringValue() and
+  kind = "string, which is used as a regular expression $@,"
+}

From 3358e49698b412a93e83519be791e58732745f76 Mon Sep 17 00:00:00 2001
From: Esben Sparre Andreasen <esben@semmle.com>
Date: Fri, 31 May 2019 07:45:02 +0200
Subject: [PATCH 02/11] JS: refactor the predicate `RegExp::regexp` to three
 classes.

This preserves the ad hoc message formatting in IncompleteHostnameRegExp.ql
---
 .../CWE-020/IncompleteHostnameRegExp.ql       | 16 ++++-
 .../ql/src/semmle/javascript/Regexp.qll       | 70 ++++++++++++++++---
 2 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql
index 9bd04c3050a8..ad1e609b2aac 100644
--- a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql
+++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql
@@ -26,11 +26,21 @@ predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
             "([():|?a-z0-9-]+(\\\\)?[.]" + RegExpPatterns::commonTLD() + ")" + ".*", 1)
 }
 
-from DataFlow::Node re, string pattern, string hostPart, string kind, DataFlow::Node aux
-where regexp(re, pattern, kind, aux) and
+from RegExpPatternSource re, string pattern, string hostPart, string kind, DataFlow::Node aux
+where
+  pattern = re.getPattern() and
   isIncompleteHostNameRegExpPattern(pattern, hostPart) and
+  (
+    if re instanceof StringRegExpPatternSource
+    then (
+      kind = "string, which is used as a regular expression $@," and
+      aux = re.(StringRegExpPatternSource).getAUse()
+    ) else (
+      kind = "regular expression" and aux = re
+    )
+  ) and
   // ignore patterns with capture groups after the TLD
   not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*")
 select re,
   "This " + kind + " has an unescaped '.' before '" + hostPart +
-    "', so it might match more hosts than expected.", aux, "here"
\ No newline at end of file
+    "', so it might match more hosts than expected.", aux, "here"
diff --git a/javascript/ql/src/semmle/javascript/Regexp.qll b/javascript/ql/src/semmle/javascript/Regexp.qll
index 21962a5b6626..773530ba4176 100644
--- a/javascript/ql/src/semmle/javascript/Regexp.qll
+++ b/javascript/ql/src/semmle/javascript/Regexp.qll
@@ -407,7 +407,7 @@ module RegExpPatterns {
 
 /**
  * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted
- * as a regular expression.
+ * as a part of a regular expression.
  */
 private DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker t) {
   t.start() and
@@ -424,19 +424,67 @@ private DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker
 
 /**
  * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted
- * as a regular expression.
+ * as a part of a regular expression.
  */
 private DataFlow::Node regExpSource(DataFlow::Node re) {
   result = regExpSource(re, DataFlow::TypeBackTracker::end())
 }
 
-/** Holds if `re` is a regular expression with value `pattern`. */
-predicate regexp(DataFlow::Node re, string pattern, string kind, DataFlow::Node aux) {
-  re.asExpr().(RegExpLiteral).getValue() = pattern and
-  kind = "regular expression" and
-  aux = re
-  or
-  re = regExpSource(aux) and
-  pattern = re.getStringValue() and
-  kind = "string, which is used as a regular expression $@,"
+/**
+ * A node whose value may flow to a position where it is interpreted
+ * as a part of a regular expression.
+ */
+abstract class RegExpPatternSource extends DataFlow::Node {
+  /**
+   * Gets the pattern of this node.
+   */
+  abstract string getPattern();
+
+  /**
+   * Gets a regular expression object that is built from the value of this node.
+   */
+  abstract DataFlow::SourceNode getARegExpObject();
+}
+
+/**
+ * A regular expression literal, viewed as the pattern source for itself.
+ */
+class RegExpLiteralPatternSource extends RegExpPatternSource {
+  string pattern;
+
+  RegExpLiteralPatternSource() {
+    exists(string raw | raw = asExpr().(RegExpLiteral).getRoot().toString() |
+      // hide the fact that `/` is escaped in the literal
+      pattern = raw.regexpReplaceAll("\\\\/", "/")
+    )
+  }
+
+  override string getPattern() { result = pattern }
+
+  override DataFlow::SourceNode getARegExpObject() { result = this }
+}
+
+/**
+ * A node whose string value may flow to a position where it is interpreted
+ * as a part of a regular expression.
+ */
+class StringRegExpPatternSource extends RegExpPatternSource {
+  DataFlow::Node use;
+
+  StringRegExpPatternSource() { this = regExpSource(use) }
+
+  /**
+   * Gets a node that use this source as a regular expression pattern.
+   */
+  DataFlow::Node getAUse() { result = use }
+
+  override DataFlow::SourceNode getARegExpObject() {
+    exists(DataFlow::InvokeNode constructor |
+      constructor = DataFlow::globalVarRef("RegExp").getAnInvocation() and
+      use = constructor.getArgument(0) and
+      result = constructor
+    )
+  }
+
+  override string getPattern() { result = getStringValue() }
 }

From 69db54a03a29a384de8e0bc7296dfb04db456b97 Mon Sep 17 00:00:00 2001
From: Esben Sparre Andreasen <esben@semmle.com>
Date: Fri, 31 May 2019 08:17:41 +0200
Subject: [PATCH 03/11] JS: add anchors to js/incomplete-hostname-regexp
 examples

---
 .../examples/IncompleteHostnameRegExp.js      |  2 +-
 .../CWE-020/IncompleteHostnameRegExp.expected | 28 +++++------
 .../CWE-020/tst-IncompleteHostnameRegExp.js   | 46 +++++++++----------
 3 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js b/javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js
index d3dfc3a867d0..11d11a97ca4a 100644
--- a/javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js
+++ b/javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js
@@ -2,7 +2,7 @@ app.get('/some/path', function(req, res) {
     let url = req.param('url'),
         host = urlLib.parse(url).host;
     // BAD: the host of `url` may be controlled by an attacker
-    let regex = /((www|beta).)?example.com/;
+    let regex = /^((www|beta).)?example.com/;
     if (host.match(regex)) {
         res.redirect(url);
     }
diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
index b34db52ed794..4360e28cc83c 100644
--- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
@@ -1,24 +1,24 @@
-| tst-IncompleteHostnameRegExp.js:3:2:3:28 | /http:\\ ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:3:2:3:28 | /http:\\ ... le.com/ | here |
-| tst-IncompleteHostnameRegExp.js:5:2:5:28 | /http:\\ ... le.net/ | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:5:2:5:28 | /http:\\ ... le.net/ | here |
-| tst-IncompleteHostnameRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | here |
-| tst-IncompleteHostnameRegExp.js:11:13:11:37 | "http:/ ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:37 | "http:/ ... le.com" | here |
+| tst-IncompleteHostnameRegExp.js:3:2:3:29 | /^http: ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:3:2:3:29 | /^http: ... le.com/ | here |
+| tst-IncompleteHostnameRegExp.js:5:2:5:29 | /^http: ... le.net/ | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:5:2:5:29 | /^http: ... le.net/ | here |
+| tst-IncompleteHostnameRegExp.js:6:2:6:43 | /^http: ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:6:2:6:43 | /^http: ... b).com/ | here |
+| tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | here |
 | tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | here |
 | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here |
 | tst-IncompleteHostnameRegExp.js:17:14:17:30 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here |
-| tst-IncompleteHostnameRegExp.js:19:17:19:34 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:20:13:20:26 | `${hostname}$` | here |
-| tst-IncompleteHostnameRegExp.js:22:27:22:44 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:23:13:23:27 | domain.hostname | here |
-| tst-IncompleteHostnameRegExp.js:28:23:28:40 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:26:21:26:35 | domain.hostname | here |
-| tst-IncompleteHostnameRegExp.js:30:30:30:47 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:32:21:32:35 | domain.hostname | here |
+| tst-IncompleteHostnameRegExp.js:19:17:19:35 | '^test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:20:13:20:26 | `${hostname}$` | here |
+| tst-IncompleteHostnameRegExp.js:22:27:22:45 | 'test.example.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:23:13:23:27 | domain.hostname | here |
+| tst-IncompleteHostnameRegExp.js:28:23:28:41 | 'test.example.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:26:21:26:35 | domain.hostname | here |
+| tst-IncompleteHostnameRegExp.js:30:30:30:48 | 'test.example.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:32:21:32:35 | domain.hostname | here |
 | tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | This regular expression has an unescaped '.' before ')?example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | here |
 | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | here |
-| tst-IncompleteHostnameRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | here |
-| tst-IncompleteHostnameRegExp.js:40:2:40:29 | /https? ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:40:2:40:29 | /https? ... le.com/ | here |
+| tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | here |
+| tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | here |
 | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here |
 | tst-IncompleteHostnameRegExp.js:41:41:41:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here |
-| tst-IncompleteHostnameRegExp.js:42:13:42:61 | 'http[s ... \\/(.+)' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:42:13:42:61 | 'http[s ... \\/(.+)' | here |
+| tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | here |
 | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | here |
-| tst-IncompleteHostnameRegExp.js:44:9:44:100 | 'protos ... ernal)' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:100 | 'protos ... ernal)' | here |
-| tst-IncompleteHostnameRegExp.js:46:2:46:26 | /exampl ... le.com/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:46:2:46:26 | /exampl ... le.com/ | here |
+| tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
+| tst-IncompleteHostnameRegExp.js:46:2:46:29 | /^(exam ... e.com)/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:46:2:46:29 | /^(exam ... e.com)/ | here |
 | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here |
 | tst-IncompleteHostnameRegExp.js:48:41:48:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here |
-| tst-IncompleteHostnameRegExp.js:53:13:53:35 | 'test.' ... le.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:35 | 'test.' ... le.com' | here |
+| tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here |
diff --git a/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js b/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js
index 708fbee16f29..47b85447abcb 100644
--- a/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js
+++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js
@@ -1,54 +1,54 @@
 (function() {
-	/http:\/\/example.com/; // OK
-	/http:\/\/test.example.com/; // NOT OK
-	/http:\/\/test\\.example.com/; // OK
-	/http:\/\/test.example.net/; // NOT OK
-	/http:\/\/test.(example-a|example-b).com/; // NOT OK
-	/http:\/\/(.+)\\.example.com/; // NOT OK, but not yet supported with enough precision
-	/http:\/\/(\\.+)\\.example.com/; // OK
-	/http:\/\/(?:.+)\\.test\\.example.com/; // NOT OK, but not yet supported with enough precision
-	/http:\/\/test.example.com\/(?:.*)/; // OK
-	new RegExp("http://test.example.com"); // NOT OK
+	/^http:\/\/example.com/; // OK
+	/^http:\/\/test.example.com/; // NOT OK
+	/^http:\/\/test\\.example.com/; // OK
+	/^http:\/\/test.example.net/; // NOT OK
+	/^http:\/\/test.(example-a|example-b).com/; // NOT OK
+	/^http:\/\/(.+)\\.example.com/; // NOT OK, but not yet supported with enough precision
+	/^http:\/\/(\\.+)\\.example.com/; // OK
+	/^http:\/\/(?:.+)\\.test\\.example.com/; // NOT OK, but not yet supported with enough precision
+	/^http:\/\/test.example.com\/(?:.*)/; // OK
+	new RegExp("^http://test.example.com"); // NOT OK
 	s.match("^http://test.example.com"); // NOT OK
 
 	function id(e) { return e; }
-	new RegExp(id(id(id("http://test.example.com")))); // NOT OK, but not supported by type tracking
+	new RegExp(id(id(id("^http://test.example.com")))); // NOT OK, but not supported by type tracking
 
 	new RegExp(`test.example.com$`); // NOT OK
 
-	let hostname = 'test.example.com'; // NOT OK
+	let hostname = '^test.example.com'; // NOT OK
 	new RegExp(`${hostname}$`);
 
-	let domain = { hostname: 'test.example.com' };
+	let domain = { hostname: 'test.example.com$' };
 	new RegExp(domain.hostname);
 
 	function convert1(domain) {
 		return new RegExp(domain.hostname);
 	}
-	convert1({ hostname: 'test.example.com' }); // NOT OK
+	convert1({ hostname: 'test.example.com$' }); // NOT OK
 
-	let domains = [ { hostname: 'test.example.com' } ];  // NOT OK
+	let domains = [ { hostname: 'test.example.com$' } ];  // NOT OK
 	function convert2(domain) {
 		return new RegExp(domain.hostname);
 	}
 	domains.map(d => convert2(d));
 
-	/(.+\.(?:example-a|example-b)\.com)/; // NOT OK, but not yet supported with enough precision
+	/^(.+\.(?:example-a|example-b)\.com)/; // NOT OK, but not yet supported with enough precision
 	/^(https?:)?\/\/((service|www).)?example.com(?=$|\/)/; // NOT OK
 	/^(http|https):\/\/www.example.com\/p\/f\//; // NOT OK
-	/\(http:\/\/sub.example.com\/\)/g; // NOT OK
-	/https?:\/\/api.example.com/; // NOT OK
+	/^(http:\/\/sub.example.com\/)/g; // NOT OK
+	/^https?:\/\/api.example.com/; // NOT OK
 	new RegExp('^http://localhost:8000|' + '^https?://.+\.example\.com'); // NOT OK
-	new RegExp('http[s]?:\/\/?sub1\.sub2\.example\.com\/f\/(.+)'); // NOT OK
+	new RegExp('^http[s]?:\/\/?sub1\.sub2\.example\.com\/f\/(.+)'); // NOT OK
 	/^https:\/\/[a-z]*.example.com$/; // NOT OK
-	RegExp('protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)'); // NOT OK
+	RegExp('^protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)'); // NOT OK
 
-	/example.dev|example.com/; // OK, but still flagged
+	/^(example.dev|example.com)/; // OK, but still flagged
 
 	new RegExp('^http://localhost:8000|' + '^https?://.+\.example\.com'); // NOT OK
 
-	var primary = 'example.com';
+	var primary = 'example.com$';
 	new RegExp('test.' + primary); // NOT OK, but not detected
 
-	new RegExp('test.' + 'example.com'); // NOT OK
+	new RegExp('test.' + 'example.com$'); // NOT OK
 });

From 0fa73b8331e1c1ade1a88b151f12517e27c58e5b Mon Sep 17 00:00:00 2001
From: Esben Sparre Andreasen <esben@semmle.com>
Date: Fri, 31 May 2019 08:45:20 +0200
Subject: [PATCH 04/11] JS: add query js/regex/missing-regexp-anchor

---
 change-notes/1.21/analysis-javascript.md      |   1 +
 javascript/config/suites/javascript/security  |   1 +
 .../CWE-020/MissingRegExpAnchor.qhelp         |  77 +++++++++++
 .../Security/CWE-020/MissingRegExpAnchor.ql   |  86 ++++++++++++
 .../examples/MissingRegExpAnchor_BAD.js       |   7 +
 .../examples/MissingRegExpAnchor_GOOD.js      |   7 +
 .../CWE-020/IncompleteHostnameRegExp.expected |   3 +
 .../CWE-020/MissingRegExpAnchor.expected      |  49 +++++++
 .../CWE-020/MissingRegExpAnchor.qlref         |   1 +
 .../CWE-020/tst-SemiAnchoredRegExp.js         | 122 ++++++++++++++++++
 .../CWE-020/tst-UnanchoredUrlRegExp.js        | 106 +++++++++++++++
 11 files changed, 460 insertions(+)
 create mode 100644 javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp
 create mode 100644 javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
 create mode 100644 javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js
 create mode 100644 javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js
 create mode 100644 javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected
 create mode 100644 javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.qlref
 create mode 100644 javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js
 create mode 100644 javascript/ql/test/query-tests/Security/CWE-020/tst-UnanchoredUrlRegExp.js

diff --git a/change-notes/1.21/analysis-javascript.md b/change-notes/1.21/analysis-javascript.md
index d2faa5bd44a8..8a63c8df65cc 100644
--- a/change-notes/1.21/analysis-javascript.md
+++ b/change-notes/1.21/analysis-javascript.md
@@ -27,6 +27,7 @@
 
 | **Query**                                     | **Tags**                                             | **Purpose**                                                                                                                                                                 |
 |-----------------------------------------------|------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Missing regular expression anchor (`js/regex/missing-regexp-anchor`) | correctness, security, external/cwe/cwe-20 | Highlights regular expression patterns that may be missing an anchor, indicating a possible violation of [CWE-20](https://cwe.mitre.org/data/definitions/20.html). Results are not shown on LGTM by default. |
 | Prototype pollution (`js/prototype-pollution`)    | security, external/cwe-250, external/cwe-400 | Highlights code that allows an attacker to modify a built-in prototype object through an unsanitized recursive merge function. The results are shown on LGTM by default. |
 
 ## Changes to existing queries
diff --git a/javascript/config/suites/javascript/security b/javascript/config/suites/javascript/security
index 1d48d378db4d..8ff7c7530c15 100644
--- a/javascript/config/suites/javascript/security
+++ b/javascript/config/suites/javascript/security
@@ -3,6 +3,7 @@
 + semmlecode-javascript-queries/Security/CWE-020/IncompleteHostnameRegExp.ql: /Security/CWE/CWE-020
 + semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlSubstringSanitization.ql: /Security/CWE/CWE-020
 + semmlecode-javascript-queries/Security/CWE-020/IncorrectSuffixCheck.ql: /Security/CWE/CWE-020
++ semmlecode-javascript-queries/Security/CWE-020/MissingRegExpAnchor.ql: /Security/CWE/CWE-020
 + semmlecode-javascript-queries/Security/CWE-022/TaintedPath.ql: /Security/CWE/CWE-022
 + semmlecode-javascript-queries/Security/CWE-022/ZipSlip.ql: /Security/CWE/CWE-022
 + semmlecode-javascript-queries/Security/CWE-078/CommandInjection.ql: /Security/CWE/CWE-078
diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp
new file mode 100644
index 000000000000..0e93e80b70ee
--- /dev/null
+++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp
@@ -0,0 +1,77 @@
+<!DOCTYPE qhelp PUBLIC
+"-//Semmle//qhelp//EN"
+"qhelp.dtd">
+<qhelp>
+
+	<overview>
+		<p>
+
+			Sanitizing untrusted input with regular expressions is a
+			common technique.  However, it is error prone to match untrusted input
+			against regular expressions without anchors such as <code>^</code> or
+			<code>$</code>.  Malicious input can bypass such security checks by
+			embedding one of the allowed patterns in an unexpected location.
+
+		</p>
+
+		<p>
+
+			Even if the matching is not done in a security-critical
+			context, it may still cause undesirable behaviors when the regular
+			expression matches accidentally.
+
+		</p>
+	</overview>
+
+	<recommendation>
+		<p>
+
+			Use anchors to ensure that regular expressions match at
+			the expected locations.
+
+		</p>
+	</recommendation>
+
+	<example>
+
+		<p>
+
+			The following example code checks that a URL redirection
+			will reach the <code>example.com</code> domain, or one of its
+			subdomains, and not some malicious site.
+
+		</p>
+
+		<sample src="examples/MissingRegExpAnchor_BAD.js"/>
+
+		<p>
+
+			The check with the regular expression match is, however, easy to bypass. For example
+			by embedding <code>example.com</code> in the path component:
+			<code>http://evil-example.net/example.com</code>, or in the query
+			string component: <code>http://evil-example.net/?x=example.com</code>.
+
+			Address these shortcomings by using anchors in the regular expression instead:
+
+		</p>
+
+		<sample src="examples/MissingRegExpAnchor_GOOD.js"/>
+
+		<p>
+
+			A related mistake is to write a regular expression with
+			multiple alternatives, but to only include an anchor for one of the
+			alternatives. As an example, the regular expression
+			<code>/^www\\.example\\.com|beta\\.example\\.com/</code> will match the host
+			<code>evil.beta.example.com</code> because the regular expression is parsed
+			as <code>/(^www\\.example\\.com)|(beta\\.example\\.com)/</code>
+
+		</p>
+	</example>
+
+	<references>
+		<li>MDN: <a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions">Regular Expressions</a></li>
+		<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
+		<li>OWASP: <a href="https://www.owasp.org/index.php/Unvalidated_Redirects_and_Forwards_Cheat_Sheet">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
+	</references>
+</qhelp>
diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
new file mode 100644
index 000000000000..ffd475811870
--- /dev/null
+++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
@@ -0,0 +1,86 @@
+/**
+ * @name Missing regular expression anchor
+ * @description Regular expressions without anchors can be vulnerable to bypassing.
+ * @kind problem
+ * @problem.severity warning
+ * @precision medium
+ * @id js/regex/missing-regexp-anchor
+ * @tags correctness
+ *       security
+ *       external/cwe/cwe-20
+ */
+
+import javascript
+
+/**
+ * Holds if `src` is a pattern for a collection of alternatives where
+ * only the first or last alternative is anchored, indicating a
+ * precedence mistake explained by `msg`.
+ *
+ * The canonical example of such a mistake is: `^a|b|c`, which is
+ * parsed as `(^a)|(b)|(c)`.
+ */
+predicate isAnInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string msg) {
+  exists(string str, string maybeGroupedStr, string regex, string anchorPart, string posString, string escapedDot |
+    // a dot that might be escaped in a regular expression, for example `/\./` or new `RegExp('\\.')`
+    escapedDot = "\\\\\\\\?[.]" and
+    // a string that is mostly free from special reqular expression symbols
+    str = "(?:(?:" + escapedDot + ")|[a-z:/.?_,@0-9 -])+" and
+    // the string may be wrapped in parentheses
+    maybeGroupedStr = "(?:" + str + "|\\(" + str + "\\))" and
+    (
+      // a problematic pattern: `^a|b|...|x`
+      regex = "(?i)(\\^" + maybeGroupedStr + ")(?:\\|" + maybeGroupedStr + ")+" and
+      posString = "beginning"
+      or
+      // a problematic pattern: `a|b|...|x$`
+      regex = "(?i)(?:" + maybeGroupedStr + "\\|)+(" + maybeGroupedStr + "\\$)" and
+      posString = "end"
+    ) and
+    anchorPart = src.getPattern().regexpCapture(regex, 1) and
+    anchorPart.regexpMatch("(?i).*[a-z].*") and
+    msg = "The alternative '" + anchorPart + "' uses an anchor to match from the " + posString +
+        " of a string, but the other alternatives of this regular expression do not use anchors."
+  )
+}
+
+/**
+ * Holds if `src` is an unanchored pattern for a URL, indicating a
+ * mistake explained by `msg`.
+ */
+predicate isAnInterestingUnanchoredRegExpString(RegExpPatternSource src, string msg) {
+  exists(string pattern | pattern = src.getPattern() |
+    // a substring sequence of a protocol and subdomains, perhaps with some regex characters mixed in, followed by a known TLD
+    pattern
+        .regexpMatch("(?i)[():|?a-z0-9-\\\\./]+[.]" + RegExpPatterns::commonTLD() +
+            "([/#?():]\\S*)?") and
+    // without any anchors
+    pattern.regexpMatch("[^$^]+") and
+    // that is not used for capture or replace
+    not exists(DataFlow::MethodCallNode mcn, string name | name = mcn.getMethodName() |
+      name = "exec" and
+      mcn = src.getARegExpObject().getAMethodCall() and
+      exists(mcn.getAPropertyRead())
+      or
+      exists(DataFlow::Node arg |
+        arg = mcn.getArgument(0) and
+        (
+          src.getARegExpObject().flowsTo(arg) or
+          src.(StringRegExpPatternSource).getAUse() = arg
+        )
+      |
+        name = "replace"
+        or
+        name = "match" and exists(mcn.getAPropertyRead())
+      )
+    ) and
+    msg = "When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it."
+  )
+}
+
+from DataFlow::Node nd, string msg
+where
+  isAnInterestingUnanchoredRegExpString(nd, msg)
+  or
+  isAnInterestingSemiAnchoredRegExpString(nd, msg)
+select nd, msg
diff --git a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js
new file mode 100644
index 000000000000..ef8b2c8d2e15
--- /dev/null
+++ b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js
@@ -0,0 +1,7 @@
+app.get('/some/path', function(req, res) {
+    let url = req.param("url");
+    // BAD: the host of `url` may be controlled by an attacker
+	if (url.match(/https?:\/\/www\.example\.com\//)) {
+        res.redirect(url);
+    }
+});
diff --git a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js
new file mode 100644
index 000000000000..8ba1f60b8325
--- /dev/null
+++ b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js
@@ -0,0 +1,7 @@
+app.get('/some/path', function(req, res) {
+    let url = req.param("url");
+    // GOOD: the host of `url` can not be controlled by an attacker
+	if (url.match(/^https?:\/\/www\.example\.com\//)) {
+        res.redirect(url);
+    }
+});
diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
index 4360e28cc83c..ffbe31cb5c67 100644
--- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
@@ -22,3 +22,6 @@
 | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here |
 | tst-IncompleteHostnameRegExp.js:48:41:48:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here |
 | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here |
+| tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | here |
+| tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | here |
+| tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | here |
diff --git a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected
new file mode 100644
index 000000000000..a686b1dec7c1
--- /dev/null
+++ b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected
@@ -0,0 +1,49 @@
+| tst-SemiAnchoredRegExp.js:3:2:3:7 | /^a\|b/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:6:2:6:9 | /^a\|b\|c/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:12:2:12:9 | /^a\|(b)/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:14:2:14:11 | /^(a)\|(b)/ | The alternative '^(a)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:17:2:17:7 | /a\|b$/ | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:20:2:20:9 | /a\|b\|c$/ | The alternative 'c$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:26:2:26:9 | /(a)\|b$/ | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:28:2:28:11 | /(a)\|(b)$/ | The alternative '(b)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:31:2:31:25 | /^good\\ ... r\\.com/ | The alternative '^good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:32:2:32:27 | /^good\\ ... \\\\.com/ | The alternative '^good\\\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:37:13:37:18 | "^a\|b" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:40:13:40:20 | "^a\|b\|c" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:46:13:46:20 | "^a\|(b)" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:48:13:48:22 | "^(a)\|(b)" | The alternative '^(a)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:51:13:51:18 | "a\|b$" | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:54:13:54:20 | "a\|b\|c$" | The alternative 'c$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:60:13:60:20 | "(a)\|b$" | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:62:13:62:22 | "(a)\|(b)$" | The alternative '(b)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:66:13:66:38 | '^good\\ ... \\\\.com' | The alternative '^good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:75:2:75:27 | /(\\.xxx ... .zzz)$/ | The alternative '(\\.zzz)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:77:2:77:23 | /\\.xxx\| ... zzz$/ig | The alternative '\\.zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:78:2:78:19 | /\\.xxx\|\\.yyy\|zzz$/ | The alternative 'zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:81:2:81:28 | /^(xxx  ...  yyy)/i | The alternative '^(xxx yyy zzz)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:83:2:83:24 | /^(xxx: ... (zzz:)/ | The alternative '^(xxx:)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:84:2:84:23 | /^(xxx? ... zzz\\/)/ | The alternative '^(xxx?:)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:85:2:85:16 | /^@media\|@page/ | The alternative '^@media' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:87:2:87:21 | /^click\|mouse\|touch/ | The alternative '^click' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:88:2:88:43 | /^http: ... r\\.com/ | The alternative '^http://good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:89:2:89:47 | /^https ... r\\.com/ | The alternative '^https?://good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:90:2:90:55 | /^mouse ... ragend/ | The alternative '^mouse' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:91:2:91:14 | /^xxx:\|yyy:/i | The alternative '^xxx:' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:92:2:92:18 | /_xxx\|_yyy\|_zzz$/ | The alternative '_zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-UnanchoredUrlRegExp.js:3:43:3:61 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:4:54:4:72 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:10:2:10:22 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:11:13:11:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:13:44:13:62 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:15:13:15:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:19:43:19:62 | "https?://good.com/" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:20:43:20:66 | "https? ... m:8080" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:23:3:23:21 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:24:3:24:23 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:25:14:25:32 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:35:2:35:32 | /https? ... 0-9]+)/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:49:11:49:51 | /youtub ... -_]+)/i | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:77:11:77:32 | /vimeo\\ ... 0-9]+)/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
diff --git a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.qlref b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.qlref
new file mode 100644
index 000000000000..5860f4b3a82a
--- /dev/null
+++ b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.qlref
@@ -0,0 +1 @@
+Security/CWE-020/MissingRegExpAnchor.ql
\ No newline at end of file
diff --git a/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js b/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js
new file mode 100644
index 000000000000..776e63987297
--- /dev/null
+++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js
@@ -0,0 +1,122 @@
+(function coreRegExp() {
+	/^a|/;
+	/^a|b/; // NOT OK
+	/a|^b/;
+	/^a|^b/;
+	/^a|b|c/; // NOT OK
+	/a|^b|c/;
+	/a|b|^c/;
+	/^a|^b|c/;
+
+	/(^a)|b/;
+	/^a|(b)/; // NOT OK
+	/^a|(^b)/;
+	/^(a)|(b)/; // NOT OK
+
+
+	/a|b$/; // NOT OK
+	/a$|b/;
+	/a$|b$/;
+	/a|b|c$/; // NOT OK
+	/a|b$|c/;
+	/a$|b|c/;
+	/a|b$|c$/;
+
+	/a|(b$)/;
+	/(a)|b$/; // NOT OK
+	/(a$)|b$/;
+	/(a)|(b)$/; // NOT OK
+
+	/^good.com|better.com/; // NOT OK
+	/^good\.com|better\.com/; // NOT OK
+	/^good\\.com|better\\.com/; // NOT OK
+});
+
+(function coreString() {
+	new RegExp("^a|");
+	new RegExp("^a|b"); // NOT OK
+	new RegExp("a|^b");
+	new RegExp("^a|^b");
+	new RegExp("^a|b|c"); // NOT OK
+	new RegExp("a|^b|c");
+	new RegExp("a|b|^c");
+	new RegExp("^a|^b|c");
+
+	new RegExp("(^a)|b");
+	new RegExp("^a|(b)"); // NOT OK
+	new RegExp("^a|(^b)");
+	new RegExp("^(a)|(b)"); // NOT OK
+
+
+	new RegExp("a|b$"); // NOT OK
+	new RegExp("a$|b");
+	new RegExp("a$|b$");
+	new RegExp("a|b|c$"); // NOT OK
+	new RegExp("a|b$|c");
+	new RegExp("a$|b|c");
+	new RegExp("a|b$|c$");
+
+	new RegExp("a|(b$)");
+	new RegExp("(a)|b$"); // NOT OK
+	new RegExp("(a$)|b$");
+	new RegExp("(a)|(b)$"); // NOT OK
+
+	new RegExp('^good.com|better.com'); // NOT OK
+	new RegExp('^good\.com|better\.com'); // NOT OK
+	new RegExp('^good\\.com|better\\.com'); // NOT OK
+});
+
+(function realWorld() {
+	// real-world examples that have been anonymized a bit
+
+	/*
+	 * NOT OK: flagged
+	 */
+	/(\.xxx)|(\.yyy)|(\.zzz)$/;
+	/(^left|right|center)\sbottom$/; // not flagged at the moment due to multiple anchors
+	/\.xxx|\.yyy|\.zzz$/ig;
+	/\.xxx|\.yyy|zzz$/;
+	/^(?:mouse|contextmenu)|click/; // not flagged at the moment due to nested alternatives
+	/^([A-Z]|xxx[XY]$)/; // not flagged at the moment due to multiple anchors
+	/^(xxx yyy zzz)|(xxx yyy)/i;
+	/^(xxx yyy zzz)|(xxx yyy)|(1st( xxx)? yyy)|xxx|1st/i; // not flagged at the moment due to nested parens
+	/^(xxx:)|(yyy:)|(zzz:)/;
+	/^(xxx?:)|(yyy:zzz\/)/;
+	/^@media|@page/;
+	/^\s*(xxx?|yyy|zzz):|xxx:yyy\//; // not flagged at the moment due to quantifiers
+	/^click|mouse|touch/;
+	/^http:\/\/good\.com|http:\/\/better\.com/;
+	/^https?:\/\/good\.com|https?:\/\/better\.com/;
+	/^mouse|touch|click|contextmenu|drop|dragover|dragend/;
+	/^xxx:|yyy:/i;
+	/_xxx|_yyy|_zzz$/;
+	/em|%$/; // not flagged at the moment due to the anchor not being for letters
+
+	/*
+	 * MAYBE OK due to apparent complexity: not flagged
+	 */
+	/(?:^[#?]?|&)([^=&]+)(?:=([^&]*))?/g;
+	/(^\s*|;\s*)\*.*;/m;
+	/(^\s*|\[)(?:xxx|yyy_(?:xxx|yyy)|xxx|yyy(?:xxx|yyy)?|xxx|yyy)\b/m;
+	/\s\S| \t|\t |\s$/;
+	/\{[^}{]*\{|\}[^}{]*\}|\{[^}]*$/g;
+	/^((\+|\-)\s*\d\d\d\d)|((\+|\-)\d\d\:?\d\d)/;
+	/^(\/\/)|([a-z]+:(\/\/)?)/;
+	/^[=?!#%@$]|!(?=[:}])/;
+	/^[\[\]!:]|[<>]/;
+	/^for\b|\b(?:xxx|yyy)\b/i;
+	/^if\b|\b(?:xxx|yyy|zzz)\b/i;
+
+	/*
+	 * OK: not flagged
+	 */
+	/$^|only-match/g;
+	/(#.+)|#$/;
+	/(NaN| {2}|^$)/;
+	/[^\n]*(?:\n|[^\n]$)/g;
+	/^$|\/(?:xxx|yyy)zzz/i;
+	/^(\/|(xxx|yyy|zzz)$)/;
+	/^9$|27/;
+	/^\+|\s*/g;
+	/xxx_yyy=\w+|^$/;
+});
diff --git a/javascript/ql/test/query-tests/Security/CWE-020/tst-UnanchoredUrlRegExp.js b/javascript/ql/test/query-tests/Security/CWE-020/tst-UnanchoredUrlRegExp.js
new file mode 100644
index 000000000000..1831f31be533
--- /dev/null
+++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-UnanchoredUrlRegExp.js
@@ -0,0 +1,106 @@
+(function(x){
+
+	"http://evil.com/?http://good.com".match("https?://good.com"); // NOT OK
+	"http://evil.com/?http://good.com".match(new RegExp("https?://good.com")); // NOT OK
+	"http://evil.com/?http://good.com".match("^https?://good.com"); // OK
+	"http://evil.com/?http://good.com".match(/^https?:\/\/good.com/); // OK
+	"http://evil.com/?http://good.com".match("(^https?://good1.com)|(^https?://good2.com)"); // OK
+	"http://evil.com/?http://good.com".match("(https?://good.com)|(^https?://goodie.com)"); // NOT OK, but not detected
+
+	/https?:\/\/good.com/.exec("http://evil.com/?http://good.com"); // NOT OK
+	new RegExp("https?://good.com").exec("http://evil.com/?http://good.com"); // NOT OK
+
+	"http://evil.com/?http://good.com".search("https?://good.com"); // NOT OK
+
+	new RegExp("https?://good.com").test("http://evil.com/?http://good.com"); // NOT OK
+
+	"something".match("other"); // OK
+	"something".match("x.commissary"); // OK
+	"http://evil.com/?http://good.com".match("https?://good.com/"); // NOT OK
+	"http://evil.com/?http://good.com".match("https?://good.com:8080"); // NOT OK
+
+	let trustedUrls = [
+		"https?://good.com", // NOT OK, referenced below
+		/https?:\/\/good.com/, // NOT OK, referenced below
+		new RegExp("https?://good.com"), // NOT OK, referenced below
+		"^https?://good.com"
+	];
+	function isTrustedUrl(url) {
+		for (let trustedUrl of trustedUrls) {
+			if (url.match(trustedUrl)) return true;
+		}
+		return false;
+	}
+
+	/https?:\/\/good.com\/([0-9]+)/.exec(url); // NOT OK
+	"https://verygood.com/?id=" + /https?:\/\/good.com\/([0-9]+)/.exec(url)[0]; // OK
+	"http" + (secure? "s": "") + "://" + "verygood.com/?id=" + /https?:\/\/good.com\/([0-9]+)/.exec(url)[0]; // OK
+	"http" + (secure? "s": "") + "://" + ("verygood.com/?id=" + /https?:\/\/good.com\/([0-9]+)/.exec(url)[0]); // OK
+
+	// g or .replace?
+	file = file.replace(
+		/https:\/\/cdn\.ampproject\.org\/v0\/amp-story-0\.1\.js/g,
+		hostName + '/dist/v0/amp-story-1.0.max.js'
+	);
+
+	// missing context of use
+	const urlPatterns  = [
+		{
+			regex: /youtube.com\/embed\/([a-z0-9\?&=\-_]+)/i,
+			type: 'iframe', w: 560, h: 314,
+			url: '//www.youtube.com/embed/$1',
+			allowFullscreen: true
+		}];
+
+	// ditto
+	F.helpers.media = {
+		defaults : {
+			youtube : {
+				matcher : /(youtube\.com|youtu\.be)\/(watch\?v=|v\/|u\/|embed\/?)?(videoseries\?list=(.*)|[\w-]{11}|\?listType=(.*)&list=(.*)).*/i,
+				params  : {
+					autoplay    : 1,
+					autohide    : 1,
+					fs          : 1,
+					rel         : 0,
+					hd          : 1,
+					wmode       : 'opaque',
+					enablejsapi : 1
+				},
+				type : 'iframe',
+				url  : '//www.youtube.com/embed/$3'
+			}}}
+
+	// ditto
+	var urlPatterns = [
+		{regex: /youtu\.be\/([\w\-.]+)/, type: 'iframe', w: 425, h: 350, url: '//www.youtube.com/embed/$1'},
+		{regex: /youtube\.com(.+)v=([^&]+)/, type: 'iframe', w: 425, h: 350, url: '//www.youtube.com/embed/$2'},
+		{regex: /vimeo\.com\/([0-9]+)/, type: 'iframe', w: 425, h: 350, url: '//player.vimeo.com/video/$1?title=0&byline=0&portrait=0&color=8dc7dc'},
+	];
+
+	// check optional successsor to TLD
+	new RegExp("(Pingdom.com_bot_version_)(\\d+)\\.(\\d+)")
+
+	// replace and spaces
+	error.replace(/See https:\/\/github\.com\/Squirrel\/Squirrel\.Mac\/issues\/182 for more information/, 'See [this link](https://github.com/Microsoft/vscode/issues/7426#issuecomment-425093469) for more information');
+
+	// not a url
+	var sharedScript = /<script\s.*src="(app:\/\/.+\.gaiamobile\.org)?\/?(shared\/.+)".*>/;
+
+	// replace
+	const repo = repoURL.replace(/http(s)?:\/\/(\d+\.)?github.com\//gi, '')
+
+	// replace and space
+	cmp.replace(/<option value="http:\/\/codemirror.net\/">HEAD<\/option>/,
+	            "<option value=\"http://codemirror.net/\">HEAD</option>\n        <option value=\"http://marijnhaverbeke.nl/git/codemirror?a=blob_plain;hb=" + number + ";f=\">" + number + "</option>");
+
+	// replace and space
+	const helpMsg = /For help see https:\/\/nodejs.org\/en\/docs\/inspector\s*/;
+	msg = msg.replace(helpMsg, '');
+
+	// not a url
+	pkg.source.match(/<a:skin.*?\s+xmlns:a="http:\/\/ajax.org\/2005\/aml"/m)
+
+	// replace
+	path.replace(/engine.io/, "$&-client")
+
+});

From 3289c629f7c36dc0b346f7978a9a741e3f9ef86b Mon Sep 17 00:00:00 2001
From: Esben Sparre Andreasen <esben@semmle.com>
Date: Fri, 31 May 2019 15:05:14 +0200
Subject: [PATCH 05/11] JS: address minor review comments

---
 .../Security/CWE-020/MissingRegExpAnchor.ql   | 19 +++--
 .../ql/src/semmle/javascript/Regexp.qll       |  5 +-
 .../CWE-020/MissingRegExpAnchor.expected      | 70 +++++++++----------
 3 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
index ffd475811870..a15a61a00afe 100644
--- a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
+++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
@@ -20,8 +20,8 @@ import javascript
  * The canonical example of such a mistake is: `^a|b|c`, which is
  * parsed as `(^a)|(b)|(c)`.
  */
-predicate isAnInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string msg) {
-  exists(string str, string maybeGroupedStr, string regex, string anchorPart, string posString, string escapedDot |
+predicate isInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string msg) {
+  exists(string str, string maybeGroupedStr, string regex, string anchorPart, string escapedDot |
     // a dot that might be escaped in a regular expression, for example `/\./` or new `RegExp('\\.')`
     escapedDot = "\\\\\\\\?[.]" and
     // a string that is mostly free from special reqular expression symbols
@@ -30,17 +30,14 @@ predicate isAnInterestingSemiAnchoredRegExpString(RegExpPatternSource src, strin
     maybeGroupedStr = "(?:" + str + "|\\(" + str + "\\))" and
     (
       // a problematic pattern: `^a|b|...|x`
-      regex = "(?i)(\\^" + maybeGroupedStr + ")(?:\\|" + maybeGroupedStr + ")+" and
-      posString = "beginning"
+      regex = "(?i)(\\^" + maybeGroupedStr + ")(?:\\|" + maybeGroupedStr + ")+"
       or
       // a problematic pattern: `a|b|...|x$`
-      regex = "(?i)(?:" + maybeGroupedStr + "\\|)+(" + maybeGroupedStr + "\\$)" and
-      posString = "end"
+      regex = "(?i)(?:" + maybeGroupedStr + "\\|)+(" + maybeGroupedStr + "\\$)"
     ) and
     anchorPart = src.getPattern().regexpCapture(regex, 1) and
     anchorPart.regexpMatch("(?i).*[a-z].*") and
-    msg = "The alternative '" + anchorPart + "' uses an anchor to match from the " + posString +
-        " of a string, but the other alternatives of this regular expression do not use anchors."
+    msg = "Misleading operator precedence. The subexpression '" + anchorPart + "' is anchored, but the other parts of this regular expression are not"
   )
 }
 
@@ -48,7 +45,7 @@ predicate isAnInterestingSemiAnchoredRegExpString(RegExpPatternSource src, strin
  * Holds if `src` is an unanchored pattern for a URL, indicating a
  * mistake explained by `msg`.
  */
-predicate isAnInterestingUnanchoredRegExpString(RegExpPatternSource src, string msg) {
+predicate isInterestingUnanchoredRegExpString(RegExpPatternSource src, string msg) {
   exists(string pattern | pattern = src.getPattern() |
     // a substring sequence of a protocol and subdomains, perhaps with some regex characters mixed in, followed by a known TLD
     pattern
@@ -80,7 +77,7 @@ predicate isAnInterestingUnanchoredRegExpString(RegExpPatternSource src, string
 
 from DataFlow::Node nd, string msg
 where
-  isAnInterestingUnanchoredRegExpString(nd, msg)
+  isInterestingUnanchoredRegExpString(nd, msg)
   or
-  isAnInterestingSemiAnchoredRegExpString(nd, msg)
+  isInterestingSemiAnchoredRegExpString(nd, msg)
 select nd, msg
diff --git a/javascript/ql/src/semmle/javascript/Regexp.qll b/javascript/ql/src/semmle/javascript/Regexp.qll
index 773530ba4176..e605418ab371 100644
--- a/javascript/ql/src/semmle/javascript/Regexp.qll
+++ b/javascript/ql/src/semmle/javascript/Regexp.qll
@@ -41,6 +41,9 @@ abstract class RegExpTerm extends Locatable, @regexpterm {
 
   override string toString() { regexpterm(this, _, _, _, result) }
 
+  /** Gets the raw source text of this term. */
+  string getRawValue() { regexpterm(this, _, _, _, result) }
+
   /** Holds if this regular expression term can match the empty string. */
   abstract predicate isNullable();
 
@@ -453,7 +456,7 @@ class RegExpLiteralPatternSource extends RegExpPatternSource {
   string pattern;
 
   RegExpLiteralPatternSource() {
-    exists(string raw | raw = asExpr().(RegExpLiteral).getRoot().toString() |
+    exists(string raw | raw = asExpr().(RegExpLiteral).getRoot().getRawValue() |
       // hide the fact that `/` is escaped in the literal
       pattern = raw.regexpReplaceAll("\\\\/", "/")
     )
diff --git a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected
index a686b1dec7c1..d72f512a9413 100644
--- a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected
@@ -1,38 +1,38 @@
-| tst-SemiAnchoredRegExp.js:3:2:3:7 | /^a\|b/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:6:2:6:9 | /^a\|b\|c/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:12:2:12:9 | /^a\|(b)/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:14:2:14:11 | /^(a)\|(b)/ | The alternative '^(a)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:17:2:17:7 | /a\|b$/ | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:20:2:20:9 | /a\|b\|c$/ | The alternative 'c$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:26:2:26:9 | /(a)\|b$/ | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:28:2:28:11 | /(a)\|(b)$/ | The alternative '(b)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:31:2:31:25 | /^good\\ ... r\\.com/ | The alternative '^good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:32:2:32:27 | /^good\\ ... \\\\.com/ | The alternative '^good\\\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:37:13:37:18 | "^a\|b" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:40:13:40:20 | "^a\|b\|c" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:46:13:46:20 | "^a\|(b)" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:48:13:48:22 | "^(a)\|(b)" | The alternative '^(a)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:51:13:51:18 | "a\|b$" | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:54:13:54:20 | "a\|b\|c$" | The alternative 'c$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:60:13:60:20 | "(a)\|b$" | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:62:13:62:22 | "(a)\|(b)$" | The alternative '(b)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:66:13:66:38 | '^good\\ ... \\\\.com' | The alternative '^good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:75:2:75:27 | /(\\.xxx ... .zzz)$/ | The alternative '(\\.zzz)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:77:2:77:23 | /\\.xxx\| ... zzz$/ig | The alternative '\\.zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:78:2:78:19 | /\\.xxx\|\\.yyy\|zzz$/ | The alternative 'zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:81:2:81:28 | /^(xxx  ...  yyy)/i | The alternative '^(xxx yyy zzz)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:83:2:83:24 | /^(xxx: ... (zzz:)/ | The alternative '^(xxx:)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:84:2:84:23 | /^(xxx? ... zzz\\/)/ | The alternative '^(xxx?:)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:85:2:85:16 | /^@media\|@page/ | The alternative '^@media' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:87:2:87:21 | /^click\|mouse\|touch/ | The alternative '^click' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:88:2:88:43 | /^http: ... r\\.com/ | The alternative '^http://good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:89:2:89:47 | /^https ... r\\.com/ | The alternative '^https?://good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:90:2:90:55 | /^mouse ... ragend/ | The alternative '^mouse' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:91:2:91:14 | /^xxx:\|yyy:/i | The alternative '^xxx:' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
-| tst-SemiAnchoredRegExp.js:92:2:92:18 | /_xxx\|_yyy\|_zzz$/ | The alternative '_zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:3:2:3:7 | /^a\|b/ | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:6:2:6:9 | /^a\|b\|c/ | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:12:2:12:9 | /^a\|(b)/ | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:14:2:14:11 | /^(a)\|(b)/ | Misleading operator precedence. The subexpression '^(a)' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:17:2:17:7 | /a\|b$/ | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:20:2:20:9 | /a\|b\|c$/ | Misleading operator precedence. The subexpression 'c$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:26:2:26:9 | /(a)\|b$/ | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:28:2:28:11 | /(a)\|(b)$/ | Misleading operator precedence. The subexpression '(b)$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:31:2:31:25 | /^good\\ ... r\\.com/ | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:32:2:32:27 | /^good\\ ... \\\\.com/ | Misleading operator precedence. The subexpression '^good\\\\.com' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:37:13:37:18 | "^a\|b" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:40:13:40:20 | "^a\|b\|c" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:46:13:46:20 | "^a\|(b)" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:48:13:48:22 | "^(a)\|(b)" | Misleading operator precedence. The subexpression '^(a)' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:51:13:51:18 | "a\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:54:13:54:20 | "a\|b\|c$" | Misleading operator precedence. The subexpression 'c$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:60:13:60:20 | "(a)\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:62:13:62:22 | "(a)\|(b)$" | Misleading operator precedence. The subexpression '(b)$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:66:13:66:38 | '^good\\ ... \\\\.com' | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:75:2:75:27 | /(\\.xxx ... .zzz)$/ | Misleading operator precedence. The subexpression '(\\.zzz)$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:77:2:77:23 | /\\.xxx\| ... zzz$/ig | Misleading operator precedence. The subexpression '\\.zzz$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:78:2:78:19 | /\\.xxx\|\\.yyy\|zzz$/ | Misleading operator precedence. The subexpression 'zzz$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:81:2:81:28 | /^(xxx  ...  yyy)/i | Misleading operator precedence. The subexpression '^(xxx yyy zzz)' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:83:2:83:24 | /^(xxx: ... (zzz:)/ | Misleading operator precedence. The subexpression '^(xxx:)' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:84:2:84:23 | /^(xxx? ... zzz\\/)/ | Misleading operator precedence. The subexpression '^(xxx?:)' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:85:2:85:16 | /^@media\|@page/ | Misleading operator precedence. The subexpression '^@media' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:87:2:87:21 | /^click\|mouse\|touch/ | Misleading operator precedence. The subexpression '^click' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:88:2:88:43 | /^http: ... r\\.com/ | Misleading operator precedence. The subexpression '^http://good\\.com' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:89:2:89:47 | /^https ... r\\.com/ | Misleading operator precedence. The subexpression '^https?://good\\.com' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:90:2:90:55 | /^mouse ... ragend/ | Misleading operator precedence. The subexpression '^mouse' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:91:2:91:14 | /^xxx:\|yyy:/i | Misleading operator precedence. The subexpression '^xxx:' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:92:2:92:18 | /_xxx\|_yyy\|_zzz$/ | Misleading operator precedence. The subexpression '_zzz$' is anchored, but the other parts of this regular expression are not |
 | tst-UnanchoredUrlRegExp.js:3:43:3:61 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
 | tst-UnanchoredUrlRegExp.js:4:54:4:72 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
 | tst-UnanchoredUrlRegExp.js:10:2:10:22 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |

From 7018a386910b3e991b8b4c9188b0ad4d3158fecf Mon Sep 17 00:00:00 2001
From: Esben Sparre Andreasen <esben@semmle.com>
Date: Sat, 1 Jun 2019 13:29:33 +0200
Subject: [PATCH 06/11] JS: improve tests and regexp for
 js/regex/missing-regexp-anchor

---
 .../Security/CWE-020/MissingRegExpAnchor.ql   |  2 +-
 .../CWE-020/IncompleteHostnameRegExp.expected |  4 +-
 .../CWE-020/MissingRegExpAnchor.expected      | 50 +++++++++----------
 .../CWE-020/tst-SemiAnchoredRegExp.js         |  6 ++-
 4 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
index a15a61a00afe..3bd43d0dd60f 100644
--- a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
+++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
@@ -23,7 +23,7 @@ import javascript
 predicate isInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string msg) {
   exists(string str, string maybeGroupedStr, string regex, string anchorPart, string escapedDot |
     // a dot that might be escaped in a regular expression, for example `/\./` or new `RegExp('\\.')`
-    escapedDot = "\\\\\\\\?[.]" and
+    escapedDot = "\\\\[.]" and
     // a string that is mostly free from special reqular expression symbols
     str = "(?:(?:" + escapedDot + ")|[a-z:/.?_,@0-9 -])+" and
     // the string may be wrapped in parentheses
diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
index ffbe31cb5c67..1e337828d8a1 100644
--- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
@@ -23,5 +23,5 @@
 | tst-IncompleteHostnameRegExp.js:48:41:48:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here |
 | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here |
 | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | here |
-| tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | here |
-| tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | here |
+| tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | here |
+| tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | here |
diff --git a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected
index d72f512a9413..dbdadc83eed6 100644
--- a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected
@@ -8,31 +8,31 @@
 | tst-SemiAnchoredRegExp.js:28:2:28:11 | /(a)\|(b)$/ | Misleading operator precedence. The subexpression '(b)$' is anchored, but the other parts of this regular expression are not |
 | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not |
 | tst-SemiAnchoredRegExp.js:31:2:31:25 | /^good\\ ... r\\.com/ | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:32:2:32:27 | /^good\\ ... \\\\.com/ | Misleading operator precedence. The subexpression '^good\\\\.com' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:37:13:37:18 | "^a\|b" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:40:13:40:20 | "^a\|b\|c" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:46:13:46:20 | "^a\|(b)" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:48:13:48:22 | "^(a)\|(b)" | Misleading operator precedence. The subexpression '^(a)' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:51:13:51:18 | "a\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:54:13:54:20 | "a\|b\|c$" | Misleading operator precedence. The subexpression 'c$' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:60:13:60:20 | "(a)\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:62:13:62:22 | "(a)\|(b)$" | Misleading operator precedence. The subexpression '(b)$' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:66:13:66:38 | '^good\\ ... \\\\.com' | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:75:2:75:27 | /(\\.xxx ... .zzz)$/ | Misleading operator precedence. The subexpression '(\\.zzz)$' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:77:2:77:23 | /\\.xxx\| ... zzz$/ig | Misleading operator precedence. The subexpression '\\.zzz$' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:78:2:78:19 | /\\.xxx\|\\.yyy\|zzz$/ | Misleading operator precedence. The subexpression 'zzz$' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:81:2:81:28 | /^(xxx  ...  yyy)/i | Misleading operator precedence. The subexpression '^(xxx yyy zzz)' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:83:2:83:24 | /^(xxx: ... (zzz:)/ | Misleading operator precedence. The subexpression '^(xxx:)' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:84:2:84:23 | /^(xxx? ... zzz\\/)/ | Misleading operator precedence. The subexpression '^(xxx?:)' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:85:2:85:16 | /^@media\|@page/ | Misleading operator precedence. The subexpression '^@media' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:87:2:87:21 | /^click\|mouse\|touch/ | Misleading operator precedence. The subexpression '^click' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:88:2:88:43 | /^http: ... r\\.com/ | Misleading operator precedence. The subexpression '^http://good\\.com' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:89:2:89:47 | /^https ... r\\.com/ | Misleading operator precedence. The subexpression '^https?://good\\.com' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:90:2:90:55 | /^mouse ... ragend/ | Misleading operator precedence. The subexpression '^mouse' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:91:2:91:14 | /^xxx:\|yyy:/i | Misleading operator precedence. The subexpression '^xxx:' is anchored, but the other parts of this regular expression are not |
-| tst-SemiAnchoredRegExp.js:92:2:92:18 | /_xxx\|_yyy\|_zzz$/ | Misleading operator precedence. The subexpression '_zzz$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:39:13:39:18 | "^a\|b" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:42:13:42:20 | "^a\|b\|c" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:48:13:48:20 | "^a\|(b)" | Misleading operator precedence. The subexpression '^a' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:50:13:50:22 | "^(a)\|(b)" | Misleading operator precedence. The subexpression '^(a)' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:53:13:53:18 | "a\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:56:13:56:20 | "a\|b\|c$" | Misleading operator precedence. The subexpression 'c$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:62:13:62:20 | "(a)\|b$" | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:64:13:64:22 | "(a)\|(b)$" | Misleading operator precedence. The subexpression '(b)$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | Misleading operator precedence. The subexpression '^good.com' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:68:13:68:38 | '^good\\ ... \\\\.com' | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:69:13:69:40 | '^good\\ ... \\\\.com' | Misleading operator precedence. The subexpression '^good\\.com' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:79:2:79:27 | /(\\.xxx ... .zzz)$/ | Misleading operator precedence. The subexpression '(\\.zzz)$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:81:2:81:23 | /\\.xxx\| ... zzz$/ig | Misleading operator precedence. The subexpression '\\.zzz$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:82:2:82:19 | /\\.xxx\|\\.yyy\|zzz$/ | Misleading operator precedence. The subexpression 'zzz$' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:85:2:85:28 | /^(xxx  ...  yyy)/i | Misleading operator precedence. The subexpression '^(xxx yyy zzz)' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:87:2:87:24 | /^(xxx: ... (zzz:)/ | Misleading operator precedence. The subexpression '^(xxx:)' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:88:2:88:23 | /^(xxx? ... zzz\\/)/ | Misleading operator precedence. The subexpression '^(xxx?:)' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:89:2:89:16 | /^@media\|@page/ | Misleading operator precedence. The subexpression '^@media' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:91:2:91:21 | /^click\|mouse\|touch/ | Misleading operator precedence. The subexpression '^click' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:92:2:92:43 | /^http: ... r\\.com/ | Misleading operator precedence. The subexpression '^http://good\\.com' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:93:2:93:47 | /^https ... r\\.com/ | Misleading operator precedence. The subexpression '^https?://good\\.com' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:94:2:94:55 | /^mouse ... ragend/ | Misleading operator precedence. The subexpression '^mouse' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:95:2:95:14 | /^xxx:\|yyy:/i | Misleading operator precedence. The subexpression '^xxx:' is anchored, but the other parts of this regular expression are not |
+| tst-SemiAnchoredRegExp.js:96:2:96:18 | /_xxx\|_yyy\|_zzz$/ | Misleading operator precedence. The subexpression '_zzz$' is anchored, but the other parts of this regular expression are not |
 | tst-UnanchoredUrlRegExp.js:3:43:3:61 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
 | tst-UnanchoredUrlRegExp.js:4:54:4:72 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
 | tst-UnanchoredUrlRegExp.js:10:2:10:22 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
diff --git a/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js b/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js
index 776e63987297..f679e8074c57 100644
--- a/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js
+++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js
@@ -29,7 +29,9 @@
 
 	/^good.com|better.com/; // NOT OK
 	/^good\.com|better\.com/; // NOT OK
-	/^good\\.com|better\\.com/; // NOT OK
+	/^good\\.com|better\\.com/;
+	/^good\\\.com|better\\\.com/;
+	/^good\\\\.com|better\\\\.com/;
 });
 
 (function coreString() {
@@ -64,6 +66,8 @@
 	new RegExp('^good.com|better.com'); // NOT OK
 	new RegExp('^good\.com|better\.com'); // NOT OK
 	new RegExp('^good\\.com|better\\.com'); // NOT OK
+	new RegExp('^good\\\.com|better\\\.com'); // NOT OK
+	new RegExp('^good\\\\.com|better\\\\.com');
 });
 
 (function realWorld() {

From 14644270ac6ec499743a1b139bd6cfab7f1ebe03 Mon Sep 17 00:00:00 2001
From: Esben Sparre Andreasen <esben@semmle.com>
Date: Mon, 3 Jun 2019 08:32:35 +0200
Subject: [PATCH 07/11] JS: fix comment typo

---
 javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
index 3bd43d0dd60f..52ac93fa175b 100644
--- a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
+++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
@@ -22,7 +22,7 @@ import javascript
  */
 predicate isInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string msg) {
   exists(string str, string maybeGroupedStr, string regex, string anchorPart, string escapedDot |
-    // a dot that might be escaped in a regular expression, for example `/\./` or new `RegExp('\\.')`
+    // a dot that might be escaped in a regular expression, for example `/\./` or `new RegExp('\\.')`
     escapedDot = "\\\\[.]" and
     // a string that is mostly free from special reqular expression symbols
     str = "(?:(?:" + escapedDot + ")|[a-z:/.?_,@0-9 -])+" and

From 7b652214c5be7724e2234bba31990ac1dbf97f89 Mon Sep 17 00:00:00 2001
From: Esben Sparre Andreasen <esben@semmle.com>
Date: Mon, 3 Jun 2019 13:59:39 +0200
Subject: [PATCH 08/11] JS: address docstring comments

---
 javascript/ql/src/semmle/javascript/Regexp.qll | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/javascript/ql/src/semmle/javascript/Regexp.qll b/javascript/ql/src/semmle/javascript/Regexp.qll
index e605418ab371..ebd2387c08a5 100644
--- a/javascript/ql/src/semmle/javascript/Regexp.qll
+++ b/javascript/ql/src/semmle/javascript/Regexp.qll
@@ -409,7 +409,7 @@ module RegExpPatterns {
 }
 
 /**
- * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted
+ * Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
  * as a part of a regular expression.
  */
 private DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker t) {
@@ -426,7 +426,7 @@ private DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker
 }
 
 /**
- * Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted
+ * Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
  * as a part of a regular expression.
  */
 private DataFlow::Node regExpSource(DataFlow::Node re) {
@@ -439,12 +439,14 @@ private DataFlow::Node regExpSource(DataFlow::Node re) {
  */
 abstract class RegExpPatternSource extends DataFlow::Node {
   /**
-   * Gets the pattern of this node.
+   * Gets the pattern of this node that is interpreted as a part of a
+   * regular expression.
    */
   abstract string getPattern();
 
   /**
-   * Gets a regular expression object that is built from the value of this node.
+   * Gets a regular expression object that is constructed from the pattern
+   * of this node.
    */
   abstract DataFlow::SourceNode getARegExpObject();
 }
@@ -477,7 +479,7 @@ class StringRegExpPatternSource extends RegExpPatternSource {
   StringRegExpPatternSource() { this = regExpSource(use) }
 
   /**
-   * Gets a node that use this source as a regular expression pattern.
+   * Gets a node that uses this source as a regular expression pattern.
    */
   DataFlow::Node getAUse() { result = use }
 

From bf51c54338c2c1b72a46e389399da8819488e485 Mon Sep 17 00:00:00 2001
From: Esben Sparre Andreasen <esben@semmle.com>
Date: Mon, 3 Jun 2019 14:23:22 +0200
Subject: [PATCH 09/11] JS: add `RegExpPatternSource::getAParse` to hide the
 subclasses

---
 .../CWE-020/IncompleteHostnameRegExp.ql       |  4 ++--
 .../Security/CWE-020/MissingRegExpAnchor.ql   |  2 +-
 .../ql/src/semmle/javascript/Regexp.qll       | 23 +++++++++++--------
 .../CWE-020/IncompleteHostnameRegExp.expected | 20 ++++++++--------
 4 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql
index ad1e609b2aac..88ebe5cf7d1e 100644
--- a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql
+++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql
@@ -31,10 +31,10 @@ where
   pattern = re.getPattern() and
   isIncompleteHostNameRegExpPattern(pattern, hostPart) and
   (
-    if re instanceof StringRegExpPatternSource
+    if re.getAParse() != re
     then (
       kind = "string, which is used as a regular expression $@," and
-      aux = re.(StringRegExpPatternSource).getAUse()
+      aux = re.getAParse()
     ) else (
       kind = "regular expression" and aux = re
     )
diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
index 52ac93fa175b..d0141361f4bc 100644
--- a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
+++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
@@ -63,7 +63,7 @@ predicate isInterestingUnanchoredRegExpString(RegExpPatternSource src, string ms
         arg = mcn.getArgument(0) and
         (
           src.getARegExpObject().flowsTo(arg) or
-          src.(StringRegExpPatternSource).getAUse() = arg
+          src.getAParse() = arg
         )
       |
         name = "replace"
diff --git a/javascript/ql/src/semmle/javascript/Regexp.qll b/javascript/ql/src/semmle/javascript/Regexp.qll
index ebd2387c08a5..909a52512c72 100644
--- a/javascript/ql/src/semmle/javascript/Regexp.qll
+++ b/javascript/ql/src/semmle/javascript/Regexp.qll
@@ -438,6 +438,12 @@ private DataFlow::Node regExpSource(DataFlow::Node re) {
  * as a part of a regular expression.
  */
 abstract class RegExpPatternSource extends DataFlow::Node {
+  /**
+   * Gets a node where the pattern of this node is parsed as a part of
+   * a regular expression.
+   */
+  abstract DataFlow::Node getAParse();
+
   /**
    * Gets the pattern of this node that is interpreted as a part of a
    * regular expression.
@@ -454,7 +460,7 @@ abstract class RegExpPatternSource extends DataFlow::Node {
 /**
  * A regular expression literal, viewed as the pattern source for itself.
  */
-class RegExpLiteralPatternSource extends RegExpPatternSource {
+private class RegExpLiteralPatternSource extends RegExpPatternSource {
   string pattern;
 
   RegExpLiteralPatternSource() {
@@ -464,6 +470,8 @@ class RegExpLiteralPatternSource extends RegExpPatternSource {
     )
   }
 
+  override DataFlow::Node getAParse() { result = this }
+
   override string getPattern() { result = pattern }
 
   override DataFlow::SourceNode getARegExpObject() { result = this }
@@ -473,20 +481,17 @@ class RegExpLiteralPatternSource extends RegExpPatternSource {
  * A node whose string value may flow to a position where it is interpreted
  * as a part of a regular expression.
  */
-class StringRegExpPatternSource extends RegExpPatternSource {
-  DataFlow::Node use;
+private class StringRegExpPatternSource extends RegExpPatternSource {
+  DataFlow::Node parse;
 
-  StringRegExpPatternSource() { this = regExpSource(use) }
+  StringRegExpPatternSource() { this = regExpSource(parse) }
 
-  /**
-   * Gets a node that uses this source as a regular expression pattern.
-   */
-  DataFlow::Node getAUse() { result = use }
+  override DataFlow::Node getAParse() { result = parse }
 
   override DataFlow::SourceNode getARegExpObject() {
     exists(DataFlow::InvokeNode constructor |
       constructor = DataFlow::globalVarRef("RegExp").getAnInvocation() and
-      use = constructor.getArgument(0) and
+      parse = constructor.getArgument(0) and
       result = constructor
     )
   }
diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
index 1e337828d8a1..c128823024eb 100644
--- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
@@ -1,9 +1,9 @@
 | tst-IncompleteHostnameRegExp.js:3:2:3:29 | /^http: ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:3:2:3:29 | /^http: ... le.com/ | here |
 | tst-IncompleteHostnameRegExp.js:5:2:5:29 | /^http: ... le.net/ | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:5:2:5:29 | /^http: ... le.net/ | here |
 | tst-IncompleteHostnameRegExp.js:6:2:6:43 | /^http: ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:6:2:6:43 | /^http: ... b).com/ | here |
-| tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | here |
-| tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | here |
-| tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here |
+| tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | here |
+| tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | here |
+| tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here |
 | tst-IncompleteHostnameRegExp.js:17:14:17:30 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here |
 | tst-IncompleteHostnameRegExp.js:19:17:19:35 | '^test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:20:13:20:26 | `${hostname}$` | here |
 | tst-IncompleteHostnameRegExp.js:22:27:22:45 | 'test.example.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:23:13:23:27 | domain.hostname | here |
@@ -13,15 +13,15 @@
 | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | here |
 | tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | here |
 | tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | here |
-| tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here |
+| tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here |
 | tst-IncompleteHostnameRegExp.js:41:41:41:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here |
-| tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | here |
+| tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | here |
 | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | here |
-| tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
+| tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
 | tst-IncompleteHostnameRegExp.js:46:2:46:29 | /^(exam ... e.com)/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:46:2:46:29 | /^(exam ... e.com)/ | here |
-| tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here |
+| tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here |
 | tst-IncompleteHostnameRegExp.js:48:41:48:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here |
-| tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here |
+| tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here |
 | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | here |
-| tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | here |
-| tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | here |
+| tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | here |
+| tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | here |

From 9e0a97e82fa72ea00713109a5a03f111295655e1 Mon Sep 17 00:00:00 2001
From: Esben Sparre Andreasen <esben@semmle.com>
Date: Mon, 3 Jun 2019 16:39:39 +0200
Subject: [PATCH 10/11] JS: address qhelp review comments

---
 .../ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp       | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp
index 0e93e80b70ee..6c2d6aa59f2b 100644
--- a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp
+++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp
@@ -7,7 +7,7 @@
 		<p>
 
 			Sanitizing untrusted input with regular expressions is a
-			common technique.  However, it is error prone to match untrusted input
+			common technique.  However, it is error-prone to match untrusted input
 			against regular expressions without anchors such as <code>^</code> or
 			<code>$</code>.  Malicious input can bypass such security checks by
 			embedding one of the allowed patterns in an unexpected location.
@@ -17,8 +17,8 @@
 		<p>
 
 			Even if the matching is not done in a security-critical
-			context, it may still cause undesirable behaviors when the regular
-			expression matches accidentally.
+			context, it may still cause undesirable behavior when the regular
+			expression accidentally matches.
 
 		</p>
 	</overview>

From 04868e5b97b4abfaf19755854ca932f33b69d96f Mon Sep 17 00:00:00 2001
From: Esben Sparre Andreasen <esben@semmle.com>
Date: Mon, 3 Jun 2019 17:05:19 +0200
Subject: [PATCH 11/11] JS: format qhelp examples

---
 .../src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js  | 4 ++--
 .../src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js
index ef8b2c8d2e15..fbf3430c5247 100644
--- a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js
+++ b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js
@@ -1,7 +1,7 @@
-app.get('/some/path', function(req, res) {
+app.get("/some/path", function(req, res) {
     let url = req.param("url");
     // BAD: the host of `url` may be controlled by an attacker
-	if (url.match(/https?:\/\/www\.example\.com\//)) {
+    if (url.match(/https?:\/\/www\.example\.com\//)) {
         res.redirect(url);
     }
 });
diff --git a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js
index 8ba1f60b8325..ebb408eec4be 100644
--- a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js
+++ b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js
@@ -1,7 +1,7 @@
-app.get('/some/path', function(req, res) {
+app.get("/some/path", function(req, res) {
     let url = req.param("url");
     // GOOD: the host of `url` can not be controlled by an attacker
-	if (url.match(/^https?:\/\/www\.example\.com\//)) {
+    if (url.match(/^https?:\/\/www\.example\.com\//)) {
         res.redirect(url);
     }
 });