From 1ab11109f9360dbdbd171a7868c0b3d1dc8d0250 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Mon, 24 Sep 2018 16:57:45 +0100 Subject: [PATCH 1/5] JavaScript: Add new query flagging identity replacements. --- change-notes/1.19/analysis-javascript.md | 1 + .../config/suites/javascript/correctness-core | 1 + .../ql/src/RegExp/IdentityReplacement.qhelp | 36 +++++++++++++++ .../ql/src/RegExp/IdentityReplacement.ql | 46 +++++++++++++++++++ .../RegExp/examples/IdentityReplacement.js | 1 + .../examples/IdentityReplacementGood.js | 1 + .../IdentityReplacement.expected | 4 ++ .../IdentityReplacement.js | 1 + .../IdentityReplacement.qlref | 1 + .../IdentityReplacementGood.js | 1 + .../RegExp/IdentityReplacement/tst.js | 4 ++ 11 files changed, 97 insertions(+) create mode 100644 javascript/ql/src/RegExp/IdentityReplacement.qhelp create mode 100644 javascript/ql/src/RegExp/IdentityReplacement.ql create mode 100644 javascript/ql/src/RegExp/examples/IdentityReplacement.js create mode 100644 javascript/ql/src/RegExp/examples/IdentityReplacementGood.js create mode 100644 javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.expected create mode 100644 javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.js create mode 100644 javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.qlref create mode 100644 javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacementGood.js create mode 100644 javascript/ql/test/query-tests/RegExp/IdentityReplacement/tst.js diff --git a/change-notes/1.19/analysis-javascript.md b/change-notes/1.19/analysis-javascript.md index 4415af95d30c..106ba3d0fd1f 100644 --- a/change-notes/1.19/analysis-javascript.md +++ b/change-notes/1.19/analysis-javascript.md @@ -14,6 +14,7 @@ |-----------------------------------------------|------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Enabling Node.js integration for Electron web content renderers (`js/enabling-electron-renderer-node-integration`) | security, frameworks/electron, external/cwe/cwe-094 | Highlights Electron web content renderer preferences with Node.js integration enabled, indicating a violation of [CWE-94](https://cwe.mitre.org/data/definitions/94.html). Results are not shown on LGTM by default. | | Stored cross-site scripting (`js/stored-xss`) | security, external/cwe/cwe-079, external/cwe/cwe-116 | Highlights uncontrolled stored values flowing into HTML content, indicating a violation of [CWE-079](https://cwe.mitre.org/data/definitions/79.html). Results shown on LGTM by default. | +| Replacement of a substring with itself (`js/identity-replacement`) | correctness, security, external/cwe/cwe-116 | Highlights string replacements that replace a string with itself, which usually indicates a mistake. Results shown on LGTM by default. | ## Changes to existing queries diff --git a/javascript/config/suites/javascript/correctness-core b/javascript/config/suites/javascript/correctness-core index 1629edb6e9bc..344e8936a38d 100644 --- a/javascript/config/suites/javascript/correctness-core +++ b/javascript/config/suites/javascript/correctness-core @@ -32,6 +32,7 @@ + semmlecode-javascript-queries/RegExp/BackrefIntoNegativeLookahead.ql: /Correctness/Regular Expressions + semmlecode-javascript-queries/RegExp/DuplicateCharacterInCharacterClass.ql: /Correctness/Regular Expressions + semmlecode-javascript-queries/RegExp/EmptyCharacterClass.ql: /Correctness/Regular Expressions ++ semmlecode-javascript-queries/RegExp/IdentityReplacement.ql: /Correctness/Regular Expressions + semmlecode-javascript-queries/RegExp/UnboundBackref.ql: /Correctness/Regular Expressions + semmlecode-javascript-queries/RegExp/UnmatchableCaret.ql: /Correctness/Regular Expressions + semmlecode-javascript-queries/RegExp/UnmatchableDollar.ql: /Correctness/Regular Expressions diff --git a/javascript/ql/src/RegExp/IdentityReplacement.qhelp b/javascript/ql/src/RegExp/IdentityReplacement.qhelp new file mode 100644 index 000000000000..f1610a782624 --- /dev/null +++ b/javascript/ql/src/RegExp/IdentityReplacement.qhelp @@ -0,0 +1,36 @@ + + + + +

+Replacing a substring with itself has no effect and usually indicates a mistake, such as +misspelling a backslash escape. +

+
+ + +

+Examine the string replacement to find and correct any typos. +

+
+ + +

+The following code snippet attempts to backslash-escape all double quotes in raw +by replacing all instances of " with \": +

+ +

+However, the replacement string '\"' is actually the same as '"', +with \" interpreted as an identity escape, so the replacement does nothing. +Instead, the replacement string should be '\\"': +

+ +
+ + +
  • Mozilla Developer Network: String escape notation.
  • +
    +
    diff --git a/javascript/ql/src/RegExp/IdentityReplacement.ql b/javascript/ql/src/RegExp/IdentityReplacement.ql new file mode 100644 index 000000000000..7ffbd83c3b40 --- /dev/null +++ b/javascript/ql/src/RegExp/IdentityReplacement.ql @@ -0,0 +1,46 @@ +/** + * @name Replacement of a substring with itself + * @description Replacing a substring with itself has no effect and may indicate a mistake. + * @kind problem + * @problem.severity warning + * @id js/identity-replacement + * @precision very-high + * @tags correctness + * security + * external/cwe/cwe-116 + */ + +import javascript + +/** + * Holds if `e`, when used as the first argument of `String.prototype.replace`, matches + * `s` and nothing else. + */ +predicate matchesString(Expr e, string s) { + exists (RegExpConstant c | + matchesConstant(e.(RegExpLiteral).getRoot(), c) and + s = c.getValue() + ) + or + s = e.getStringValue() +} + +/** + * Holds if `t` matches `c` and nothing else. + */ +predicate matchesConstant(RegExpTerm t, RegExpConstant c) { + c = t + or + matchesConstant(t.(RegExpGroup).getAChild(), c) + or + exists (RegExpCharacterClass recc | recc = t and not recc.isInverted() | + recc.getNumChild() = 1 and + matchesConstant(recc.getChild(0), c) + ) +} + +from MethodCallExpr repl, string s +where repl.getMethodName() = "replace" and + matchesString(repl.getArgument(0), s) and + repl.getArgument(1).getStringValue() = s +select repl.getArgument(0), "This replaces '" + s + "' with itself." diff --git a/javascript/ql/src/RegExp/examples/IdentityReplacement.js b/javascript/ql/src/RegExp/examples/IdentityReplacement.js new file mode 100644 index 000000000000..1c5d9251e7a4 --- /dev/null +++ b/javascript/ql/src/RegExp/examples/IdentityReplacement.js @@ -0,0 +1 @@ +var escaped = raw.replace(/"/g, '\"'); diff --git a/javascript/ql/src/RegExp/examples/IdentityReplacementGood.js b/javascript/ql/src/RegExp/examples/IdentityReplacementGood.js new file mode 100644 index 000000000000..0238fc1fe3f4 --- /dev/null +++ b/javascript/ql/src/RegExp/examples/IdentityReplacementGood.js @@ -0,0 +1 @@ +var escaped = raw.replace(/"/g, '\\"'); diff --git a/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.expected b/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.expected new file mode 100644 index 000000000000..58972c3af834 --- /dev/null +++ b/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.expected @@ -0,0 +1,4 @@ +| IdentityReplacement.js:1:27:1:30 | /"/g | This replaces '"' with itself. | +| tst.js:1:13:1:16 | "\\\\" | This replaces '\\' with itself. | +| tst.js:2:13:2:18 | /(\\\\)/ | This replaces '\\' with itself. | +| tst.js:3:13:3:17 | /["]/ | This replaces '"' with itself. | diff --git a/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.js b/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.js new file mode 100644 index 000000000000..1c5d9251e7a4 --- /dev/null +++ b/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.js @@ -0,0 +1 @@ +var escaped = raw.replace(/"/g, '\"'); diff --git a/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.qlref b/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.qlref new file mode 100644 index 000000000000..f8b9c39b11ff --- /dev/null +++ b/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.qlref @@ -0,0 +1 @@ +RegExp/IdentityReplacement.ql \ No newline at end of file diff --git a/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacementGood.js b/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacementGood.js new file mode 100644 index 000000000000..0238fc1fe3f4 --- /dev/null +++ b/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacementGood.js @@ -0,0 +1 @@ +var escaped = raw.replace(/"/g, '\\"'); diff --git a/javascript/ql/test/query-tests/RegExp/IdentityReplacement/tst.js b/javascript/ql/test/query-tests/RegExp/IdentityReplacement/tst.js new file mode 100644 index 000000000000..bdcc89a010d9 --- /dev/null +++ b/javascript/ql/test/query-tests/RegExp/IdentityReplacement/tst.js @@ -0,0 +1,4 @@ +raw.replace("\\", "\\"); // NOT OK +raw.replace(/(\\)/, "\\"); // NOT OK +raw.replace(/["]/, "\""); // NOT OK +raw.replace("\\", "\\\\"); // OK From ec9a3c87a7cdcea17b472bedf4e954f47ee4637e Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Tue, 25 Sep 2018 11:02:03 +0100 Subject: [PATCH 2/5] JavaScript: Do not flag case-insensitive replace. --- javascript/ql/src/RegExp/IdentityReplacement.ql | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/javascript/ql/src/RegExp/IdentityReplacement.ql b/javascript/ql/src/RegExp/IdentityReplacement.ql index 7ffbd83c3b40..0163016bad32 100644 --- a/javascript/ql/src/RegExp/IdentityReplacement.ql +++ b/javascript/ql/src/RegExp/IdentityReplacement.ql @@ -17,9 +17,10 @@ import javascript * `s` and nothing else. */ predicate matchesString(Expr e, string s) { - exists (RegExpConstant c | - matchesConstant(e.(RegExpLiteral).getRoot(), c) and - s = c.getValue() + exists (RegExpLiteral rl | + rl = e and + not rl.isIgnoreCase() and + regExpMatchesString(rl.getRoot(), s) ) or s = e.getStringValue() From 5fb22ba02157ab6483181733b431f4e0105dfa33 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Tue, 25 Sep 2018 11:02:45 +0100 Subject: [PATCH 3/5] JavaScript: Handle zero-width assertions and sequences. --- .../ql/src/RegExp/IdentityReplacement.ql | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/javascript/ql/src/RegExp/IdentityReplacement.ql b/javascript/ql/src/RegExp/IdentityReplacement.ql index 0163016bad32..c735b4377b96 100644 --- a/javascript/ql/src/RegExp/IdentityReplacement.ql +++ b/javascript/ql/src/RegExp/IdentityReplacement.ql @@ -27,16 +27,36 @@ predicate matchesString(Expr e, string s) { } /** - * Holds if `t` matches `c` and nothing else. + * Holds if `t` matches `s` and nothing else. */ -predicate matchesConstant(RegExpTerm t, RegExpConstant c) { - c = t +language[monotonicAggregates] +predicate regExpMatchesString(RegExpTerm t, string s) { + // constants match themselves + s = t.(RegExpConstant).getValue() or - matchesConstant(t.(RegExpGroup).getAChild(), c) + // assertions match the empty string + (t instanceof RegExpCaret or + t instanceof RegExpDollar or + t instanceof RegExpWordBoundary or + t instanceof RegExpNonWordBoundary or + t instanceof RegExpLookahead or + t instanceof RegExpLookbehind) and + s = "" or + // groups match their content + regExpMatchesString(t.(RegExpGroup).getAChild(), s) + or + // single-character classes match that character exists (RegExpCharacterClass recc | recc = t and not recc.isInverted() | recc.getNumChild() = 1 and - matchesConstant(recc.getChild(0), c) + regExpMatchesString(recc.getChild(0), s) + ) + or + // sequences match the concatenation of their elements + exists (RegExpSequence seq | seq = t | + s = concat(int i, RegExpTerm child | child = seq.getChild(i) | + any(string subs | regExpMatchesString(child, subs)) order by i + ) ) } From 659c67c715a6ab3d1454670fb5b920d4980ef5c1 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Tue, 25 Sep 2018 11:03:05 +0100 Subject: [PATCH 4/5] JavaScript: Produce friendlier message for empty-string replacements. --- javascript/ql/src/RegExp/IdentityReplacement.ql | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/javascript/ql/src/RegExp/IdentityReplacement.ql b/javascript/ql/src/RegExp/IdentityReplacement.ql index c735b4377b96..ecb02733337a 100644 --- a/javascript/ql/src/RegExp/IdentityReplacement.ql +++ b/javascript/ql/src/RegExp/IdentityReplacement.ql @@ -60,8 +60,9 @@ predicate regExpMatchesString(RegExpTerm t, string s) { ) } -from MethodCallExpr repl, string s +from MethodCallExpr repl, string s, string friendly where repl.getMethodName() = "replace" and matchesString(repl.getArgument(0), s) and - repl.getArgument(1).getStringValue() = s -select repl.getArgument(0), "This replaces '" + s + "' with itself." + repl.getArgument(1).getStringValue() = s and + (if s = "" then friendly = "the empty string" else friendly = "'" + s + "'") +select repl.getArgument(0), "This replaces " + friendly + " with itself." From 0e63ea1b511a0a4017fc0dfd77df79978ccac935 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Tue, 25 Sep 2018 11:03:15 +0100 Subject: [PATCH 5/5] JavaScript: Update tests. --- .../IdentityReplacement/IdentityReplacement.expected | 8 ++++++++ .../query-tests/RegExp/IdentityReplacement/tst.js | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.expected b/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.expected index 58972c3af834..e707ae9ecdd2 100644 --- a/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.expected +++ b/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.expected @@ -2,3 +2,11 @@ | tst.js:1:13:1:16 | "\\\\" | This replaces '\\' with itself. | | tst.js:2:13:2:18 | /(\\\\)/ | This replaces '\\' with itself. | | tst.js:3:13:3:17 | /["]/ | This replaces '"' with itself. | +| tst.js:6:13:6:18 | /foo/g | This replaces 'foo' with itself. | +| tst.js:9:13:9:17 | /^\\\\/ | This replaces '\\' with itself. | +| tst.js:10:13:10:17 | /\\\\$/ | This replaces '\\' with itself. | +| tst.js:11:13:11:18 | /\\b\\\\/ | This replaces '\\' with itself. | +| tst.js:12:13:12:18 | /\\B\\\\/ | This replaces '\\' with itself. | +| tst.js:13:13:13:22 | /\\\\(?!\\\\)/ | This replaces '\\' with itself. | +| tst.js:14:13:14:23 | /(?