diff --git a/change-notes/1.19/analysis-javascript.md b/change-notes/1.19/analysis-javascript.md index 4415af95d30c..106ba3d0fd1f 100644 --- a/change-notes/1.19/analysis-javascript.md +++ b/change-notes/1.19/analysis-javascript.md @@ -14,6 +14,7 @@ |-----------------------------------------------|------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Enabling Node.js integration for Electron web content renderers (`js/enabling-electron-renderer-node-integration`) | security, frameworks/electron, external/cwe/cwe-094 | Highlights Electron web content renderer preferences with Node.js integration enabled, indicating a violation of [CWE-94](https://cwe.mitre.org/data/definitions/94.html). Results are not shown on LGTM by default. | | Stored cross-site scripting (`js/stored-xss`) | security, external/cwe/cwe-079, external/cwe/cwe-116 | Highlights uncontrolled stored values flowing into HTML content, indicating a violation of [CWE-079](https://cwe.mitre.org/data/definitions/79.html). Results shown on LGTM by default. | +| Replacement of a substring with itself (`js/identity-replacement`) | correctness, security, external/cwe/cwe-116 | Highlights string replacements that replace a string with itself, which usually indicates a mistake. Results shown on LGTM by default. | ## Changes to existing queries diff --git a/javascript/config/suites/javascript/correctness-core b/javascript/config/suites/javascript/correctness-core index 1629edb6e9bc..344e8936a38d 100644 --- a/javascript/config/suites/javascript/correctness-core +++ b/javascript/config/suites/javascript/correctness-core @@ -32,6 +32,7 @@ + semmlecode-javascript-queries/RegExp/BackrefIntoNegativeLookahead.ql: /Correctness/Regular Expressions + semmlecode-javascript-queries/RegExp/DuplicateCharacterInCharacterClass.ql: /Correctness/Regular Expressions + semmlecode-javascript-queries/RegExp/EmptyCharacterClass.ql: /Correctness/Regular Expressions ++ semmlecode-javascript-queries/RegExp/IdentityReplacement.ql: /Correctness/Regular Expressions + semmlecode-javascript-queries/RegExp/UnboundBackref.ql: /Correctness/Regular Expressions + semmlecode-javascript-queries/RegExp/UnmatchableCaret.ql: /Correctness/Regular Expressions + semmlecode-javascript-queries/RegExp/UnmatchableDollar.ql: /Correctness/Regular Expressions diff --git a/javascript/ql/src/RegExp/IdentityReplacement.qhelp b/javascript/ql/src/RegExp/IdentityReplacement.qhelp new file mode 100644 index 000000000000..f1610a782624 --- /dev/null +++ b/javascript/ql/src/RegExp/IdentityReplacement.qhelp @@ -0,0 +1,36 @@ + + + + +

+Replacing a substring with itself has no effect and usually indicates a mistake, such as +misspelling a backslash escape. +

+
+ + +

+Examine the string replacement to find and correct any typos. +

+
+ + +

+The following code snippet attempts to backslash-escape all double quotes in raw +by replacing all instances of " with \": +

+ +

+However, the replacement string '\"' is actually the same as '"', +with \" interpreted as an identity escape, so the replacement does nothing. +Instead, the replacement string should be '\\"': +

+ +
+ + +
  • Mozilla Developer Network: String escape notation.
  • +
    +
    diff --git a/javascript/ql/src/RegExp/IdentityReplacement.ql b/javascript/ql/src/RegExp/IdentityReplacement.ql new file mode 100644 index 000000000000..ecb02733337a --- /dev/null +++ b/javascript/ql/src/RegExp/IdentityReplacement.ql @@ -0,0 +1,68 @@ +/** + * @name Replacement of a substring with itself + * @description Replacing a substring with itself has no effect and may indicate a mistake. + * @kind problem + * @problem.severity warning + * @id js/identity-replacement + * @precision very-high + * @tags correctness + * security + * external/cwe/cwe-116 + */ + +import javascript + +/** + * Holds if `e`, when used as the first argument of `String.prototype.replace`, matches + * `s` and nothing else. + */ +predicate matchesString(Expr e, string s) { + exists (RegExpLiteral rl | + rl = e and + not rl.isIgnoreCase() and + regExpMatchesString(rl.getRoot(), s) + ) + or + s = e.getStringValue() +} + +/** + * Holds if `t` matches `s` and nothing else. + */ +language[monotonicAggregates] +predicate regExpMatchesString(RegExpTerm t, string s) { + // constants match themselves + s = t.(RegExpConstant).getValue() + or + // assertions match the empty string + (t instanceof RegExpCaret or + t instanceof RegExpDollar or + t instanceof RegExpWordBoundary or + t instanceof RegExpNonWordBoundary or + t instanceof RegExpLookahead or + t instanceof RegExpLookbehind) and + s = "" + or + // groups match their content + regExpMatchesString(t.(RegExpGroup).getAChild(), s) + or + // single-character classes match that character + exists (RegExpCharacterClass recc | recc = t and not recc.isInverted() | + recc.getNumChild() = 1 and + regExpMatchesString(recc.getChild(0), s) + ) + or + // sequences match the concatenation of their elements + exists (RegExpSequence seq | seq = t | + s = concat(int i, RegExpTerm child | child = seq.getChild(i) | + any(string subs | regExpMatchesString(child, subs)) order by i + ) + ) +} + +from MethodCallExpr repl, string s, string friendly +where repl.getMethodName() = "replace" and + matchesString(repl.getArgument(0), s) and + repl.getArgument(1).getStringValue() = s and + (if s = "" then friendly = "the empty string" else friendly = "'" + s + "'") +select repl.getArgument(0), "This replaces " + friendly + " with itself." diff --git a/javascript/ql/src/RegExp/examples/IdentityReplacement.js b/javascript/ql/src/RegExp/examples/IdentityReplacement.js new file mode 100644 index 000000000000..1c5d9251e7a4 --- /dev/null +++ b/javascript/ql/src/RegExp/examples/IdentityReplacement.js @@ -0,0 +1 @@ +var escaped = raw.replace(/"/g, '\"'); diff --git a/javascript/ql/src/RegExp/examples/IdentityReplacementGood.js b/javascript/ql/src/RegExp/examples/IdentityReplacementGood.js new file mode 100644 index 000000000000..0238fc1fe3f4 --- /dev/null +++ b/javascript/ql/src/RegExp/examples/IdentityReplacementGood.js @@ -0,0 +1 @@ +var escaped = raw.replace(/"/g, '\\"'); diff --git a/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.expected b/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.expected new file mode 100644 index 000000000000..e707ae9ecdd2 --- /dev/null +++ b/javascript/ql/test/query-tests/RegExp/IdentityReplacement/IdentityReplacement.expected @@ -0,0 +1,12 @@ +| IdentityReplacement.js:1:27:1:30 | /"/g | This replaces '"' with itself. | +| tst.js:1:13:1:16 | "\\\\" | This replaces '\\' with itself. | +| tst.js:2:13:2:18 | /(\\\\)/ | This replaces '\\' with itself. | +| tst.js:3:13:3:17 | /["]/ | This replaces '"' with itself. | +| tst.js:6:13:6:18 | /foo/g | This replaces 'foo' with itself. | +| tst.js:9:13:9:17 | /^\\\\/ | This replaces '\\' with itself. | +| tst.js:10:13:10:17 | /\\\\$/ | This replaces '\\' with itself. | +| tst.js:11:13:11:18 | /\\b\\\\/ | This replaces '\\' with itself. | +| tst.js:12:13:12:18 | /\\B\\\\/ | This replaces '\\' with itself. | +| tst.js:13:13:13:22 | /\\\\(?!\\\\)/ | This replaces '\\' with itself. | +| tst.js:14:13:14:23 | /(?