+
+
+
+Interpreting hard-coded data, such as string literals containing hexadecimal numbers,
+as code or as an import path is typical of malicious backdoor code that has been
+implanted into an otherwise trusted code base and is trying to hide its true purpose
+from casual readers or automated scanning tools.
+
+
+
+
+
+Examine the code in question carefully to ascertain its provenance and its true purpose.
+If the code is benign, it should always be possible to rewrite it without relying
+on dynamically interpreting data as code, improving both clarity and safety.
+
+
+
+
+
+As an example of malicious code using this obfuscation technique, consider the following
+simplified version of a snippet of backdoor code that was discovered in a dependency of
+the popular event-stream npm package:
+
+
+
+While this shows only the first few lines of code, it already looks very suspicious
+since it takes a hard-coded string literal, hex-decodes it and then uses it as an
+import path. The only reason to do so is to hide the name of the file being imported.
+
+
+
+
+
+OWASP:
+Trojan Horse.
+
+
+The npm Blog:
+Details about the event-stream incident.
+
+
+
+
diff --git a/javascript/ql/src/Security/CWE-506/HardcodedDataInterpretedAsCode.ql b/javascript/ql/src/Security/CWE-506/HardcodedDataInterpretedAsCode.ql
new file mode 100644
index 000000000000..e274ac45d37a
--- /dev/null
+++ b/javascript/ql/src/Security/CWE-506/HardcodedDataInterpretedAsCode.ql
@@ -0,0 +1,22 @@
+/**
+ * @name Hard-coded data interpreted as code
+ * @description Transforming hard-coded data (such as hexadecimal constants) into code
+ * to be executed is a technique often associated with backdoors and should
+ * be avoided.
+ * @kind path-problem
+ * @problem.severity error
+ * @precision medium
+ * @id js/hardcoded-data-interpreted-as-code
+ * @tags security
+ * external/cwe/cwe-506
+ */
+
+import javascript
+import semmle.javascript.security.dataflow.HardcodedDataInterpretedAsCode::HardcodedDataInterpretedAsCode
+import DataFlow::PathGraph
+
+from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select sink.getNode(), source, sink,
+ "Hard-coded data from $@ is interpreted as " + sink.getNode().(Sink).getKind() + ".",
+ source.getNode(), "here"
diff --git a/javascript/ql/src/Security/CWE-506/examples/HardcodedDataInterpretedAsCode.js b/javascript/ql/src/Security/CWE-506/examples/HardcodedDataInterpretedAsCode.js
new file mode 100644
index 000000000000..dcf862d194e6
--- /dev/null
+++ b/javascript/ql/src/Security/CWE-506/examples/HardcodedDataInterpretedAsCode.js
@@ -0,0 +1,8 @@
+var r = require;
+
+function e(r) {
+ return Buffer.from(r, "hex").toString()
+}
+
+// BAD: hexadecimal constant decoded and interpreted as import path
+var n = r(e("2e2f746573742f64617461"));
diff --git a/javascript/ql/src/semmle/javascript/NodeJS.qll b/javascript/ql/src/semmle/javascript/NodeJS.qll
index 0d0827a5ed24..9503e9637cde 100644
--- a/javascript/ql/src/semmle/javascript/NodeJS.qll
+++ b/javascript/ql/src/semmle/javascript/NodeJS.qll
@@ -132,7 +132,13 @@ predicate findNodeModulesFolder(Folder f, Folder nodeModules, int distance) {
*/
private class RequireVariable extends Variable {
RequireVariable() {
- exists (ModuleScope m | this = m.getVariable("require"))
+ this = any(ModuleScope m).getVariable("require")
+ or
+ // cover cases where we failed to detect Node.js code
+ this.(GlobalVariable).getName() = "require"
+ or
+ // track through assignments to other variables
+ this.getAnAssignedExpr().(VarAccess).getVariable() instanceof RequireVariable
}
}
@@ -149,7 +155,9 @@ private predicate moduleInFile(Module m, File f) {
class Require extends CallExpr, Import {
Require() {
exists (RequireVariable req |
- this.getCallee() = req.getAnAccess()
+ this.getCallee() = req.getAnAccess() and
+ // `mjs` files explicitly disallow `require`
+ getFile().getExtension() != "mjs"
)
}
diff --git a/javascript/ql/src/semmle/javascript/frameworks/NodeJSLib.qll b/javascript/ql/src/semmle/javascript/frameworks/NodeJSLib.qll
index 2d4b574fcca2..884c0e022800 100644
--- a/javascript/ql/src/semmle/javascript/frameworks/NodeJSLib.qll
+++ b/javascript/ql/src/semmle/javascript/frameworks/NodeJSLib.qll
@@ -297,7 +297,22 @@ module NodeJSLib {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
pred = tainted and succ = this
}
+ }
+
+ /**
+ * A model of taint propagation through `new Buffer` and `Buffer.from`.
+ */
+ private class BufferTaintStep extends TaintTracking::AdditionalTaintStep, DataFlow::InvokeNode {
+ BufferTaintStep() {
+ this = DataFlow::globalVarRef("Buffer").getAnInstantiation()
+ or
+ this = DataFlow::globalVarRef("Buffer").getAMemberInvocation("from")
+ }
+ override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
+ pred = getArgument(0) and
+ succ = this
+ }
}
/**
diff --git a/javascript/ql/src/semmle/javascript/security/dataflow/HardcodedDataInterpretedAsCode.qll b/javascript/ql/src/semmle/javascript/security/dataflow/HardcodedDataInterpretedAsCode.qll
new file mode 100644
index 000000000000..f7ff4042f7e6
--- /dev/null
+++ b/javascript/ql/src/semmle/javascript/security/dataflow/HardcodedDataInterpretedAsCode.qll
@@ -0,0 +1,97 @@
+/**
+ * Provides a taint-tracking configuration for reasoning about hard-coded data
+ * being interpreted as code.
+ */
+
+import javascript
+private import semmle.javascript.security.dataflow.CodeInjection
+
+module HardcodedDataInterpretedAsCode {
+ /**
+ * A data flow source for hard-coded data.
+ */
+ abstract class Source extends DataFlow::Node {
+ /** Gets a flow label for which this is a source. */
+ DataFlow::FlowLabel getLabel() {
+ result = DataFlow::FlowLabel::data()
+ }
+ }
+
+ /**
+ * A data flow sink for code injection.
+ */
+ abstract class Sink extends DataFlow::Node {
+ /** Gets a flow label for which this is a sink. */
+ abstract DataFlow::FlowLabel getLabel();
+
+ /** Gets a description of what kind of sink this is. */
+ abstract string getKind();
+ }
+
+ /**
+ * A sanitizer for hard-coded data.
+ */
+ abstract class Sanitizer extends DataFlow::Node {}
+
+ /**
+ * A taint-tracking configuration for reasoning about hard-coded data
+ * being interpreted as code
+ */
+ class Configuration extends TaintTracking::Configuration {
+ Configuration() {
+ this = "HardcodedDataInterpretedAsCode"
+ }
+
+ override predicate isSource(DataFlow::Node source, DataFlow::FlowLabel lbl) {
+ source.(Source).getLabel() = lbl
+ }
+
+ override predicate isSink(DataFlow::Node nd, DataFlow::FlowLabel lbl) {
+ nd.(Sink).getLabel() = lbl
+ }
+
+ override predicate isSanitizer(DataFlow::Node node) {
+ node instanceof Sanitizer
+ }
+ }
+
+ /**
+ * A constant string consisting of eight or more hexadecimal characters (including at
+ * least one digit), viewed as a source of hard-coded data that should not be
+ * interpreted as code.
+ */
+ private class DefaultSource extends Source, DataFlow::ValueNode {
+ DefaultSource() {
+ exists (string val | val = astNode.(Expr).getStringValue() |
+ val.regexpMatch("[0-9a-fA-F]{8,}") and
+ val.regexpMatch(".*[0-9].*")
+ )
+ }
+ }
+
+ /**
+ * A code injection sink; hard-coded data should not flow here.
+ */
+ private class DefaultCodeInjectionSink extends Sink {
+ DefaultCodeInjectionSink() { this instanceof CodeInjection::Sink }
+ override DataFlow::FlowLabel getLabel() { result = DataFlow::FlowLabel::taint() }
+ override string getKind() { result = "code" }
+ }
+
+ /**
+ * An argument to `require` path; hard-coded data should not flow here.
+ */
+ private class RequireArgumentSink extends Sink {
+ RequireArgumentSink() {
+ this = any(Require r).getAnArgument().flow()
+ }
+
+ override DataFlow::FlowLabel getLabel() {
+ result = DataFlow::FlowLabel::data()
+ or
+ result = DataFlow::FlowLabel::taint()
+ }
+
+ override string getKind() { result = "an import path" }
+ }
+}
diff --git a/javascript/ql/test/library-tests/NodeJS/Require.expected b/javascript/ql/test/library-tests/NodeJS/Require.expected
index a7ec65d67a2e..491c80a9a875 100644
--- a/javascript/ql/test/library-tests/NodeJS/Require.expected
+++ b/javascript/ql/test/library-tests/NodeJS/Require.expected
@@ -1,14 +1,18 @@
-| a.js:1:9:1:22 | require('./b') | ./b | b.js:1:1:8:0 |