diff --git a/javascript/ql/src/semmle/javascript/dataflow/DataFlow.qll b/javascript/ql/src/semmle/javascript/dataflow/DataFlow.qll index e013e7c41754..6d9551fbb8cb 100644 --- a/javascript/ql/src/semmle/javascript/dataflow/DataFlow.qll +++ b/javascript/ql/src/semmle/javascript/dataflow/DataFlow.qll @@ -1221,4 +1221,5 @@ module DataFlow { import TypeInference import Configuration import TrackedNodes + import TypeTracking } diff --git a/javascript/ql/src/semmle/javascript/dataflow/Sources.qll b/javascript/ql/src/semmle/javascript/dataflow/Sources.qll index f6a3ce3f3916..436855732d28 100644 --- a/javascript/ql/src/semmle/javascript/dataflow/Sources.qll +++ b/javascript/ql/src/semmle/javascript/dataflow/Sources.qll @@ -7,6 +7,7 @@ */ import javascript +private import semmle.javascript.dataflow.TypeTracking /** * A source node for local data flow, that is, a node from which local data flow is tracked. @@ -153,6 +154,34 @@ class SourceNode extends DataFlow::Node { DataFlow::SourceNode getAPropertySource(string prop) { result.flowsTo(getAPropertyWrite(prop).getRhs()) } + + /** + * EXPERIMENTAL. + * + * Gets a node that this node may flow to using one heap and/or interprocedural step. + * + * See `TypeTracker` for more details about how to use this. + */ + DataFlow::SourceNode track(TypeTracker t2, TypeTracker t) { + exists(StepSummary summary | + StepSummary::step(this, result, summary) and + t = StepSummary::append(t2, summary) + ) + } + + /** + * EXPERIMENTAL. + * + * Gets a node that may flow into this one using one heap and/or interprocedural step. + * + * See `TypeBackTracker` for more details about how to use this. + */ + DataFlow::SourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { + exists(StepSummary summary | + StepSummary::step(result, this, summary) and + t = StepSummary::prepend(summary, t2) + ) + } } module SourceNode { diff --git a/javascript/ql/src/semmle/javascript/dataflow/TrackedNodes.qll b/javascript/ql/src/semmle/javascript/dataflow/TrackedNodes.qll index a33e486859f0..fcadd0a3968f 100644 --- a/javascript/ql/src/semmle/javascript/dataflow/TrackedNodes.qll +++ b/javascript/ql/src/semmle/javascript/dataflow/TrackedNodes.qll @@ -4,6 +4,7 @@ */ import javascript +private import internal.FlowSteps as FlowSteps /** * A data flow node that should be tracked inter-procedurally. diff --git a/javascript/ql/src/semmle/javascript/dataflow/TypeTracking.qll b/javascript/ql/src/semmle/javascript/dataflow/TypeTracking.qll new file mode 100644 index 000000000000..d752d466b0f8 --- /dev/null +++ b/javascript/ql/src/semmle/javascript/dataflow/TypeTracking.qll @@ -0,0 +1,253 @@ +/** + * Provides the `TypeTracker` class for tracking types interprocedurally. + * + * This provides an alternative to `DataFlow::TrackedNode` and `AbstractValue` + * for tracking certain types interprocedurally without computing which source + * a given value came from. + */ + +import javascript +private import internal.FlowSteps + +/** + * A pair of booleans, indicating whether a path goes through a return and/or a call. + * + * Identical to `TPathSummary` except without flow labels. + */ +private newtype TStepSummary = MkStepSummary(boolean hasReturn, boolean hasCall) { + (hasReturn = true or hasReturn = false) and + (hasCall = true or hasCall = false) +} + +/** + * INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead. + * + * Summary of the steps needed to track a value to a given dataflow node. + */ +class StepSummary extends TStepSummary { + Boolean hasReturn; + + Boolean hasCall; + + StepSummary() { this = MkStepSummary(hasReturn, hasCall) } + + /** Indicates whether the path represented by this summary contains any return steps. */ + boolean hasReturn() { result = hasReturn } + + /** Indicates whether the path represented by this summary contains any call steps. */ + boolean hasCall() { result = hasCall } + + /** + * Gets the summary for the path obtained by appending `that` to `this`. + * + * Note that a path containing a `return` step cannot be appended to a path containing + * a `call` step in order to maintain well-formedness. + */ + StepSummary append(StepSummary that) { + exists(Boolean hasReturn2, Boolean hasCall2 | + that = MkStepSummary(hasReturn2, hasCall2) + | + result = MkStepSummary(hasReturn.booleanOr(hasReturn2), hasCall.booleanOr(hasCall2)) and + // avoid constructing invalid paths + not (hasCall = true and hasReturn2 = true) + ) + } + + /** + * Gets the summary for the path obtained by appending `this` to `that`. + */ + StepSummary prepend(StepSummary that) { result = that.append(this) } + + /** Gets a textual representation of this path summary. */ + string toString() { + exists(string withReturn, string withCall | + (if hasReturn = true then withReturn = "with" else withReturn = "without") and + (if hasCall = true then withCall = "with" else withCall = "without") + | + result = "path " + withReturn + " return steps and " + withCall + " call steps" + ) + } +} + +module StepSummary { + /** + * Gets a summary describing a path without any calls or returns. + */ + StepSummary level() { result = MkStepSummary(false, false) } + + /** + * Gets a summary describing a path with one or more calls, but no returns. + */ + StepSummary call() { result = MkStepSummary(false, true) } + + /** + * Gets a summary describing a path with one or more returns, but no calls. + */ + StepSummary return() { result = MkStepSummary(true, false) } + + /** + * INTERNAL: Use `SourceNode.track()` or `SourceNode.backtrack()` instead. + */ + predicate step(DataFlow::SourceNode pred, DataFlow::SourceNode succ, StepSummary summary) { + exists (DataFlow::Node predNode | pred.flowsTo(predNode) | + // Flow through properties of objects + propertyFlowStep(predNode, succ) and + summary = level() + or + // Flow through global variables + globalFlowStep(predNode, succ) and + summary = level() + or + // Flow into function + callStep(predNode, succ) and + summary = call() + or + // Flow out of function + returnStep(predNode, succ) and + summary = return() + or + // Flow through an instance field between members of the same class + DataFlow::localFieldStep(predNode, succ) and + summary = level() + ) + } + + /** + * INTERNAL. Do not use. + * + * Appends a step summary onto a type-tracking summary. + */ + TypeTracker append(TypeTracker type, StepSummary summary) { + not (type.hasCall() = true and summary.hasReturn() = true) and + result.hasCall() = type.hasCall().booleanOr(summary.hasCall()) + } + + /** + * INTERNAL. Do not use. + * + * Prepends a step summary before a backwards type-tracking summary. + */ + TypeBackTracker prepend(StepSummary summary, TypeBackTracker type) { + not (type.hasReturn() = true and summary.hasCall() = true) and + result.hasReturn() = type.hasReturn().booleanOr(summary.hasReturn()) + } +} + +private newtype TTypeTracker = MkTypeTracker(boolean hasCall) { + hasCall = true or hasCall = false +} + +/** + * EXPERIMENTAL. + * + * Summary of the steps needed to track a value to a given dataflow node. + * + * This can be used to track objects that implement a certain API in order to + * recognize calls to that API. Note that type-tracking does not provide a + * source/sink relation, that is, it may determine that a node has a given type, + * but it won't determine where that type came from. + * + * It is recommended that all uses of this type is written on the following form, + * for tracking some type `myType`: + * ``` + * DataFlow::SourceNode myType(DataFlow::TypeTracker t) { + * t.start() and + * result = < source of myType > + * or + * exists (DataFlow::TypeTracker t2 | + * result = myType(t2).track(t2, t) + * ) + * } + * + * DataFlow::SourceNode myType() { result = myType(_) } + * ``` + * + * To track values backwards, which can be useful for tracking + * the type of a callback, use the `TypeBackTracker` class instead. + */ +class TypeTracker extends TTypeTracker { + Boolean hasCall; + + TypeTracker() { this = MkTypeTracker(hasCall) } + + string toString() { + hasCall = true and result = "type tracker with call steps" + or + hasCall = false and result = "type tracker without call steps" + } + + /** + * Holds if this is the starting point of type tracking. + */ + predicate start() { + hasCall = false + } + + /** + * INTERNAL. DO NOT USE. + * + * Holds if this type has been tracked into a call. + */ + boolean hasCall() { + result = hasCall + } +} + +private newtype TTypeBackTracker = MkTypeBackTracker(boolean hasReturn) { + hasReturn = true or hasReturn = false +} + +/** + * EXPERIMENTAL. + * + * Summary of the steps needed to back-track a use of a value to a given dataflow node. + * + * This can be used to track callbacks that are passed to a certian API call, and are + * therefore expected to called with a certain type of value. + * + * Note that type back-tracking does not provide a source/sink relation, that is, + * it may determine that a node will be used in an API call somwwhere, but it won't + * determine exactly where that use was, or the path that led to the use. + * + * It is recommended that all uses of this type is written on the following form, + * for back-tracking some callback type `myCallback`: + * ``` + * DataFlow::SourceNode myCallback(DataFlow::TypeBackTracker t) { + * t.start() and + * result = (< some API call >).getArgument(< n >).getALocalSource() + * or + * exists (DataFlow::TypeTracker t2 | + * result = myCallback(t2).backtrack(t2, t) + * ) + * } + * + * DataFlow::SourceNode myCallback() { result = myCallback(_) } + * ``` + */ +class TypeBackTracker extends TTypeBackTracker { + Boolean hasReturn; + + TypeBackTracker() { this = MkTypeBackTracker(hasReturn) } + + string toString() { + hasReturn = true and result = "type back-tracker with return steps" + or + hasReturn = false and result = "type back-tracker without return steps" + } + + /** + * Holds if this is the starting point of type tracking. + */ + predicate start() { + hasReturn = false + } + + /** + * INTERNAL. DO NOT USE. + * + * Holds if this type has been back-tracked into a call through return edge. + */ + boolean hasReturn() { + result = hasReturn + } +} diff --git a/javascript/ql/test/library-tests/TypeTracking/ClassStyle.expected b/javascript/ql/test/library-tests/TypeTracking/ClassStyle.expected new file mode 100644 index 000000000000..e1bdb73500f5 --- /dev/null +++ b/javascript/ql/test/library-tests/TypeTracking/ClassStyle.expected @@ -0,0 +1,26 @@ +test_ApiObject +| tst.js:3:11:3:21 | new myapi() | +| tst.js:15:10:15:21 | api.chain1() | +| tst.js:15:10:15:30 | api.cha ... hain2() | +test_Connection +| tst.js:6:15:6:18 | conn | +| tst.js:10:5:10:19 | this.connection | +| tst.js:15:10:15:49 | api.cha ... ction() | +| tst.js:18:7:18:21 | getConnection() | +| tst.js:30:9:30:23 | getConnection() | +| tst.js:39:7:39:21 | getConnection() | +| tst.js:47:7:47:21 | getConnection() | +test_DataCallback +| tst.js:9:11:9:12 | cb | +| tst.js:20:1:22:1 | functio ... ata);\\n} | +| tst.js:29:26:29:27 | cb | +| tst.js:32:17:32:26 | data => {} | +| tst.js:37:10:37:19 | data => {} | +| tst.js:39:32:39:45 | getDataCurry() | +| tst.js:44:19:44:20 | cb | +| tst.js:47:32:47:60 | identit ... llback) | +test_DataValue +| tst.js:20:18:20:21 | data | +| tst.js:24:19:24:22 | data | +| tst.js:32:17:32:20 | data | +| tst.js:37:10:37:13 | data | diff --git a/javascript/ql/test/library-tests/TypeTracking/ClassStyle.ql b/javascript/ql/test/library-tests/TypeTracking/ClassStyle.ql new file mode 100644 index 000000000000..f965a40102fd --- /dev/null +++ b/javascript/ql/test/library-tests/TypeTracking/ClassStyle.ql @@ -0,0 +1,91 @@ +import javascript + +string chainableMethod() { + result = "chain1" or + result = "chain2" +} + +class ApiObject extends DataFlow::NewNode { + ApiObject() { + this = DataFlow::moduleImport("@test/myapi").getAnInstantiation() + } + + DataFlow::SourceNode ref(DataFlow::TypeTracker t) { + t.start() and + result = this + or + t.start() and + result = ref(_).getAMethodCall(chainableMethod()) + or + exists(DataFlow::TypeTracker t2 | + result = ref(t2).track(t2, t) + ) + } + + DataFlow::SourceNode ref() { + result = ref(_) + } +} + +class Connection extends DataFlow::SourceNode { + ApiObject api; + + Connection() { + this = api.ref().getAMethodCall("createConnection") + } + + DataFlow::SourceNode ref(DataFlow::TypeTracker t) { + t.start() and + result = this + or + exists(DataFlow::TypeTracker t2 | + result = ref(t2).track(t2, t) + ) + } + + DataFlow::SourceNode ref() { + result = ref(_) + } + + DataFlow::SourceNode getACallbackNode(DataFlow::TypeBackTracker t) { + t.start() and + result = ref().getAMethodCall("getData").getArgument(0).getALocalSource() + or + exists(DataFlow::TypeBackTracker t2 | + result = getACallbackNode(t2).backtrack(t2, t) + ) + } + + DataFlow::FunctionNode getACallback() { + result = getACallbackNode(_).getAFunctionValue() + } +} + +class DataValue extends DataFlow::SourceNode { + Connection connection; + + DataValue() { + this = connection.getACallback().getParameter(0) + } + + DataFlow::SourceNode ref(DataFlow::TypeTracker t) { + t.start() and + result = this + or + exists(DataFlow::TypeTracker t2 | + result = ref(t2).track(t2, t) + ) + } + + DataFlow::SourceNode ref() { + result = ref(_) + } +} + +query DataFlow::SourceNode test_ApiObject() { result = any(ApiObject obj).ref() } + +query DataFlow::SourceNode test_Connection() { result = any(Connection c).ref() } + +query DataFlow::SourceNode test_DataCallback() { result = any(Connection c).getACallbackNode(_) } + +query DataFlow::SourceNode test_DataValue() { result = any(DataValue v).ref() } diff --git a/javascript/ql/test/library-tests/TypeTracking/PredicateStyle.expected b/javascript/ql/test/library-tests/TypeTracking/PredicateStyle.expected new file mode 100644 index 000000000000..0c0fea7a9286 --- /dev/null +++ b/javascript/ql/test/library-tests/TypeTracking/PredicateStyle.expected @@ -0,0 +1,26 @@ +apiObject +| tst.js:3:11:3:21 | new myapi() | +| tst.js:15:10:15:21 | api.chain1() | +| tst.js:15:10:15:30 | api.cha ... hain2() | +connection +| type tracker with call steps | tst.js:6:15:6:18 | conn | +| type tracker with call steps | tst.js:10:5:10:19 | this.connection | +| type tracker without call steps | tst.js:15:10:15:49 | api.cha ... ction() | +| type tracker without call steps | tst.js:18:7:18:21 | getConnection() | +| type tracker without call steps | tst.js:30:9:30:23 | getConnection() | +| type tracker without call steps | tst.js:39:7:39:21 | getConnection() | +| type tracker without call steps | tst.js:47:7:47:21 | getConnection() | +dataCallback +| tst.js:9:11:9:12 | cb | +| tst.js:20:1:22:1 | functio ... ata);\\n} | +| tst.js:29:26:29:27 | cb | +| tst.js:32:17:32:26 | data => {} | +| tst.js:37:10:37:19 | data => {} | +| tst.js:39:32:39:45 | getDataCurry() | +| tst.js:44:19:44:20 | cb | +| tst.js:47:32:47:60 | identit ... llback) | +dataValue +| tst.js:20:18:20:21 | data | +| tst.js:24:19:24:22 | data | +| tst.js:32:17:32:20 | data | +| tst.js:37:10:37:13 | data | diff --git a/javascript/ql/test/library-tests/TypeTracking/PredicateStyle.ql b/javascript/ql/test/library-tests/TypeTracking/PredicateStyle.ql new file mode 100644 index 000000000000..678dc12de0af --- /dev/null +++ b/javascript/ql/test/library-tests/TypeTracking/PredicateStyle.ql @@ -0,0 +1,61 @@ +import javascript + +string chainableMethod() { + result = "chain1" or + result = "chain2" +} + +DataFlow::SourceNode apiObject(DataFlow::TypeTracker t) { + t.start() and + result = DataFlow::moduleImport("@test/myapi").getAnInstantiation() + or + t.start() and + result = apiObject(_).getAMethodCall(chainableMethod()) + or + exists(DataFlow::TypeTracker t2 | + result = apiObject(t2).track(t2, t) + ) +} + +query DataFlow::SourceNode apiObject() { + result = apiObject(_) +} + +query DataFlow::SourceNode connection(DataFlow::TypeTracker t) { + t.start() and + result = apiObject().getAMethodCall("createConnection") + or + exists(DataFlow::TypeTracker t2 | + result = connection(t2).track(t2, t) + ) +} + +DataFlow::SourceNode connection() { + result = connection(_) +} + +DataFlow::SourceNode dataCallback(DataFlow::TypeBackTracker t) { + t.start() and + result = connection().getAMethodCall("getData").getArgument(0).getALocalSource() + or + exists(DataFlow::TypeBackTracker t2 | + result = dataCallback(t2).backtrack(t2, t) + ) +} + +query DataFlow::SourceNode dataCallback() { + result = dataCallback(_) +} + +DataFlow::SourceNode dataValue(DataFlow::TypeTracker t) { + t.start() and + result = dataCallback().getAFunctionValue().getParameter(0) + or + exists(DataFlow::TypeTracker t2 | + result = dataValue(t2).track(t2, t) + ) +} + +query DataFlow::SourceNode dataValue() { + result = dataValue(_) +} diff --git a/javascript/ql/test/library-tests/TypeTracking/tst.js b/javascript/ql/test/library-tests/TypeTracking/tst.js new file mode 100644 index 000000000000..8749aa201b62 --- /dev/null +++ b/javascript/ql/test/library-tests/TypeTracking/tst.js @@ -0,0 +1,51 @@ +import myapi from "@test/myapi"; + +let api = new myapi(); + +class C { + constructor(conn) { + this.connection = conn; + } + getData(cb) { + this.connection.getData(cb); + } +} + +function getConnection() { + return api.chain1().chain2().createConnection(); +} + +new C(getConnection()).getData(useData); + +function useData(data) { + useData2(data); +} + +function useData2(data) { +} + + +// Test tracking of callback into function +function getDataIndirect(cb) { + new C(getConnection()).getData(cb); +} +getDataIndirect(data => {}); +getDataIndirect(); // suppress precision gains from single-call special case + +// Test tracking of callback out of function +function getDataCurry() { + return data => {}; +} +new C(getConnection()).getData(getDataCurry()); +getDataCurry(); // suppress precision gains from single-call special case + + +// Test call/return matching of callback tracking +function identity(cb) { + return cb; +} +new C(getConnection()).getData(identity(realGetDataCallback)); +identity(fakeGetDataCallback); + +function realGetDataCallback(data) {} // not found due to missing summarization +function fakeGetDataCallback(notData) {} // should not be found