From 58f7596519745dee1b3e0171d2dbcadc922becd5 Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Thu, 29 Nov 2018 10:45:29 -0800 Subject: [PATCH 01/15] C++: IR-based dataflow --- config/identical-files.json | 127 +- .../semmle/code/cpp/ir/dataflow/DataFlow.qll | 21 + .../semmle/code/cpp/ir/dataflow/DataFlow2.qll | 38 + .../semmle/code/cpp/ir/dataflow/DataFlow3.qll | 38 + .../semmle/code/cpp/ir/dataflow/DataFlow4.qll | 38 + .../code/cpp/ir/dataflow/TaintTracking.qll | 189 ++ .../ir/dataflow/internal/DataFlowDispatch.qll | 73 + .../cpp/ir/dataflow/internal/DataFlowImpl.qll | 1614 +++++++++++++++++ .../ir/dataflow/internal/DataFlowImpl2.qll | 1614 +++++++++++++++++ .../ir/dataflow/internal/DataFlowImpl3.qll | 1614 +++++++++++++++++ .../ir/dataflow/internal/DataFlowImpl4.qll | 1614 +++++++++++++++++ .../dataflow/internal/DataFlowImplCommon.qll | 284 +++ .../ir/dataflow/internal/DataFlowPrivate.qll | 189 ++ .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 143 ++ .../aliased_ssa/Instruction.qll | 30 + .../ir/implementation/aliased_ssa/Operand.qll | 7 + .../cpp/ir/implementation/raw/Instruction.qll | 30 + .../cpp/ir/implementation/raw/Operand.qll | 7 + .../unaliased_ssa/Instruction.qll | 30 + .../implementation/unaliased_ssa/Operand.qll | 7 + .../dataflow-tests/IRDataflowTestCommon.qll | 29 + .../dataflow/dataflow-tests/test_ir.expected | 28 + .../dataflow/dataflow-tests/test_ir.ql | 5 + 23 files changed, 7716 insertions(+), 53 deletions(-) create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow.qll create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow2.qll create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow3.qll create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow4.qll create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/TaintTracking.qll create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll create mode 100644 cpp/ql/test/library-tests/dataflow/dataflow-tests/IRDataflowTestCommon.qll create mode 100644 cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected create mode 100644 cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.ql diff --git a/config/identical-files.json b/config/identical-files.json index f4098f6bef9b..59793e8dc455 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -1,55 +1,76 @@ { - "C++ IR Instruction": [ - "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll" - ], - "C++ IR IRBlock": [ - "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRBlock.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRBlock.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRBlock.qll" - ], - "C++ IR IRVariable": [ - "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRVariable.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRVariable.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRVariable.qll" - ], - "C++ IR FunctionIR": [ - "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/FunctionIR.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/FunctionIR.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/FunctionIR.qll" - ], - "C++ IR Operand": [ - "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll" - ], - "C++ IR IRImpl": [ - "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IR.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IR.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IR.qll" - ], - "C++ IR IRSanityImpl": [ - "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRSanity.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRSanity.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRSanity.qll" - ], - "C++ IR PrintIRImpl": [ - "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/PrintIR.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll" - ], - "C++ SSA AliasAnalysis": [ - "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysis.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysis.qll" - ], - "C++ SSA SSAConstruction": [ - "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstruction.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstruction.qll" - ], - "C++ IR ValueNumber": [ - "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/gvn/ValueNumbering.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/ValueNumbering.qll", - "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/ValueNumbering.qll" - ] + "DataFlow Java/C++": [ + "java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl.qll", + "java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl2.qll", + "java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl3.qll", + "java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl4.qll", + "java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl5.qll", + "java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplDepr.qll", + "cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl.qll", + "cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl2.qll", + "cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl3.qll", + "cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl4.qll", + "cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll", + "cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll", + "cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll", + "cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll" + ], + "DataFlow Java/C++ Common": [ + "java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplCommon.qll", + "cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImplCommon.qll", + "cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll" + ], + "C++ IR Instruction": [ + "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll" + ], + "C++ IR IRBlock": [ + "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRBlock.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRBlock.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRBlock.qll" + ], + "C++ IR IRVariable": [ + "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRVariable.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRVariable.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRVariable.qll" + ], + "C++ IR FunctionIR": [ + "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/FunctionIR.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/FunctionIR.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/FunctionIR.qll" + ], + "C++ IR Operand": [ + "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll" + ], + "C++ IR IRImpl": [ + "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IR.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IR.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IR.qll" + ], + "C++ IR IRSanityImpl": [ + "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRSanity.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRSanity.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRSanity.qll" + ], + "C++ IR PrintIRImpl": [ + "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/PrintIR.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll" + ], + "C++ SSA AliasAnalysis": [ + "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysis.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysis.qll" + ], + "C++ SSA SSAConstruction": [ + "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstruction.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstruction.qll" + ], + "C++ IR ValueNumber": [ + "cpp/ql/src/semmle/code/cpp/ir/implementation/raw/gvn/ValueNumbering.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/ValueNumbering.qll", + "cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/ValueNumbering.qll" + ] } diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow.qll new file mode 100644 index 000000000000..a2c9a905f589 --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow.qll @@ -0,0 +1,21 @@ +/** + * Provides a library for local (intra-procedural) and global (inter-procedural) + * data flow analysis: deciding whether data can flow from a _source_ to a + * _sink_. + * + * Unless configured otherwise, _flow_ means that the exact value of + * the source may reach the sink. We do not track flow across pointer + * dereferences or array indexing. To track these types of flow, where the + * exact value may not be preserved, import + * `semmle.code.cpp.dataflow.TaintTracking`. + * + * To use global (interprocedural) data flow, extend the class + * `DataFlow::Configuration` as documented on that class. To use local + * (intraprocedural) data flow, invoke `DataFlow::localFlow` or + * `DataFlow::LocalFlowStep` with arguments of type `DataFlow::Node`. + */ +import cpp + +module DataFlow { + import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow2.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow2.qll new file mode 100644 index 000000000000..9d849c510c51 --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow2.qll @@ -0,0 +1,38 @@ +/** + * Provides a `DataFlow2` module, which is a copy of the `DataFlow` module. Use + * this class when data-flow configurations must depend on each other. Two + * classes extending `DataFlow::Configuration` should never depend on each + * other, but one of them should instead depend on a + * `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a + * `DataFlow4::Configuration`. + * + * See `semmle.code.cpp.dataflow.DataFlow` for the full documentation. + */ +import cpp + +module DataFlow2 { + import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl2 + + /** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + * Four copies are available: `DataFlow` through `DataFlow4`. + */ + private abstract + class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + super.hasFlow(source, sink) + } + } +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow3.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow3.qll new file mode 100644 index 000000000000..459b80d5e279 --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow3.qll @@ -0,0 +1,38 @@ +/** + * Provides a `DataFlow3` module, which is a copy of the `DataFlow` module. Use + * this class when data-flow configurations must depend on each other. Two + * classes extending `DataFlow::Configuration` should never depend on each + * other, but one of them should instead depend on a + * `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a + * `DataFlow4::Configuration`. + * + * See `semmle.code.cpp.dataflow.DataFlow` for the full documentation. + */ +import cpp + +module DataFlow3 { + import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl3 + + /** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + * Four copies are available: `DataFlow` through `DataFlow4`. + */ + private abstract + class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + super.hasFlow(source, sink) + } + } +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow4.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow4.qll new file mode 100644 index 000000000000..c67509bd7e4f --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow4.qll @@ -0,0 +1,38 @@ +/** + * Provides a `DataFlow4` module, which is a copy of the `DataFlow` module. Use + * this class when data-flow configurations must depend on each other. Two + * classes extending `DataFlow::Configuration` should never depend on each + * other, but one of them should instead depend on a + * `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a + * `DataFlow4::Configuration`. + * + * See `semmle.code.cpp.dataflow.DataFlow` for the full documentation. + */ +import cpp + +module DataFlow4 { + import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl4 + + /** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + * Four copies are available: `DataFlow` through `DataFlow4`. + */ + private abstract + class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + super.hasFlow(source, sink) + } + } +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/TaintTracking.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/TaintTracking.qll new file mode 100644 index 000000000000..068b35fc12a2 --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/TaintTracking.qll @@ -0,0 +1,189 @@ +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) taint-tracking analyses. + * + * We define _taint propagation_ informally to mean that a substantial part of + * the information from the source is preserved at the sink. For example, taint + * propagates from `x` to `x + 100`, but it does not propagate from `x` to `x > + * 100` since we consider a single bit of information to be too little. + */ +import semmle.code.cpp.ir.dataflow.DataFlow +import semmle.code.cpp.ir.dataflow.DataFlow2 +private import semmle.code.cpp.ir.IR + +module TaintTracking { + + /** + * A configuration of interprocedural taint tracking analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the taint tracking library must define its own unique extension of + * this abstract class. + * + * A taint-tracking configuration is a special data flow configuration + * (`DataFlow::Configuration`) that allows for flow through nodes that do not + * necessarily preserve values but are still relevant from a taint-tracking + * perspective. (For example, string concatenation, where one of the operands + * is tainted.) + * + * To create a configuration, extend this class with a subclass whose + * characteristic predicate is a unique singleton string. For example, write + * + * ``` + * class MyAnalysisConfiguration extends TaintTracking::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isSanitizer`. + * // Optionally override `isAdditionalTaintStep`. + * } + * ``` + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ``` + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but it is unsupported to depend on a + * `TaintTracking::Configuration` or a `DataFlow::Configuration` in the + * overridden predicates that define sources, sinks, or additional steps. + * Instead, the dependency should go to a `TaintTracking::Configuration2` or + * a `DataFlow{2,3,4}::Configuration`. + */ + abstract class Configuration extends DataFlow::Configuration { + bindingset[this] + Configuration() { any() } + + /** Holds if `source` is a taint source. */ + // overridden to provide taint-tracking specific qldoc + abstract override predicate isSource(DataFlow::Node source); + + /** Holds if `sink` is a taint sink. */ + // overridden to provide taint-tracking specific qldoc + abstract override predicate isSink(DataFlow::Node sink); + + /** + * Holds if taint should not flow into `node`. + */ + predicate isSanitizer(DataFlow::Node node) { none() } + + /** + * Holds if the additional taint propagation step + * from `source` to `target` must be taken into account in the analysis. + * This step will only be followed if `target` is not in the `isSanitizer` + * predicate. + */ + predicate isAdditionalTaintStep(DataFlow::Node source, + DataFlow::Node target) + { none() } + + final override + predicate isBarrier(DataFlow::Node node) { isSanitizer(node) } + + final override + predicate isAdditionalFlowStep(DataFlow::Node source, DataFlow::Node target) { + this.isAdditionalTaintStep(source, target) + or + localTaintStep(source, target) + } + } + + /** + * A taint-tracking configuration that is backed by the `DataFlow2` library + * instead of `DataFlow`. Use this class when taint-tracking configurations + * or data-flow configurations must depend on each other. + * + * See `TaintTracking::Configuration` for the full documentation. + */ + abstract class Configuration2 extends DataFlow2::Configuration { + bindingset[this] + Configuration2() { any() } + + /** Holds if `source` is a taint source. */ + // overridden to provide taint-tracking specific qldoc + abstract override predicate isSource(DataFlow::Node source); + + /** Holds if `sink` is a taint sink. */ + // overridden to provide taint-tracking specific qldoc + abstract override predicate isSink(DataFlow::Node sink); + + /** + * Holds if taint should not flow into `node`. + */ + predicate isSanitizer(DataFlow::Node node) { none() } + + /** + * Holds if the additional taint propagation step + * from `source` to `target` must be taken into account in the analysis. + * This step will only be followed if `target` is not in the `isSanitizer` + * predicate. + */ + predicate isAdditionalTaintStep(DataFlow::Node source, + DataFlow::Node target) + { none() } + + final override + predicate isBarrier(DataFlow::Node node) { isSanitizer(node) } + + final override + predicate isAdditionalFlowStep(DataFlow::Node source, DataFlow::Node target) { + this.isAdditionalTaintStep(source, target) + or + localTaintStep(source, target) + } + } + + /** + * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local + * (intra-procedural) step. + */ + predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + // Taint can flow into using ordinary data flow. + DataFlow::localFlowStep(nodeFrom, nodeTo) + or + // Taint can flow through expressions that alter the value but preserve + // more than one bit of it _or_ expressions that follow data through + // pointer indirections. + not nodeTo instanceof CompareInstruction and + not nodeTo instanceof InvokeInstruction and + nodeTo.getAnOperand() = nodeFrom + } + + /** + * Holds if taint may propagate from `source` to `sink` in zero or more local + * (intra-procedural) steps. + */ + predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { + localTaintStep*(source, sink) + } + + /** + * Holds if we do not propagate taint from `fromExpr` to `toExpr` + * even though `toExpr` is the AST parent of `fromExpr`. + */ + private predicate noParentExprFlow(Expr fromExpr, Expr toExpr) { + fromExpr = toExpr.(ConditionalExpr).getCondition() + or + fromExpr = toExpr.(CommaExpr).getLeftOperand() + or + fromExpr = toExpr.(AssignExpr).getLValue() // LHS of `=` + } + + /** + * Holds if we do not propagate taint from a child of `e` to `e` itself. + */ + private predicate noFlowFromChildExpr(Expr e) { + e instanceof ComparisonOperation + or + e instanceof LogicalAndExpr + or + e instanceof LogicalOrExpr + or + e instanceof Call + or + e instanceof SizeofOperator + or + e instanceof AlignofOperator + } + +} \ No newline at end of file diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll new file mode 100644 index 000000000000..cb2339cfdc26 --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll @@ -0,0 +1,73 @@ +private import cpp +private import DataFlowPrivate + +Function viableImpl(MethodAccess ma) { + result = ma.getTarget() +} + +Function viableCallable(Call call) { + result = call.getTarget() +} + +/** + * Holds if the call context `ctx` reduces the set of viable dispatch + * targets of `ma` in `c`. + */ +predicate reducedViableImplInCallContext(MethodAccess ma, Callable c, Call ctx) { + none() +} + +/** + * Gets a viable dispatch target of `ma` in the context `ctx`. This is + * restricted to those `ma`s for which a context might make a difference. + */ +private Method viableImplInCallContext(MethodAccess ma, Call ctx) { + // stub implementation + result = viableImpl(ma) and + viableCallable(ctx) = ma.getEnclosingFunction() +} + +/** + * Gets a viable dispatch target of `ma` in the context `ctx`. This is + * restricted to those `ma`s for which the context makes a difference. + */ +Method prunedViableImplInCallContext(MethodAccess ma, Call ctx) { + result = viableImplInCallContext(ma, ctx) and + reducedViableImplInCallContext(ma, _, ctx) +} + +/** + * Holds if data might flow from `ma` to a return statement in some + * configuration. + */ +private predicate maybeChainedReturn(MethodAccess ma) { + exists(ReturnStmt ret | + exists(ret.getExpr()) and + ret.getEnclosingFunction() = ma.getEnclosingFunction() and + not ma.getParent() instanceof ExprStmt + ) +} + +/** + * Holds if flow returning from `m` to `ma` might return further and if + * this path restricts the set of call sites that can be returned to. + */ +predicate reducedViableImplInReturn(Method m, MethodAccess ma) { + exists(int tgts, int ctxtgts | + m = viableImpl(ma) and + ctxtgts = count(Call ctx | m = viableImplInCallContext(ma, ctx)) and + tgts = strictcount(Call ctx | viableCallable(ctx) = ma.getEnclosingFunction()) and + ctxtgts < tgts + ) and + maybeChainedReturn(ma) +} + +/** + * Gets a viable dispatch target of `ma` in the context `ctx`. This is + * restricted to those `ma`s and results for which the return flow from the + * result to `ma` restricts the possible context `ctx`. + */ +Method prunedViableImplInCallContextReverse(MethodAccess ma, Call ctx) { + result = viableImplInCallContext(ma, ctx) and + reducedViableImplInReturn(result, ma) +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll new file mode 100644 index 000000000000..9d28e37cc8ad --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll @@ -0,0 +1,1614 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from `DataFlowUtil` + * and adds a global analysis, mainly exposed through the `Configuration` class. + * This file exists in several identical copies, allowing queries to use + * multiple `Configuration` classes that depend on each other without + * introducing mutual recursion among those configurations. + */ + +import DataFlowUtil +private import DataFlowPrivate +private import DataFlowDispatch +private import DataFlowImplCommon + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ``` + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ``` + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + abstract predicate isSource(Node source); + + /** + * Holds if `sink` is a relevant data flow sink. + */ + abstract predicate isSink(Node sink); + + /** Holds if data flow through `node` is prohibited. */ + predicate isBarrier(Node node) { none() } + + /** Holds if data flow from `node1` to `node2` is prohibited. */ + predicate isBarrierEdge(Node node1, Node node2) { none() } + + /** + * Holds if the additional flow step from `node1` to `node2` must be taken + * into account in the analysis. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(Expr sink) { hasFlowTo(exprNode(sink)) } + + /** DEPRECATED: use `hasFlow` instead. */ + deprecated predicate hasFlowForward(Node source, Node sink) { hasFlow(source, sink) } + + /** DEPRECATED: use `hasFlow` instead. */ + deprecated predicate hasFlowBackward(Node source, Node sink) { hasFlow(source, sink) } +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(Node node1, Node node2, Configuration config) { + config.isAdditionalFlowStep(node1, node2) and + node1.getEnclosingCallable() != node2.getEnclosingCallable() +} + +pragma[noinline] +private predicate isAdditionalFlowStep( + Node node1, Node node2, Callable callable1, Callable callable2, Configuration config +) { + config.isAdditionalFlowStep(node1, node2) and + callable1 = node1.getEnclosingCallable() and + callable2 = node2.getEnclosingCallable() +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(Node node1, Node node2, Configuration config) { + exists(Callable callable | isAdditionalFlowStep(node1, node2, callable, callable, config)) +} + +/** + * Holds if data can flow from `node1` to `node2` through a static field or + * variable capture. + */ +private predicate jumpStep(Node node1, Node node2, boolean preservesValue, Configuration config) { + jumpStep(node1, node2) and preservesValue = true + or + additionalJumpStep(node1, node2, config) and preservesValue = false +} + +/** + * Holds if data can flow in one local step from `node1` to `node2` taking + * additional steps from the configuration into account. + */ +private predicate localFlowStep(Node node1, Node node2, boolean preservesValue, Configuration config) { + localFlowStep(node1, node2) and not config.isBarrierEdge(node1, node2) and preservesValue = true + or + additionalLocalFlowStep(node1, node2, config) and preservesValue = false +} + +pragma[noinline] +private Method returnNodeGetEnclosingCallable(ReturnNode ret) { + result = ret.getEnclosingCallable() +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +/** + * Holds if `node` is reachable from a source in the given configuration + * ignoring call contexts. + */ +private predicate nodeCandFwd1(Node node, boolean stored, Configuration config) { + not config.isBarrier(node) and + ( + config.isSource(node) and stored = false + or + exists(Node mid, boolean preservesValue | + nodeCandFwd1(mid, stored, config) and + localFlowStep(mid, node, preservesValue, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + nodeCandFwd1(mid, stored, config) and + jumpStep(mid, node, preservesValue, config) and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid | + useFieldFlow(config) and + nodeCandFwd1(mid, _, config) and + store(mid, _, node) and + stored = true + ) + or + // read + exists(Node mid, Content f | + nodeCandFwd1(mid, true, config) and + read(mid, f, node) and + storeCandFwd1(f, unbind(config)) and + (stored = false or stored = true) + ) + or + // flow into a callable + exists(Node arg | + nodeCandFwd1(arg, stored, config) and + viableParamArg(node, arg) + ) + or + // flow out of an argument + exists(PostUpdateNode mid, ParameterNode p | + nodeCandFwd1(mid, stored, config) and + parameterValueFlowsToUpdate(p, mid) and + viableParamArg(p, node.(PostUpdateNode).getPreUpdateNode()) + ) + or + // flow out of a callable + exists(Method m, MethodAccess ma, ReturnNode ret | + nodeCandFwd1(ret, stored, config) and + m = returnNodeGetEnclosingCallable(ret) and + m = viableImpl(ma) and + node.asExpr() = ma + ) + ) +} + +/** + * Holds if `f` is the target of a store in the flow covered by `nodeCandFwd1`. + */ +private predicate storeCandFwd1(Content f, Configuration config) { + exists(Node mid, Node node | + not config.isBarrier(node) and + useFieldFlow(config) and + nodeCandFwd1(mid, _, config) and + store(mid, f, node) + ) +} + +bindingset[result, b] +private boolean unbindBool(boolean b) { result != b.booleanNot() } + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration ignoring call contexts. + */ +pragma[nomagic] +private predicate nodeCand1(Node node, boolean stored, Configuration config) { + nodeCandFwd1(node, false, config) and + config.isSink(node) and + stored = false + or + nodeCandFwd1(node, unbindBool(stored), unbind(config)) and + ( + exists(Node mid, boolean preservesValue | + localFlowStep(node, mid, preservesValue, config) and + nodeCand1(mid, stored, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + jumpStep(node, mid, preservesValue, config) and + nodeCand1(mid, stored, config) and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid, Content f | + store(node, f, mid) and + readCand1(f, unbind(config)) and + nodeCand1(mid, true, config) and + (stored = false or stored = true) + ) + or + // read + exists(Node mid, Content f | + read(node, f, mid) and + storeCandFwd1(f, unbind(config)) and + nodeCand1(mid, _, config) and + stored = true + ) + or + // flow into a callable + exists(Node param | + viableParamArg(param, node) and + nodeCand1(param, stored, config) + ) + or + // flow out of an argument + exists(PostUpdateNode mid, ParameterNode p | + parameterValueFlowsToUpdate(p, node) and + viableParamArg(p, mid.getPreUpdateNode()) and + nodeCand1(mid, stored, config) + ) + or + // flow out of a callable + exists(Method m, ExprNode ma | + nodeCand1(ma, stored, config) and + m = returnNodeGetEnclosingCallable(node) and + m = viableImpl(ma.getExpr()) + ) + ) +} + +/** + * Holds if `f` is the target of a read in the flow covered by `nodeCand1`. + */ +private predicate readCand1(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd1(node, true, unbind(config)) and + read(node, f, mid) and + storeCandFwd1(f, unbind(config)) and + nodeCand1(mid, _, config) + ) +} + +/** + * Holds if there is a path from `p` to `node` in the same callable that is + * part of a path from a source to a sink taking simple call contexts into + * consideration. This is restricted to paths that does not necessarily + * preserve the value of `p` by making use of at least one additional step + * from the configuration. + */ +pragma[nomagic] +private predicate simpleParameterFlow(ParameterNode p, Node node, RefType t, Configuration config) { + nodeCand1(node, false, config) and + p = node and + t = getErasedRepr(node.getType()) and + not parameterValueFlowsThrough(p) + or + nodeCand1(node, false, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, t, config) and + localFlowStep(mid, node, true, config) and + compatibleTypes(t, node.getType()) + ) + or + nodeCand1(node, false, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, _, config) and + localFlowStep(mid, node, false, config) and + t = getErasedRepr(node.getType()) + ) + or + nodeCand1(node, false, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, t, config) and + localStoreReadStep(mid, node) and + compatibleTypes(t, node.getType()) + ) + or + // value flow through a callable + nodeCand1(node, false, config) and + exists(Node arg | + simpleParameterFlow(p, arg, t, config) and + argumentValueFlowsThrough(arg, node) and + compatibleTypes(t, node.getType()) + ) + or + // flow through a callable + nodeCand1(node, false, config) and + exists(Node arg | + simpleParameterFlow(p, arg, _, config) and + simpleArgumentFlowsThrough(arg, node, t, config) + ) +} + +/** + * Holds if data can flow from `arg` through the `call` taking simple call + * contexts into consideration and that this is part of a path from a source + * to a sink. This is restricted to paths through the `call` that does not + * necessarily preserve the value of `arg` by making use of at least one + * additional step from the configuration. + */ +private predicate simpleArgumentFlowsThrough( + ArgumentNode arg, ExprNode call, RefType t, Configuration config +) { + exists(ParameterNode param, ReturnNode ret | + nodeCand1(arg, false, unbind(config)) and + nodeCand1(call, false, unbind(config)) and + viableParamArg(param, arg) and + simpleParameterFlow(param, ret, t, config) and + arg.argumentOf(call.getExpr(), _) + ) +} + +/** + * Holds if data can flow from `node1` to `node2` by a step through a method. + */ +private predicate flowThroughMethod( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + simpleArgumentFlowsThrough(node1, node2, _, config) and preservesValue = false + or + argumentValueFlowsThrough(node1, node2) and preservesValue = true +} + +/** + * Holds if data can flow from `node1` to `node2` in one local step or a step + * through a method. + */ +private predicate localFlowStepOrFlowThroughMethod( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + localFlowStep(node1, node2, preservesValue, config) or + flowThroughMethod(node1, node2, preservesValue, config) +} + +/** + * Holds if data can flow out of a callable from `node1` to `node2`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +private predicate flowOutOfCallable(Node node1, Node node2, Configuration config) { + nodeCand1(node1, _, unbind(config)) and + nodeCand1(node2, _, config) and + ( + // flow out of an argument + exists(ParameterNode p | + parameterValueFlowsToUpdate(p, node1) and + viableParamArg(p, node2.(PostUpdateNode).getPreUpdateNode()) + ) + or + // flow out of a method + exists(Method m, MethodAccess ma, ReturnNode ret | + ret = node1 and + m = returnNodeGetEnclosingCallable(ret) and + m = viableImpl(ma) and + node2.asExpr() = ma + ) + ) +} + +/** + * Holds if data can flow into a callable and that this step is part of a + * path from a source to a sink. + */ +private predicate flowIntoCallable(Node node1, Node node2, Configuration config) { + viableParamArg(node2, node1) and + nodeCand1(node1, _, unbind(config)) and + nodeCand1(node2, _, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(Node n1, Configuration conf) { + result = strictcount(Node n | flowOutOfCallable(n1, n, conf) or flowIntoCallable(n1, n, conf)) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(Node n2, Configuration conf) { + result = strictcount(Node n | flowOutOfCallable(n, n2, conf) or flowIntoCallable(n, n2, conf)) +} + +/** + * Holds if data can flow out of a callable from `node1` to `node2`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +private predicate flowOutOfCallable( + Node node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallable(node1, node2, config) and + exists(int b, int j | + b = branch(node1, config) and + j = join(node2, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into a callable and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +private predicate flowIntoCallable( + Node node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallable(node1, node2, config) and + exists(int b, int j | + b = branch(node1, config) and + j = join(node2, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration taking simple call contexts into consideration. + */ +private predicate nodeCandFwd2(Node node, boolean fromArg, boolean stored, Configuration config) { + nodeCand1(node, false, config) and + config.isSource(node) and + fromArg = false and + stored = false + or + nodeCand1(node, unbindBool(stored), unbind(config)) and + ( + exists(Node mid, boolean preservesValue | + nodeCandFwd2(mid, fromArg, stored, config) and + localFlowStepOrFlowThroughMethod(mid, node, preservesValue, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + nodeCandFwd2(mid, _, stored, config) and + jumpStep(mid, node, preservesValue, config) and + fromArg = false and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid, Content f | + nodeCandFwd2(mid, fromArg, _, config) and + store(mid, f, node) and + readCand1(f, unbind(config)) and + stored = true + ) + or + // read + exists(Node mid, Content f | + nodeCandFwd2(mid, fromArg, true, config) and + read(mid, f, node) and + storeCandFwd2(f, unbind(config)) and + (stored = false or stored = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + nodeCandFwd2(mid, _, stored, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (stored = false or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + nodeCandFwd2(mid, false, stored, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (stored = false or allowsFieldFlow = true) + ) + ) +} + +/** + * Holds if `f` is the target of a store in the flow covered by `nodeCandFwd2`. + */ +private predicate storeCandFwd2(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCand1(node, true, unbind(config)) and + nodeCandFwd2(mid, _, _, config) and + store(mid, f, node) and + readCand1(f, unbind(config)) + ) +} + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration taking simple call contexts into consideration. + */ +private predicate nodeCand2(Node node, boolean toReturn, boolean stored, Configuration config) { + nodeCandFwd2(node, _, false, config) and + config.isSink(node) and + toReturn = false and + stored = false + or + nodeCandFwd2(node, _, unbindBool(stored), unbind(config)) and + ( + exists(Node mid, boolean preservesValue | + localFlowStepOrFlowThroughMethod(node, mid, preservesValue, config) and + nodeCand2(mid, toReturn, stored, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + jumpStep(node, mid, preservesValue, config) and + nodeCand2(mid, _, stored, config) and + toReturn = false and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid, Content f | + store(node, f, mid) and + readCand2(f, unbind(config)) and + nodeCand2(mid, toReturn, true, config) and + (stored = false or stored = true) + ) + or + // read + exists(Node mid, Content f | + read(node, f, mid) and + storeCandFwd2(f, unbind(config)) and + nodeCand2(mid, toReturn, _, config) and + stored = true + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + nodeCand2(mid, false, stored, config) and + toReturn = false and + (stored = false or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + nodeCand2(mid, _, stored, config) and + toReturn = true and + (stored = false or allowsFieldFlow = true) + ) + ) +} + +/** + * Holds if `f` is the target of a read in the flow covered by `nodeCand2`. + */ +private predicate readCand2(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd2(node, _, true, unbind(config)) and + read(node, f, mid) and + storeCandFwd2(f, unbind(config)) and + nodeCand2(mid, _, _, config) + ) +} + +private predicate storeCand(Content f, Configuration conf) { + exists(Node n1, Node n2 | + store(n1, f, n2) and + nodeCand2(n1, _, _, conf) and + nodeCand2(n2, _, _, unbind(conf)) + ) +} + +private predicate readCand(Content f, Configuration conf) { readCand2(f, conf) } + +/** + * Holds if `f` is the target of both a store and a read in the path graph + * covered by `nodeCand2`. + */ +pragma[noinline] +private predicate readStoreCand(Content f, Configuration conf) { + storeCand(f, conf) and + readCand(f, conf) +} + +private predicate nodeCand(Node node, Configuration config) { nodeCand2(node, _, _, config) } + +/** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ +private predicate localFlowEntry(Node node, Configuration config) { + nodeCand(node, config) and + ( + config.isSource(node) or + jumpStep(_, node, _, config) or + node instanceof ParameterNode or + node.asExpr() instanceof MethodAccess or + node instanceof PostUpdateNode or + read(_, _, node) or + node.asExpr() instanceof CastExpr + ) +} + +/** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ +private predicate localFlowExit(Node node, Configuration config) { + exists(Node next | nodeCand(next, config) | + jumpStep(node, next, _, config) or + flowIntoCallable(node, next, config) or + flowOutOfCallable(node, next, config) or + flowThroughMethod(node, next, _, config) or + store(node, _, next) or + read(node, _, next) + ) + or + node.asExpr() instanceof CastExpr + or + config.isSink(node) +} + +/** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `localFlowStep` beginning at `localFlowEntry`. + */ +private predicate localFlowStepPlus( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + localFlowEntry(node1, config) and + localFlowStep(node1, node2, preservesValue, config) and + node1 != node2 and + nodeCand(node2, unbind(config)) + or + exists(Node mid, boolean pv1, boolean pv2 | + localFlowStepPlus(node1, mid, pv1, config) and + localFlowStep(mid, node2, pv2, config) and + not mid.asExpr() instanceof CastExpr and + preservesValue = pv1.booleanAnd(pv2) and + nodeCand(node2, unbind(config)) + ) +} + +/** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ +pragma[noinline] +private predicate localFlowBigStep( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + localFlowStepPlus(node1, node2, preservesValue, config) and + localFlowExit(node2, config) +} + +private newtype TAccessPathFront = + TFrontNil(Type t) or + TFrontHead(Content f) + +/** + * The front of an `AccessPath`. This is either a head or a nil. + */ +private class AccessPathFront extends TAccessPathFront { + string toString() { + exists(Type t | this = TFrontNil(t) | result = ppReprType(t)) + or + exists(Content f | this = TFrontHead(f) | result = f.toString()) + } + + Type getType() { + this = TFrontNil(result) + or + exists(Content head | this = TFrontHead(head) | result = head.getContainerType()) + } + + predicate headUsesContent(Content f) { this = TFrontHead(f) } +} + +private class AccessPathFrontNil extends AccessPathFront, TFrontNil { } + +/** + * A `Node` at which a cast can occur such that the type should be checked. + */ +private class CastingNode extends Node { + CastingNode() { + this instanceof ParameterNode or + this.asExpr() instanceof CastExpr or + this.asExpr() instanceof MethodAccess or + this.(PostUpdateNode).getPreUpdateNode() instanceof ArgumentNode + } +} + +/** + * Holds if data can flow from a source to `node` with the given `apf`. + */ +private predicate flowCandFwd(Node node, boolean fromArg, AccessPathFront apf, Configuration config) { + flowCandFwd0(node, fromArg, apf, config) and + if node instanceof CastingNode then compatibleTypes(node.getType(), apf.getType()) else any() +} + +private predicate flowCandFwd0(Node node, boolean fromArg, AccessPathFront apf, Configuration config) { + nodeCand2(node, _, false, config) and + config.isSource(node) and + fromArg = false and + apf = TFrontNil(getErasedRepr(node.getType())) + or + nodeCand(node, unbind(config)) and + ( + exists(Node mid | + flowCandFwd(mid, fromArg, apf, config) and + localFlowBigStep(mid, node, true, config) + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(mid, fromArg, apf0, config) and + localFlowBigStep(mid, node, false, config) and + apf0 instanceof AccessPathFrontNil and + apf = TFrontNil(getErasedRepr(node.getType())) + ) + or + exists(Node mid | + flowCandFwd(mid, _, apf, config) and + jumpStep(mid, node, true, config) and + fromArg = false + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(mid, _, apf0, config) and + jumpStep(mid, node, false, config) and + fromArg = false and + apf0 instanceof AccessPathFrontNil and + apf = TFrontNil(getErasedRepr(node.getType())) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowCandFwd(mid, _, apf, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowCandFwd(mid, false, apf, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPathFront apf0 | + flowCandFwd(mid, fromArg, apf0, config) and + flowThroughMethod(mid, node, preservesValue, config) and + ( + preservesValue = true and apf = apf0 + or + preservesValue = false and + apf0 instanceof AccessPathFrontNil and + apf = TFrontNil(getErasedRepr(node.getType())) + ) + ) + ) + or + exists(Node mid, Content f | + flowCandFwd(mid, fromArg, _, config) and + store(mid, f, node) and + nodeCand(node, unbind(config)) and + apf.headUsesContent(f) + ) + or + exists(Node mid, Content f, AccessPathFront apf0 | + flowCandFwd(mid, fromArg, apf0, config) and + read(mid, f, node) and + nodeCand(node, config) and + apf0.headUsesContent(f) and + consCandFwd(f, apf, unbind(config)) + ) +} + +private predicate consCandFwd(Content f, AccessPathFront apf, Configuration config) { + exists(Node mid, Node n | + flowCandFwd(mid, _, apf, config) and + store(mid, f, n) and + nodeCand(n, unbind(config)) and + readStoreCand(f, unbind(config)) and + compatibleTypes(apf.getType(), f.getType()) + ) +} + +/** + * Holds if data can flow from a source to `node` with the given `apf` and + * from there flow to a sink. + */ +private predicate flowCand(Node node, boolean toReturn, AccessPathFront apf, Configuration config) { + flowCand0(node, toReturn, apf, config) and + flowCandFwd(node, _, apf, config) +} + +private predicate flowCand0(Node node, boolean toReturn, AccessPathFront apf, Configuration config) { + flowCandFwd(node, _, apf, config) and + config.isSink(node) and + toReturn = false and + apf instanceof AccessPathFrontNil + or + ( + exists(Node mid | + localFlowBigStep(node, mid, true, config) and + flowCand(mid, toReturn, apf, config) + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(node, _, apf, config) and + localFlowBigStep(node, mid, false, config) and + flowCand(mid, toReturn, apf0, config) and + apf0 instanceof AccessPathFrontNil and + apf instanceof AccessPathFrontNil + ) + or + exists(Node mid | + jumpStep(node, mid, true, config) and + flowCand(mid, _, apf, config) and + toReturn = false + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(node, _, apf, config) and + jumpStep(node, mid, false, config) and + flowCand(mid, _, apf0, config) and + toReturn = false and + apf0 instanceof AccessPathFrontNil and + apf instanceof AccessPathFrontNil + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + flowCand(mid, false, apf, config) and + toReturn = false and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + flowCand(mid, _, apf, config) and + toReturn = true and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPathFront apf0 | + flowThroughMethod(node, mid, preservesValue, config) and + flowCand(mid, toReturn, apf0, config) and + ( + preservesValue = true and apf = apf0 + or + preservesValue = false and + apf0 instanceof AccessPathFrontNil and + apf instanceof AccessPathFrontNil and + flowCandFwd(node, _, apf, config) + ) + ) + or + exists(Node mid, Content f, AccessPathFront apf0 | + store(node, f, mid) and + flowCand(mid, toReturn, apf0, config) and + apf0.headUsesContent(f) and + consCand(f, apf, unbind(config)) + ) + or + exists(Node mid, Content f, AccessPathFront apf0 | + read(node, f, mid) and + flowCand(mid, toReturn, apf0, config) and + consCandFwd(f, apf0, unbind(config)) and + apf.headUsesContent(f) + ) + ) +} + +private predicate consCand(Content f, AccessPathFront apf, Configuration config) { + consCandFwd(f, apf, config) and + exists(Node mid, Node n, AccessPathFront apf0 | + flowCandFwd(n, _, apf0, config) and + apf0.headUsesContent(f) and + read(n, f, mid) and + flowCand(mid, _, apf, config) + ) +} + +private newtype TAccessPath = + TNil(Type t) or + TCons(Content f, int len) { len in [1 .. 5] } + +/** + * Conceptually a list of `Content`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +private class AccessPath extends TAccessPath { + abstract string toString(); + + Content getHead() { this = TCons(result, _) } + + int len() { + this = TNil(_) and result = 0 + or + this = TCons(_, result) + } + + Type getType() { + this = TNil(result) + or + exists(Content head | this = TCons(head, _) | result = head.getContainerType()) + } + + abstract AccessPathFront getFront(); +} + +private class AccessPathNil extends AccessPath, TNil { + override string toString() { exists(Type t | this = TNil(t) | result = ppReprType(t)) } + + override AccessPathFront getFront() { exists(Type t | this = TNil(t) | result = TFrontNil(t)) } +} + +private class AccessPathCons extends AccessPath, TCons { + override string toString() { + exists(Content f, int len | this = TCons(f, len) | + result = f.toString() + ", ... (" + len.toString() + ")" + ) + } + + override AccessPathFront getFront() { + exists(Content f | this = TCons(f, _) | result = TFrontHead(f)) + } +} + +/** Holds if `ap0` corresponds to the cons of `f` and `ap`. */ +private predicate pop(AccessPath ap0, Content f, AccessPath ap) { + ap0.getFront().headUsesContent(f) and + consCand(f, ap.getFront(), _) and + ap0.len() = 1 + ap.len() +} + +/** Holds if `ap0` corresponds to the cons of `f` and `ap` and `apf` is the front of `ap`. */ +pragma[noinline] +private predicate popWithFront(AccessPath ap0, Content f, AccessPathFront apf, AccessPath ap) { + pop(ap0, f, ap) and apf = ap.getFront() +} + +/** Holds if `ap` corresponds to the cons of `f` and `ap0`. */ +private predicate push(AccessPath ap0, Content f, AccessPath ap) { pop(ap, f, ap0) } + +/** + * Holds if data can flow from a source to `node` with the given `ap`. + */ +private predicate flowFwd( + Node node, boolean fromArg, AccessPathFront apf, AccessPath ap, Configuration config +) { + flowFwd0(node, fromArg, apf, ap, config) and + flowCand(node, _, apf, config) +} + +private predicate flowFwd0( + Node node, boolean fromArg, AccessPathFront apf, AccessPath ap, Configuration config +) { + flowCand(node, _, _, config) and + config.isSource(node) and + fromArg = false and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + or + flowCand(node, _, _, unbind(config)) and + ( + exists(Node mid | + flowFwd(mid, fromArg, apf, ap, config) and + localFlowBigStep(mid, node, true, config) + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(mid, fromArg, _, ap0, config) and + localFlowBigStep(mid, node, false, config) and + ap0 instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + ) + or + exists(Node mid | + flowFwd(mid, _, apf, ap, config) and + jumpStep(mid, node, true, config) and + fromArg = false + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(mid, _, _, ap0, config) and + jumpStep(mid, node, false, config) and + fromArg = false and + ap0 instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowFwd(mid, _, apf, ap, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowFwd(mid, false, apf, ap, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPathFront apf0, AccessPath ap0 | + flowFwd(mid, fromArg, apf0, ap0, config) and + flowThroughMethod(mid, node, preservesValue, config) and + ( + preservesValue = true and ap = ap0 and apf = apf0 + or + preservesValue = false and + ap0 instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + ) + ) + ) + or + exists(Content f, AccessPath ap0 | + flowFwdStore(node, f, ap0, apf, fromArg, config) and + push(ap0, f, ap) + ) + or + exists(Content f, AccessPath ap0 | + flowFwdRead(node, f, ap0, fromArg, config) and + popWithFront(ap0, f, apf, ap) + ) +} + +pragma[nomagic] +private predicate flowFwdStore( + Node node, Content f, AccessPath ap0, AccessPathFront apf, boolean fromArg, Configuration config +) { + exists(Node mid, AccessPathFront apf0 | + flowFwd(mid, fromArg, apf0, ap0, config) and + flowFwdStoreAux(mid, f, node, apf0, apf, config) + ) +} + +private predicate flowFwdStoreAux( + Node mid, Content f, Node node, AccessPathFront apf0, AccessPathFront apf, Configuration config +) { + store(mid, f, node) and + consCand(f, apf0, config) and + apf.headUsesContent(f) and + flowCand(node, _, apf, unbind(config)) +} + +pragma[nomagic] +private predicate flowFwdRead( + Node node, Content f, AccessPath ap0, boolean fromArg, Configuration config +) { + exists(Node mid, AccessPathFront apf0 | + flowFwd(mid, fromArg, apf0, ap0, config) and + read(mid, f, node) and + apf0.headUsesContent(f) and + flowCand(node, _, _, unbind(config)) + ) +} + +/** + * Holds if data can flow from a source to `node` with the given `ap` and + * from there flow to a sink. + */ +private predicate flow(Node node, boolean toReturn, AccessPath ap, Configuration config) { + flow0(node, toReturn, ap, config) and + flowFwd(node, _, _, ap, config) +} + +private predicate flow0(Node node, boolean toReturn, AccessPath ap, Configuration config) { + flowFwd(node, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + ap instanceof AccessPathNil + or + ( + exists(Node mid | + localFlowBigStep(node, mid, true, config) and + flow(mid, toReturn, ap, config) + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(node, _, _, ap, config) and + localFlowBigStep(node, mid, false, config) and + flow(mid, toReturn, ap0, config) and + ap0 instanceof AccessPathNil and + ap instanceof AccessPathNil + ) + or + exists(Node mid | + jumpStep(node, mid, true, config) and + flow(mid, _, ap, config) and + toReturn = false + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(node, _, _, ap, config) and + jumpStep(node, mid, false, config) and + flow(mid, _, ap0, config) and + toReturn = false and + ap0 instanceof AccessPathNil and + ap instanceof AccessPathNil + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + flow(mid, false, ap, config) and + toReturn = false and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + flow(mid, _, ap, config) and + toReturn = true and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPath ap0 | + flowThroughMethod(node, mid, preservesValue, config) and + flow(mid, toReturn, ap0, config) and + ( + preservesValue = true and ap = ap0 + or + preservesValue = false and + ap0 instanceof AccessPathNil and + ap instanceof AccessPathNil and + flowFwd(node, _, _, ap, config) + ) + ) + or + exists(Content f, AccessPath ap0 | + flowStore(node, f, toReturn, ap0, config) and + pop(ap0, f, ap) + ) + or + exists(Content f, AccessPath ap0 | + flowRead(node, f, toReturn, ap0, config) and + push(ap0, f, ap) + ) + ) +} + +pragma[nomagic] +private predicate flowStore( + Node node, Content f, boolean toReturn, AccessPath ap0, Configuration config +) { + exists(Node mid | + store(node, f, mid) and + flow(mid, toReturn, ap0, config) + ) +} + +pragma[nomagic] +private predicate flowRead( + Node node, Content f, boolean toReturn, AccessPath ap0, Configuration config +) { + exists(Node mid | + read(node, f, mid) and + flow(mid, toReturn, ap0, config) + ) +} + +bindingset[conf, result] +private Configuration unbind(Configuration conf) { result >= conf and result <= conf } + +private predicate flow(Node n, Configuration config) { flow(n, _, _, config) } + +private newtype TPathNode = + TPathNodeMid(Node node, CallContext cc, AccessPath ap, Configuration config) { + // A PathNode is introduced by a source ... + flow(node, config) and + config.isSource(node) and + cc instanceof CallContextAny and + ap = TNil(getErasedRepr(node.getType())) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + flowStep(mid, node, cc, ap) and + config = mid.getConfiguration() and + flow(node, _, ap, unbind(config)) + ) + } or + TPathNodeSink(Node node, Configuration config) { + // The AccessPath on a sink is empty. + config.isSink(node) and + flow(node, config) + } + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +abstract class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = getNode().toString() + ppAp() } + + /** Gets the source location for this element. */ + Location getLocation() { result = getNode().getLocation() } + + /** Gets the underlying `Node`. */ + abstract Node getNode(); + + /** Gets the associated configuration. */ + abstract Configuration getConfiguration(); + + /** Gets a successor. */ + abstract PathNode getSucc(); + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + result = " [" + this.(PathNodeMid).getAp().toString() + "]" + } +} + +/** Holds if `n` can reach a sink. */ +private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getSucc()) } + +/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNode n1, PathNode n2) { n1.getSucc() = n2 and reach(n2) } + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNode, TPathNodeMid { + Node node; + + CallContext cc; + + AccessPath ap; + + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, cc, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + flowStep(this, result.getNode(), result.getCallContext(), result.getAp()) and + result.getConfiguration() = unbind(this.getConfiguration()) + } + + override PathNode getSucc() { + // an intermediate step to another intermediate node + result = getSuccMid() + or + // a final step to a sink via one or more local steps + localFlowStepPlus(node, result.getNode(), _, config) and + ap instanceof AccessPathNil and + result instanceof PathNodeSink and + result.getConfiguration() = unbind(this.getConfiguration()) + or + // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges + exists(PathNodeMid mid | + mid = getSuccMid() and + mid.getNode() = result.getNode() and + mid.getAp() instanceof AccessPathNil and + result instanceof PathNodeSink and + result.getConfiguration() = unbind(mid.getConfiguration()) + ) + or + // a direct step from a source to a sink if a node is both + this instanceof PathNodeSource and + result instanceof PathNodeSink and + this.getNode() = result.getNode() and + result.getConfiguration() = unbind(this.getConfiguration()) + } +} + +/** + * A flow graph node corresponding to a source. + */ +private class PathNodeSource extends PathNodeMid { + PathNodeSource() { + getConfiguration().isSource(getNode()) and + getCallContext() instanceof CallContextAny and + getAp() instanceof AccessPathNil + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNode, TPathNodeSink { + Node node; + + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, config) } + + override Node getNode() { result = node } + + override Configuration getConfiguration() { result = config } + + override PathNode getSucc() { none() } +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +private predicate flowStep(PathNodeMid mid, Node node, CallContext cc, AccessPath ap) { + localFlowBigStep(mid.getNode(), node, true, mid.getConfiguration()) and + cc = mid.getCallContext() and + ap = mid.getAp() + or + localFlowBigStep(mid.getNode(), node, false, mid.getConfiguration()) and + cc = mid.getCallContext() and + mid.getAp() instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) + or + jumpStep(mid.getNode(), node, true, mid.getConfiguration()) and + cc instanceof CallContextAny and + ap = mid.getAp() + or + jumpStep(mid.getNode(), node, false, mid.getConfiguration()) and + cc instanceof CallContextAny and + mid.getAp() instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) + or + contentReadStep(mid, node, ap) and cc = mid.getCallContext() + or + exists(Content f, AccessPath ap0 | contentStoreStep(mid, node, ap0, f, cc) and push(ap0, f, ap)) + or + flowOutOfArgument(mid, node, cc) and ap = mid.getAp() + or + flowIntoCallable(mid, node, _, cc, _) and ap = mid.getAp() + or + flowOutOfMethod(mid, node.asExpr(), cc) and ap = mid.getAp() + or + flowThroughMethod(mid, node.asExpr(), cc) and ap = TNil(getErasedRepr(node.getType())) + or + valueFlowThroughMethod(mid, node.asExpr(), cc) and ap = mid.getAp() +} + +private predicate contentReadStep(PathNodeMid mid, Node node, AccessPath ap) { + exists(Content f, AccessPath ap0 | + ap0 = mid.getAp() and + read(mid.getNode(), f, node) and + pop(ap0, f, ap) + ) +} + +pragma[noinline] +private predicate contentStoreStep( + PathNodeMid mid, Node node, AccessPath ap0, Content f, CallContext cc +) { + ap0 = mid.getAp() and + store(mid.getNode(), f, node) and + cc = mid.getCallContext() +} + +/** + * Holds if data may flow from `mid` to an exit of `m` in the context + * `innercc`, and the path did not flow through a parameter of `m`. + */ +private predicate flowOutOfMethod0(PathNodeMid mid, Method m, CallContext innercc) { + exists(ReturnNode ret | + ret = mid.getNode() and + innercc = mid.getCallContext() and + m = returnNodeGetEnclosingCallable(ret) and + not innercc instanceof CallContextCall + ) +} + +/** + * Holds if data may flow from `mid` to `ma`. The last step of this path + * is a return from a method and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate flowOutOfMethod(PathNodeMid mid, MethodAccess ma, CallContext cc) { + exists(Method m, CallContext innercc | + flowOutOfMethod0(mid, m, innercc) and + resolveReturn(innercc, m, ma) + | + if reducedViableImplInReturn(m, ma) then cc = TReturn(m, ma) else cc = TAnyCallContext() + ) +} + +private predicate flowOutOfArgument(PathNodeMid mid, PostUpdateNode node, CallContext cc) { + exists( + PostUpdateNode n, ParameterNode p, Callable callable, CallContext innercc, int i, Call call, + ArgumentNode arg + | + mid.getNode() = n and + parameterValueFlowsToUpdate(p, n) and + innercc = mid.getCallContext() and + p.isParameterOf(callable, i) and + resolveReturn(innercc, callable, call) and + node.getPreUpdateNode() = arg and + arg.argumentOf(call, i) and + flow(node, unbind(mid.getConfiguration())) + | + if reducedViableImplInReturn(callable, call) + then cc = TReturn(callable, call) + else cc = TAnyCallContext() + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate flowIntoArg(PathNodeMid mid, int i, CallContext cc, Call call, boolean emptyAp) { + exists(ArgumentNode arg, AccessPath ap | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() + | + ap instanceof AccessPathNil and emptyAp = true + or + ap instanceof AccessPathCons and emptyAp = false + ) +} + +pragma[noinline] +private predicate parameterCand(Callable callable, int i, Configuration config) { + exists(ParameterNode p | + flow(p, config) and + p.isParameterOf(callable, i) + ) +} + +pragma[nomagic] +private predicate flowIntoCallable0( + PathNodeMid mid, Callable callable, int i, CallContext outercc, Call call, boolean emptyAp +) { + flowIntoArg(mid, i, outercc, call, emptyAp) and + callable = resolveCall(call, outercc) and + parameterCand(callable, any(int j | j <= i and j >= i), mid.getConfiguration()) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +private predicate flowIntoCallable( + PathNodeMid mid, ParameterNode p, CallContext outercc, CallContextCall innercc, Call call +) { + exists(int i, Callable callable, boolean emptyAp | + flowIntoCallable0(mid, callable, i, outercc, call, emptyAp) and + p.isParameterOf(callable, i) + | + if reducedViableImplInCallContext(_, callable, call) + then innercc = TSpecificCall(call, i, emptyAp) + else innercc = TSomeCall(p, emptyAp) + ) +} + +/** Holds if data may flow from `p` to a return statement in the callable. */ +pragma[nomagic] +private predicate paramFlowsThrough(ParameterNode p, CallContextCall cc, Configuration config) { + exists(PathNodeMid mid, ReturnNode ret | + mid.getNode() = ret and + cc = mid.getCallContext() and + config = mid.getConfiguration() and + mid.getAp() instanceof AccessPathNil + | + cc = TSomeCall(p, true) + or + exists(int i | cc = TSpecificCall(_, i, true) | + p.isParameterOf(returnNodeGetEnclosingCallable(ret), i) + ) + ) +} + +/** + * Holds if data may flow from `mid` to an argument of `methodcall`, + * through a called method `m`, and back out through a return statement in + * `m`. The context `cc` is restored to its value prior to entering `m`. + */ +pragma[noinline] +private predicate flowThroughMethod(PathNodeMid mid, Call methodcall, CallContext cc) { + exists(ParameterNode p, CallContext innercc | + flowIntoCallable(mid, p, cc, innercc, methodcall) and + paramFlowsThrough(p, innercc, unbind(mid.getConfiguration())) and + not parameterValueFlowsThrough(p) and + mid.getAp() instanceof AccessPathNil + ) +} + +private predicate valueFlowThroughMethod(PathNodeMid mid, Call methodcall, CallContext cc) { + exists(ParameterNode p | + flowIntoCallable(mid, p, cc, _, methodcall) and + parameterValueFlowsThrough(p) + ) +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNodeSource flowsource, PathNodeSink flowsink, Node source, Node sink, + Configuration configuration +) { + flowsource.getConfiguration() = configuration and + flowsource.getNode() = source and + pathSuccPlus(flowsource, flowsink) and + flowsink.getNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll new file mode 100644 index 000000000000..9d28e37cc8ad --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll @@ -0,0 +1,1614 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from `DataFlowUtil` + * and adds a global analysis, mainly exposed through the `Configuration` class. + * This file exists in several identical copies, allowing queries to use + * multiple `Configuration` classes that depend on each other without + * introducing mutual recursion among those configurations. + */ + +import DataFlowUtil +private import DataFlowPrivate +private import DataFlowDispatch +private import DataFlowImplCommon + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ``` + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ``` + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + abstract predicate isSource(Node source); + + /** + * Holds if `sink` is a relevant data flow sink. + */ + abstract predicate isSink(Node sink); + + /** Holds if data flow through `node` is prohibited. */ + predicate isBarrier(Node node) { none() } + + /** Holds if data flow from `node1` to `node2` is prohibited. */ + predicate isBarrierEdge(Node node1, Node node2) { none() } + + /** + * Holds if the additional flow step from `node1` to `node2` must be taken + * into account in the analysis. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(Expr sink) { hasFlowTo(exprNode(sink)) } + + /** DEPRECATED: use `hasFlow` instead. */ + deprecated predicate hasFlowForward(Node source, Node sink) { hasFlow(source, sink) } + + /** DEPRECATED: use `hasFlow` instead. */ + deprecated predicate hasFlowBackward(Node source, Node sink) { hasFlow(source, sink) } +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(Node node1, Node node2, Configuration config) { + config.isAdditionalFlowStep(node1, node2) and + node1.getEnclosingCallable() != node2.getEnclosingCallable() +} + +pragma[noinline] +private predicate isAdditionalFlowStep( + Node node1, Node node2, Callable callable1, Callable callable2, Configuration config +) { + config.isAdditionalFlowStep(node1, node2) and + callable1 = node1.getEnclosingCallable() and + callable2 = node2.getEnclosingCallable() +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(Node node1, Node node2, Configuration config) { + exists(Callable callable | isAdditionalFlowStep(node1, node2, callable, callable, config)) +} + +/** + * Holds if data can flow from `node1` to `node2` through a static field or + * variable capture. + */ +private predicate jumpStep(Node node1, Node node2, boolean preservesValue, Configuration config) { + jumpStep(node1, node2) and preservesValue = true + or + additionalJumpStep(node1, node2, config) and preservesValue = false +} + +/** + * Holds if data can flow in one local step from `node1` to `node2` taking + * additional steps from the configuration into account. + */ +private predicate localFlowStep(Node node1, Node node2, boolean preservesValue, Configuration config) { + localFlowStep(node1, node2) and not config.isBarrierEdge(node1, node2) and preservesValue = true + or + additionalLocalFlowStep(node1, node2, config) and preservesValue = false +} + +pragma[noinline] +private Method returnNodeGetEnclosingCallable(ReturnNode ret) { + result = ret.getEnclosingCallable() +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +/** + * Holds if `node` is reachable from a source in the given configuration + * ignoring call contexts. + */ +private predicate nodeCandFwd1(Node node, boolean stored, Configuration config) { + not config.isBarrier(node) and + ( + config.isSource(node) and stored = false + or + exists(Node mid, boolean preservesValue | + nodeCandFwd1(mid, stored, config) and + localFlowStep(mid, node, preservesValue, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + nodeCandFwd1(mid, stored, config) and + jumpStep(mid, node, preservesValue, config) and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid | + useFieldFlow(config) and + nodeCandFwd1(mid, _, config) and + store(mid, _, node) and + stored = true + ) + or + // read + exists(Node mid, Content f | + nodeCandFwd1(mid, true, config) and + read(mid, f, node) and + storeCandFwd1(f, unbind(config)) and + (stored = false or stored = true) + ) + or + // flow into a callable + exists(Node arg | + nodeCandFwd1(arg, stored, config) and + viableParamArg(node, arg) + ) + or + // flow out of an argument + exists(PostUpdateNode mid, ParameterNode p | + nodeCandFwd1(mid, stored, config) and + parameterValueFlowsToUpdate(p, mid) and + viableParamArg(p, node.(PostUpdateNode).getPreUpdateNode()) + ) + or + // flow out of a callable + exists(Method m, MethodAccess ma, ReturnNode ret | + nodeCandFwd1(ret, stored, config) and + m = returnNodeGetEnclosingCallable(ret) and + m = viableImpl(ma) and + node.asExpr() = ma + ) + ) +} + +/** + * Holds if `f` is the target of a store in the flow covered by `nodeCandFwd1`. + */ +private predicate storeCandFwd1(Content f, Configuration config) { + exists(Node mid, Node node | + not config.isBarrier(node) and + useFieldFlow(config) and + nodeCandFwd1(mid, _, config) and + store(mid, f, node) + ) +} + +bindingset[result, b] +private boolean unbindBool(boolean b) { result != b.booleanNot() } + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration ignoring call contexts. + */ +pragma[nomagic] +private predicate nodeCand1(Node node, boolean stored, Configuration config) { + nodeCandFwd1(node, false, config) and + config.isSink(node) and + stored = false + or + nodeCandFwd1(node, unbindBool(stored), unbind(config)) and + ( + exists(Node mid, boolean preservesValue | + localFlowStep(node, mid, preservesValue, config) and + nodeCand1(mid, stored, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + jumpStep(node, mid, preservesValue, config) and + nodeCand1(mid, stored, config) and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid, Content f | + store(node, f, mid) and + readCand1(f, unbind(config)) and + nodeCand1(mid, true, config) and + (stored = false or stored = true) + ) + or + // read + exists(Node mid, Content f | + read(node, f, mid) and + storeCandFwd1(f, unbind(config)) and + nodeCand1(mid, _, config) and + stored = true + ) + or + // flow into a callable + exists(Node param | + viableParamArg(param, node) and + nodeCand1(param, stored, config) + ) + or + // flow out of an argument + exists(PostUpdateNode mid, ParameterNode p | + parameterValueFlowsToUpdate(p, node) and + viableParamArg(p, mid.getPreUpdateNode()) and + nodeCand1(mid, stored, config) + ) + or + // flow out of a callable + exists(Method m, ExprNode ma | + nodeCand1(ma, stored, config) and + m = returnNodeGetEnclosingCallable(node) and + m = viableImpl(ma.getExpr()) + ) + ) +} + +/** + * Holds if `f` is the target of a read in the flow covered by `nodeCand1`. + */ +private predicate readCand1(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd1(node, true, unbind(config)) and + read(node, f, mid) and + storeCandFwd1(f, unbind(config)) and + nodeCand1(mid, _, config) + ) +} + +/** + * Holds if there is a path from `p` to `node` in the same callable that is + * part of a path from a source to a sink taking simple call contexts into + * consideration. This is restricted to paths that does not necessarily + * preserve the value of `p` by making use of at least one additional step + * from the configuration. + */ +pragma[nomagic] +private predicate simpleParameterFlow(ParameterNode p, Node node, RefType t, Configuration config) { + nodeCand1(node, false, config) and + p = node and + t = getErasedRepr(node.getType()) and + not parameterValueFlowsThrough(p) + or + nodeCand1(node, false, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, t, config) and + localFlowStep(mid, node, true, config) and + compatibleTypes(t, node.getType()) + ) + or + nodeCand1(node, false, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, _, config) and + localFlowStep(mid, node, false, config) and + t = getErasedRepr(node.getType()) + ) + or + nodeCand1(node, false, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, t, config) and + localStoreReadStep(mid, node) and + compatibleTypes(t, node.getType()) + ) + or + // value flow through a callable + nodeCand1(node, false, config) and + exists(Node arg | + simpleParameterFlow(p, arg, t, config) and + argumentValueFlowsThrough(arg, node) and + compatibleTypes(t, node.getType()) + ) + or + // flow through a callable + nodeCand1(node, false, config) and + exists(Node arg | + simpleParameterFlow(p, arg, _, config) and + simpleArgumentFlowsThrough(arg, node, t, config) + ) +} + +/** + * Holds if data can flow from `arg` through the `call` taking simple call + * contexts into consideration and that this is part of a path from a source + * to a sink. This is restricted to paths through the `call` that does not + * necessarily preserve the value of `arg` by making use of at least one + * additional step from the configuration. + */ +private predicate simpleArgumentFlowsThrough( + ArgumentNode arg, ExprNode call, RefType t, Configuration config +) { + exists(ParameterNode param, ReturnNode ret | + nodeCand1(arg, false, unbind(config)) and + nodeCand1(call, false, unbind(config)) and + viableParamArg(param, arg) and + simpleParameterFlow(param, ret, t, config) and + arg.argumentOf(call.getExpr(), _) + ) +} + +/** + * Holds if data can flow from `node1` to `node2` by a step through a method. + */ +private predicate flowThroughMethod( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + simpleArgumentFlowsThrough(node1, node2, _, config) and preservesValue = false + or + argumentValueFlowsThrough(node1, node2) and preservesValue = true +} + +/** + * Holds if data can flow from `node1` to `node2` in one local step or a step + * through a method. + */ +private predicate localFlowStepOrFlowThroughMethod( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + localFlowStep(node1, node2, preservesValue, config) or + flowThroughMethod(node1, node2, preservesValue, config) +} + +/** + * Holds if data can flow out of a callable from `node1` to `node2`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +private predicate flowOutOfCallable(Node node1, Node node2, Configuration config) { + nodeCand1(node1, _, unbind(config)) and + nodeCand1(node2, _, config) and + ( + // flow out of an argument + exists(ParameterNode p | + parameterValueFlowsToUpdate(p, node1) and + viableParamArg(p, node2.(PostUpdateNode).getPreUpdateNode()) + ) + or + // flow out of a method + exists(Method m, MethodAccess ma, ReturnNode ret | + ret = node1 and + m = returnNodeGetEnclosingCallable(ret) and + m = viableImpl(ma) and + node2.asExpr() = ma + ) + ) +} + +/** + * Holds if data can flow into a callable and that this step is part of a + * path from a source to a sink. + */ +private predicate flowIntoCallable(Node node1, Node node2, Configuration config) { + viableParamArg(node2, node1) and + nodeCand1(node1, _, unbind(config)) and + nodeCand1(node2, _, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(Node n1, Configuration conf) { + result = strictcount(Node n | flowOutOfCallable(n1, n, conf) or flowIntoCallable(n1, n, conf)) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(Node n2, Configuration conf) { + result = strictcount(Node n | flowOutOfCallable(n, n2, conf) or flowIntoCallable(n, n2, conf)) +} + +/** + * Holds if data can flow out of a callable from `node1` to `node2`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +private predicate flowOutOfCallable( + Node node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallable(node1, node2, config) and + exists(int b, int j | + b = branch(node1, config) and + j = join(node2, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into a callable and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +private predicate flowIntoCallable( + Node node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallable(node1, node2, config) and + exists(int b, int j | + b = branch(node1, config) and + j = join(node2, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration taking simple call contexts into consideration. + */ +private predicate nodeCandFwd2(Node node, boolean fromArg, boolean stored, Configuration config) { + nodeCand1(node, false, config) and + config.isSource(node) and + fromArg = false and + stored = false + or + nodeCand1(node, unbindBool(stored), unbind(config)) and + ( + exists(Node mid, boolean preservesValue | + nodeCandFwd2(mid, fromArg, stored, config) and + localFlowStepOrFlowThroughMethod(mid, node, preservesValue, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + nodeCandFwd2(mid, _, stored, config) and + jumpStep(mid, node, preservesValue, config) and + fromArg = false and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid, Content f | + nodeCandFwd2(mid, fromArg, _, config) and + store(mid, f, node) and + readCand1(f, unbind(config)) and + stored = true + ) + or + // read + exists(Node mid, Content f | + nodeCandFwd2(mid, fromArg, true, config) and + read(mid, f, node) and + storeCandFwd2(f, unbind(config)) and + (stored = false or stored = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + nodeCandFwd2(mid, _, stored, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (stored = false or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + nodeCandFwd2(mid, false, stored, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (stored = false or allowsFieldFlow = true) + ) + ) +} + +/** + * Holds if `f` is the target of a store in the flow covered by `nodeCandFwd2`. + */ +private predicate storeCandFwd2(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCand1(node, true, unbind(config)) and + nodeCandFwd2(mid, _, _, config) and + store(mid, f, node) and + readCand1(f, unbind(config)) + ) +} + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration taking simple call contexts into consideration. + */ +private predicate nodeCand2(Node node, boolean toReturn, boolean stored, Configuration config) { + nodeCandFwd2(node, _, false, config) and + config.isSink(node) and + toReturn = false and + stored = false + or + nodeCandFwd2(node, _, unbindBool(stored), unbind(config)) and + ( + exists(Node mid, boolean preservesValue | + localFlowStepOrFlowThroughMethod(node, mid, preservesValue, config) and + nodeCand2(mid, toReturn, stored, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + jumpStep(node, mid, preservesValue, config) and + nodeCand2(mid, _, stored, config) and + toReturn = false and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid, Content f | + store(node, f, mid) and + readCand2(f, unbind(config)) and + nodeCand2(mid, toReturn, true, config) and + (stored = false or stored = true) + ) + or + // read + exists(Node mid, Content f | + read(node, f, mid) and + storeCandFwd2(f, unbind(config)) and + nodeCand2(mid, toReturn, _, config) and + stored = true + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + nodeCand2(mid, false, stored, config) and + toReturn = false and + (stored = false or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + nodeCand2(mid, _, stored, config) and + toReturn = true and + (stored = false or allowsFieldFlow = true) + ) + ) +} + +/** + * Holds if `f` is the target of a read in the flow covered by `nodeCand2`. + */ +private predicate readCand2(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd2(node, _, true, unbind(config)) and + read(node, f, mid) and + storeCandFwd2(f, unbind(config)) and + nodeCand2(mid, _, _, config) + ) +} + +private predicate storeCand(Content f, Configuration conf) { + exists(Node n1, Node n2 | + store(n1, f, n2) and + nodeCand2(n1, _, _, conf) and + nodeCand2(n2, _, _, unbind(conf)) + ) +} + +private predicate readCand(Content f, Configuration conf) { readCand2(f, conf) } + +/** + * Holds if `f` is the target of both a store and a read in the path graph + * covered by `nodeCand2`. + */ +pragma[noinline] +private predicate readStoreCand(Content f, Configuration conf) { + storeCand(f, conf) and + readCand(f, conf) +} + +private predicate nodeCand(Node node, Configuration config) { nodeCand2(node, _, _, config) } + +/** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ +private predicate localFlowEntry(Node node, Configuration config) { + nodeCand(node, config) and + ( + config.isSource(node) or + jumpStep(_, node, _, config) or + node instanceof ParameterNode or + node.asExpr() instanceof MethodAccess or + node instanceof PostUpdateNode or + read(_, _, node) or + node.asExpr() instanceof CastExpr + ) +} + +/** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ +private predicate localFlowExit(Node node, Configuration config) { + exists(Node next | nodeCand(next, config) | + jumpStep(node, next, _, config) or + flowIntoCallable(node, next, config) or + flowOutOfCallable(node, next, config) or + flowThroughMethod(node, next, _, config) or + store(node, _, next) or + read(node, _, next) + ) + or + node.asExpr() instanceof CastExpr + or + config.isSink(node) +} + +/** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `localFlowStep` beginning at `localFlowEntry`. + */ +private predicate localFlowStepPlus( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + localFlowEntry(node1, config) and + localFlowStep(node1, node2, preservesValue, config) and + node1 != node2 and + nodeCand(node2, unbind(config)) + or + exists(Node mid, boolean pv1, boolean pv2 | + localFlowStepPlus(node1, mid, pv1, config) and + localFlowStep(mid, node2, pv2, config) and + not mid.asExpr() instanceof CastExpr and + preservesValue = pv1.booleanAnd(pv2) and + nodeCand(node2, unbind(config)) + ) +} + +/** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ +pragma[noinline] +private predicate localFlowBigStep( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + localFlowStepPlus(node1, node2, preservesValue, config) and + localFlowExit(node2, config) +} + +private newtype TAccessPathFront = + TFrontNil(Type t) or + TFrontHead(Content f) + +/** + * The front of an `AccessPath`. This is either a head or a nil. + */ +private class AccessPathFront extends TAccessPathFront { + string toString() { + exists(Type t | this = TFrontNil(t) | result = ppReprType(t)) + or + exists(Content f | this = TFrontHead(f) | result = f.toString()) + } + + Type getType() { + this = TFrontNil(result) + or + exists(Content head | this = TFrontHead(head) | result = head.getContainerType()) + } + + predicate headUsesContent(Content f) { this = TFrontHead(f) } +} + +private class AccessPathFrontNil extends AccessPathFront, TFrontNil { } + +/** + * A `Node` at which a cast can occur such that the type should be checked. + */ +private class CastingNode extends Node { + CastingNode() { + this instanceof ParameterNode or + this.asExpr() instanceof CastExpr or + this.asExpr() instanceof MethodAccess or + this.(PostUpdateNode).getPreUpdateNode() instanceof ArgumentNode + } +} + +/** + * Holds if data can flow from a source to `node` with the given `apf`. + */ +private predicate flowCandFwd(Node node, boolean fromArg, AccessPathFront apf, Configuration config) { + flowCandFwd0(node, fromArg, apf, config) and + if node instanceof CastingNode then compatibleTypes(node.getType(), apf.getType()) else any() +} + +private predicate flowCandFwd0(Node node, boolean fromArg, AccessPathFront apf, Configuration config) { + nodeCand2(node, _, false, config) and + config.isSource(node) and + fromArg = false and + apf = TFrontNil(getErasedRepr(node.getType())) + or + nodeCand(node, unbind(config)) and + ( + exists(Node mid | + flowCandFwd(mid, fromArg, apf, config) and + localFlowBigStep(mid, node, true, config) + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(mid, fromArg, apf0, config) and + localFlowBigStep(mid, node, false, config) and + apf0 instanceof AccessPathFrontNil and + apf = TFrontNil(getErasedRepr(node.getType())) + ) + or + exists(Node mid | + flowCandFwd(mid, _, apf, config) and + jumpStep(mid, node, true, config) and + fromArg = false + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(mid, _, apf0, config) and + jumpStep(mid, node, false, config) and + fromArg = false and + apf0 instanceof AccessPathFrontNil and + apf = TFrontNil(getErasedRepr(node.getType())) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowCandFwd(mid, _, apf, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowCandFwd(mid, false, apf, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPathFront apf0 | + flowCandFwd(mid, fromArg, apf0, config) and + flowThroughMethod(mid, node, preservesValue, config) and + ( + preservesValue = true and apf = apf0 + or + preservesValue = false and + apf0 instanceof AccessPathFrontNil and + apf = TFrontNil(getErasedRepr(node.getType())) + ) + ) + ) + or + exists(Node mid, Content f | + flowCandFwd(mid, fromArg, _, config) and + store(mid, f, node) and + nodeCand(node, unbind(config)) and + apf.headUsesContent(f) + ) + or + exists(Node mid, Content f, AccessPathFront apf0 | + flowCandFwd(mid, fromArg, apf0, config) and + read(mid, f, node) and + nodeCand(node, config) and + apf0.headUsesContent(f) and + consCandFwd(f, apf, unbind(config)) + ) +} + +private predicate consCandFwd(Content f, AccessPathFront apf, Configuration config) { + exists(Node mid, Node n | + flowCandFwd(mid, _, apf, config) and + store(mid, f, n) and + nodeCand(n, unbind(config)) and + readStoreCand(f, unbind(config)) and + compatibleTypes(apf.getType(), f.getType()) + ) +} + +/** + * Holds if data can flow from a source to `node` with the given `apf` and + * from there flow to a sink. + */ +private predicate flowCand(Node node, boolean toReturn, AccessPathFront apf, Configuration config) { + flowCand0(node, toReturn, apf, config) and + flowCandFwd(node, _, apf, config) +} + +private predicate flowCand0(Node node, boolean toReturn, AccessPathFront apf, Configuration config) { + flowCandFwd(node, _, apf, config) and + config.isSink(node) and + toReturn = false and + apf instanceof AccessPathFrontNil + or + ( + exists(Node mid | + localFlowBigStep(node, mid, true, config) and + flowCand(mid, toReturn, apf, config) + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(node, _, apf, config) and + localFlowBigStep(node, mid, false, config) and + flowCand(mid, toReturn, apf0, config) and + apf0 instanceof AccessPathFrontNil and + apf instanceof AccessPathFrontNil + ) + or + exists(Node mid | + jumpStep(node, mid, true, config) and + flowCand(mid, _, apf, config) and + toReturn = false + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(node, _, apf, config) and + jumpStep(node, mid, false, config) and + flowCand(mid, _, apf0, config) and + toReturn = false and + apf0 instanceof AccessPathFrontNil and + apf instanceof AccessPathFrontNil + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + flowCand(mid, false, apf, config) and + toReturn = false and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + flowCand(mid, _, apf, config) and + toReturn = true and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPathFront apf0 | + flowThroughMethod(node, mid, preservesValue, config) and + flowCand(mid, toReturn, apf0, config) and + ( + preservesValue = true and apf = apf0 + or + preservesValue = false and + apf0 instanceof AccessPathFrontNil and + apf instanceof AccessPathFrontNil and + flowCandFwd(node, _, apf, config) + ) + ) + or + exists(Node mid, Content f, AccessPathFront apf0 | + store(node, f, mid) and + flowCand(mid, toReturn, apf0, config) and + apf0.headUsesContent(f) and + consCand(f, apf, unbind(config)) + ) + or + exists(Node mid, Content f, AccessPathFront apf0 | + read(node, f, mid) and + flowCand(mid, toReturn, apf0, config) and + consCandFwd(f, apf0, unbind(config)) and + apf.headUsesContent(f) + ) + ) +} + +private predicate consCand(Content f, AccessPathFront apf, Configuration config) { + consCandFwd(f, apf, config) and + exists(Node mid, Node n, AccessPathFront apf0 | + flowCandFwd(n, _, apf0, config) and + apf0.headUsesContent(f) and + read(n, f, mid) and + flowCand(mid, _, apf, config) + ) +} + +private newtype TAccessPath = + TNil(Type t) or + TCons(Content f, int len) { len in [1 .. 5] } + +/** + * Conceptually a list of `Content`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +private class AccessPath extends TAccessPath { + abstract string toString(); + + Content getHead() { this = TCons(result, _) } + + int len() { + this = TNil(_) and result = 0 + or + this = TCons(_, result) + } + + Type getType() { + this = TNil(result) + or + exists(Content head | this = TCons(head, _) | result = head.getContainerType()) + } + + abstract AccessPathFront getFront(); +} + +private class AccessPathNil extends AccessPath, TNil { + override string toString() { exists(Type t | this = TNil(t) | result = ppReprType(t)) } + + override AccessPathFront getFront() { exists(Type t | this = TNil(t) | result = TFrontNil(t)) } +} + +private class AccessPathCons extends AccessPath, TCons { + override string toString() { + exists(Content f, int len | this = TCons(f, len) | + result = f.toString() + ", ... (" + len.toString() + ")" + ) + } + + override AccessPathFront getFront() { + exists(Content f | this = TCons(f, _) | result = TFrontHead(f)) + } +} + +/** Holds if `ap0` corresponds to the cons of `f` and `ap`. */ +private predicate pop(AccessPath ap0, Content f, AccessPath ap) { + ap0.getFront().headUsesContent(f) and + consCand(f, ap.getFront(), _) and + ap0.len() = 1 + ap.len() +} + +/** Holds if `ap0` corresponds to the cons of `f` and `ap` and `apf` is the front of `ap`. */ +pragma[noinline] +private predicate popWithFront(AccessPath ap0, Content f, AccessPathFront apf, AccessPath ap) { + pop(ap0, f, ap) and apf = ap.getFront() +} + +/** Holds if `ap` corresponds to the cons of `f` and `ap0`. */ +private predicate push(AccessPath ap0, Content f, AccessPath ap) { pop(ap, f, ap0) } + +/** + * Holds if data can flow from a source to `node` with the given `ap`. + */ +private predicate flowFwd( + Node node, boolean fromArg, AccessPathFront apf, AccessPath ap, Configuration config +) { + flowFwd0(node, fromArg, apf, ap, config) and + flowCand(node, _, apf, config) +} + +private predicate flowFwd0( + Node node, boolean fromArg, AccessPathFront apf, AccessPath ap, Configuration config +) { + flowCand(node, _, _, config) and + config.isSource(node) and + fromArg = false and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + or + flowCand(node, _, _, unbind(config)) and + ( + exists(Node mid | + flowFwd(mid, fromArg, apf, ap, config) and + localFlowBigStep(mid, node, true, config) + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(mid, fromArg, _, ap0, config) and + localFlowBigStep(mid, node, false, config) and + ap0 instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + ) + or + exists(Node mid | + flowFwd(mid, _, apf, ap, config) and + jumpStep(mid, node, true, config) and + fromArg = false + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(mid, _, _, ap0, config) and + jumpStep(mid, node, false, config) and + fromArg = false and + ap0 instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowFwd(mid, _, apf, ap, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowFwd(mid, false, apf, ap, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPathFront apf0, AccessPath ap0 | + flowFwd(mid, fromArg, apf0, ap0, config) and + flowThroughMethod(mid, node, preservesValue, config) and + ( + preservesValue = true and ap = ap0 and apf = apf0 + or + preservesValue = false and + ap0 instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + ) + ) + ) + or + exists(Content f, AccessPath ap0 | + flowFwdStore(node, f, ap0, apf, fromArg, config) and + push(ap0, f, ap) + ) + or + exists(Content f, AccessPath ap0 | + flowFwdRead(node, f, ap0, fromArg, config) and + popWithFront(ap0, f, apf, ap) + ) +} + +pragma[nomagic] +private predicate flowFwdStore( + Node node, Content f, AccessPath ap0, AccessPathFront apf, boolean fromArg, Configuration config +) { + exists(Node mid, AccessPathFront apf0 | + flowFwd(mid, fromArg, apf0, ap0, config) and + flowFwdStoreAux(mid, f, node, apf0, apf, config) + ) +} + +private predicate flowFwdStoreAux( + Node mid, Content f, Node node, AccessPathFront apf0, AccessPathFront apf, Configuration config +) { + store(mid, f, node) and + consCand(f, apf0, config) and + apf.headUsesContent(f) and + flowCand(node, _, apf, unbind(config)) +} + +pragma[nomagic] +private predicate flowFwdRead( + Node node, Content f, AccessPath ap0, boolean fromArg, Configuration config +) { + exists(Node mid, AccessPathFront apf0 | + flowFwd(mid, fromArg, apf0, ap0, config) and + read(mid, f, node) and + apf0.headUsesContent(f) and + flowCand(node, _, _, unbind(config)) + ) +} + +/** + * Holds if data can flow from a source to `node` with the given `ap` and + * from there flow to a sink. + */ +private predicate flow(Node node, boolean toReturn, AccessPath ap, Configuration config) { + flow0(node, toReturn, ap, config) and + flowFwd(node, _, _, ap, config) +} + +private predicate flow0(Node node, boolean toReturn, AccessPath ap, Configuration config) { + flowFwd(node, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + ap instanceof AccessPathNil + or + ( + exists(Node mid | + localFlowBigStep(node, mid, true, config) and + flow(mid, toReturn, ap, config) + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(node, _, _, ap, config) and + localFlowBigStep(node, mid, false, config) and + flow(mid, toReturn, ap0, config) and + ap0 instanceof AccessPathNil and + ap instanceof AccessPathNil + ) + or + exists(Node mid | + jumpStep(node, mid, true, config) and + flow(mid, _, ap, config) and + toReturn = false + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(node, _, _, ap, config) and + jumpStep(node, mid, false, config) and + flow(mid, _, ap0, config) and + toReturn = false and + ap0 instanceof AccessPathNil and + ap instanceof AccessPathNil + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + flow(mid, false, ap, config) and + toReturn = false and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + flow(mid, _, ap, config) and + toReturn = true and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPath ap0 | + flowThroughMethod(node, mid, preservesValue, config) and + flow(mid, toReturn, ap0, config) and + ( + preservesValue = true and ap = ap0 + or + preservesValue = false and + ap0 instanceof AccessPathNil and + ap instanceof AccessPathNil and + flowFwd(node, _, _, ap, config) + ) + ) + or + exists(Content f, AccessPath ap0 | + flowStore(node, f, toReturn, ap0, config) and + pop(ap0, f, ap) + ) + or + exists(Content f, AccessPath ap0 | + flowRead(node, f, toReturn, ap0, config) and + push(ap0, f, ap) + ) + ) +} + +pragma[nomagic] +private predicate flowStore( + Node node, Content f, boolean toReturn, AccessPath ap0, Configuration config +) { + exists(Node mid | + store(node, f, mid) and + flow(mid, toReturn, ap0, config) + ) +} + +pragma[nomagic] +private predicate flowRead( + Node node, Content f, boolean toReturn, AccessPath ap0, Configuration config +) { + exists(Node mid | + read(node, f, mid) and + flow(mid, toReturn, ap0, config) + ) +} + +bindingset[conf, result] +private Configuration unbind(Configuration conf) { result >= conf and result <= conf } + +private predicate flow(Node n, Configuration config) { flow(n, _, _, config) } + +private newtype TPathNode = + TPathNodeMid(Node node, CallContext cc, AccessPath ap, Configuration config) { + // A PathNode is introduced by a source ... + flow(node, config) and + config.isSource(node) and + cc instanceof CallContextAny and + ap = TNil(getErasedRepr(node.getType())) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + flowStep(mid, node, cc, ap) and + config = mid.getConfiguration() and + flow(node, _, ap, unbind(config)) + ) + } or + TPathNodeSink(Node node, Configuration config) { + // The AccessPath on a sink is empty. + config.isSink(node) and + flow(node, config) + } + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +abstract class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = getNode().toString() + ppAp() } + + /** Gets the source location for this element. */ + Location getLocation() { result = getNode().getLocation() } + + /** Gets the underlying `Node`. */ + abstract Node getNode(); + + /** Gets the associated configuration. */ + abstract Configuration getConfiguration(); + + /** Gets a successor. */ + abstract PathNode getSucc(); + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + result = " [" + this.(PathNodeMid).getAp().toString() + "]" + } +} + +/** Holds if `n` can reach a sink. */ +private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getSucc()) } + +/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNode n1, PathNode n2) { n1.getSucc() = n2 and reach(n2) } + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNode, TPathNodeMid { + Node node; + + CallContext cc; + + AccessPath ap; + + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, cc, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + flowStep(this, result.getNode(), result.getCallContext(), result.getAp()) and + result.getConfiguration() = unbind(this.getConfiguration()) + } + + override PathNode getSucc() { + // an intermediate step to another intermediate node + result = getSuccMid() + or + // a final step to a sink via one or more local steps + localFlowStepPlus(node, result.getNode(), _, config) and + ap instanceof AccessPathNil and + result instanceof PathNodeSink and + result.getConfiguration() = unbind(this.getConfiguration()) + or + // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges + exists(PathNodeMid mid | + mid = getSuccMid() and + mid.getNode() = result.getNode() and + mid.getAp() instanceof AccessPathNil and + result instanceof PathNodeSink and + result.getConfiguration() = unbind(mid.getConfiguration()) + ) + or + // a direct step from a source to a sink if a node is both + this instanceof PathNodeSource and + result instanceof PathNodeSink and + this.getNode() = result.getNode() and + result.getConfiguration() = unbind(this.getConfiguration()) + } +} + +/** + * A flow graph node corresponding to a source. + */ +private class PathNodeSource extends PathNodeMid { + PathNodeSource() { + getConfiguration().isSource(getNode()) and + getCallContext() instanceof CallContextAny and + getAp() instanceof AccessPathNil + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNode, TPathNodeSink { + Node node; + + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, config) } + + override Node getNode() { result = node } + + override Configuration getConfiguration() { result = config } + + override PathNode getSucc() { none() } +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +private predicate flowStep(PathNodeMid mid, Node node, CallContext cc, AccessPath ap) { + localFlowBigStep(mid.getNode(), node, true, mid.getConfiguration()) and + cc = mid.getCallContext() and + ap = mid.getAp() + or + localFlowBigStep(mid.getNode(), node, false, mid.getConfiguration()) and + cc = mid.getCallContext() and + mid.getAp() instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) + or + jumpStep(mid.getNode(), node, true, mid.getConfiguration()) and + cc instanceof CallContextAny and + ap = mid.getAp() + or + jumpStep(mid.getNode(), node, false, mid.getConfiguration()) and + cc instanceof CallContextAny and + mid.getAp() instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) + or + contentReadStep(mid, node, ap) and cc = mid.getCallContext() + or + exists(Content f, AccessPath ap0 | contentStoreStep(mid, node, ap0, f, cc) and push(ap0, f, ap)) + or + flowOutOfArgument(mid, node, cc) and ap = mid.getAp() + or + flowIntoCallable(mid, node, _, cc, _) and ap = mid.getAp() + or + flowOutOfMethod(mid, node.asExpr(), cc) and ap = mid.getAp() + or + flowThroughMethod(mid, node.asExpr(), cc) and ap = TNil(getErasedRepr(node.getType())) + or + valueFlowThroughMethod(mid, node.asExpr(), cc) and ap = mid.getAp() +} + +private predicate contentReadStep(PathNodeMid mid, Node node, AccessPath ap) { + exists(Content f, AccessPath ap0 | + ap0 = mid.getAp() and + read(mid.getNode(), f, node) and + pop(ap0, f, ap) + ) +} + +pragma[noinline] +private predicate contentStoreStep( + PathNodeMid mid, Node node, AccessPath ap0, Content f, CallContext cc +) { + ap0 = mid.getAp() and + store(mid.getNode(), f, node) and + cc = mid.getCallContext() +} + +/** + * Holds if data may flow from `mid` to an exit of `m` in the context + * `innercc`, and the path did not flow through a parameter of `m`. + */ +private predicate flowOutOfMethod0(PathNodeMid mid, Method m, CallContext innercc) { + exists(ReturnNode ret | + ret = mid.getNode() and + innercc = mid.getCallContext() and + m = returnNodeGetEnclosingCallable(ret) and + not innercc instanceof CallContextCall + ) +} + +/** + * Holds if data may flow from `mid` to `ma`. The last step of this path + * is a return from a method and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate flowOutOfMethod(PathNodeMid mid, MethodAccess ma, CallContext cc) { + exists(Method m, CallContext innercc | + flowOutOfMethod0(mid, m, innercc) and + resolveReturn(innercc, m, ma) + | + if reducedViableImplInReturn(m, ma) then cc = TReturn(m, ma) else cc = TAnyCallContext() + ) +} + +private predicate flowOutOfArgument(PathNodeMid mid, PostUpdateNode node, CallContext cc) { + exists( + PostUpdateNode n, ParameterNode p, Callable callable, CallContext innercc, int i, Call call, + ArgumentNode arg + | + mid.getNode() = n and + parameterValueFlowsToUpdate(p, n) and + innercc = mid.getCallContext() and + p.isParameterOf(callable, i) and + resolveReturn(innercc, callable, call) and + node.getPreUpdateNode() = arg and + arg.argumentOf(call, i) and + flow(node, unbind(mid.getConfiguration())) + | + if reducedViableImplInReturn(callable, call) + then cc = TReturn(callable, call) + else cc = TAnyCallContext() + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate flowIntoArg(PathNodeMid mid, int i, CallContext cc, Call call, boolean emptyAp) { + exists(ArgumentNode arg, AccessPath ap | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() + | + ap instanceof AccessPathNil and emptyAp = true + or + ap instanceof AccessPathCons and emptyAp = false + ) +} + +pragma[noinline] +private predicate parameterCand(Callable callable, int i, Configuration config) { + exists(ParameterNode p | + flow(p, config) and + p.isParameterOf(callable, i) + ) +} + +pragma[nomagic] +private predicate flowIntoCallable0( + PathNodeMid mid, Callable callable, int i, CallContext outercc, Call call, boolean emptyAp +) { + flowIntoArg(mid, i, outercc, call, emptyAp) and + callable = resolveCall(call, outercc) and + parameterCand(callable, any(int j | j <= i and j >= i), mid.getConfiguration()) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +private predicate flowIntoCallable( + PathNodeMid mid, ParameterNode p, CallContext outercc, CallContextCall innercc, Call call +) { + exists(int i, Callable callable, boolean emptyAp | + flowIntoCallable0(mid, callable, i, outercc, call, emptyAp) and + p.isParameterOf(callable, i) + | + if reducedViableImplInCallContext(_, callable, call) + then innercc = TSpecificCall(call, i, emptyAp) + else innercc = TSomeCall(p, emptyAp) + ) +} + +/** Holds if data may flow from `p` to a return statement in the callable. */ +pragma[nomagic] +private predicate paramFlowsThrough(ParameterNode p, CallContextCall cc, Configuration config) { + exists(PathNodeMid mid, ReturnNode ret | + mid.getNode() = ret and + cc = mid.getCallContext() and + config = mid.getConfiguration() and + mid.getAp() instanceof AccessPathNil + | + cc = TSomeCall(p, true) + or + exists(int i | cc = TSpecificCall(_, i, true) | + p.isParameterOf(returnNodeGetEnclosingCallable(ret), i) + ) + ) +} + +/** + * Holds if data may flow from `mid` to an argument of `methodcall`, + * through a called method `m`, and back out through a return statement in + * `m`. The context `cc` is restored to its value prior to entering `m`. + */ +pragma[noinline] +private predicate flowThroughMethod(PathNodeMid mid, Call methodcall, CallContext cc) { + exists(ParameterNode p, CallContext innercc | + flowIntoCallable(mid, p, cc, innercc, methodcall) and + paramFlowsThrough(p, innercc, unbind(mid.getConfiguration())) and + not parameterValueFlowsThrough(p) and + mid.getAp() instanceof AccessPathNil + ) +} + +private predicate valueFlowThroughMethod(PathNodeMid mid, Call methodcall, CallContext cc) { + exists(ParameterNode p | + flowIntoCallable(mid, p, cc, _, methodcall) and + parameterValueFlowsThrough(p) + ) +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNodeSource flowsource, PathNodeSink flowsink, Node source, Node sink, + Configuration configuration +) { + flowsource.getConfiguration() = configuration and + flowsource.getNode() = source and + pathSuccPlus(flowsource, flowsink) and + flowsink.getNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll new file mode 100644 index 000000000000..9d28e37cc8ad --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll @@ -0,0 +1,1614 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from `DataFlowUtil` + * and adds a global analysis, mainly exposed through the `Configuration` class. + * This file exists in several identical copies, allowing queries to use + * multiple `Configuration` classes that depend on each other without + * introducing mutual recursion among those configurations. + */ + +import DataFlowUtil +private import DataFlowPrivate +private import DataFlowDispatch +private import DataFlowImplCommon + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ``` + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ``` + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + abstract predicate isSource(Node source); + + /** + * Holds if `sink` is a relevant data flow sink. + */ + abstract predicate isSink(Node sink); + + /** Holds if data flow through `node` is prohibited. */ + predicate isBarrier(Node node) { none() } + + /** Holds if data flow from `node1` to `node2` is prohibited. */ + predicate isBarrierEdge(Node node1, Node node2) { none() } + + /** + * Holds if the additional flow step from `node1` to `node2` must be taken + * into account in the analysis. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(Expr sink) { hasFlowTo(exprNode(sink)) } + + /** DEPRECATED: use `hasFlow` instead. */ + deprecated predicate hasFlowForward(Node source, Node sink) { hasFlow(source, sink) } + + /** DEPRECATED: use `hasFlow` instead. */ + deprecated predicate hasFlowBackward(Node source, Node sink) { hasFlow(source, sink) } +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(Node node1, Node node2, Configuration config) { + config.isAdditionalFlowStep(node1, node2) and + node1.getEnclosingCallable() != node2.getEnclosingCallable() +} + +pragma[noinline] +private predicate isAdditionalFlowStep( + Node node1, Node node2, Callable callable1, Callable callable2, Configuration config +) { + config.isAdditionalFlowStep(node1, node2) and + callable1 = node1.getEnclosingCallable() and + callable2 = node2.getEnclosingCallable() +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(Node node1, Node node2, Configuration config) { + exists(Callable callable | isAdditionalFlowStep(node1, node2, callable, callable, config)) +} + +/** + * Holds if data can flow from `node1` to `node2` through a static field or + * variable capture. + */ +private predicate jumpStep(Node node1, Node node2, boolean preservesValue, Configuration config) { + jumpStep(node1, node2) and preservesValue = true + or + additionalJumpStep(node1, node2, config) and preservesValue = false +} + +/** + * Holds if data can flow in one local step from `node1` to `node2` taking + * additional steps from the configuration into account. + */ +private predicate localFlowStep(Node node1, Node node2, boolean preservesValue, Configuration config) { + localFlowStep(node1, node2) and not config.isBarrierEdge(node1, node2) and preservesValue = true + or + additionalLocalFlowStep(node1, node2, config) and preservesValue = false +} + +pragma[noinline] +private Method returnNodeGetEnclosingCallable(ReturnNode ret) { + result = ret.getEnclosingCallable() +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +/** + * Holds if `node` is reachable from a source in the given configuration + * ignoring call contexts. + */ +private predicate nodeCandFwd1(Node node, boolean stored, Configuration config) { + not config.isBarrier(node) and + ( + config.isSource(node) and stored = false + or + exists(Node mid, boolean preservesValue | + nodeCandFwd1(mid, stored, config) and + localFlowStep(mid, node, preservesValue, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + nodeCandFwd1(mid, stored, config) and + jumpStep(mid, node, preservesValue, config) and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid | + useFieldFlow(config) and + nodeCandFwd1(mid, _, config) and + store(mid, _, node) and + stored = true + ) + or + // read + exists(Node mid, Content f | + nodeCandFwd1(mid, true, config) and + read(mid, f, node) and + storeCandFwd1(f, unbind(config)) and + (stored = false or stored = true) + ) + or + // flow into a callable + exists(Node arg | + nodeCandFwd1(arg, stored, config) and + viableParamArg(node, arg) + ) + or + // flow out of an argument + exists(PostUpdateNode mid, ParameterNode p | + nodeCandFwd1(mid, stored, config) and + parameterValueFlowsToUpdate(p, mid) and + viableParamArg(p, node.(PostUpdateNode).getPreUpdateNode()) + ) + or + // flow out of a callable + exists(Method m, MethodAccess ma, ReturnNode ret | + nodeCandFwd1(ret, stored, config) and + m = returnNodeGetEnclosingCallable(ret) and + m = viableImpl(ma) and + node.asExpr() = ma + ) + ) +} + +/** + * Holds if `f` is the target of a store in the flow covered by `nodeCandFwd1`. + */ +private predicate storeCandFwd1(Content f, Configuration config) { + exists(Node mid, Node node | + not config.isBarrier(node) and + useFieldFlow(config) and + nodeCandFwd1(mid, _, config) and + store(mid, f, node) + ) +} + +bindingset[result, b] +private boolean unbindBool(boolean b) { result != b.booleanNot() } + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration ignoring call contexts. + */ +pragma[nomagic] +private predicate nodeCand1(Node node, boolean stored, Configuration config) { + nodeCandFwd1(node, false, config) and + config.isSink(node) and + stored = false + or + nodeCandFwd1(node, unbindBool(stored), unbind(config)) and + ( + exists(Node mid, boolean preservesValue | + localFlowStep(node, mid, preservesValue, config) and + nodeCand1(mid, stored, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + jumpStep(node, mid, preservesValue, config) and + nodeCand1(mid, stored, config) and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid, Content f | + store(node, f, mid) and + readCand1(f, unbind(config)) and + nodeCand1(mid, true, config) and + (stored = false or stored = true) + ) + or + // read + exists(Node mid, Content f | + read(node, f, mid) and + storeCandFwd1(f, unbind(config)) and + nodeCand1(mid, _, config) and + stored = true + ) + or + // flow into a callable + exists(Node param | + viableParamArg(param, node) and + nodeCand1(param, stored, config) + ) + or + // flow out of an argument + exists(PostUpdateNode mid, ParameterNode p | + parameterValueFlowsToUpdate(p, node) and + viableParamArg(p, mid.getPreUpdateNode()) and + nodeCand1(mid, stored, config) + ) + or + // flow out of a callable + exists(Method m, ExprNode ma | + nodeCand1(ma, stored, config) and + m = returnNodeGetEnclosingCallable(node) and + m = viableImpl(ma.getExpr()) + ) + ) +} + +/** + * Holds if `f` is the target of a read in the flow covered by `nodeCand1`. + */ +private predicate readCand1(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd1(node, true, unbind(config)) and + read(node, f, mid) and + storeCandFwd1(f, unbind(config)) and + nodeCand1(mid, _, config) + ) +} + +/** + * Holds if there is a path from `p` to `node` in the same callable that is + * part of a path from a source to a sink taking simple call contexts into + * consideration. This is restricted to paths that does not necessarily + * preserve the value of `p` by making use of at least one additional step + * from the configuration. + */ +pragma[nomagic] +private predicate simpleParameterFlow(ParameterNode p, Node node, RefType t, Configuration config) { + nodeCand1(node, false, config) and + p = node and + t = getErasedRepr(node.getType()) and + not parameterValueFlowsThrough(p) + or + nodeCand1(node, false, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, t, config) and + localFlowStep(mid, node, true, config) and + compatibleTypes(t, node.getType()) + ) + or + nodeCand1(node, false, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, _, config) and + localFlowStep(mid, node, false, config) and + t = getErasedRepr(node.getType()) + ) + or + nodeCand1(node, false, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, t, config) and + localStoreReadStep(mid, node) and + compatibleTypes(t, node.getType()) + ) + or + // value flow through a callable + nodeCand1(node, false, config) and + exists(Node arg | + simpleParameterFlow(p, arg, t, config) and + argumentValueFlowsThrough(arg, node) and + compatibleTypes(t, node.getType()) + ) + or + // flow through a callable + nodeCand1(node, false, config) and + exists(Node arg | + simpleParameterFlow(p, arg, _, config) and + simpleArgumentFlowsThrough(arg, node, t, config) + ) +} + +/** + * Holds if data can flow from `arg` through the `call` taking simple call + * contexts into consideration and that this is part of a path from a source + * to a sink. This is restricted to paths through the `call` that does not + * necessarily preserve the value of `arg` by making use of at least one + * additional step from the configuration. + */ +private predicate simpleArgumentFlowsThrough( + ArgumentNode arg, ExprNode call, RefType t, Configuration config +) { + exists(ParameterNode param, ReturnNode ret | + nodeCand1(arg, false, unbind(config)) and + nodeCand1(call, false, unbind(config)) and + viableParamArg(param, arg) and + simpleParameterFlow(param, ret, t, config) and + arg.argumentOf(call.getExpr(), _) + ) +} + +/** + * Holds if data can flow from `node1` to `node2` by a step through a method. + */ +private predicate flowThroughMethod( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + simpleArgumentFlowsThrough(node1, node2, _, config) and preservesValue = false + or + argumentValueFlowsThrough(node1, node2) and preservesValue = true +} + +/** + * Holds if data can flow from `node1` to `node2` in one local step or a step + * through a method. + */ +private predicate localFlowStepOrFlowThroughMethod( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + localFlowStep(node1, node2, preservesValue, config) or + flowThroughMethod(node1, node2, preservesValue, config) +} + +/** + * Holds if data can flow out of a callable from `node1` to `node2`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +private predicate flowOutOfCallable(Node node1, Node node2, Configuration config) { + nodeCand1(node1, _, unbind(config)) and + nodeCand1(node2, _, config) and + ( + // flow out of an argument + exists(ParameterNode p | + parameterValueFlowsToUpdate(p, node1) and + viableParamArg(p, node2.(PostUpdateNode).getPreUpdateNode()) + ) + or + // flow out of a method + exists(Method m, MethodAccess ma, ReturnNode ret | + ret = node1 and + m = returnNodeGetEnclosingCallable(ret) and + m = viableImpl(ma) and + node2.asExpr() = ma + ) + ) +} + +/** + * Holds if data can flow into a callable and that this step is part of a + * path from a source to a sink. + */ +private predicate flowIntoCallable(Node node1, Node node2, Configuration config) { + viableParamArg(node2, node1) and + nodeCand1(node1, _, unbind(config)) and + nodeCand1(node2, _, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(Node n1, Configuration conf) { + result = strictcount(Node n | flowOutOfCallable(n1, n, conf) or flowIntoCallable(n1, n, conf)) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(Node n2, Configuration conf) { + result = strictcount(Node n | flowOutOfCallable(n, n2, conf) or flowIntoCallable(n, n2, conf)) +} + +/** + * Holds if data can flow out of a callable from `node1` to `node2`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +private predicate flowOutOfCallable( + Node node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallable(node1, node2, config) and + exists(int b, int j | + b = branch(node1, config) and + j = join(node2, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into a callable and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +private predicate flowIntoCallable( + Node node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallable(node1, node2, config) and + exists(int b, int j | + b = branch(node1, config) and + j = join(node2, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration taking simple call contexts into consideration. + */ +private predicate nodeCandFwd2(Node node, boolean fromArg, boolean stored, Configuration config) { + nodeCand1(node, false, config) and + config.isSource(node) and + fromArg = false and + stored = false + or + nodeCand1(node, unbindBool(stored), unbind(config)) and + ( + exists(Node mid, boolean preservesValue | + nodeCandFwd2(mid, fromArg, stored, config) and + localFlowStepOrFlowThroughMethod(mid, node, preservesValue, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + nodeCandFwd2(mid, _, stored, config) and + jumpStep(mid, node, preservesValue, config) and + fromArg = false and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid, Content f | + nodeCandFwd2(mid, fromArg, _, config) and + store(mid, f, node) and + readCand1(f, unbind(config)) and + stored = true + ) + or + // read + exists(Node mid, Content f | + nodeCandFwd2(mid, fromArg, true, config) and + read(mid, f, node) and + storeCandFwd2(f, unbind(config)) and + (stored = false or stored = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + nodeCandFwd2(mid, _, stored, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (stored = false or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + nodeCandFwd2(mid, false, stored, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (stored = false or allowsFieldFlow = true) + ) + ) +} + +/** + * Holds if `f` is the target of a store in the flow covered by `nodeCandFwd2`. + */ +private predicate storeCandFwd2(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCand1(node, true, unbind(config)) and + nodeCandFwd2(mid, _, _, config) and + store(mid, f, node) and + readCand1(f, unbind(config)) + ) +} + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration taking simple call contexts into consideration. + */ +private predicate nodeCand2(Node node, boolean toReturn, boolean stored, Configuration config) { + nodeCandFwd2(node, _, false, config) and + config.isSink(node) and + toReturn = false and + stored = false + or + nodeCandFwd2(node, _, unbindBool(stored), unbind(config)) and + ( + exists(Node mid, boolean preservesValue | + localFlowStepOrFlowThroughMethod(node, mid, preservesValue, config) and + nodeCand2(mid, toReturn, stored, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + jumpStep(node, mid, preservesValue, config) and + nodeCand2(mid, _, stored, config) and + toReturn = false and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid, Content f | + store(node, f, mid) and + readCand2(f, unbind(config)) and + nodeCand2(mid, toReturn, true, config) and + (stored = false or stored = true) + ) + or + // read + exists(Node mid, Content f | + read(node, f, mid) and + storeCandFwd2(f, unbind(config)) and + nodeCand2(mid, toReturn, _, config) and + stored = true + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + nodeCand2(mid, false, stored, config) and + toReturn = false and + (stored = false or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + nodeCand2(mid, _, stored, config) and + toReturn = true and + (stored = false or allowsFieldFlow = true) + ) + ) +} + +/** + * Holds if `f` is the target of a read in the flow covered by `nodeCand2`. + */ +private predicate readCand2(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd2(node, _, true, unbind(config)) and + read(node, f, mid) and + storeCandFwd2(f, unbind(config)) and + nodeCand2(mid, _, _, config) + ) +} + +private predicate storeCand(Content f, Configuration conf) { + exists(Node n1, Node n2 | + store(n1, f, n2) and + nodeCand2(n1, _, _, conf) and + nodeCand2(n2, _, _, unbind(conf)) + ) +} + +private predicate readCand(Content f, Configuration conf) { readCand2(f, conf) } + +/** + * Holds if `f` is the target of both a store and a read in the path graph + * covered by `nodeCand2`. + */ +pragma[noinline] +private predicate readStoreCand(Content f, Configuration conf) { + storeCand(f, conf) and + readCand(f, conf) +} + +private predicate nodeCand(Node node, Configuration config) { nodeCand2(node, _, _, config) } + +/** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ +private predicate localFlowEntry(Node node, Configuration config) { + nodeCand(node, config) and + ( + config.isSource(node) or + jumpStep(_, node, _, config) or + node instanceof ParameterNode or + node.asExpr() instanceof MethodAccess or + node instanceof PostUpdateNode or + read(_, _, node) or + node.asExpr() instanceof CastExpr + ) +} + +/** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ +private predicate localFlowExit(Node node, Configuration config) { + exists(Node next | nodeCand(next, config) | + jumpStep(node, next, _, config) or + flowIntoCallable(node, next, config) or + flowOutOfCallable(node, next, config) or + flowThroughMethod(node, next, _, config) or + store(node, _, next) or + read(node, _, next) + ) + or + node.asExpr() instanceof CastExpr + or + config.isSink(node) +} + +/** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `localFlowStep` beginning at `localFlowEntry`. + */ +private predicate localFlowStepPlus( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + localFlowEntry(node1, config) and + localFlowStep(node1, node2, preservesValue, config) and + node1 != node2 and + nodeCand(node2, unbind(config)) + or + exists(Node mid, boolean pv1, boolean pv2 | + localFlowStepPlus(node1, mid, pv1, config) and + localFlowStep(mid, node2, pv2, config) and + not mid.asExpr() instanceof CastExpr and + preservesValue = pv1.booleanAnd(pv2) and + nodeCand(node2, unbind(config)) + ) +} + +/** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ +pragma[noinline] +private predicate localFlowBigStep( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + localFlowStepPlus(node1, node2, preservesValue, config) and + localFlowExit(node2, config) +} + +private newtype TAccessPathFront = + TFrontNil(Type t) or + TFrontHead(Content f) + +/** + * The front of an `AccessPath`. This is either a head or a nil. + */ +private class AccessPathFront extends TAccessPathFront { + string toString() { + exists(Type t | this = TFrontNil(t) | result = ppReprType(t)) + or + exists(Content f | this = TFrontHead(f) | result = f.toString()) + } + + Type getType() { + this = TFrontNil(result) + or + exists(Content head | this = TFrontHead(head) | result = head.getContainerType()) + } + + predicate headUsesContent(Content f) { this = TFrontHead(f) } +} + +private class AccessPathFrontNil extends AccessPathFront, TFrontNil { } + +/** + * A `Node` at which a cast can occur such that the type should be checked. + */ +private class CastingNode extends Node { + CastingNode() { + this instanceof ParameterNode or + this.asExpr() instanceof CastExpr or + this.asExpr() instanceof MethodAccess or + this.(PostUpdateNode).getPreUpdateNode() instanceof ArgumentNode + } +} + +/** + * Holds if data can flow from a source to `node` with the given `apf`. + */ +private predicate flowCandFwd(Node node, boolean fromArg, AccessPathFront apf, Configuration config) { + flowCandFwd0(node, fromArg, apf, config) and + if node instanceof CastingNode then compatibleTypes(node.getType(), apf.getType()) else any() +} + +private predicate flowCandFwd0(Node node, boolean fromArg, AccessPathFront apf, Configuration config) { + nodeCand2(node, _, false, config) and + config.isSource(node) and + fromArg = false and + apf = TFrontNil(getErasedRepr(node.getType())) + or + nodeCand(node, unbind(config)) and + ( + exists(Node mid | + flowCandFwd(mid, fromArg, apf, config) and + localFlowBigStep(mid, node, true, config) + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(mid, fromArg, apf0, config) and + localFlowBigStep(mid, node, false, config) and + apf0 instanceof AccessPathFrontNil and + apf = TFrontNil(getErasedRepr(node.getType())) + ) + or + exists(Node mid | + flowCandFwd(mid, _, apf, config) and + jumpStep(mid, node, true, config) and + fromArg = false + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(mid, _, apf0, config) and + jumpStep(mid, node, false, config) and + fromArg = false and + apf0 instanceof AccessPathFrontNil and + apf = TFrontNil(getErasedRepr(node.getType())) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowCandFwd(mid, _, apf, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowCandFwd(mid, false, apf, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPathFront apf0 | + flowCandFwd(mid, fromArg, apf0, config) and + flowThroughMethod(mid, node, preservesValue, config) and + ( + preservesValue = true and apf = apf0 + or + preservesValue = false and + apf0 instanceof AccessPathFrontNil and + apf = TFrontNil(getErasedRepr(node.getType())) + ) + ) + ) + or + exists(Node mid, Content f | + flowCandFwd(mid, fromArg, _, config) and + store(mid, f, node) and + nodeCand(node, unbind(config)) and + apf.headUsesContent(f) + ) + or + exists(Node mid, Content f, AccessPathFront apf0 | + flowCandFwd(mid, fromArg, apf0, config) and + read(mid, f, node) and + nodeCand(node, config) and + apf0.headUsesContent(f) and + consCandFwd(f, apf, unbind(config)) + ) +} + +private predicate consCandFwd(Content f, AccessPathFront apf, Configuration config) { + exists(Node mid, Node n | + flowCandFwd(mid, _, apf, config) and + store(mid, f, n) and + nodeCand(n, unbind(config)) and + readStoreCand(f, unbind(config)) and + compatibleTypes(apf.getType(), f.getType()) + ) +} + +/** + * Holds if data can flow from a source to `node` with the given `apf` and + * from there flow to a sink. + */ +private predicate flowCand(Node node, boolean toReturn, AccessPathFront apf, Configuration config) { + flowCand0(node, toReturn, apf, config) and + flowCandFwd(node, _, apf, config) +} + +private predicate flowCand0(Node node, boolean toReturn, AccessPathFront apf, Configuration config) { + flowCandFwd(node, _, apf, config) and + config.isSink(node) and + toReturn = false and + apf instanceof AccessPathFrontNil + or + ( + exists(Node mid | + localFlowBigStep(node, mid, true, config) and + flowCand(mid, toReturn, apf, config) + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(node, _, apf, config) and + localFlowBigStep(node, mid, false, config) and + flowCand(mid, toReturn, apf0, config) and + apf0 instanceof AccessPathFrontNil and + apf instanceof AccessPathFrontNil + ) + or + exists(Node mid | + jumpStep(node, mid, true, config) and + flowCand(mid, _, apf, config) and + toReturn = false + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(node, _, apf, config) and + jumpStep(node, mid, false, config) and + flowCand(mid, _, apf0, config) and + toReturn = false and + apf0 instanceof AccessPathFrontNil and + apf instanceof AccessPathFrontNil + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + flowCand(mid, false, apf, config) and + toReturn = false and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + flowCand(mid, _, apf, config) and + toReturn = true and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPathFront apf0 | + flowThroughMethod(node, mid, preservesValue, config) and + flowCand(mid, toReturn, apf0, config) and + ( + preservesValue = true and apf = apf0 + or + preservesValue = false and + apf0 instanceof AccessPathFrontNil and + apf instanceof AccessPathFrontNil and + flowCandFwd(node, _, apf, config) + ) + ) + or + exists(Node mid, Content f, AccessPathFront apf0 | + store(node, f, mid) and + flowCand(mid, toReturn, apf0, config) and + apf0.headUsesContent(f) and + consCand(f, apf, unbind(config)) + ) + or + exists(Node mid, Content f, AccessPathFront apf0 | + read(node, f, mid) and + flowCand(mid, toReturn, apf0, config) and + consCandFwd(f, apf0, unbind(config)) and + apf.headUsesContent(f) + ) + ) +} + +private predicate consCand(Content f, AccessPathFront apf, Configuration config) { + consCandFwd(f, apf, config) and + exists(Node mid, Node n, AccessPathFront apf0 | + flowCandFwd(n, _, apf0, config) and + apf0.headUsesContent(f) and + read(n, f, mid) and + flowCand(mid, _, apf, config) + ) +} + +private newtype TAccessPath = + TNil(Type t) or + TCons(Content f, int len) { len in [1 .. 5] } + +/** + * Conceptually a list of `Content`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +private class AccessPath extends TAccessPath { + abstract string toString(); + + Content getHead() { this = TCons(result, _) } + + int len() { + this = TNil(_) and result = 0 + or + this = TCons(_, result) + } + + Type getType() { + this = TNil(result) + or + exists(Content head | this = TCons(head, _) | result = head.getContainerType()) + } + + abstract AccessPathFront getFront(); +} + +private class AccessPathNil extends AccessPath, TNil { + override string toString() { exists(Type t | this = TNil(t) | result = ppReprType(t)) } + + override AccessPathFront getFront() { exists(Type t | this = TNil(t) | result = TFrontNil(t)) } +} + +private class AccessPathCons extends AccessPath, TCons { + override string toString() { + exists(Content f, int len | this = TCons(f, len) | + result = f.toString() + ", ... (" + len.toString() + ")" + ) + } + + override AccessPathFront getFront() { + exists(Content f | this = TCons(f, _) | result = TFrontHead(f)) + } +} + +/** Holds if `ap0` corresponds to the cons of `f` and `ap`. */ +private predicate pop(AccessPath ap0, Content f, AccessPath ap) { + ap0.getFront().headUsesContent(f) and + consCand(f, ap.getFront(), _) and + ap0.len() = 1 + ap.len() +} + +/** Holds if `ap0` corresponds to the cons of `f` and `ap` and `apf` is the front of `ap`. */ +pragma[noinline] +private predicate popWithFront(AccessPath ap0, Content f, AccessPathFront apf, AccessPath ap) { + pop(ap0, f, ap) and apf = ap.getFront() +} + +/** Holds if `ap` corresponds to the cons of `f` and `ap0`. */ +private predicate push(AccessPath ap0, Content f, AccessPath ap) { pop(ap, f, ap0) } + +/** + * Holds if data can flow from a source to `node` with the given `ap`. + */ +private predicate flowFwd( + Node node, boolean fromArg, AccessPathFront apf, AccessPath ap, Configuration config +) { + flowFwd0(node, fromArg, apf, ap, config) and + flowCand(node, _, apf, config) +} + +private predicate flowFwd0( + Node node, boolean fromArg, AccessPathFront apf, AccessPath ap, Configuration config +) { + flowCand(node, _, _, config) and + config.isSource(node) and + fromArg = false and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + or + flowCand(node, _, _, unbind(config)) and + ( + exists(Node mid | + flowFwd(mid, fromArg, apf, ap, config) and + localFlowBigStep(mid, node, true, config) + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(mid, fromArg, _, ap0, config) and + localFlowBigStep(mid, node, false, config) and + ap0 instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + ) + or + exists(Node mid | + flowFwd(mid, _, apf, ap, config) and + jumpStep(mid, node, true, config) and + fromArg = false + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(mid, _, _, ap0, config) and + jumpStep(mid, node, false, config) and + fromArg = false and + ap0 instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowFwd(mid, _, apf, ap, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowFwd(mid, false, apf, ap, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPathFront apf0, AccessPath ap0 | + flowFwd(mid, fromArg, apf0, ap0, config) and + flowThroughMethod(mid, node, preservesValue, config) and + ( + preservesValue = true and ap = ap0 and apf = apf0 + or + preservesValue = false and + ap0 instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + ) + ) + ) + or + exists(Content f, AccessPath ap0 | + flowFwdStore(node, f, ap0, apf, fromArg, config) and + push(ap0, f, ap) + ) + or + exists(Content f, AccessPath ap0 | + flowFwdRead(node, f, ap0, fromArg, config) and + popWithFront(ap0, f, apf, ap) + ) +} + +pragma[nomagic] +private predicate flowFwdStore( + Node node, Content f, AccessPath ap0, AccessPathFront apf, boolean fromArg, Configuration config +) { + exists(Node mid, AccessPathFront apf0 | + flowFwd(mid, fromArg, apf0, ap0, config) and + flowFwdStoreAux(mid, f, node, apf0, apf, config) + ) +} + +private predicate flowFwdStoreAux( + Node mid, Content f, Node node, AccessPathFront apf0, AccessPathFront apf, Configuration config +) { + store(mid, f, node) and + consCand(f, apf0, config) and + apf.headUsesContent(f) and + flowCand(node, _, apf, unbind(config)) +} + +pragma[nomagic] +private predicate flowFwdRead( + Node node, Content f, AccessPath ap0, boolean fromArg, Configuration config +) { + exists(Node mid, AccessPathFront apf0 | + flowFwd(mid, fromArg, apf0, ap0, config) and + read(mid, f, node) and + apf0.headUsesContent(f) and + flowCand(node, _, _, unbind(config)) + ) +} + +/** + * Holds if data can flow from a source to `node` with the given `ap` and + * from there flow to a sink. + */ +private predicate flow(Node node, boolean toReturn, AccessPath ap, Configuration config) { + flow0(node, toReturn, ap, config) and + flowFwd(node, _, _, ap, config) +} + +private predicate flow0(Node node, boolean toReturn, AccessPath ap, Configuration config) { + flowFwd(node, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + ap instanceof AccessPathNil + or + ( + exists(Node mid | + localFlowBigStep(node, mid, true, config) and + flow(mid, toReturn, ap, config) + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(node, _, _, ap, config) and + localFlowBigStep(node, mid, false, config) and + flow(mid, toReturn, ap0, config) and + ap0 instanceof AccessPathNil and + ap instanceof AccessPathNil + ) + or + exists(Node mid | + jumpStep(node, mid, true, config) and + flow(mid, _, ap, config) and + toReturn = false + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(node, _, _, ap, config) and + jumpStep(node, mid, false, config) and + flow(mid, _, ap0, config) and + toReturn = false and + ap0 instanceof AccessPathNil and + ap instanceof AccessPathNil + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + flow(mid, false, ap, config) and + toReturn = false and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + flow(mid, _, ap, config) and + toReturn = true and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPath ap0 | + flowThroughMethod(node, mid, preservesValue, config) and + flow(mid, toReturn, ap0, config) and + ( + preservesValue = true and ap = ap0 + or + preservesValue = false and + ap0 instanceof AccessPathNil and + ap instanceof AccessPathNil and + flowFwd(node, _, _, ap, config) + ) + ) + or + exists(Content f, AccessPath ap0 | + flowStore(node, f, toReturn, ap0, config) and + pop(ap0, f, ap) + ) + or + exists(Content f, AccessPath ap0 | + flowRead(node, f, toReturn, ap0, config) and + push(ap0, f, ap) + ) + ) +} + +pragma[nomagic] +private predicate flowStore( + Node node, Content f, boolean toReturn, AccessPath ap0, Configuration config +) { + exists(Node mid | + store(node, f, mid) and + flow(mid, toReturn, ap0, config) + ) +} + +pragma[nomagic] +private predicate flowRead( + Node node, Content f, boolean toReturn, AccessPath ap0, Configuration config +) { + exists(Node mid | + read(node, f, mid) and + flow(mid, toReturn, ap0, config) + ) +} + +bindingset[conf, result] +private Configuration unbind(Configuration conf) { result >= conf and result <= conf } + +private predicate flow(Node n, Configuration config) { flow(n, _, _, config) } + +private newtype TPathNode = + TPathNodeMid(Node node, CallContext cc, AccessPath ap, Configuration config) { + // A PathNode is introduced by a source ... + flow(node, config) and + config.isSource(node) and + cc instanceof CallContextAny and + ap = TNil(getErasedRepr(node.getType())) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + flowStep(mid, node, cc, ap) and + config = mid.getConfiguration() and + flow(node, _, ap, unbind(config)) + ) + } or + TPathNodeSink(Node node, Configuration config) { + // The AccessPath on a sink is empty. + config.isSink(node) and + flow(node, config) + } + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +abstract class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = getNode().toString() + ppAp() } + + /** Gets the source location for this element. */ + Location getLocation() { result = getNode().getLocation() } + + /** Gets the underlying `Node`. */ + abstract Node getNode(); + + /** Gets the associated configuration. */ + abstract Configuration getConfiguration(); + + /** Gets a successor. */ + abstract PathNode getSucc(); + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + result = " [" + this.(PathNodeMid).getAp().toString() + "]" + } +} + +/** Holds if `n` can reach a sink. */ +private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getSucc()) } + +/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNode n1, PathNode n2) { n1.getSucc() = n2 and reach(n2) } + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNode, TPathNodeMid { + Node node; + + CallContext cc; + + AccessPath ap; + + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, cc, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + flowStep(this, result.getNode(), result.getCallContext(), result.getAp()) and + result.getConfiguration() = unbind(this.getConfiguration()) + } + + override PathNode getSucc() { + // an intermediate step to another intermediate node + result = getSuccMid() + or + // a final step to a sink via one or more local steps + localFlowStepPlus(node, result.getNode(), _, config) and + ap instanceof AccessPathNil and + result instanceof PathNodeSink and + result.getConfiguration() = unbind(this.getConfiguration()) + or + // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges + exists(PathNodeMid mid | + mid = getSuccMid() and + mid.getNode() = result.getNode() and + mid.getAp() instanceof AccessPathNil and + result instanceof PathNodeSink and + result.getConfiguration() = unbind(mid.getConfiguration()) + ) + or + // a direct step from a source to a sink if a node is both + this instanceof PathNodeSource and + result instanceof PathNodeSink and + this.getNode() = result.getNode() and + result.getConfiguration() = unbind(this.getConfiguration()) + } +} + +/** + * A flow graph node corresponding to a source. + */ +private class PathNodeSource extends PathNodeMid { + PathNodeSource() { + getConfiguration().isSource(getNode()) and + getCallContext() instanceof CallContextAny and + getAp() instanceof AccessPathNil + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNode, TPathNodeSink { + Node node; + + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, config) } + + override Node getNode() { result = node } + + override Configuration getConfiguration() { result = config } + + override PathNode getSucc() { none() } +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +private predicate flowStep(PathNodeMid mid, Node node, CallContext cc, AccessPath ap) { + localFlowBigStep(mid.getNode(), node, true, mid.getConfiguration()) and + cc = mid.getCallContext() and + ap = mid.getAp() + or + localFlowBigStep(mid.getNode(), node, false, mid.getConfiguration()) and + cc = mid.getCallContext() and + mid.getAp() instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) + or + jumpStep(mid.getNode(), node, true, mid.getConfiguration()) and + cc instanceof CallContextAny and + ap = mid.getAp() + or + jumpStep(mid.getNode(), node, false, mid.getConfiguration()) and + cc instanceof CallContextAny and + mid.getAp() instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) + or + contentReadStep(mid, node, ap) and cc = mid.getCallContext() + or + exists(Content f, AccessPath ap0 | contentStoreStep(mid, node, ap0, f, cc) and push(ap0, f, ap)) + or + flowOutOfArgument(mid, node, cc) and ap = mid.getAp() + or + flowIntoCallable(mid, node, _, cc, _) and ap = mid.getAp() + or + flowOutOfMethod(mid, node.asExpr(), cc) and ap = mid.getAp() + or + flowThroughMethod(mid, node.asExpr(), cc) and ap = TNil(getErasedRepr(node.getType())) + or + valueFlowThroughMethod(mid, node.asExpr(), cc) and ap = mid.getAp() +} + +private predicate contentReadStep(PathNodeMid mid, Node node, AccessPath ap) { + exists(Content f, AccessPath ap0 | + ap0 = mid.getAp() and + read(mid.getNode(), f, node) and + pop(ap0, f, ap) + ) +} + +pragma[noinline] +private predicate contentStoreStep( + PathNodeMid mid, Node node, AccessPath ap0, Content f, CallContext cc +) { + ap0 = mid.getAp() and + store(mid.getNode(), f, node) and + cc = mid.getCallContext() +} + +/** + * Holds if data may flow from `mid` to an exit of `m` in the context + * `innercc`, and the path did not flow through a parameter of `m`. + */ +private predicate flowOutOfMethod0(PathNodeMid mid, Method m, CallContext innercc) { + exists(ReturnNode ret | + ret = mid.getNode() and + innercc = mid.getCallContext() and + m = returnNodeGetEnclosingCallable(ret) and + not innercc instanceof CallContextCall + ) +} + +/** + * Holds if data may flow from `mid` to `ma`. The last step of this path + * is a return from a method and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate flowOutOfMethod(PathNodeMid mid, MethodAccess ma, CallContext cc) { + exists(Method m, CallContext innercc | + flowOutOfMethod0(mid, m, innercc) and + resolveReturn(innercc, m, ma) + | + if reducedViableImplInReturn(m, ma) then cc = TReturn(m, ma) else cc = TAnyCallContext() + ) +} + +private predicate flowOutOfArgument(PathNodeMid mid, PostUpdateNode node, CallContext cc) { + exists( + PostUpdateNode n, ParameterNode p, Callable callable, CallContext innercc, int i, Call call, + ArgumentNode arg + | + mid.getNode() = n and + parameterValueFlowsToUpdate(p, n) and + innercc = mid.getCallContext() and + p.isParameterOf(callable, i) and + resolveReturn(innercc, callable, call) and + node.getPreUpdateNode() = arg and + arg.argumentOf(call, i) and + flow(node, unbind(mid.getConfiguration())) + | + if reducedViableImplInReturn(callable, call) + then cc = TReturn(callable, call) + else cc = TAnyCallContext() + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate flowIntoArg(PathNodeMid mid, int i, CallContext cc, Call call, boolean emptyAp) { + exists(ArgumentNode arg, AccessPath ap | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() + | + ap instanceof AccessPathNil and emptyAp = true + or + ap instanceof AccessPathCons and emptyAp = false + ) +} + +pragma[noinline] +private predicate parameterCand(Callable callable, int i, Configuration config) { + exists(ParameterNode p | + flow(p, config) and + p.isParameterOf(callable, i) + ) +} + +pragma[nomagic] +private predicate flowIntoCallable0( + PathNodeMid mid, Callable callable, int i, CallContext outercc, Call call, boolean emptyAp +) { + flowIntoArg(mid, i, outercc, call, emptyAp) and + callable = resolveCall(call, outercc) and + parameterCand(callable, any(int j | j <= i and j >= i), mid.getConfiguration()) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +private predicate flowIntoCallable( + PathNodeMid mid, ParameterNode p, CallContext outercc, CallContextCall innercc, Call call +) { + exists(int i, Callable callable, boolean emptyAp | + flowIntoCallable0(mid, callable, i, outercc, call, emptyAp) and + p.isParameterOf(callable, i) + | + if reducedViableImplInCallContext(_, callable, call) + then innercc = TSpecificCall(call, i, emptyAp) + else innercc = TSomeCall(p, emptyAp) + ) +} + +/** Holds if data may flow from `p` to a return statement in the callable. */ +pragma[nomagic] +private predicate paramFlowsThrough(ParameterNode p, CallContextCall cc, Configuration config) { + exists(PathNodeMid mid, ReturnNode ret | + mid.getNode() = ret and + cc = mid.getCallContext() and + config = mid.getConfiguration() and + mid.getAp() instanceof AccessPathNil + | + cc = TSomeCall(p, true) + or + exists(int i | cc = TSpecificCall(_, i, true) | + p.isParameterOf(returnNodeGetEnclosingCallable(ret), i) + ) + ) +} + +/** + * Holds if data may flow from `mid` to an argument of `methodcall`, + * through a called method `m`, and back out through a return statement in + * `m`. The context `cc` is restored to its value prior to entering `m`. + */ +pragma[noinline] +private predicate flowThroughMethod(PathNodeMid mid, Call methodcall, CallContext cc) { + exists(ParameterNode p, CallContext innercc | + flowIntoCallable(mid, p, cc, innercc, methodcall) and + paramFlowsThrough(p, innercc, unbind(mid.getConfiguration())) and + not parameterValueFlowsThrough(p) and + mid.getAp() instanceof AccessPathNil + ) +} + +private predicate valueFlowThroughMethod(PathNodeMid mid, Call methodcall, CallContext cc) { + exists(ParameterNode p | + flowIntoCallable(mid, p, cc, _, methodcall) and + parameterValueFlowsThrough(p) + ) +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNodeSource flowsource, PathNodeSink flowsink, Node source, Node sink, + Configuration configuration +) { + flowsource.getConfiguration() = configuration and + flowsource.getNode() = source and + pathSuccPlus(flowsource, flowsink) and + flowsink.getNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll new file mode 100644 index 000000000000..9d28e37cc8ad --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll @@ -0,0 +1,1614 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from `DataFlowUtil` + * and adds a global analysis, mainly exposed through the `Configuration` class. + * This file exists in several identical copies, allowing queries to use + * multiple `Configuration` classes that depend on each other without + * introducing mutual recursion among those configurations. + */ + +import DataFlowUtil +private import DataFlowPrivate +private import DataFlowDispatch +private import DataFlowImplCommon + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ``` + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ``` + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + abstract predicate isSource(Node source); + + /** + * Holds if `sink` is a relevant data flow sink. + */ + abstract predicate isSink(Node sink); + + /** Holds if data flow through `node` is prohibited. */ + predicate isBarrier(Node node) { none() } + + /** Holds if data flow from `node1` to `node2` is prohibited. */ + predicate isBarrierEdge(Node node1, Node node2) { none() } + + /** + * Holds if the additional flow step from `node1` to `node2` must be taken + * into account in the analysis. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(Expr sink) { hasFlowTo(exprNode(sink)) } + + /** DEPRECATED: use `hasFlow` instead. */ + deprecated predicate hasFlowForward(Node source, Node sink) { hasFlow(source, sink) } + + /** DEPRECATED: use `hasFlow` instead. */ + deprecated predicate hasFlowBackward(Node source, Node sink) { hasFlow(source, sink) } +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(Node node1, Node node2, Configuration config) { + config.isAdditionalFlowStep(node1, node2) and + node1.getEnclosingCallable() != node2.getEnclosingCallable() +} + +pragma[noinline] +private predicate isAdditionalFlowStep( + Node node1, Node node2, Callable callable1, Callable callable2, Configuration config +) { + config.isAdditionalFlowStep(node1, node2) and + callable1 = node1.getEnclosingCallable() and + callable2 = node2.getEnclosingCallable() +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(Node node1, Node node2, Configuration config) { + exists(Callable callable | isAdditionalFlowStep(node1, node2, callable, callable, config)) +} + +/** + * Holds if data can flow from `node1` to `node2` through a static field or + * variable capture. + */ +private predicate jumpStep(Node node1, Node node2, boolean preservesValue, Configuration config) { + jumpStep(node1, node2) and preservesValue = true + or + additionalJumpStep(node1, node2, config) and preservesValue = false +} + +/** + * Holds if data can flow in one local step from `node1` to `node2` taking + * additional steps from the configuration into account. + */ +private predicate localFlowStep(Node node1, Node node2, boolean preservesValue, Configuration config) { + localFlowStep(node1, node2) and not config.isBarrierEdge(node1, node2) and preservesValue = true + or + additionalLocalFlowStep(node1, node2, config) and preservesValue = false +} + +pragma[noinline] +private Method returnNodeGetEnclosingCallable(ReturnNode ret) { + result = ret.getEnclosingCallable() +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +/** + * Holds if `node` is reachable from a source in the given configuration + * ignoring call contexts. + */ +private predicate nodeCandFwd1(Node node, boolean stored, Configuration config) { + not config.isBarrier(node) and + ( + config.isSource(node) and stored = false + or + exists(Node mid, boolean preservesValue | + nodeCandFwd1(mid, stored, config) and + localFlowStep(mid, node, preservesValue, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + nodeCandFwd1(mid, stored, config) and + jumpStep(mid, node, preservesValue, config) and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid | + useFieldFlow(config) and + nodeCandFwd1(mid, _, config) and + store(mid, _, node) and + stored = true + ) + or + // read + exists(Node mid, Content f | + nodeCandFwd1(mid, true, config) and + read(mid, f, node) and + storeCandFwd1(f, unbind(config)) and + (stored = false or stored = true) + ) + or + // flow into a callable + exists(Node arg | + nodeCandFwd1(arg, stored, config) and + viableParamArg(node, arg) + ) + or + // flow out of an argument + exists(PostUpdateNode mid, ParameterNode p | + nodeCandFwd1(mid, stored, config) and + parameterValueFlowsToUpdate(p, mid) and + viableParamArg(p, node.(PostUpdateNode).getPreUpdateNode()) + ) + or + // flow out of a callable + exists(Method m, MethodAccess ma, ReturnNode ret | + nodeCandFwd1(ret, stored, config) and + m = returnNodeGetEnclosingCallable(ret) and + m = viableImpl(ma) and + node.asExpr() = ma + ) + ) +} + +/** + * Holds if `f` is the target of a store in the flow covered by `nodeCandFwd1`. + */ +private predicate storeCandFwd1(Content f, Configuration config) { + exists(Node mid, Node node | + not config.isBarrier(node) and + useFieldFlow(config) and + nodeCandFwd1(mid, _, config) and + store(mid, f, node) + ) +} + +bindingset[result, b] +private boolean unbindBool(boolean b) { result != b.booleanNot() } + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration ignoring call contexts. + */ +pragma[nomagic] +private predicate nodeCand1(Node node, boolean stored, Configuration config) { + nodeCandFwd1(node, false, config) and + config.isSink(node) and + stored = false + or + nodeCandFwd1(node, unbindBool(stored), unbind(config)) and + ( + exists(Node mid, boolean preservesValue | + localFlowStep(node, mid, preservesValue, config) and + nodeCand1(mid, stored, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + jumpStep(node, mid, preservesValue, config) and + nodeCand1(mid, stored, config) and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid, Content f | + store(node, f, mid) and + readCand1(f, unbind(config)) and + nodeCand1(mid, true, config) and + (stored = false or stored = true) + ) + or + // read + exists(Node mid, Content f | + read(node, f, mid) and + storeCandFwd1(f, unbind(config)) and + nodeCand1(mid, _, config) and + stored = true + ) + or + // flow into a callable + exists(Node param | + viableParamArg(param, node) and + nodeCand1(param, stored, config) + ) + or + // flow out of an argument + exists(PostUpdateNode mid, ParameterNode p | + parameterValueFlowsToUpdate(p, node) and + viableParamArg(p, mid.getPreUpdateNode()) and + nodeCand1(mid, stored, config) + ) + or + // flow out of a callable + exists(Method m, ExprNode ma | + nodeCand1(ma, stored, config) and + m = returnNodeGetEnclosingCallable(node) and + m = viableImpl(ma.getExpr()) + ) + ) +} + +/** + * Holds if `f` is the target of a read in the flow covered by `nodeCand1`. + */ +private predicate readCand1(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd1(node, true, unbind(config)) and + read(node, f, mid) and + storeCandFwd1(f, unbind(config)) and + nodeCand1(mid, _, config) + ) +} + +/** + * Holds if there is a path from `p` to `node` in the same callable that is + * part of a path from a source to a sink taking simple call contexts into + * consideration. This is restricted to paths that does not necessarily + * preserve the value of `p` by making use of at least one additional step + * from the configuration. + */ +pragma[nomagic] +private predicate simpleParameterFlow(ParameterNode p, Node node, RefType t, Configuration config) { + nodeCand1(node, false, config) and + p = node and + t = getErasedRepr(node.getType()) and + not parameterValueFlowsThrough(p) + or + nodeCand1(node, false, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, t, config) and + localFlowStep(mid, node, true, config) and + compatibleTypes(t, node.getType()) + ) + or + nodeCand1(node, false, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, _, config) and + localFlowStep(mid, node, false, config) and + t = getErasedRepr(node.getType()) + ) + or + nodeCand1(node, false, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, t, config) and + localStoreReadStep(mid, node) and + compatibleTypes(t, node.getType()) + ) + or + // value flow through a callable + nodeCand1(node, false, config) and + exists(Node arg | + simpleParameterFlow(p, arg, t, config) and + argumentValueFlowsThrough(arg, node) and + compatibleTypes(t, node.getType()) + ) + or + // flow through a callable + nodeCand1(node, false, config) and + exists(Node arg | + simpleParameterFlow(p, arg, _, config) and + simpleArgumentFlowsThrough(arg, node, t, config) + ) +} + +/** + * Holds if data can flow from `arg` through the `call` taking simple call + * contexts into consideration and that this is part of a path from a source + * to a sink. This is restricted to paths through the `call` that does not + * necessarily preserve the value of `arg` by making use of at least one + * additional step from the configuration. + */ +private predicate simpleArgumentFlowsThrough( + ArgumentNode arg, ExprNode call, RefType t, Configuration config +) { + exists(ParameterNode param, ReturnNode ret | + nodeCand1(arg, false, unbind(config)) and + nodeCand1(call, false, unbind(config)) and + viableParamArg(param, arg) and + simpleParameterFlow(param, ret, t, config) and + arg.argumentOf(call.getExpr(), _) + ) +} + +/** + * Holds if data can flow from `node1` to `node2` by a step through a method. + */ +private predicate flowThroughMethod( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + simpleArgumentFlowsThrough(node1, node2, _, config) and preservesValue = false + or + argumentValueFlowsThrough(node1, node2) and preservesValue = true +} + +/** + * Holds if data can flow from `node1` to `node2` in one local step or a step + * through a method. + */ +private predicate localFlowStepOrFlowThroughMethod( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + localFlowStep(node1, node2, preservesValue, config) or + flowThroughMethod(node1, node2, preservesValue, config) +} + +/** + * Holds if data can flow out of a callable from `node1` to `node2`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +private predicate flowOutOfCallable(Node node1, Node node2, Configuration config) { + nodeCand1(node1, _, unbind(config)) and + nodeCand1(node2, _, config) and + ( + // flow out of an argument + exists(ParameterNode p | + parameterValueFlowsToUpdate(p, node1) and + viableParamArg(p, node2.(PostUpdateNode).getPreUpdateNode()) + ) + or + // flow out of a method + exists(Method m, MethodAccess ma, ReturnNode ret | + ret = node1 and + m = returnNodeGetEnclosingCallable(ret) and + m = viableImpl(ma) and + node2.asExpr() = ma + ) + ) +} + +/** + * Holds if data can flow into a callable and that this step is part of a + * path from a source to a sink. + */ +private predicate flowIntoCallable(Node node1, Node node2, Configuration config) { + viableParamArg(node2, node1) and + nodeCand1(node1, _, unbind(config)) and + nodeCand1(node2, _, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(Node n1, Configuration conf) { + result = strictcount(Node n | flowOutOfCallable(n1, n, conf) or flowIntoCallable(n1, n, conf)) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(Node n2, Configuration conf) { + result = strictcount(Node n | flowOutOfCallable(n, n2, conf) or flowIntoCallable(n, n2, conf)) +} + +/** + * Holds if data can flow out of a callable from `node1` to `node2`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +private predicate flowOutOfCallable( + Node node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallable(node1, node2, config) and + exists(int b, int j | + b = branch(node1, config) and + j = join(node2, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into a callable and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +private predicate flowIntoCallable( + Node node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallable(node1, node2, config) and + exists(int b, int j | + b = branch(node1, config) and + j = join(node2, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration taking simple call contexts into consideration. + */ +private predicate nodeCandFwd2(Node node, boolean fromArg, boolean stored, Configuration config) { + nodeCand1(node, false, config) and + config.isSource(node) and + fromArg = false and + stored = false + or + nodeCand1(node, unbindBool(stored), unbind(config)) and + ( + exists(Node mid, boolean preservesValue | + nodeCandFwd2(mid, fromArg, stored, config) and + localFlowStepOrFlowThroughMethod(mid, node, preservesValue, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + nodeCandFwd2(mid, _, stored, config) and + jumpStep(mid, node, preservesValue, config) and + fromArg = false and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid, Content f | + nodeCandFwd2(mid, fromArg, _, config) and + store(mid, f, node) and + readCand1(f, unbind(config)) and + stored = true + ) + or + // read + exists(Node mid, Content f | + nodeCandFwd2(mid, fromArg, true, config) and + read(mid, f, node) and + storeCandFwd2(f, unbind(config)) and + (stored = false or stored = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + nodeCandFwd2(mid, _, stored, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (stored = false or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + nodeCandFwd2(mid, false, stored, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (stored = false or allowsFieldFlow = true) + ) + ) +} + +/** + * Holds if `f` is the target of a store in the flow covered by `nodeCandFwd2`. + */ +private predicate storeCandFwd2(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCand1(node, true, unbind(config)) and + nodeCandFwd2(mid, _, _, config) and + store(mid, f, node) and + readCand1(f, unbind(config)) + ) +} + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration taking simple call contexts into consideration. + */ +private predicate nodeCand2(Node node, boolean toReturn, boolean stored, Configuration config) { + nodeCandFwd2(node, _, false, config) and + config.isSink(node) and + toReturn = false and + stored = false + or + nodeCandFwd2(node, _, unbindBool(stored), unbind(config)) and + ( + exists(Node mid, boolean preservesValue | + localFlowStepOrFlowThroughMethod(node, mid, preservesValue, config) and + nodeCand2(mid, toReturn, stored, config) and + (stored = false or preservesValue = true) + ) + or + exists(Node mid, boolean preservesValue | + jumpStep(node, mid, preservesValue, config) and + nodeCand2(mid, _, stored, config) and + toReturn = false and + (stored = false or preservesValue = true) + ) + or + // store + exists(Node mid, Content f | + store(node, f, mid) and + readCand2(f, unbind(config)) and + nodeCand2(mid, toReturn, true, config) and + (stored = false or stored = true) + ) + or + // read + exists(Node mid, Content f | + read(node, f, mid) and + storeCandFwd2(f, unbind(config)) and + nodeCand2(mid, toReturn, _, config) and + stored = true + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + nodeCand2(mid, false, stored, config) and + toReturn = false and + (stored = false or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + nodeCand2(mid, _, stored, config) and + toReturn = true and + (stored = false or allowsFieldFlow = true) + ) + ) +} + +/** + * Holds if `f` is the target of a read in the flow covered by `nodeCand2`. + */ +private predicate readCand2(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd2(node, _, true, unbind(config)) and + read(node, f, mid) and + storeCandFwd2(f, unbind(config)) and + nodeCand2(mid, _, _, config) + ) +} + +private predicate storeCand(Content f, Configuration conf) { + exists(Node n1, Node n2 | + store(n1, f, n2) and + nodeCand2(n1, _, _, conf) and + nodeCand2(n2, _, _, unbind(conf)) + ) +} + +private predicate readCand(Content f, Configuration conf) { readCand2(f, conf) } + +/** + * Holds if `f` is the target of both a store and a read in the path graph + * covered by `nodeCand2`. + */ +pragma[noinline] +private predicate readStoreCand(Content f, Configuration conf) { + storeCand(f, conf) and + readCand(f, conf) +} + +private predicate nodeCand(Node node, Configuration config) { nodeCand2(node, _, _, config) } + +/** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ +private predicate localFlowEntry(Node node, Configuration config) { + nodeCand(node, config) and + ( + config.isSource(node) or + jumpStep(_, node, _, config) or + node instanceof ParameterNode or + node.asExpr() instanceof MethodAccess or + node instanceof PostUpdateNode or + read(_, _, node) or + node.asExpr() instanceof CastExpr + ) +} + +/** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ +private predicate localFlowExit(Node node, Configuration config) { + exists(Node next | nodeCand(next, config) | + jumpStep(node, next, _, config) or + flowIntoCallable(node, next, config) or + flowOutOfCallable(node, next, config) or + flowThroughMethod(node, next, _, config) or + store(node, _, next) or + read(node, _, next) + ) + or + node.asExpr() instanceof CastExpr + or + config.isSink(node) +} + +/** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `localFlowStep` beginning at `localFlowEntry`. + */ +private predicate localFlowStepPlus( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + localFlowEntry(node1, config) and + localFlowStep(node1, node2, preservesValue, config) and + node1 != node2 and + nodeCand(node2, unbind(config)) + or + exists(Node mid, boolean pv1, boolean pv2 | + localFlowStepPlus(node1, mid, pv1, config) and + localFlowStep(mid, node2, pv2, config) and + not mid.asExpr() instanceof CastExpr and + preservesValue = pv1.booleanAnd(pv2) and + nodeCand(node2, unbind(config)) + ) +} + +/** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ +pragma[noinline] +private predicate localFlowBigStep( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + localFlowStepPlus(node1, node2, preservesValue, config) and + localFlowExit(node2, config) +} + +private newtype TAccessPathFront = + TFrontNil(Type t) or + TFrontHead(Content f) + +/** + * The front of an `AccessPath`. This is either a head or a nil. + */ +private class AccessPathFront extends TAccessPathFront { + string toString() { + exists(Type t | this = TFrontNil(t) | result = ppReprType(t)) + or + exists(Content f | this = TFrontHead(f) | result = f.toString()) + } + + Type getType() { + this = TFrontNil(result) + or + exists(Content head | this = TFrontHead(head) | result = head.getContainerType()) + } + + predicate headUsesContent(Content f) { this = TFrontHead(f) } +} + +private class AccessPathFrontNil extends AccessPathFront, TFrontNil { } + +/** + * A `Node` at which a cast can occur such that the type should be checked. + */ +private class CastingNode extends Node { + CastingNode() { + this instanceof ParameterNode or + this.asExpr() instanceof CastExpr or + this.asExpr() instanceof MethodAccess or + this.(PostUpdateNode).getPreUpdateNode() instanceof ArgumentNode + } +} + +/** + * Holds if data can flow from a source to `node` with the given `apf`. + */ +private predicate flowCandFwd(Node node, boolean fromArg, AccessPathFront apf, Configuration config) { + flowCandFwd0(node, fromArg, apf, config) and + if node instanceof CastingNode then compatibleTypes(node.getType(), apf.getType()) else any() +} + +private predicate flowCandFwd0(Node node, boolean fromArg, AccessPathFront apf, Configuration config) { + nodeCand2(node, _, false, config) and + config.isSource(node) and + fromArg = false and + apf = TFrontNil(getErasedRepr(node.getType())) + or + nodeCand(node, unbind(config)) and + ( + exists(Node mid | + flowCandFwd(mid, fromArg, apf, config) and + localFlowBigStep(mid, node, true, config) + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(mid, fromArg, apf0, config) and + localFlowBigStep(mid, node, false, config) and + apf0 instanceof AccessPathFrontNil and + apf = TFrontNil(getErasedRepr(node.getType())) + ) + or + exists(Node mid | + flowCandFwd(mid, _, apf, config) and + jumpStep(mid, node, true, config) and + fromArg = false + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(mid, _, apf0, config) and + jumpStep(mid, node, false, config) and + fromArg = false and + apf0 instanceof AccessPathFrontNil and + apf = TFrontNil(getErasedRepr(node.getType())) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowCandFwd(mid, _, apf, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowCandFwd(mid, false, apf, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPathFront apf0 | + flowCandFwd(mid, fromArg, apf0, config) and + flowThroughMethod(mid, node, preservesValue, config) and + ( + preservesValue = true and apf = apf0 + or + preservesValue = false and + apf0 instanceof AccessPathFrontNil and + apf = TFrontNil(getErasedRepr(node.getType())) + ) + ) + ) + or + exists(Node mid, Content f | + flowCandFwd(mid, fromArg, _, config) and + store(mid, f, node) and + nodeCand(node, unbind(config)) and + apf.headUsesContent(f) + ) + or + exists(Node mid, Content f, AccessPathFront apf0 | + flowCandFwd(mid, fromArg, apf0, config) and + read(mid, f, node) and + nodeCand(node, config) and + apf0.headUsesContent(f) and + consCandFwd(f, apf, unbind(config)) + ) +} + +private predicate consCandFwd(Content f, AccessPathFront apf, Configuration config) { + exists(Node mid, Node n | + flowCandFwd(mid, _, apf, config) and + store(mid, f, n) and + nodeCand(n, unbind(config)) and + readStoreCand(f, unbind(config)) and + compatibleTypes(apf.getType(), f.getType()) + ) +} + +/** + * Holds if data can flow from a source to `node` with the given `apf` and + * from there flow to a sink. + */ +private predicate flowCand(Node node, boolean toReturn, AccessPathFront apf, Configuration config) { + flowCand0(node, toReturn, apf, config) and + flowCandFwd(node, _, apf, config) +} + +private predicate flowCand0(Node node, boolean toReturn, AccessPathFront apf, Configuration config) { + flowCandFwd(node, _, apf, config) and + config.isSink(node) and + toReturn = false and + apf instanceof AccessPathFrontNil + or + ( + exists(Node mid | + localFlowBigStep(node, mid, true, config) and + flowCand(mid, toReturn, apf, config) + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(node, _, apf, config) and + localFlowBigStep(node, mid, false, config) and + flowCand(mid, toReturn, apf0, config) and + apf0 instanceof AccessPathFrontNil and + apf instanceof AccessPathFrontNil + ) + or + exists(Node mid | + jumpStep(node, mid, true, config) and + flowCand(mid, _, apf, config) and + toReturn = false + ) + or + exists(Node mid, AccessPathFront apf0 | + flowCandFwd(node, _, apf, config) and + jumpStep(node, mid, false, config) and + flowCand(mid, _, apf0, config) and + toReturn = false and + apf0 instanceof AccessPathFrontNil and + apf instanceof AccessPathFrontNil + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + flowCand(mid, false, apf, config) and + toReturn = false and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + flowCand(mid, _, apf, config) and + toReturn = true and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPathFront apf0 | + flowThroughMethod(node, mid, preservesValue, config) and + flowCand(mid, toReturn, apf0, config) and + ( + preservesValue = true and apf = apf0 + or + preservesValue = false and + apf0 instanceof AccessPathFrontNil and + apf instanceof AccessPathFrontNil and + flowCandFwd(node, _, apf, config) + ) + ) + or + exists(Node mid, Content f, AccessPathFront apf0 | + store(node, f, mid) and + flowCand(mid, toReturn, apf0, config) and + apf0.headUsesContent(f) and + consCand(f, apf, unbind(config)) + ) + or + exists(Node mid, Content f, AccessPathFront apf0 | + read(node, f, mid) and + flowCand(mid, toReturn, apf0, config) and + consCandFwd(f, apf0, unbind(config)) and + apf.headUsesContent(f) + ) + ) +} + +private predicate consCand(Content f, AccessPathFront apf, Configuration config) { + consCandFwd(f, apf, config) and + exists(Node mid, Node n, AccessPathFront apf0 | + flowCandFwd(n, _, apf0, config) and + apf0.headUsesContent(f) and + read(n, f, mid) and + flowCand(mid, _, apf, config) + ) +} + +private newtype TAccessPath = + TNil(Type t) or + TCons(Content f, int len) { len in [1 .. 5] } + +/** + * Conceptually a list of `Content`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +private class AccessPath extends TAccessPath { + abstract string toString(); + + Content getHead() { this = TCons(result, _) } + + int len() { + this = TNil(_) and result = 0 + or + this = TCons(_, result) + } + + Type getType() { + this = TNil(result) + or + exists(Content head | this = TCons(head, _) | result = head.getContainerType()) + } + + abstract AccessPathFront getFront(); +} + +private class AccessPathNil extends AccessPath, TNil { + override string toString() { exists(Type t | this = TNil(t) | result = ppReprType(t)) } + + override AccessPathFront getFront() { exists(Type t | this = TNil(t) | result = TFrontNil(t)) } +} + +private class AccessPathCons extends AccessPath, TCons { + override string toString() { + exists(Content f, int len | this = TCons(f, len) | + result = f.toString() + ", ... (" + len.toString() + ")" + ) + } + + override AccessPathFront getFront() { + exists(Content f | this = TCons(f, _) | result = TFrontHead(f)) + } +} + +/** Holds if `ap0` corresponds to the cons of `f` and `ap`. */ +private predicate pop(AccessPath ap0, Content f, AccessPath ap) { + ap0.getFront().headUsesContent(f) and + consCand(f, ap.getFront(), _) and + ap0.len() = 1 + ap.len() +} + +/** Holds if `ap0` corresponds to the cons of `f` and `ap` and `apf` is the front of `ap`. */ +pragma[noinline] +private predicate popWithFront(AccessPath ap0, Content f, AccessPathFront apf, AccessPath ap) { + pop(ap0, f, ap) and apf = ap.getFront() +} + +/** Holds if `ap` corresponds to the cons of `f` and `ap0`. */ +private predicate push(AccessPath ap0, Content f, AccessPath ap) { pop(ap, f, ap0) } + +/** + * Holds if data can flow from a source to `node` with the given `ap`. + */ +private predicate flowFwd( + Node node, boolean fromArg, AccessPathFront apf, AccessPath ap, Configuration config +) { + flowFwd0(node, fromArg, apf, ap, config) and + flowCand(node, _, apf, config) +} + +private predicate flowFwd0( + Node node, boolean fromArg, AccessPathFront apf, AccessPath ap, Configuration config +) { + flowCand(node, _, _, config) and + config.isSource(node) and + fromArg = false and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + or + flowCand(node, _, _, unbind(config)) and + ( + exists(Node mid | + flowFwd(mid, fromArg, apf, ap, config) and + localFlowBigStep(mid, node, true, config) + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(mid, fromArg, _, ap0, config) and + localFlowBigStep(mid, node, false, config) and + ap0 instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + ) + or + exists(Node mid | + flowFwd(mid, _, apf, ap, config) and + jumpStep(mid, node, true, config) and + fromArg = false + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(mid, _, _, ap0, config) and + jumpStep(mid, node, false, config) and + fromArg = false and + ap0 instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowFwd(mid, _, apf, ap, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowFwd(mid, false, apf, ap, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPathFront apf0, AccessPath ap0 | + flowFwd(mid, fromArg, apf0, ap0, config) and + flowThroughMethod(mid, node, preservesValue, config) and + ( + preservesValue = true and ap = ap0 and apf = apf0 + or + preservesValue = false and + ap0 instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) and + apf = ap.(AccessPathNil).getFront() + ) + ) + ) + or + exists(Content f, AccessPath ap0 | + flowFwdStore(node, f, ap0, apf, fromArg, config) and + push(ap0, f, ap) + ) + or + exists(Content f, AccessPath ap0 | + flowFwdRead(node, f, ap0, fromArg, config) and + popWithFront(ap0, f, apf, ap) + ) +} + +pragma[nomagic] +private predicate flowFwdStore( + Node node, Content f, AccessPath ap0, AccessPathFront apf, boolean fromArg, Configuration config +) { + exists(Node mid, AccessPathFront apf0 | + flowFwd(mid, fromArg, apf0, ap0, config) and + flowFwdStoreAux(mid, f, node, apf0, apf, config) + ) +} + +private predicate flowFwdStoreAux( + Node mid, Content f, Node node, AccessPathFront apf0, AccessPathFront apf, Configuration config +) { + store(mid, f, node) and + consCand(f, apf0, config) and + apf.headUsesContent(f) and + flowCand(node, _, apf, unbind(config)) +} + +pragma[nomagic] +private predicate flowFwdRead( + Node node, Content f, AccessPath ap0, boolean fromArg, Configuration config +) { + exists(Node mid, AccessPathFront apf0 | + flowFwd(mid, fromArg, apf0, ap0, config) and + read(mid, f, node) and + apf0.headUsesContent(f) and + flowCand(node, _, _, unbind(config)) + ) +} + +/** + * Holds if data can flow from a source to `node` with the given `ap` and + * from there flow to a sink. + */ +private predicate flow(Node node, boolean toReturn, AccessPath ap, Configuration config) { + flow0(node, toReturn, ap, config) and + flowFwd(node, _, _, ap, config) +} + +private predicate flow0(Node node, boolean toReturn, AccessPath ap, Configuration config) { + flowFwd(node, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + ap instanceof AccessPathNil + or + ( + exists(Node mid | + localFlowBigStep(node, mid, true, config) and + flow(mid, toReturn, ap, config) + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(node, _, _, ap, config) and + localFlowBigStep(node, mid, false, config) and + flow(mid, toReturn, ap0, config) and + ap0 instanceof AccessPathNil and + ap instanceof AccessPathNil + ) + or + exists(Node mid | + jumpStep(node, mid, true, config) and + flow(mid, _, ap, config) and + toReturn = false + ) + or + exists(Node mid, AccessPath ap0 | + flowFwd(node, _, _, ap, config) and + jumpStep(node, mid, false, config) and + flow(mid, _, ap0, config) and + toReturn = false and + ap0 instanceof AccessPathNil and + ap instanceof AccessPathNil + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + flow(mid, false, ap, config) and + toReturn = false and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + flow(mid, _, ap, config) and + toReturn = true and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean preservesValue, AccessPath ap0 | + flowThroughMethod(node, mid, preservesValue, config) and + flow(mid, toReturn, ap0, config) and + ( + preservesValue = true and ap = ap0 + or + preservesValue = false and + ap0 instanceof AccessPathNil and + ap instanceof AccessPathNil and + flowFwd(node, _, _, ap, config) + ) + ) + or + exists(Content f, AccessPath ap0 | + flowStore(node, f, toReturn, ap0, config) and + pop(ap0, f, ap) + ) + or + exists(Content f, AccessPath ap0 | + flowRead(node, f, toReturn, ap0, config) and + push(ap0, f, ap) + ) + ) +} + +pragma[nomagic] +private predicate flowStore( + Node node, Content f, boolean toReturn, AccessPath ap0, Configuration config +) { + exists(Node mid | + store(node, f, mid) and + flow(mid, toReturn, ap0, config) + ) +} + +pragma[nomagic] +private predicate flowRead( + Node node, Content f, boolean toReturn, AccessPath ap0, Configuration config +) { + exists(Node mid | + read(node, f, mid) and + flow(mid, toReturn, ap0, config) + ) +} + +bindingset[conf, result] +private Configuration unbind(Configuration conf) { result >= conf and result <= conf } + +private predicate flow(Node n, Configuration config) { flow(n, _, _, config) } + +private newtype TPathNode = + TPathNodeMid(Node node, CallContext cc, AccessPath ap, Configuration config) { + // A PathNode is introduced by a source ... + flow(node, config) and + config.isSource(node) and + cc instanceof CallContextAny and + ap = TNil(getErasedRepr(node.getType())) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + flowStep(mid, node, cc, ap) and + config = mid.getConfiguration() and + flow(node, _, ap, unbind(config)) + ) + } or + TPathNodeSink(Node node, Configuration config) { + // The AccessPath on a sink is empty. + config.isSink(node) and + flow(node, config) + } + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +abstract class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = getNode().toString() + ppAp() } + + /** Gets the source location for this element. */ + Location getLocation() { result = getNode().getLocation() } + + /** Gets the underlying `Node`. */ + abstract Node getNode(); + + /** Gets the associated configuration. */ + abstract Configuration getConfiguration(); + + /** Gets a successor. */ + abstract PathNode getSucc(); + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + result = " [" + this.(PathNodeMid).getAp().toString() + "]" + } +} + +/** Holds if `n` can reach a sink. */ +private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getSucc()) } + +/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNode n1, PathNode n2) { n1.getSucc() = n2 and reach(n2) } + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNode, TPathNodeMid { + Node node; + + CallContext cc; + + AccessPath ap; + + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, cc, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + flowStep(this, result.getNode(), result.getCallContext(), result.getAp()) and + result.getConfiguration() = unbind(this.getConfiguration()) + } + + override PathNode getSucc() { + // an intermediate step to another intermediate node + result = getSuccMid() + or + // a final step to a sink via one or more local steps + localFlowStepPlus(node, result.getNode(), _, config) and + ap instanceof AccessPathNil and + result instanceof PathNodeSink and + result.getConfiguration() = unbind(this.getConfiguration()) + or + // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges + exists(PathNodeMid mid | + mid = getSuccMid() and + mid.getNode() = result.getNode() and + mid.getAp() instanceof AccessPathNil and + result instanceof PathNodeSink and + result.getConfiguration() = unbind(mid.getConfiguration()) + ) + or + // a direct step from a source to a sink if a node is both + this instanceof PathNodeSource and + result instanceof PathNodeSink and + this.getNode() = result.getNode() and + result.getConfiguration() = unbind(this.getConfiguration()) + } +} + +/** + * A flow graph node corresponding to a source. + */ +private class PathNodeSource extends PathNodeMid { + PathNodeSource() { + getConfiguration().isSource(getNode()) and + getCallContext() instanceof CallContextAny and + getAp() instanceof AccessPathNil + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNode, TPathNodeSink { + Node node; + + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, config) } + + override Node getNode() { result = node } + + override Configuration getConfiguration() { result = config } + + override PathNode getSucc() { none() } +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +private predicate flowStep(PathNodeMid mid, Node node, CallContext cc, AccessPath ap) { + localFlowBigStep(mid.getNode(), node, true, mid.getConfiguration()) and + cc = mid.getCallContext() and + ap = mid.getAp() + or + localFlowBigStep(mid.getNode(), node, false, mid.getConfiguration()) and + cc = mid.getCallContext() and + mid.getAp() instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) + or + jumpStep(mid.getNode(), node, true, mid.getConfiguration()) and + cc instanceof CallContextAny and + ap = mid.getAp() + or + jumpStep(mid.getNode(), node, false, mid.getConfiguration()) and + cc instanceof CallContextAny and + mid.getAp() instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) + or + contentReadStep(mid, node, ap) and cc = mid.getCallContext() + or + exists(Content f, AccessPath ap0 | contentStoreStep(mid, node, ap0, f, cc) and push(ap0, f, ap)) + or + flowOutOfArgument(mid, node, cc) and ap = mid.getAp() + or + flowIntoCallable(mid, node, _, cc, _) and ap = mid.getAp() + or + flowOutOfMethod(mid, node.asExpr(), cc) and ap = mid.getAp() + or + flowThroughMethod(mid, node.asExpr(), cc) and ap = TNil(getErasedRepr(node.getType())) + or + valueFlowThroughMethod(mid, node.asExpr(), cc) and ap = mid.getAp() +} + +private predicate contentReadStep(PathNodeMid mid, Node node, AccessPath ap) { + exists(Content f, AccessPath ap0 | + ap0 = mid.getAp() and + read(mid.getNode(), f, node) and + pop(ap0, f, ap) + ) +} + +pragma[noinline] +private predicate contentStoreStep( + PathNodeMid mid, Node node, AccessPath ap0, Content f, CallContext cc +) { + ap0 = mid.getAp() and + store(mid.getNode(), f, node) and + cc = mid.getCallContext() +} + +/** + * Holds if data may flow from `mid` to an exit of `m` in the context + * `innercc`, and the path did not flow through a parameter of `m`. + */ +private predicate flowOutOfMethod0(PathNodeMid mid, Method m, CallContext innercc) { + exists(ReturnNode ret | + ret = mid.getNode() and + innercc = mid.getCallContext() and + m = returnNodeGetEnclosingCallable(ret) and + not innercc instanceof CallContextCall + ) +} + +/** + * Holds if data may flow from `mid` to `ma`. The last step of this path + * is a return from a method and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate flowOutOfMethod(PathNodeMid mid, MethodAccess ma, CallContext cc) { + exists(Method m, CallContext innercc | + flowOutOfMethod0(mid, m, innercc) and + resolveReturn(innercc, m, ma) + | + if reducedViableImplInReturn(m, ma) then cc = TReturn(m, ma) else cc = TAnyCallContext() + ) +} + +private predicate flowOutOfArgument(PathNodeMid mid, PostUpdateNode node, CallContext cc) { + exists( + PostUpdateNode n, ParameterNode p, Callable callable, CallContext innercc, int i, Call call, + ArgumentNode arg + | + mid.getNode() = n and + parameterValueFlowsToUpdate(p, n) and + innercc = mid.getCallContext() and + p.isParameterOf(callable, i) and + resolveReturn(innercc, callable, call) and + node.getPreUpdateNode() = arg and + arg.argumentOf(call, i) and + flow(node, unbind(mid.getConfiguration())) + | + if reducedViableImplInReturn(callable, call) + then cc = TReturn(callable, call) + else cc = TAnyCallContext() + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate flowIntoArg(PathNodeMid mid, int i, CallContext cc, Call call, boolean emptyAp) { + exists(ArgumentNode arg, AccessPath ap | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() + | + ap instanceof AccessPathNil and emptyAp = true + or + ap instanceof AccessPathCons and emptyAp = false + ) +} + +pragma[noinline] +private predicate parameterCand(Callable callable, int i, Configuration config) { + exists(ParameterNode p | + flow(p, config) and + p.isParameterOf(callable, i) + ) +} + +pragma[nomagic] +private predicate flowIntoCallable0( + PathNodeMid mid, Callable callable, int i, CallContext outercc, Call call, boolean emptyAp +) { + flowIntoArg(mid, i, outercc, call, emptyAp) and + callable = resolveCall(call, outercc) and + parameterCand(callable, any(int j | j <= i and j >= i), mid.getConfiguration()) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +private predicate flowIntoCallable( + PathNodeMid mid, ParameterNode p, CallContext outercc, CallContextCall innercc, Call call +) { + exists(int i, Callable callable, boolean emptyAp | + flowIntoCallable0(mid, callable, i, outercc, call, emptyAp) and + p.isParameterOf(callable, i) + | + if reducedViableImplInCallContext(_, callable, call) + then innercc = TSpecificCall(call, i, emptyAp) + else innercc = TSomeCall(p, emptyAp) + ) +} + +/** Holds if data may flow from `p` to a return statement in the callable. */ +pragma[nomagic] +private predicate paramFlowsThrough(ParameterNode p, CallContextCall cc, Configuration config) { + exists(PathNodeMid mid, ReturnNode ret | + mid.getNode() = ret and + cc = mid.getCallContext() and + config = mid.getConfiguration() and + mid.getAp() instanceof AccessPathNil + | + cc = TSomeCall(p, true) + or + exists(int i | cc = TSpecificCall(_, i, true) | + p.isParameterOf(returnNodeGetEnclosingCallable(ret), i) + ) + ) +} + +/** + * Holds if data may flow from `mid` to an argument of `methodcall`, + * through a called method `m`, and back out through a return statement in + * `m`. The context `cc` is restored to its value prior to entering `m`. + */ +pragma[noinline] +private predicate flowThroughMethod(PathNodeMid mid, Call methodcall, CallContext cc) { + exists(ParameterNode p, CallContext innercc | + flowIntoCallable(mid, p, cc, innercc, methodcall) and + paramFlowsThrough(p, innercc, unbind(mid.getConfiguration())) and + not parameterValueFlowsThrough(p) and + mid.getAp() instanceof AccessPathNil + ) +} + +private predicate valueFlowThroughMethod(PathNodeMid mid, Call methodcall, CallContext cc) { + exists(ParameterNode p | + flowIntoCallable(mid, p, cc, _, methodcall) and + parameterValueFlowsThrough(p) + ) +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNodeSource flowsource, PathNodeSink flowsink, Node source, Node sink, + Configuration configuration +) { + flowsource.getConfiguration() = configuration and + flowsource.getNode() = source and + pathSuccPlus(flowsource, flowsink) and + flowsink.getNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll new file mode 100644 index 000000000000..02c9919723b5 --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll @@ -0,0 +1,284 @@ +import DataFlowUtil +private import DataFlowPrivate +private import DataFlowDispatch + +cached +private module ImplCommon { + /** + * Holds if `p` is the `i`th parameter of a viable dispatch target of `call`. + * The instance parameter is considered to have index `-1`. + */ + pragma[nomagic] + private predicate viableParam(Call call, int i, ParameterNode p) { + exists(Callable callable | + callable = viableCallable(call) and + p.isParameterOf(callable, i) + ) + } + + /** + * Holds if `arg` is a possible argument to `p` taking virtual dispatch into account. + */ + cached + predicate viableParamArg(ParameterNode p, ArgumentNode arg) { + exists(int i, Call call | + viableParam(call, i, p) and + arg.argumentOf(call, i) + ) + } + + /** + * Holds if `p` can flow to `node` in the same callable using only + * value-preserving steps. + */ + private predicate parameterValueFlow(ParameterNode p, Node node) { + p = node + or + exists(Node mid | + parameterValueFlow(p, mid) and + localFlowStep(mid, node) and + compatibleTypes(p.getType(), node.getType()) + ) + or + // flow through a callable + exists(Node arg | + parameterValueFlow(p, arg) and + argumentValueFlowsThrough(arg, node) and + compatibleTypes(p.getType(), node.getType()) + ) + } + + /** + * Holds if `p` can flow to a `ReturnNode` in the same callable using only + * value-preserving steps. + */ + cached + predicate parameterValueFlowsThrough(ParameterNode p) { + exists(ReturnNode ret | parameterValueFlow(p, ret)) + } + + /** + * Holds if `arg` flows through `call` using only value-preserving steps. + */ + cached + predicate argumentValueFlowsThrough(ArgumentNode arg, ExprNode call) { + exists(ParameterNode param | + viableParamArg(param, arg) and + parameterValueFlowsThrough(param) and + arg.argumentOf(call.getExpr(), _) and + compatibleTypes(arg.getType(), call.getType()) + ) + } + + /** + * Holds if `p` can flow to the pre-update node of `n` in the same callable + * using only value-preserving steps. + */ + cached + predicate parameterValueFlowsToUpdate(ParameterNode p, PostUpdateNode n) { + parameterValueFlow(p, n.getPreUpdateNode()) + } + + /** + * Holds if data can flow from `node1` to `node2` in one local step or a step + * through a value-preserving method. + */ + private predicate localValueStep(Node node1, Node node2) { + localFlowStep(node1, node2) or + argumentValueFlowsThrough(node1, node2) + } + + /* + * Calculation of `predicate store(Node node1, Content f, Node node2)`: + * There are three cases: + * - The base case: A direct local assignment given by `storeStep`. + * - A call to a method or constructor with two arguments, `arg1` and `arg2`, + * such the call has the side-effect `arg2.f = arg1`. + * - A call to a method that returns an object in which an argument has been + * stored. + * `storeViaSideEffect` covers the first two cases, and `storeReturn` covers + * the third case. + */ + + /** + * Holds if data can flow from `node1` to `node2` via a direct assignment to + * `f` or via a call that acts as a setter. + */ + cached + predicate store(Node node1, Content f, Node node2) { + storeViaSideEffect(node1, f, node2) or + storeReturn(node1, f, node2) + } + + private predicate storeViaSideEffect(Node node1, Content f, PostUpdateNode node2) { + storeStep(node1, f, node2) and readStep(_, f, _) + or + exists(Call call, int i1, int i2 | + setterCall(call, i1, i2, f) and + node1.(ArgumentNode).argumentOf(call, i1) and + node2.getPreUpdateNode().(ArgumentNode).argumentOf(call, i2) and + compatibleTypes(node1.getTypeBound(), f.getType()) and + compatibleTypes(node2.getTypeBound(), f.getContainerType()) + ) + } + + pragma[nomagic] + private predicate setterInParam(ParameterNode p1, Content f, ParameterNode p2) { + exists(Node n1, PostUpdateNode n2 | + parameterValueFlow(p1, n1) and + storeViaSideEffect(n1, f, n2) and + parameterValueFlow(p2, n2.getPreUpdateNode()) and + p1 != p2 + ) + } + + pragma[nomagic] + private predicate setterCall(Call call, int i1, int i2, Content f) { + exists(Callable callable, ParameterNode p1, ParameterNode p2 | + setterInParam(p1, f, p2) and + callable = viableCallable(call) and + p1.isParameterOf(callable, i1) and + p2.isParameterOf(callable, i2) + ) + } + + private predicate storeReturn(Node node1, Content f, Node node2) { + exists(ParameterNode p, ArgumentNode arg | + arg = node1 and + viableParamArg(p, arg) and + setterReturn(p, f) and + arg.argumentOf(node2.asExpr(), _) and + compatibleTypes(node1.getTypeBound(), f.getType()) and + compatibleTypes(node2.getTypeBound(), f.getContainerType()) + ) + } + + private predicate setterReturn(ParameterNode p, Content f) { + exists(Node n1, Node n2, ReturnNode ret | + parameterValueFlow(p, n1) and + store(n1, f, n2) and + localValueStep*(n2, ret) + ) + } + + /** + * Holds if data can flow from `node1` to `node2` via a direct read of `f` or + * via a getter. + */ + cached + predicate read(Node node1, Content f, Node node2) { + readStep(node1, f, node2) and storeStep(_, f, _) + or + exists(ParameterNode p, ArgumentNode arg | + arg = node1 and + viableParamArg(p, arg) and + getter(p, f) and + arg.argumentOf(node2.asExpr(), _) and + compatibleTypes(node1.getTypeBound(), f.getContainerType()) and + compatibleTypes(node2.getTypeBound(), f.getType()) + ) + } + + private predicate getter(ParameterNode p, Content f) { + exists(Node n1, Node n2, ReturnNode ret | + parameterValueFlow(p, n1) and + read(n1, f, n2) and + localValueStep*(n2, ret) + ) + } + + cached + predicate localStoreReadStep(Node node1, Node node2) { + exists(Node mid1, Node mid2, Content f | + store(node1, f, mid1) and + localValueStep*(mid1, mid2) and + read(mid2, f, node2) + ) + } + + /** + * Holds if `call` passes an implicit or explicit instance argument, i.e., an + * expression that reaches a `this` parameter. + */ + private predicate callHasInstanceArgument(Call call) { + exists(ArgumentNode arg | arg.argumentOf(call, -1)) + } + + cached + newtype TCallContext = + TAnyCallContext() or + TSpecificCall(Call call, int i, boolean emptyAp) { + reducedViableImplInCallContext(_, _, call) and + (emptyAp = true or emptyAp = false) and + ( + exists(call.getArgument(i)) + or + i = -1 and callHasInstanceArgument(call) + ) + } or + TSomeCall(ParameterNode p, boolean emptyAp) { emptyAp = true or emptyAp = false } or + TReturn(Method m, MethodAccess ma) { reducedViableImplInReturn(m, ma) } +} +import ImplCommon + +/** + * A call context to restrict the targets of virtual dispatch and match the + * call sites of flow into a method with flow out of a method. + * + * There are four cases: + * - `TAnyCallContext()` : No restrictions on method flow. + * - `TSpecificCall(Call call, int i)` : Flow entered through the `i`th + * parameter at the given `call`. This call improves the set of viable + * dispatch targets for at least one method call in the current callable. + * - `TSomeCall(ParameterNode p)` : Flow entered through parameter `p`. The + * originating call does not improve the set of dispatch targets for any + * method call in the current callable and was therefore not recorded. + * - `TReturn(Method m, MethodAccess ma)` : Flow reached `ma` from `m` and + * this dispatch target of `ma` implies a reduced set of dispatch origins + * to which data may flow if it should reach a `return` statement. + */ +abstract class CallContext extends TCallContext { abstract string toString(); } + +class CallContextAny extends CallContext, TAnyCallContext { + override string toString() { result = "CcAny" } +} + +abstract class CallContextCall extends CallContext { } + +class CallContextSpecificCall extends CallContextCall, TSpecificCall { + override string toString() { result = "CcCall" } +} + +class CallContextSomeCall extends CallContextCall, TSomeCall { + override string toString() { result = "CcSomeCall" } +} + +class CallContextReturn extends CallContext, TReturn { + override string toString() { result = "CcReturn" } +} + +bindingset[cc, callable] +predicate resolveReturn(CallContext cc, Callable callable, Call call) { + cc instanceof CallContextAny and callable = viableCallable(call) + or + exists(Method m0, MethodAccess ma0 | + ma0.getEnclosingCallable() = callable and + cc = TReturn(m0, ma0) and + m0 = prunedViableImplInCallContextReverse(ma0, call) + ) +} + +bindingset[call, cc] +Callable resolveCall(Call call, CallContext cc) { + exists(Call ctx | cc = TSpecificCall(ctx, _, _) | + if reducedViableImplInCallContext(call, _, ctx) + then result = prunedViableImplInCallContext(call, ctx) + else result = viableCallable(call) + ) + or + result = viableCallable(call) and cc instanceof CallContextSomeCall + or + result = viableCallable(call) and cc instanceof CallContextAny + or + result = viableCallable(call) and cc instanceof CallContextReturn +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll new file mode 100644 index 000000000000..e9c1247d41c3 --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -0,0 +1,189 @@ +private import cpp +private import DataFlowUtil + +/** + * A data flow node that occurs as the argument of a call and is passed as-is + * to the callable. Arguments that are wrapped in an implicit varargs array + * creation are not included, but the implicitly created array is. + * Instance arguments are also included. + */ +class ArgumentNode extends Node { + ArgumentNode() { + exists(CallInstruction call | + this = call.getAnArgument() + ) + } + + /** + * Holds if this argument occurs at the given position in the given call. + * The instance argument is considered to have index `-1`. + */ + predicate argumentOf(Call call, int pos) { + exists (CallInstruction callInstr | + callInstr.getAST() = call and + ( + this = callInstr.getPositionalArgument(pos) or + this = callInstr.getThisArgument() and pos = -1 + ) + ) + } +} + +/** A data flow node that occurs as the result of a `ReturnStmt`. */ +class ReturnNode extends Node { + ReturnNode() { + exists(ReturnValueInstruction ret | this = ret.getReturnValue() ) + } +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that loses the + * calling context. For example, this would happen with flow through a + * global or static variable. + */ +predicate jumpStep(Node n1, Node n2) { + none() +} + +/** + * Holds if `call` does not pass an implicit or explicit qualifier, i.e., a + * `this` parameter. + */ +predicate callHasQualifier(Call call) { + call.hasQualifier() + or + call.getTarget() instanceof Destructor +} + +private newtype TContent = TFieldContent(Field f) or TCollectionContent() or TArrayContent() + +/** + * A reference contained in an object. Examples include instance fields, the + * contents of a collection object, or the contents of an array. + */ +class Content extends TContent { + /** Gets a textual representation of this element. */ + abstract string toString(); + predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) { + path = "" and sl = 0 and sc = 0 and el = 0 and ec = 0 + } + /** Gets the type of the object containing this content. */ + abstract RefType getContainerType(); + /** Gets the type of this content. */ + abstract Type getType(); +} +private class FieldContent extends Content, TFieldContent { + Field f; + FieldContent() { this = TFieldContent(f) } + Field getField() { result = f } + override string toString() { result = f.toString() } + override predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) { + f.getLocation().hasLocationInfo(path, sl, sc, el, ec) + } + override RefType getContainerType() { result = f.getDeclaringType() } + override Type getType() { result = f.getType() } +} +private class CollectionContent extends Content, TCollectionContent { + override string toString() { result = "collection" } + override RefType getContainerType() { none() } + override Type getType() { none() } +} +private class ArrayContent extends Content, TArrayContent { + override string toString() { result = "array" } + override RefType getContainerType() { none() } + override Type getType() { none() } +} + +/** + * Holds if data can flow from `node1` to `node2` via an assignment to `f`. + * Thus, `node2` references an object with a field `f` that contains the + * value of `node1`. + */ +predicate storeStep(Node node1, Content f, PostUpdateNode node2) { + none() // stub implementation +} + +/** + * Holds if data can flow from `node1` to `node2` via a read of `f`. + * Thus, `node1` references an object with a field `f` whose value ends up in + * `node2`. + */ +predicate readStep(Node node1, Content f, Node node2) { + none() // stub implementation +} + +/** + * Gets a representative (boxed) type for `t` for the purpose of pruning + * possible flow. A single type is used for all numeric types to account for + * numeric conversions, and otherwise the erasure is used. + */ +RefType getErasedRepr(Type t) { + suppressUnusedType(t) and + result instanceof VoidType // stub implementation +} + +/** Gets a string representation of a type returned by `getErasedRepr`. */ +string ppReprType(Type t) { + result = t.toString() +} + +/** + * Holds if `t1` and `t2` are compatible, that is, whether data can flow from + * a node of type `t1` to a node of type `t2`. + */ +pragma[inline] +predicate compatibleTypes(Type t1, Type t2) { + any() // stub implementation +} + +private predicate suppressUnusedType(Type t) { any() } + +////////////////////////////////////////////////////////////////////////////// +// Java QL library compatibility wrappers +////////////////////////////////////////////////////////////////////////////// + +class RefType extends Type { +} + +class CastExpr extends Expr { + CastExpr() { none() } // stub implementation +} + +/** An argument to a call. */ +class Argument extends Expr { + Call call; + int pos; + + Argument() { + call.getArgument(pos) = this + } + + /** Gets the call that has this argument. */ + Call getCall() { result = call } + + /** Gets the position of this argument. */ + int getPosition() { + result = pos + } +} + +class Callable extends Function { } + +/** + * An alias for `Function` in the C++ library. In the Java library, a `Method` + * is any callable except a constructor. + */ +class Method extends Function { } + +/** + * An alias for `FunctionCall` in the C++ library. In the Java library, a + * `MethodAccess` is any `Call` that does not call a constructor. + */ +class MethodAccess extends FunctionCall { + /** + * INTERNAL: Do not use. Alternative name for `getEnclosingFunction`. + */ + Callable getEnclosingCallable() { + result = this.getEnclosingFunction() + } +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll new file mode 100644 index 000000000000..7b90906ef76b --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -0,0 +1,143 @@ +/** + * Provides C++-specific definitions for use in the data flow library. + */ +import cpp +import semmle.code.cpp.ir.IR + +/** + * A node in a data flow graph. + * + * A node can be either an expression, a parameter, or an uninitialized local + * variable. Such nodes are created with `DataFlow::exprNode`, + * `DataFlow::parameterNode`, and `DataFlow::uninitializedNode` respectively. +*/ +class Node extends Instruction { + /** + * INTERNAL: Do not use. Alternative name for `getFunction`. + */ + Function getEnclosingCallable() { + result = this.getFunction() + } + + /** Gets the type of this node. */ + Type getType() { + result = this.asExpr().getType() + or + result = this.getAST().(Variable).getType() + } + + /** Gets the expression corresponding to this node, if any. */ + Expr asExpr() { result = this.getUnconvertedResultExpression() } + + /** Gets the parameter corresponding to this node, if any. */ + Parameter asParameter() { result = this.(ParameterNode).getParameter() } + + /** + * Gets the uninitialized local variable corresponding to this node, if + * any. + */ + LocalVariable asUninitialized() { + result = this.(UninitializedNode).getLocalVariable() + } + + /** + * Gets an upper bound on the type of this node. + */ + Type getTypeBound() { result = getType() } +} + +/** + * An expression, viewed as a node in a data flow graph. + */ +class ExprNode extends Node { + ExprNode() { getAST() instanceof Expr } + Expr getExpr() { result = getAST() } +} + +/** + * The value of a parameter at function entry, viewed as a node in a data + * flow graph. + */ +class ParameterNode extends Node, InitializeParameterInstruction { + /** + * Holds if this node is the parameter of `c` at the specified (zero-based) + * position. The implicit `this` parameter is considered to have index `-1`. + */ + predicate isParameterOf(Function f, int i) { + f.getParameter(i) = getParameter() + } +} + +/** + * The value of an uninitialized local variable, viewed as a node in a data + * flow graph. + */ +class UninitializedNode extends Node, UninitializedInstruction { + /** Gets the uninitialized local variable corresponding to this node. */ + LocalVariable getLocalVariable() { result = this.getAST().(VariableDeclarationEntry).getDeclaration()} +} + +/** + * A node associated with an object after an operation that might have + * changed its state. + * + * This can be either the argument to a callable after the callable returns + * (which might have mutated the argument), or the qualifier of a field after + * an update to the field. + * + * Nodes corresponding to AST elements, for example `ExprNode`, usually refer + * to the value before the update with the exception of `ClassInstanceExpr`, + * which represents the value after the constructor has run. + */ +abstract class PostUpdateNode extends Node { + /** + * Gets the node before the state update. + */ + abstract Node getPreUpdateNode(); +} + +class StoreDestinationAsPostUpdateNode extends PostUpdateNode { + StoreInstruction si; + StoreDestinationAsPostUpdateNode() { + this = si.getDestinationAddress() + } + + override Node getPreUpdateNode() { + result = si.getDestinationAddress() + } +} + +/** + * Gets the `Node` corresponding to `e`. + */ +ExprNode exprNode(Expr e) { result.getExpr() = e } + +/** + * Gets the `Node` corresponding to the value of `p` at function entry. + */ +ParameterNode parameterNode(Parameter p) { result.getParameter() = p } + +/** + * Gets the `Node` corresponding to the value of an uninitialized local + * variable `v`. + */ +UninitializedNode uninitializedNode(LocalVariable v) { + result.getLocalVariable() = v +} + +/** + * Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local + * (intra-procedural) step. + */ +predicate localFlowStep(Node nodeFrom, Node nodeTo) { + nodeTo.(CopyInstruction).getSourceValue() = nodeFrom or + nodeTo.(PhiInstruction).getAnOperand().getDefinitionInstruction() = nodeFrom +} + +/** + * Holds if data flows from `source` to `sink` in zero or more local + * (intra-procedural) steps. + */ +predicate localFlow(Node source, Node sink) { + localFlowStep*(source, sink) +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll index de3087770ea5..36722d4372ae 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll @@ -1106,9 +1106,39 @@ class CallInstruction extends Instruction { opcode instanceof Opcode::Call } + /** + * Gets the `Instruction` that computes the target function of the call. This is usually a + * `FunctionAddress` instruction, but can also be an arbitrary instruction that produces a + * function pointer. + */ final Instruction getCallTarget() { result = getAnOperand().(CallTargetOperand).getDefinitionInstruction() } + + /** + * Gets all of the arguments of the call, including the `this` pointer, if any. + */ + final Instruction getAnArgument() { + result = getAnOperand().(ArgumentOperand).getDefinitionInstruction() + } + + /** + * Gets the `this` pointer argument of the call, if any. + */ + final Instruction getThisArgument() { + result = getAnOperand().(ThisArgumentOperand).getDefinitionInstruction() + } + + /** + * Gets the argument at the specified index. + */ + final Instruction getPositionalArgument(int index) { + exists(PositionalArgumentOperand operand | + operand = getAnOperand() and + operand.getIndex() = index and + result = operand.getDefinitionInstruction() + ) + } } /** diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll index 6fc6af477fa1..edf8e141764e 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll @@ -304,6 +304,13 @@ class PositionalArgumentOperand extends ArgumentOperand { override string toString() { result = "Arg(" + argIndex + ")" } + + /** + * Gets the zero-based index of the argument. + */ + final int getIndex() { + result = argIndex + } } class SideEffectOperand extends NonPhiOperand { diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll index de3087770ea5..36722d4372ae 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll @@ -1106,9 +1106,39 @@ class CallInstruction extends Instruction { opcode instanceof Opcode::Call } + /** + * Gets the `Instruction` that computes the target function of the call. This is usually a + * `FunctionAddress` instruction, but can also be an arbitrary instruction that produces a + * function pointer. + */ final Instruction getCallTarget() { result = getAnOperand().(CallTargetOperand).getDefinitionInstruction() } + + /** + * Gets all of the arguments of the call, including the `this` pointer, if any. + */ + final Instruction getAnArgument() { + result = getAnOperand().(ArgumentOperand).getDefinitionInstruction() + } + + /** + * Gets the `this` pointer argument of the call, if any. + */ + final Instruction getThisArgument() { + result = getAnOperand().(ThisArgumentOperand).getDefinitionInstruction() + } + + /** + * Gets the argument at the specified index. + */ + final Instruction getPositionalArgument(int index) { + exists(PositionalArgumentOperand operand | + operand = getAnOperand() and + operand.getIndex() = index and + result = operand.getDefinitionInstruction() + ) + } } /** diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll index 6fc6af477fa1..edf8e141764e 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll @@ -304,6 +304,13 @@ class PositionalArgumentOperand extends ArgumentOperand { override string toString() { result = "Arg(" + argIndex + ")" } + + /** + * Gets the zero-based index of the argument. + */ + final int getIndex() { + result = argIndex + } } class SideEffectOperand extends NonPhiOperand { diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll index de3087770ea5..36722d4372ae 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll @@ -1106,9 +1106,39 @@ class CallInstruction extends Instruction { opcode instanceof Opcode::Call } + /** + * Gets the `Instruction` that computes the target function of the call. This is usually a + * `FunctionAddress` instruction, but can also be an arbitrary instruction that produces a + * function pointer. + */ final Instruction getCallTarget() { result = getAnOperand().(CallTargetOperand).getDefinitionInstruction() } + + /** + * Gets all of the arguments of the call, including the `this` pointer, if any. + */ + final Instruction getAnArgument() { + result = getAnOperand().(ArgumentOperand).getDefinitionInstruction() + } + + /** + * Gets the `this` pointer argument of the call, if any. + */ + final Instruction getThisArgument() { + result = getAnOperand().(ThisArgumentOperand).getDefinitionInstruction() + } + + /** + * Gets the argument at the specified index. + */ + final Instruction getPositionalArgument(int index) { + exists(PositionalArgumentOperand operand | + operand = getAnOperand() and + operand.getIndex() = index and + result = operand.getDefinitionInstruction() + ) + } } /** diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll index 6fc6af477fa1..edf8e141764e 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll @@ -304,6 +304,13 @@ class PositionalArgumentOperand extends ArgumentOperand { override string toString() { result = "Arg(" + argIndex + ")" } + + /** + * Gets the zero-based index of the argument. + */ + final int getIndex() { + result = argIndex + } } class SideEffectOperand extends NonPhiOperand { diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/IRDataflowTestCommon.qll b/cpp/ql/test/library-tests/dataflow/dataflow-tests/IRDataflowTestCommon.qll new file mode 100644 index 000000000000..ce2eb1ac97ad --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/IRDataflowTestCommon.qll @@ -0,0 +1,29 @@ +import cpp +import semmle.code.cpp.ir.dataflow.DataFlow + +/** Common data flow configuration to be used by tests. */ +class TestAllocationConfig extends DataFlow::Configuration { + TestAllocationConfig() { + this = "TestAllocationConfig" + } + + override predicate isSource(DataFlow::Node source) { + source.asExpr().(FunctionCall).getTarget().getName() = "source" + or + source.asParameter().getName().matches("source%") + or + // Track uninitialized variables + exists(source.asUninitialized()) + } + + override predicate isSink(DataFlow::Node sink) { + exists(FunctionCall call | + call.getTarget().getName() = "sink" and + sink.asExpr() = call.getAnArgument() + ) + } + + override predicate isBarrier(DataFlow::Node barrier) { + barrier.asExpr().(VariableAccess).getTarget().hasName("barrier") + } +} diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected new file mode 100644 index 000000000000..3e1beebf2799 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected @@ -0,0 +1,28 @@ +| test.cpp:7:8:7:9 | Load: t1 | test.cpp:6:12:6:17 | Call: call to source | +| test.cpp:9:8:9:9 | Load: t1 | test.cpp:6:12:6:17 | Call: call to source | +| test.cpp:10:8:10:9 | Load: t2 | test.cpp:6:12:6:17 | Call: call to source | +| test.cpp:15:8:15:9 | Load: t2 | test.cpp:6:12:6:17 | Call: call to source | +| test.cpp:21:8:21:9 | Load: t1 | test.cpp:6:12:6:17 | Call: call to source | +| test.cpp:26:8:26:9 | Load: t1 | test.cpp:6:12:6:17 | Call: call to source | +| test.cpp:30:8:30:8 | Load: t | test.cpp:35:10:35:15 | Call: call to source | +| test.cpp:31:8:31:8 | Load: c | test.cpp:36:13:36:18 | Call: call to source | +| test.cpp:58:10:58:10 | Load: t | test.cpp:50:14:50:19 | Call: call to source | +| test.cpp:90:8:90:14 | Load: source1 | test.cpp:89:28:89:34 | InitializeParameter: source1 | +| test.cpp:92:8:92:14 | Load: source1 | test.cpp:89:28:89:34 | InitializeParameter: source1 | +| test.cpp:132:22:132:23 | Load: m1 | test.cpp:122:18:122:30 | InitializeParameter: sourceStruct1 | +| test.cpp:140:22:140:23 | Load: m1 | test.cpp:122:18:122:30 | InitializeParameter: sourceStruct1 | +| test.cpp:192:8:192:8 | Load: s | test.cpp:199:33:199:38 | Call: call to source | +| test.cpp:205:8:205:8 | Load: x | test.cpp:212:34:212:39 | Call: call to source | +| test.cpp:226:8:226:8 | Load: y | test.cpp:219:11:219:16 | Call: call to source | +| test.cpp:308:12:308:12 | Load: x | test.cpp:293:14:293:19 | Call: call to source | +| test.cpp:337:14:337:14 | Load: x | test.cpp:353:17:353:22 | Call: call to source | +| true_upon_entry.cpp:13:8:13:8 | Load: x | true_upon_entry.cpp:9:11:9:16 | Call: call to source | +| true_upon_entry.cpp:21:8:21:8 | Load: x | true_upon_entry.cpp:17:11:17:16 | Call: call to source | +| true_upon_entry.cpp:29:8:29:8 | Load: x | true_upon_entry.cpp:27:9:27:14 | Call: call to source | +| true_upon_entry.cpp:39:8:39:8 | Load: x | true_upon_entry.cpp:33:11:33:16 | Call: call to source | +| true_upon_entry.cpp:49:8:49:8 | Load: x | true_upon_entry.cpp:43:11:43:16 | Call: call to source | +| true_upon_entry.cpp:57:8:57:8 | Load: x | true_upon_entry.cpp:54:11:54:16 | Call: call to source | +| true_upon_entry.cpp:66:8:66:8 | Load: x | true_upon_entry.cpp:62:11:62:16 | Call: call to source | +| true_upon_entry.cpp:78:8:78:8 | Load: x | true_upon_entry.cpp:70:11:70:16 | Call: call to source | +| true_upon_entry.cpp:86:8:86:8 | Load: x | true_upon_entry.cpp:83:11:83:16 | Call: call to source | +| true_upon_entry.cpp:105:8:105:8 | Load: x | true_upon_entry.cpp:98:11:98:16 | Call: call to source | diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.ql b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.ql new file mode 100644 index 000000000000..3debabfefa29 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.ql @@ -0,0 +1,5 @@ +import IRDataflowTestCommon + +from DataFlow::Node sink, DataFlow::Node source, TestAllocationConfig cfg +where cfg.hasFlow(source, sink) +select sink, source From ae8f18c0b53813fe8d3fad078f78d0dba9bf6f85 Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Fri, 30 Nov 2018 11:15:33 -0800 Subject: [PATCH 02/15] C++: Treat all `Convert` instructions as dataflow The AST dataflow library essentially ignores conversions, which is probably the right behavior. Converting an `int` to a `long` preserves the value, even if the bit pattern might be different. It's arguable whether narrowing conversions should be treated as dataflow, but we'll do so for now. We can revisit that if we see it cause problems. --- .../src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 7b90906ef76b..9881a2ef1cb5 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -131,7 +131,9 @@ UninitializedNode uninitializedNode(LocalVariable v) { */ predicate localFlowStep(Node nodeFrom, Node nodeTo) { nodeTo.(CopyInstruction).getSourceValue() = nodeFrom or - nodeTo.(PhiInstruction).getAnOperand().getDefinitionInstruction() = nodeFrom + nodeTo.(PhiInstruction).getAnOperand().getDefinitionInstruction() = nodeFrom or + // Treat all conversions as flow, even conversions between different numeric types. + nodeTo.(ConvertInstruction).getOperand() = nodeFrom } /** From af443569d9390ab89c479aa8ea5f18581baa742b Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Fri, 30 Nov 2018 12:02:05 -0800 Subject: [PATCH 03/15] C++: Fix handling of accesses to escaped variables in Aliased SSA This fixes a subtle bug in the construction of aliased SSA. `getResultMemoryAccess` was failing to return a `MemoryAccess` for a store to a variable whose address escaped. This is because no `VirtualIRVariable` was being created for such variables. The code was assuming that any access to such a variable would be via `UnknownMemoryAccess`. The result is that accesses to such variables were not being modeled in SSA at all. Instead, the way to handle this is to have a `VariableMemoryAccess` even when the variable being accessed has escaped, and to have `VariableMemoryAccess::getVirtualVariable()` return the `UnknownVirtualVariable` for escaped variables. In the future, this will also let us be less conservative about inserting `Chi` nodes, because we'll be able to determine that there's an exact overlap between two accesses to the same escaped variable in some cases. --- .../aliased_ssa/internal/AliasedSSA.qll | 27 +- .../aliased_ssa/internal/PrintAliasedSSA.qll | 12 + .../ir/ir/aliased_ssa_ir.expected | 494 +++++++++--------- 3 files changed, 287 insertions(+), 246 deletions(-) create mode 100644 cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintAliasedSSA.qll diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasedSSA.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasedSSA.qll index 37a14222ea33..70f196d1a0b4 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasedSSA.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasedSSA.qll @@ -19,6 +19,10 @@ private VirtualIRVariable getVirtualVariable(IRVariable var) { result.getIRVariable() = var } +private UnknownVirtualVariable getUnknownVirtualVariable(FunctionIR f) { + result.getFunctionIR() = f +} + class VirtualVariable extends TVirtualVariable { string toString() { none() @@ -83,10 +87,10 @@ class UnknownVirtualVariable extends VirtualVariable, TUnknownVirtualVariable { } private newtype TMemoryAccess = - TVariableMemoryAccess(VirtualIRVariable vvar, IntValue offset, IntValue size) { + TVariableMemoryAccess(IRVariable var, IntValue offset, IntValue size) { exists(Instruction instr | exists(MemoryAccessKind mak | instr.getResultMemoryAccess() = mak and not mak instanceof PhiMemoryAccess) and - resultPointsTo(instr.getAnOperand().(AddressOperand).getDefinitionInstruction(), vvar.getIRVariable(), offset) and + resultPointsTo(instr.getAnOperand().(AddressOperand).getDefinitionInstruction(), var, offset) and if exists(instr.getResultSize()) then instr.getResultSize() = size else size = Ints::unknown() @@ -97,7 +101,7 @@ private newtype TMemoryAccess = TTotalUnknownMemoryAccess(UnknownVirtualVariable uvv) private VariableMemoryAccess getVariableMemoryAccess(IRVariable var, IntValue offset, IntValue size) { - result.getVirtualVariable() = getVirtualVariable(var) and + result.getVariable() = var and result.getOffset() = offset and result.getSize() = size } @@ -117,20 +121,21 @@ class MemoryAccess extends TMemoryAccess { } class VariableMemoryAccess extends TVariableMemoryAccess, MemoryAccess { - VirtualIRVariable vvar; + IRVariable var; IntValue offset; IntValue size; VariableMemoryAccess() { - this = TVariableMemoryAccess(vvar, offset, size) + this = TVariableMemoryAccess(var, offset, size) } override final string toString() { - result = vvar.toString() + result = var.toString() + "[" + offset.toString() + ".." + (offset + size - 1).toString() + "]" } final override VirtualVariable getVirtualVariable() { - result = vvar + result = getVirtualVariable(var) or + not exists(getVirtualVariable(var)) and result = getUnknownVirtualVariable(var.getFunctionIR()) } IntValue getOffset() { @@ -141,10 +146,15 @@ class VariableMemoryAccess extends TVariableMemoryAccess, MemoryAccess { result = size } + final IRVariable getVariable() { + result = var + } + final override predicate isPartialMemoryAccess() { + not exists(getVirtualVariable(var)) or getOffset() != 0 or - getSize() != vvar.getType().getSize() + getSize() != var.getType().getSize() } } @@ -166,6 +176,7 @@ class UnknownMemoryAccess extends TUnknownMemoryAccess, MemoryAccess { final override predicate isPartialMemoryAccess() { any() } + Type getType() { result instanceof UnknownType } diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintAliasedSSA.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintAliasedSSA.qll new file mode 100644 index 000000000000..d3889ad8d0e1 --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/PrintAliasedSSA.qll @@ -0,0 +1,12 @@ +private import semmle.code.cpp.ir.implementation.unaliased_ssa.IR +private import AliasedSSA + +/** + * Property provide that dumps the memory access of each result. Useful for debugging SSA + * construction. + */ +class PropertyProvider extends IRPropertyProvider { + override string getInstructionProperty(Instruction instruction, string key) { + key = "ResultMemoryAccess" and result = getResultMemoryAccess(instruction).toString() + } +} diff --git a/cpp/ql/test/library-tests/ir/ir/aliased_ssa_ir.expected b/cpp/ql/test/library-tests/ir/ir/aliased_ssa_ir.expected index 2eb23da7a69b..805a1e565e2a 100644 --- a/cpp/ql/test/library-tests/ir/ir/aliased_ssa_ir.expected +++ b/cpp/ql/test/library-tests/ir/ir/aliased_ssa_ir.expected @@ -1,24 +1,26 @@ bad_asts.cpp: # 14| Bad::CallBadMemberFunction() -> void # 14| Block 0 -# 14| v0_0(void) = EnterFunction : -# 14| m0_1(unknown) = AliasedDefinition : -# 14| mu0_2(unknown) = UnmodeledDefinition : -# 15| r0_3(glval) = VariableAddress[s] : -# 15| mu0_4(S) = Uninitialized : r0_3 -# 15| r0_5(glval) = FieldAddress[x] : r0_3 -# 15| r0_6(int) = Constant[0] : -# 15| mu0_7(int) = Store : r0_5, r0_6 -# 16| r0_8(glval) = VariableAddress[s] : -# 16| r0_9(glval) = FunctionAddress[MemberFunction] : -# 16| r0_10(int) = Constant[1] : -# 16| r0_11(int) = Call : r0_9, this:r0_8, r0_10 -# 16| m0_12(unknown) = ^CallSideEffect : m0_1 -# 16| m0_13(unknown) = Chi : m0_1, m0_12 -# 17| v0_14(void) = NoOp : -# 14| v0_15(void) = ReturnVoid : -# 14| v0_16(void) = UnmodeledUse : mu* -# 14| v0_17(void) = ExitFunction : +# 14| v0_0(void) = EnterFunction : +# 14| m0_1(unknown) = AliasedDefinition : +# 14| mu0_2(unknown) = UnmodeledDefinition : +# 15| r0_3(glval) = VariableAddress[s] : +# 15| m0_4(S) = Uninitialized : r0_3 +# 15| m0_5(unknown) = Chi : m0_1, m0_4 +# 15| r0_6(glval) = FieldAddress[x] : r0_3 +# 15| r0_7(int) = Constant[0] : +# 15| m0_8(int) = Store : r0_6, r0_7 +# 15| m0_9(unknown) = Chi : m0_5, m0_8 +# 16| r0_10(glval) = VariableAddress[s] : +# 16| r0_11(glval) = FunctionAddress[MemberFunction] : +# 16| r0_12(int) = Constant[1] : +# 16| r0_13(int) = Call : r0_11, this:r0_10, r0_12 +# 16| m0_14(unknown) = ^CallSideEffect : m0_9 +# 16| m0_15(unknown) = Chi : m0_9, m0_14 +# 17| v0_16(void) = NoOp : +# 14| v0_17(void) = ReturnVoid : +# 14| v0_18(void) = UnmodeledUse : mu* +# 14| v0_19(void) = ExitFunction : # 22| Bad::Point::Point() -> void # 22| Block 0 @@ -2175,13 +2177,15 @@ ir.cpp: # 486| r0_3(glval) = VariableAddress[a] : # 486| m0_4(bool) = InitializeParameter[a] : r0_3 # 487| r0_5(glval) = VariableAddress[x] : -# 487| mu0_6(int) = Uninitialized : r0_5 -# 488| r0_7(glval) = VariableAddress[y] : -# 488| mu0_8(int) = Uninitialized : r0_7 -# 489| r0_9(int) = Constant[5] : -# 489| r0_10(glval) = VariableAddress[a] : -# 489| r0_11(bool) = Load : r0_10, m0_4 -# 489| v0_12(void) = ConditionalBranch : r0_11 +# 487| m0_6(int) = Uninitialized : r0_5 +# 487| m0_7(unknown) = Chi : m0_1, m0_6 +# 488| r0_8(glval) = VariableAddress[y] : +# 488| m0_9(int) = Uninitialized : r0_8 +# 488| m0_10(unknown) = Chi : m0_7, m0_9 +# 489| r0_11(int) = Constant[5] : +# 489| r0_12(glval) = VariableAddress[a] : +# 489| r0_13(bool) = Load : r0_12, m0_4 +# 489| v0_14(void) = ConditionalBranch : r0_13 #-----| False -> Block 3 #-----| True -> Block 2 @@ -2189,8 +2193,8 @@ ir.cpp: # 489| m1_0(int) = Phi : from 2:m2_2, from 3:m3_2 # 489| r1_1(glval) = VariableAddress[#temp489:6] : # 489| r1_2(glval) = Load : r1_1, m1_0 -# 489| m1_3(int) = Store : r1_2, r0_9 -# 489| m1_4(unknown) = Chi : m0_1, m1_3 +# 489| m1_3(int) = Store : r1_2, r0_11 +# 489| m1_4(unknown) = Chi : m0_10, m1_3 # 490| v1_5(void) = NoOp : # 486| v1_6(void) = ReturnVoid : # 486| v1_7(void) = UnmodeledUse : mu* @@ -2417,49 +2421,52 @@ ir.cpp: # 519| r0_5(glval) = VariableAddress[f] : # 519| m0_6(float) = InitializeParameter[f] : r0_5 # 520| r0_7(glval) = VariableAddress[a1] : -# 520| mu0_8(int[3]) = Uninitialized : r0_7 -# 520| r0_9(int) = Constant[0] : -# 520| r0_10(glval) = PointerAdd : r0_7, r0_9 -# 520| r0_11(unknown[12]) = Constant[0] : -# 520| m0_12(unknown[12]) = Store : r0_10, r0_11 -# 520| m0_13(unknown) = Chi : m0_1, m0_12 -# 521| r0_14(glval) = VariableAddress[a2] : -# 521| mu0_15(int[3]) = Uninitialized : r0_14 -# 521| r0_16(int) = Constant[0] : -# 521| r0_17(glval) = PointerAdd : r0_14, r0_16 -# 521| r0_18(glval) = VariableAddress[x] : -# 521| r0_19(int) = Load : r0_18, m0_4 -# 521| m0_20(int) = Store : r0_17, r0_19 -# 521| m0_21(unknown) = Chi : m0_13, m0_20 -# 521| r0_22(int) = Constant[1] : -# 521| r0_23(glval) = PointerAdd : r0_14, r0_22 -# 521| r0_24(glval) = VariableAddress[f] : -# 521| r0_25(float) = Load : r0_24, m0_6 -# 521| r0_26(int) = Convert : r0_25 -# 521| m0_27(int) = Store : r0_23, r0_26 -# 521| m0_28(unknown) = Chi : m0_21, m0_27 -# 521| r0_29(int) = Constant[2] : -# 521| r0_30(glval) = PointerAdd : r0_14, r0_29 -# 521| r0_31(int) = Constant[0] : -# 521| m0_32(int) = Store : r0_30, r0_31 -# 521| m0_33(unknown) = Chi : m0_28, m0_32 -# 522| r0_34(glval) = VariableAddress[a3] : -# 522| mu0_35(int[3]) = Uninitialized : r0_34 -# 522| r0_36(int) = Constant[0] : -# 522| r0_37(glval) = PointerAdd : r0_34, r0_36 -# 522| r0_38(glval) = VariableAddress[x] : -# 522| r0_39(int) = Load : r0_38, m0_4 -# 522| m0_40(int) = Store : r0_37, r0_39 -# 522| m0_41(unknown) = Chi : m0_33, m0_40 -# 522| r0_42(int) = Constant[1] : -# 522| r0_43(glval) = PointerAdd : r0_34, r0_42 -# 522| r0_44(unknown[8]) = Constant[0] : -# 522| m0_45(unknown[8]) = Store : r0_43, r0_44 -# 522| m0_46(unknown) = Chi : m0_41, m0_45 -# 523| v0_47(void) = NoOp : -# 519| v0_48(void) = ReturnVoid : -# 519| v0_49(void) = UnmodeledUse : mu* -# 519| v0_50(void) = ExitFunction : +# 520| m0_8(int[3]) = Uninitialized : r0_7 +# 520| m0_9(unknown) = Chi : m0_1, m0_8 +# 520| r0_10(int) = Constant[0] : +# 520| r0_11(glval) = PointerAdd : r0_7, r0_10 +# 520| r0_12(unknown[12]) = Constant[0] : +# 520| m0_13(unknown[12]) = Store : r0_11, r0_12 +# 520| m0_14(unknown) = Chi : m0_9, m0_13 +# 521| r0_15(glval) = VariableAddress[a2] : +# 521| m0_16(int[3]) = Uninitialized : r0_15 +# 521| m0_17(unknown) = Chi : m0_14, m0_16 +# 521| r0_18(int) = Constant[0] : +# 521| r0_19(glval) = PointerAdd : r0_15, r0_18 +# 521| r0_20(glval) = VariableAddress[x] : +# 521| r0_21(int) = Load : r0_20, m0_4 +# 521| m0_22(int) = Store : r0_19, r0_21 +# 521| m0_23(unknown) = Chi : m0_17, m0_22 +# 521| r0_24(int) = Constant[1] : +# 521| r0_25(glval) = PointerAdd : r0_15, r0_24 +# 521| r0_26(glval) = VariableAddress[f] : +# 521| r0_27(float) = Load : r0_26, m0_6 +# 521| r0_28(int) = Convert : r0_27 +# 521| m0_29(int) = Store : r0_25, r0_28 +# 521| m0_30(unknown) = Chi : m0_23, m0_29 +# 521| r0_31(int) = Constant[2] : +# 521| r0_32(glval) = PointerAdd : r0_15, r0_31 +# 521| r0_33(int) = Constant[0] : +# 521| m0_34(int) = Store : r0_32, r0_33 +# 521| m0_35(unknown) = Chi : m0_30, m0_34 +# 522| r0_36(glval) = VariableAddress[a3] : +# 522| m0_37(int[3]) = Uninitialized : r0_36 +# 522| m0_38(unknown) = Chi : m0_35, m0_37 +# 522| r0_39(int) = Constant[0] : +# 522| r0_40(glval) = PointerAdd : r0_36, r0_39 +# 522| r0_41(glval) = VariableAddress[x] : +# 522| r0_42(int) = Load : r0_41, m0_4 +# 522| m0_43(int) = Store : r0_40, r0_42 +# 522| m0_44(unknown) = Chi : m0_38, m0_43 +# 522| r0_45(int) = Constant[1] : +# 522| r0_46(glval) = PointerAdd : r0_36, r0_45 +# 522| r0_47(unknown[8]) = Constant[0] : +# 522| m0_48(unknown[8]) = Store : r0_46, r0_47 +# 522| m0_49(unknown) = Chi : m0_44, m0_48 +# 523| v0_50(void) = NoOp : +# 519| v0_51(void) = ReturnVoid : +# 519| v0_52(void) = UnmodeledUse : mu* +# 519| v0_53(void) = ExitFunction : # 530| UnionInit(int, float) -> void # 530| Block 0 @@ -2630,69 +2637,74 @@ ir.cpp: # 572| r0_3(glval) = VariableAddress[a_pad] : # 572| r0_4(glval) = StringConstant[""] : # 572| r0_5(char[1]) = Load : r0_4, m0_1 -# 572| mu0_6(char[1]) = Store : r0_3, r0_5 -# 572| r0_7(unknown[31]) = Constant[0] : -# 572| r0_8(int) = Constant[1] : -# 572| r0_9(glval) = PointerAdd : r0_3, r0_8 -# 572| m0_10(unknown[31]) = Store : r0_9, r0_7 -# 572| m0_11(unknown) = Chi : m0_1, m0_10 -# 573| r0_12(glval) = VariableAddress[a_nopad] : -# 573| r0_13(glval) = StringConstant["foo"] : -# 573| r0_14(char[4]) = Load : r0_13, m0_11 -# 573| m0_15(char[4]) = Store : r0_12, r0_14 -# 574| r0_16(glval) = VariableAddress[a_infer] : -# 574| r0_17(glval) = StringConstant["blah"] : -# 574| r0_18(char[5]) = Load : r0_17, m0_11 -# 574| m0_19(char[5]) = Store : r0_16, r0_18 -# 575| r0_20(glval) = VariableAddress[b] : -# 575| m0_21(char[2]) = Uninitialized : r0_20 -# 576| r0_22(glval) = VariableAddress[c] : -# 576| mu0_23(char[2]) = Uninitialized : r0_22 -# 576| r0_24(int) = Constant[0] : -# 576| r0_25(glval) = PointerAdd : r0_22, r0_24 -# 576| r0_26(unknown[2]) = Constant[0] : -# 576| m0_27(unknown[2]) = Store : r0_25, r0_26 -# 576| m0_28(unknown) = Chi : m0_11, m0_27 -# 577| r0_29(glval) = VariableAddress[d] : -# 577| mu0_30(char[2]) = Uninitialized : r0_29 -# 577| r0_31(int) = Constant[0] : -# 577| r0_32(glval) = PointerAdd : r0_29, r0_31 -# 577| r0_33(char) = Constant[0] : -# 577| m0_34(char) = Store : r0_32, r0_33 -# 577| m0_35(unknown) = Chi : m0_28, m0_34 -# 577| r0_36(int) = Constant[1] : -# 577| r0_37(glval) = PointerAdd : r0_29, r0_36 -# 577| r0_38(char) = Constant[0] : -# 577| m0_39(char) = Store : r0_37, r0_38 -# 577| m0_40(unknown) = Chi : m0_35, m0_39 -# 578| r0_41(glval) = VariableAddress[e] : -# 578| mu0_42(char[2]) = Uninitialized : r0_41 -# 578| r0_43(int) = Constant[0] : -# 578| r0_44(glval) = PointerAdd : r0_41, r0_43 -# 578| r0_45(char) = Constant[0] : -# 578| m0_46(char) = Store : r0_44, r0_45 -# 578| m0_47(unknown) = Chi : m0_40, m0_46 -# 578| r0_48(int) = Constant[1] : -# 578| r0_49(glval) = PointerAdd : r0_41, r0_48 -# 578| r0_50(char) = Constant[1] : -# 578| m0_51(char) = Store : r0_49, r0_50 -# 578| m0_52(unknown) = Chi : m0_47, m0_51 -# 579| r0_53(glval) = VariableAddress[f] : -# 579| mu0_54(char[3]) = Uninitialized : r0_53 -# 579| r0_55(int) = Constant[0] : -# 579| r0_56(glval) = PointerAdd : r0_53, r0_55 -# 579| r0_57(char) = Constant[0] : -# 579| m0_58(char) = Store : r0_56, r0_57 -# 579| m0_59(unknown) = Chi : m0_52, m0_58 -# 579| r0_60(int) = Constant[1] : -# 579| r0_61(glval) = PointerAdd : r0_53, r0_60 -# 579| r0_62(unknown[2]) = Constant[0] : -# 579| m0_63(unknown[2]) = Store : r0_61, r0_62 +# 572| m0_6(char[1]) = Store : r0_3, r0_5 +# 572| m0_7(unknown) = Chi : m0_1, m0_6 +# 572| r0_8(unknown[31]) = Constant[0] : +# 572| r0_9(int) = Constant[1] : +# 572| r0_10(glval) = PointerAdd : r0_3, r0_9 +# 572| m0_11(unknown[31]) = Store : r0_10, r0_8 +# 572| m0_12(unknown) = Chi : m0_7, m0_11 +# 573| r0_13(glval) = VariableAddress[a_nopad] : +# 573| r0_14(glval) = StringConstant["foo"] : +# 573| r0_15(char[4]) = Load : r0_14, m0_12 +# 573| m0_16(char[4]) = Store : r0_13, r0_15 +# 574| r0_17(glval) = VariableAddress[a_infer] : +# 574| r0_18(glval) = StringConstant["blah"] : +# 574| r0_19(char[5]) = Load : r0_18, m0_12 +# 574| m0_20(char[5]) = Store : r0_17, r0_19 +# 575| r0_21(glval) = VariableAddress[b] : +# 575| m0_22(char[2]) = Uninitialized : r0_21 +# 576| r0_23(glval) = VariableAddress[c] : +# 576| m0_24(char[2]) = Uninitialized : r0_23 +# 576| m0_25(unknown) = Chi : m0_12, m0_24 +# 576| r0_26(int) = Constant[0] : +# 576| r0_27(glval) = PointerAdd : r0_23, r0_26 +# 576| r0_28(unknown[2]) = Constant[0] : +# 576| m0_29(unknown[2]) = Store : r0_27, r0_28 +# 576| m0_30(unknown) = Chi : m0_25, m0_29 +# 577| r0_31(glval) = VariableAddress[d] : +# 577| m0_32(char[2]) = Uninitialized : r0_31 +# 577| m0_33(unknown) = Chi : m0_30, m0_32 +# 577| r0_34(int) = Constant[0] : +# 577| r0_35(glval) = PointerAdd : r0_31, r0_34 +# 577| r0_36(char) = Constant[0] : +# 577| m0_37(char) = Store : r0_35, r0_36 +# 577| m0_38(unknown) = Chi : m0_33, m0_37 +# 577| r0_39(int) = Constant[1] : +# 577| r0_40(glval) = PointerAdd : r0_31, r0_39 +# 577| r0_41(char) = Constant[0] : +# 577| m0_42(char) = Store : r0_40, r0_41 +# 577| m0_43(unknown) = Chi : m0_38, m0_42 +# 578| r0_44(glval) = VariableAddress[e] : +# 578| m0_45(char[2]) = Uninitialized : r0_44 +# 578| m0_46(unknown) = Chi : m0_43, m0_45 +# 578| r0_47(int) = Constant[0] : +# 578| r0_48(glval) = PointerAdd : r0_44, r0_47 +# 578| r0_49(char) = Constant[0] : +# 578| m0_50(char) = Store : r0_48, r0_49 +# 578| m0_51(unknown) = Chi : m0_46, m0_50 +# 578| r0_52(int) = Constant[1] : +# 578| r0_53(glval) = PointerAdd : r0_44, r0_52 +# 578| r0_54(char) = Constant[1] : +# 578| m0_55(char) = Store : r0_53, r0_54 +# 578| m0_56(unknown) = Chi : m0_51, m0_55 +# 579| r0_57(glval) = VariableAddress[f] : +# 579| m0_58(char[3]) = Uninitialized : r0_57 +# 579| m0_59(unknown) = Chi : m0_56, m0_58 +# 579| r0_60(int) = Constant[0] : +# 579| r0_61(glval) = PointerAdd : r0_57, r0_60 +# 579| r0_62(char) = Constant[0] : +# 579| m0_63(char) = Store : r0_61, r0_62 # 579| m0_64(unknown) = Chi : m0_59, m0_63 -# 580| v0_65(void) = NoOp : -# 571| v0_66(void) = ReturnVoid : -# 571| v0_67(void) = UnmodeledUse : mu* -# 571| v0_68(void) = ExitFunction : +# 579| r0_65(int) = Constant[1] : +# 579| r0_66(glval) = PointerAdd : r0_57, r0_65 +# 579| r0_67(unknown[2]) = Constant[0] : +# 579| m0_68(unknown[2]) = Store : r0_66, r0_67 +# 579| m0_69(unknown) = Chi : m0_64, m0_68 +# 580| v0_70(void) = NoOp : +# 571| v0_71(void) = ReturnVoid : +# 571| v0_72(void) = UnmodeledUse : mu* +# 571| v0_73(void) = ExitFunction : # 584| VarArgs() -> void # 584| Block 0 @@ -2780,31 +2792,32 @@ ir.cpp: # 622| r0_5(glval) = VariableAddress[p] : # 622| m0_6(String *) = InitializeParameter[p] : r0_5 # 622| r0_7(glval) = VariableAddress[s] : -# 622| mu0_8(String) = InitializeParameter[s] : r0_7 -# 623| r0_9(glval) = VariableAddress[r] : -# 623| r0_10(String &) = Load : r0_9, m0_4 -# 623| r0_11(glval) = Convert : r0_10 -# 623| r0_12(glval) = FunctionAddress[c_str] : -# 623| r0_13(char *) = Call : r0_12, this:r0_11 -# 623| m0_14(unknown) = ^CallSideEffect : m0_1 -# 623| m0_15(unknown) = Chi : m0_1, m0_14 -# 624| r0_16(glval) = VariableAddress[p] : -# 624| r0_17(String *) = Load : r0_16, m0_6 -# 624| r0_18(String *) = Convert : r0_17 -# 624| r0_19(glval) = FunctionAddress[c_str] : -# 624| r0_20(char *) = Call : r0_19, this:r0_18 -# 624| m0_21(unknown) = ^CallSideEffect : m0_15 -# 624| m0_22(unknown) = Chi : m0_15, m0_21 -# 625| r0_23(glval) = VariableAddress[s] : -# 625| r0_24(glval) = Convert : r0_23 -# 625| r0_25(glval) = FunctionAddress[c_str] : -# 625| r0_26(char *) = Call : r0_25, this:r0_24 -# 625| m0_27(unknown) = ^CallSideEffect : m0_22 -# 625| m0_28(unknown) = Chi : m0_22, m0_27 -# 626| v0_29(void) = NoOp : -# 622| v0_30(void) = ReturnVoid : -# 622| v0_31(void) = UnmodeledUse : mu* -# 622| v0_32(void) = ExitFunction : +# 622| m0_8(String) = InitializeParameter[s] : r0_7 +# 622| m0_9(unknown) = Chi : m0_1, m0_8 +# 623| r0_10(glval) = VariableAddress[r] : +# 623| r0_11(String &) = Load : r0_10, m0_4 +# 623| r0_12(glval) = Convert : r0_11 +# 623| r0_13(glval) = FunctionAddress[c_str] : +# 623| r0_14(char *) = Call : r0_13, this:r0_12 +# 623| m0_15(unknown) = ^CallSideEffect : m0_9 +# 623| m0_16(unknown) = Chi : m0_9, m0_15 +# 624| r0_17(glval) = VariableAddress[p] : +# 624| r0_18(String *) = Load : r0_17, m0_6 +# 624| r0_19(String *) = Convert : r0_18 +# 624| r0_20(glval) = FunctionAddress[c_str] : +# 624| r0_21(char *) = Call : r0_20, this:r0_19 +# 624| m0_22(unknown) = ^CallSideEffect : m0_16 +# 624| m0_23(unknown) = Chi : m0_16, m0_22 +# 625| r0_24(glval) = VariableAddress[s] : +# 625| r0_25(glval) = Convert : r0_24 +# 625| r0_26(glval) = FunctionAddress[c_str] : +# 625| r0_27(char *) = Call : r0_26, this:r0_25 +# 625| m0_28(unknown) = ^CallSideEffect : m0_23 +# 625| m0_29(unknown) = Chi : m0_23, m0_28 +# 626| v0_30(void) = NoOp : +# 622| v0_31(void) = ReturnVoid : +# 622| v0_32(void) = UnmodeledUse : mu* +# 622| v0_33(void) = ExitFunction : # 630| C::StaticMemberFunction(int) -> int # 630| Block 0 @@ -4071,43 +4084,46 @@ ir.cpp: # 888| m0_1(unknown) = AliasedDefinition : # 888| mu0_2(unknown) = UnmodeledDefinition : # 888| r0_3(glval) = VariableAddress[x] : -# 888| mu0_4(int) = InitializeParameter[x] : r0_3 -# 889| r0_5(glval<__va_list_tag[1]>) = VariableAddress[args] : -# 889| mu0_6(__va_list_tag[1]) = Uninitialized : r0_5 -# 891| r0_7(glval<__va_list_tag[1]>) = VariableAddress[args] : -# 891| r0_8(__va_list_tag *) = Convert : r0_7 -# 891| r0_9(glval) = VariableAddress[x] : -# 891| v0_10(void) = VarArgsStart : r0_8, r0_9 -# 892| r0_11(glval<__va_list_tag[1]>) = VariableAddress[args2] : -# 892| mu0_12(__va_list_tag[1]) = Uninitialized : r0_11 -# 893| r0_13(glval<__va_list_tag[1]>) = VariableAddress[args2] : -# 893| r0_14(__va_list_tag *) = Convert : r0_13 -# 893| r0_15(glval<__va_list_tag[1]>) = VariableAddress[args] : -# 893| r0_16(__va_list_tag *) = Convert : r0_15 -# 893| v0_17(void) = VarArgsStart : r0_14, r0_16 -# 894| r0_18(glval) = VariableAddress[d] : -# 894| r0_19(glval<__va_list_tag[1]>) = VariableAddress[args] : -# 894| r0_20(__va_list_tag *) = Convert : r0_19 -# 894| r0_21(glval) = VarArg : r0_20 -# 894| r0_22(double) = Load : r0_21, m0_1 -# 894| m0_23(double) = Store : r0_18, r0_22 -# 895| r0_24(glval) = VariableAddress[f] : -# 895| r0_25(glval<__va_list_tag[1]>) = VariableAddress[args] : -# 895| r0_26(__va_list_tag *) = Convert : r0_25 -# 895| r0_27(glval) = VarArg : r0_26 -# 895| r0_28(double) = Load : r0_27, m0_1 -# 895| r0_29(float) = Convert : r0_28 -# 895| m0_30(float) = Store : r0_24, r0_29 -# 896| r0_31(glval<__va_list_tag[1]>) = VariableAddress[args] : -# 896| r0_32(__va_list_tag *) = Convert : r0_31 -# 896| v0_33(void) = VarArgsEnd : r0_32 -# 897| r0_34(glval<__va_list_tag[1]>) = VariableAddress[args2] : -# 897| r0_35(__va_list_tag *) = Convert : r0_34 -# 897| v0_36(void) = VarArgsEnd : r0_35 -# 898| v0_37(void) = NoOp : -# 888| v0_38(void) = ReturnVoid : -# 888| v0_39(void) = UnmodeledUse : mu* -# 888| v0_40(void) = ExitFunction : +# 888| m0_4(int) = InitializeParameter[x] : r0_3 +# 888| m0_5(unknown) = Chi : m0_1, m0_4 +# 889| r0_6(glval<__va_list_tag[1]>) = VariableAddress[args] : +# 889| m0_7(__va_list_tag[1]) = Uninitialized : r0_6 +# 889| m0_8(unknown) = Chi : m0_5, m0_7 +# 891| r0_9(glval<__va_list_tag[1]>) = VariableAddress[args] : +# 891| r0_10(__va_list_tag *) = Convert : r0_9 +# 891| r0_11(glval) = VariableAddress[x] : +# 891| v0_12(void) = VarArgsStart : r0_10, r0_11 +# 892| r0_13(glval<__va_list_tag[1]>) = VariableAddress[args2] : +# 892| m0_14(__va_list_tag[1]) = Uninitialized : r0_13 +# 892| m0_15(unknown) = Chi : m0_8, m0_14 +# 893| r0_16(glval<__va_list_tag[1]>) = VariableAddress[args2] : +# 893| r0_17(__va_list_tag *) = Convert : r0_16 +# 893| r0_18(glval<__va_list_tag[1]>) = VariableAddress[args] : +# 893| r0_19(__va_list_tag *) = Convert : r0_18 +# 893| v0_20(void) = VarArgsStart : r0_17, r0_19 +# 894| r0_21(glval) = VariableAddress[d] : +# 894| r0_22(glval<__va_list_tag[1]>) = VariableAddress[args] : +# 894| r0_23(__va_list_tag *) = Convert : r0_22 +# 894| r0_24(glval) = VarArg : r0_23 +# 894| r0_25(double) = Load : r0_24, m0_15 +# 894| m0_26(double) = Store : r0_21, r0_25 +# 895| r0_27(glval) = VariableAddress[f] : +# 895| r0_28(glval<__va_list_tag[1]>) = VariableAddress[args] : +# 895| r0_29(__va_list_tag *) = Convert : r0_28 +# 895| r0_30(glval) = VarArg : r0_29 +# 895| r0_31(double) = Load : r0_30, m0_15 +# 895| r0_32(float) = Convert : r0_31 +# 895| m0_33(float) = Store : r0_27, r0_32 +# 896| r0_34(glval<__va_list_tag[1]>) = VariableAddress[args] : +# 896| r0_35(__va_list_tag *) = Convert : r0_34 +# 896| v0_36(void) = VarArgsEnd : r0_35 +# 897| r0_37(glval<__va_list_tag[1]>) = VariableAddress[args2] : +# 897| r0_38(__va_list_tag *) = Convert : r0_37 +# 897| v0_39(void) = VarArgsEnd : r0_38 +# 898| v0_40(void) = NoOp : +# 888| v0_41(void) = ReturnVoid : +# 888| v0_42(void) = UnmodeledUse : mu* +# 888| v0_43(void) = ExitFunction : # 900| CastToVoid(int) -> void # 900| Block 0 @@ -4115,13 +4131,14 @@ ir.cpp: # 900| m0_1(unknown) = AliasedDefinition : # 900| mu0_2(unknown) = UnmodeledDefinition : # 900| r0_3(glval) = VariableAddress[x] : -# 900| mu0_4(int) = InitializeParameter[x] : r0_3 -# 901| r0_5(glval) = VariableAddress[x] : -# 901| v0_6(void) = Convert : r0_5 -# 902| v0_7(void) = NoOp : -# 900| v0_8(void) = ReturnVoid : -# 900| v0_9(void) = UnmodeledUse : mu* -# 900| v0_10(void) = ExitFunction : +# 900| m0_4(int) = InitializeParameter[x] : r0_3 +# 900| m0_5(unknown) = Chi : m0_1, m0_4 +# 901| r0_6(glval) = VariableAddress[x] : +# 901| v0_7(void) = Convert : r0_6 +# 902| v0_8(void) = NoOp : +# 900| v0_9(void) = ReturnVoid : +# 900| v0_10(void) = UnmodeledUse : mu* +# 900| v0_11(void) = ExitFunction : # 904| ConstantConditions(int) -> void # 904| Block 0 @@ -4330,43 +4347,44 @@ ir.cpp: # 961| m0_1(unknown) = AliasedDefinition : # 961| mu0_2(unknown) = UnmodeledDefinition : # 962| r0_3(glval) = VariableAddress[a1] : -# 962| mu0_4(int[1000]) = Uninitialized : r0_3 -# 962| r0_5(int) = Constant[0] : -# 962| r0_6(glval) = PointerAdd : r0_3, r0_5 -# 962| r0_7(unknown[8]) = Constant[0] : -# 962| m0_8(unknown[8]) = Store : r0_6, r0_7 -# 962| m0_9(unknown) = Chi : m0_1, m0_8 -# 962| r0_10(int) = Constant[2] : -# 962| r0_11(glval) = PointerAdd : r0_3, r0_10 -# 962| r0_12(int) = Constant[10002] : -# 962| m0_13(int) = Store : r0_11, r0_12 -# 962| m0_14(unknown) = Chi : m0_9, m0_13 -# 962| r0_15(int) = Constant[3] : -# 962| r0_16(glval) = PointerAdd : r0_3, r0_15 -# 962| r0_17(unknown[3588]) = Constant[0] : -# 962| m0_18(unknown[3588]) = Store : r0_16, r0_17 -# 962| m0_19(unknown) = Chi : m0_14, m0_18 -# 962| r0_20(int) = Constant[900] : -# 962| r0_21(glval) = PointerAdd : r0_3, r0_20 -# 962| r0_22(int) = Constant[10900] : -# 962| m0_23(int) = Store : r0_21, r0_22 -# 962| m0_24(unknown) = Chi : m0_19, m0_23 -# 962| r0_25(int) = Constant[901] : -# 962| r0_26(glval) = PointerAdd : r0_3, r0_25 -# 962| r0_27(unknown[396]) = Constant[0] : -# 962| m0_28(unknown[396]) = Store : r0_26, r0_27 -# 962| m0_29(unknown) = Chi : m0_24, m0_28 -# 963| r0_30(glval) = VariableAddress[#return] : -# 963| r0_31(glval) = VariableAddress[a1] : -# 963| r0_32(int *) = Convert : r0_31 -# 963| r0_33(int) = Constant[900] : -# 963| r0_34(int *) = PointerAdd[4] : r0_32, r0_33 -# 963| r0_35(int) = Load : r0_34, mu0_2 -# 963| m0_36(int) = Store : r0_30, r0_35 -# 961| r0_37(glval) = VariableAddress[#return] : -# 961| v0_38(void) = ReturnValue : r0_37, m0_36 -# 961| v0_39(void) = UnmodeledUse : mu* -# 961| v0_40(void) = ExitFunction : +# 962| m0_4(int[1000]) = Uninitialized : r0_3 +# 962| m0_5(unknown) = Chi : m0_1, m0_4 +# 962| r0_6(int) = Constant[0] : +# 962| r0_7(glval) = PointerAdd : r0_3, r0_6 +# 962| r0_8(unknown[8]) = Constant[0] : +# 962| m0_9(unknown[8]) = Store : r0_7, r0_8 +# 962| m0_10(unknown) = Chi : m0_5, m0_9 +# 962| r0_11(int) = Constant[2] : +# 962| r0_12(glval) = PointerAdd : r0_3, r0_11 +# 962| r0_13(int) = Constant[10002] : +# 962| m0_14(int) = Store : r0_12, r0_13 +# 962| m0_15(unknown) = Chi : m0_10, m0_14 +# 962| r0_16(int) = Constant[3] : +# 962| r0_17(glval) = PointerAdd : r0_3, r0_16 +# 962| r0_18(unknown[3588]) = Constant[0] : +# 962| m0_19(unknown[3588]) = Store : r0_17, r0_18 +# 962| m0_20(unknown) = Chi : m0_15, m0_19 +# 962| r0_21(int) = Constant[900] : +# 962| r0_22(glval) = PointerAdd : r0_3, r0_21 +# 962| r0_23(int) = Constant[10900] : +# 962| m0_24(int) = Store : r0_22, r0_23 +# 962| m0_25(unknown) = Chi : m0_20, m0_24 +# 962| r0_26(int) = Constant[901] : +# 962| r0_27(glval) = PointerAdd : r0_3, r0_26 +# 962| r0_28(unknown[396]) = Constant[0] : +# 962| m0_29(unknown[396]) = Store : r0_27, r0_28 +# 962| m0_30(unknown) = Chi : m0_25, m0_29 +# 963| r0_31(glval) = VariableAddress[#return] : +# 963| r0_32(glval) = VariableAddress[a1] : +# 963| r0_33(int *) = Convert : r0_32 +# 963| r0_34(int) = Constant[900] : +# 963| r0_35(int *) = PointerAdd[4] : r0_33, r0_34 +# 963| r0_36(int) = Load : r0_35, mu0_2 +# 963| m0_37(int) = Store : r0_31, r0_36 +# 961| r0_38(glval) = VariableAddress[#return] : +# 961| v0_39(void) = ReturnValue : r0_38, m0_37 +# 961| v0_40(void) = UnmodeledUse : mu* +# 961| v0_41(void) = ExitFunction : # 966| IfStmtWithDeclaration(int, int) -> void # 966| Block 0 From 309b703e47c93858979635df65cd7f28387f199b Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Fri, 30 Nov 2018 12:06:19 -0800 Subject: [PATCH 04/15] C++: Models for side-effect-free functions This commit adds a new model interface that describes the known side effects (or lack thereof) of a library function. Does it read memory, does it write memory, and do any of its parameters escape? Initially, we have models for just two Standard Library functions: `std::move` and `std::forward`, which neither read nor write memory, and do not escape their parameter. IR construction has been updated to insert the correct side effect instruction (or no side effect instruction) based on the model. --- .../raw/internal/TranslatedCall.qll | 40 ++++++++++- cpp/ql/src/semmle/code/cpp/models/Models.qll | 1 + .../implementations/IdentityFunction.qll | 35 ++++++++++ .../models/interfaces/SideEffectFunction.qll | 68 +++++++++++++++++++ 4 files changed, 141 insertions(+), 3 deletions(-) create mode 100644 cpp/ql/src/semmle/code/cpp/models/implementations/IdentityFunction.qll create mode 100644 cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffectFunction.qll diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll index 5f831993755b..91260e2d3052 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll @@ -1,6 +1,7 @@ import cpp private import semmle.code.cpp.ir.implementation.Opcode private import semmle.code.cpp.ir.internal.OperandTag +private import semmle.code.cpp.models.interfaces.SideEffectFunction private import InstructionTag private import TranslatedElement private import TranslatedExpr @@ -37,9 +38,18 @@ abstract class TranslatedCall extends TranslatedExpr { isGLValue = false ) or ( + hasSideEffect() and tag = CallSideEffectTag() and - opcode instanceof Opcode::CallSideEffect and - resultType instanceof UnknownType and + ( + if hasWriteSideEffect() then ( + opcode instanceof Opcode::CallSideEffect and + resultType instanceof UnknownType + ) + else ( + opcode instanceof Opcode::CallReadSideEffect and + resultType instanceof VoidType + ) + ) and isGLValue = false ) } @@ -68,9 +78,13 @@ abstract class TranslatedCall extends TranslatedExpr { ( ( tag = CallTag() and - result = getInstruction(CallSideEffectTag()) + if hasSideEffect() then + result = getInstruction(CallSideEffectTag()) + else + result = getParent().getChildSuccessor(this) ) or ( + hasSideEffect() and tag = CallSideEffectTag() and result = getParent().getChildSuccessor(this) ) @@ -183,6 +197,18 @@ abstract class TranslatedCall extends TranslatedExpr { * Holds if the call has any arguments, not counting the `this` argument. */ abstract predicate hasArguments(); + + predicate hasReadSideEffect() { + any() + } + + predicate hasWriteSideEffect() { + any() + } + + private predicate hasSideEffect() { + hasReadSideEffect() or hasWriteSideEffect() + } } /** @@ -280,6 +306,14 @@ class TranslatedFunctionCall extends TranslatedCallExpr, TranslatedDirectCall { override Function getInstructionFunction(InstructionTag tag) { tag = CallTargetTag() and result = funcCall.getTarget() } + + override predicate hasReadSideEffect() { + functionReadsMemory(funcCall.getTarget()) + } + + override predicate hasWriteSideEffect() { + functionWritesMemory(funcCall.getTarget()) + } } /** diff --git a/cpp/ql/src/semmle/code/cpp/models/Models.qll b/cpp/ql/src/semmle/code/cpp/models/Models.qll index 672354c9625c..71526b8fca27 100644 --- a/cpp/ql/src/semmle/code/cpp/models/Models.qll +++ b/cpp/ql/src/semmle/code/cpp/models/Models.qll @@ -1,3 +1,4 @@ +private import implementations.IdentityFunction private import implementations.Inet private import implementations.Memcpy private import implementations.Printf diff --git a/cpp/ql/src/semmle/code/cpp/models/implementations/IdentityFunction.qll b/cpp/ql/src/semmle/code/cpp/models/implementations/IdentityFunction.qll new file mode 100644 index 000000000000..1399e0e8a397 --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/models/implementations/IdentityFunction.qll @@ -0,0 +1,35 @@ +import semmle.code.cpp.Function +import semmle.code.cpp.models.interfaces.DataFlow +import semmle.code.cpp.models.interfaces.SideEffectFunction + +/** + * The standard function templates `std::move` and `std::identity` + */ +class IdentityFunction extends DataFlowFunction, SideEffectFunction { + IdentityFunction() { + this.getNamespace().getParentNamespace() instanceof GlobalNamespace and + this.getNamespace().getName() = "std" and + ( + this.getName() = "move" or + this.getName() = "forward" + ) + } + + override predicate readsMemory() { + none() + } + + override predicate writesMemory() { + none() + } + + override predicate parameterEscapes(int index) { + // Note that returning the value of the parameter does not count as escaping. + none() + } + + override predicate hasDataFlow(FunctionInput input, FunctionOutput output) { + // These functions simply return the argument value. + input.isInParameter(0) and output.isOutReturnValue() + } +} diff --git a/cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffectFunction.qll b/cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffectFunction.qll new file mode 100644 index 000000000000..134f170cf19f --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffectFunction.qll @@ -0,0 +1,68 @@ +/** + * Provides an abstract class for accurate dataflow modeling of library + * functions when source code is not available. To use this QL library, + * create a QL class extending `SideEffectFunction` with a characteristic + * predicate that selects the function or set of functions you are modeling. + * Within that class, override the predicates provided by `SideEffectFunction` + * to match the flow within that function. + */ + +import semmle.code.cpp.Function +import semmle.code.cpp.models.Models + +/** + * Models the side effects of a library function. + */ +abstract class SideEffectFunction extends Function { + /** + * Holds if the function may read from memory that was defined before entry to the function. This + * memory could be from global variables, or from other memory that was reachable from a pointer + * that was passed into the function. + */ + abstract predicate readsMemory(); + + /** + * Holds if the function may write to memory that remains allocated after the function returns. + * This memory could be from global variables, or from other memory that was reachable from a + * pointer that was passed into the function. + */ + abstract predicate writesMemory(); + + /** + * Holds if any address passed to the parameter at the specified index is retained after the + * function returns. + */ + abstract predicate parameterEscapes(int index); +} + +/** + * Holds if the function `f` may read from memory that was defined before entry to the function. + * This memory could be from global variables, or from other memory that was reachable from a + * pointer that was passed into the function. + */ +predicate functionReadsMemory(Function f) { + not exists(SideEffectFunction sideEffect | + sideEffect = f and not sideEffect.readsMemory() + ) +} + +/** +* Holds if the function `f` may write to memory that remains allocated after the function returns. +* This memory could be from global variables, or from other memory that was reachable from a +* pointer that was passed into the function. +*/ +predicate functionWritesMemory(Function f) { + not exists(SideEffectFunction sideEffect | + sideEffect = f and not sideEffect.writesMemory() + ) +} + +/** + * Holds if any address passed to the parameter at the specified index is retained after the + * function returns. + */ +predicate functionParameterEscapes(Function f, int index) { + not exists(SideEffectFunction sideEffect | + exists(f.getParameter(index)) and sideEffect = f and not sideEffect.parameterEscapes(index) + ) +} From 7eb47f3f826c1644bf4e2401327c7c4a5edd81d8 Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Fri, 30 Nov 2018 16:51:14 -0800 Subject: [PATCH 05/15] C++: A few more IR dataflow tweaks Made `Node::getType()`, `Node::asParameter()`, and `Node::asUninitialized()` operate directly on the IR. This actually fixed several diffs compared to the AST dataflow, because `getType()` wasn't holding for nodes that weren't `Exprs`. Made `Uninitialized` a `VariableInstruction`. This makes it consistent with `InitializeParameter`. --- .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 10 +- .../aliased_ssa/Instruction.qll | 9 +- .../cpp/ir/implementation/raw/Instruction.qll | 9 +- .../internal/TranslatedDeclarationEntry.qll | 5 +- .../unaliased_ssa/Instruction.qll | 9 +- .../ir/ir/aliased_ssa_ir.expected | 92 +++++++++---------- .../test/library-tests/ir/ir/raw_ir.expected | 92 +++++++++---------- .../ir/ir/unaliased_ssa_ir.expected | 92 +++++++++---------- .../GlobalValueNumbering/ir_gvn.expected | 28 +++--- 9 files changed, 183 insertions(+), 163 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 9881a2ef1cb5..dfacd48a68fa 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -21,23 +21,21 @@ class Node extends Instruction { /** Gets the type of this node. */ Type getType() { - result = this.asExpr().getType() - or - result = this.getAST().(Variable).getType() + result = this.getResultType() } /** Gets the expression corresponding to this node, if any. */ Expr asExpr() { result = this.getUnconvertedResultExpression() } /** Gets the parameter corresponding to this node, if any. */ - Parameter asParameter() { result = this.(ParameterNode).getParameter() } + Parameter asParameter() { result = this.(InitializeParameterInstruction).getParameter() } /** * Gets the uninitialized local variable corresponding to this node, if * any. */ LocalVariable asUninitialized() { - result = this.(UninitializedNode).getLocalVariable() + result = this.(UninitializedInstruction).getLocalVariable() } /** @@ -73,8 +71,6 @@ class ParameterNode extends Node, InitializeParameterInstruction { * flow graph. */ class UninitializedNode extends Node, UninitializedInstruction { - /** Gets the uninitialized local variable corresponding to this node. */ - LocalVariable getLocalVariable() { result = this.getAST().(VariableDeclarationEntry).getDeclaration()} } /** diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll index 36722d4372ae..7fc7f69ec74b 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll @@ -595,7 +595,7 @@ class FieldAddressInstruction extends FieldInstruction { } } -class UninitializedInstruction extends Instruction { +class UninitializedInstruction extends VariableInstruction { UninitializedInstruction() { opcode instanceof Opcode::Uninitialized } @@ -603,6 +603,13 @@ class UninitializedInstruction extends Instruction { override final MemoryAccessKind getResultMemoryAccess() { result instanceof IndirectMemoryAccess } + + /** + * Gets the `LocalVariable` that is uninitialized. + */ + final LocalVariable getLocalVariable() { + result = var.(IRUserVariable).getVariable() + } } class NoOpInstruction extends Instruction { diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll index 36722d4372ae..7fc7f69ec74b 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll @@ -595,7 +595,7 @@ class FieldAddressInstruction extends FieldInstruction { } } -class UninitializedInstruction extends Instruction { +class UninitializedInstruction extends VariableInstruction { UninitializedInstruction() { opcode instanceof Opcode::Uninitialized } @@ -603,6 +603,13 @@ class UninitializedInstruction extends Instruction { override final MemoryAccessKind getResultMemoryAccess() { result instanceof IndirectMemoryAccess } + + /** + * Gets the `LocalVariable` that is uninitialized. + */ + final LocalVariable getLocalVariable() { + result = var.(IRUserVariable).getVariable() + } } class NoOpInstruction extends Instruction { diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedDeclarationEntry.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedDeclarationEntry.qll index da9a3f1c2ddf..6b2fb15549e3 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedDeclarationEntry.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedDeclarationEntry.qll @@ -142,7 +142,10 @@ abstract class TranslatedVariableDeclaration extends TranslatedElement, Initiali } override IRVariable getInstructionVariable(InstructionTag tag) { - tag = InitializerVariableAddressTag() and + ( + tag = InitializerVariableAddressTag() or + hasUninitializedInstruction() and tag = InitializerStoreTag() + ) and result = getIRUserVariable(getFunction(), getVariable()) } diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll index 36722d4372ae..7fc7f69ec74b 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll @@ -595,7 +595,7 @@ class FieldAddressInstruction extends FieldInstruction { } } -class UninitializedInstruction extends Instruction { +class UninitializedInstruction extends VariableInstruction { UninitializedInstruction() { opcode instanceof Opcode::Uninitialized } @@ -603,6 +603,13 @@ class UninitializedInstruction extends Instruction { override final MemoryAccessKind getResultMemoryAccess() { result instanceof IndirectMemoryAccess } + + /** + * Gets the `LocalVariable` that is uninitialized. + */ + final LocalVariable getLocalVariable() { + result = var.(IRUserVariable).getVariable() + } } class NoOpInstruction extends Instruction { diff --git a/cpp/ql/test/library-tests/ir/ir/aliased_ssa_ir.expected b/cpp/ql/test/library-tests/ir/ir/aliased_ssa_ir.expected index 805a1e565e2a..082f902fb23a 100644 --- a/cpp/ql/test/library-tests/ir/ir/aliased_ssa_ir.expected +++ b/cpp/ql/test/library-tests/ir/ir/aliased_ssa_ir.expected @@ -5,7 +5,7 @@ bad_asts.cpp: # 14| m0_1(unknown) = AliasedDefinition : # 14| mu0_2(unknown) = UnmodeledDefinition : # 15| r0_3(glval) = VariableAddress[s] : -# 15| m0_4(S) = Uninitialized : r0_3 +# 15| m0_4(S) = Uninitialized[s] : r0_3 # 15| m0_5(unknown) = Chi : m0_1, m0_4 # 15| r0_6(glval) = FieldAddress[x] : r0_3 # 15| r0_7(int) = Constant[0] : @@ -189,7 +189,7 @@ ir.cpp: # 50| r0_5(glval) = VariableAddress[y] : # 50| m0_6(int) = InitializeParameter[y] : r0_5 # 51| r0_7(glval) = VariableAddress[z] : -# 51| m0_8(int) = Uninitialized : r0_7 +# 51| m0_8(int) = Uninitialized[z] : r0_7 # 53| r0_9(glval) = VariableAddress[x] : # 53| r0_10(int) = Load : r0_9, m0_4 # 53| r0_11(glval) = VariableAddress[y] : @@ -362,7 +362,7 @@ ir.cpp: # 87| r0_5(glval) = VariableAddress[y] : # 87| m0_6(int) = InitializeParameter[y] : r0_5 # 88| r0_7(glval) = VariableAddress[b] : -# 88| m0_8(bool) = Uninitialized : r0_7 +# 88| m0_8(bool) = Uninitialized[b] : r0_7 # 90| r0_9(glval) = VariableAddress[x] : # 90| r0_10(int) = Load : r0_9, m0_4 # 90| r0_11(glval) = VariableAddress[y] : @@ -418,7 +418,7 @@ ir.cpp: # 98| r0_3(glval) = VariableAddress[x] : # 98| m0_4(int) = InitializeParameter[x] : r0_3 # 99| r0_5(glval) = VariableAddress[y] : -# 99| m0_6(int) = Uninitialized : r0_5 +# 99| m0_6(int) = Uninitialized[y] : r0_5 # 101| r0_7(glval) = VariableAddress[x] : # 101| r0_8(int) = Load : r0_7, m0_4 # 101| r0_9(int) = Constant[1] : @@ -460,7 +460,7 @@ ir.cpp: # 107| r0_3(glval) = VariableAddress[x] : # 107| m0_4(int) = InitializeParameter[x] : r0_3 # 108| r0_5(glval) = VariableAddress[p] : -# 108| m0_6(int *) = Uninitialized : r0_5 +# 108| m0_6(int *) = Uninitialized[p] : r0_5 # 110| r0_7(glval) = VariableAddress[x] : # 110| r0_8(int) = Load : r0_7, mu0_2 # 110| r0_9(int) = Constant[1] : @@ -490,7 +490,7 @@ ir.cpp: # 114| r0_5(glval) = VariableAddress[y] : # 114| m0_6(double) = InitializeParameter[y] : r0_5 # 115| r0_7(glval) = VariableAddress[z] : -# 115| m0_8(double) = Uninitialized : r0_7 +# 115| m0_8(double) = Uninitialized[z] : r0_7 # 117| r0_9(glval) = VariableAddress[x] : # 117| r0_10(double) = Load : r0_9, m0_4 # 117| r0_11(glval) = VariableAddress[y] : @@ -572,7 +572,7 @@ ir.cpp: # 133| r0_5(glval) = VariableAddress[y] : # 133| m0_6(double) = InitializeParameter[y] : r0_5 # 134| r0_7(glval) = VariableAddress[b] : -# 134| m0_8(bool) = Uninitialized : r0_7 +# 134| m0_8(bool) = Uninitialized[b] : r0_7 # 136| r0_9(glval) = VariableAddress[x] : # 136| r0_10(double) = Load : r0_9, m0_4 # 136| r0_11(glval) = VariableAddress[y] : @@ -628,7 +628,7 @@ ir.cpp: # 144| r0_3(glval) = VariableAddress[x] : # 144| m0_4(float) = InitializeParameter[x] : r0_3 # 145| r0_5(glval) = VariableAddress[y] : -# 145| m0_6(float) = Uninitialized : r0_5 +# 145| m0_6(float) = Uninitialized[y] : r0_5 # 147| r0_7(glval) = VariableAddress[x] : # 147| r0_8(float) = Load : r0_7, m0_4 # 147| r0_9(float) = Constant[1.0] : @@ -672,9 +672,9 @@ ir.cpp: # 153| r0_5(glval) = VariableAddress[i] : # 153| m0_6(int) = InitializeParameter[i] : r0_5 # 154| r0_7(glval) = VariableAddress[q] : -# 154| m0_8(int *) = Uninitialized : r0_7 +# 154| m0_8(int *) = Uninitialized[q] : r0_7 # 155| r0_9(glval) = VariableAddress[b] : -# 155| m0_10(bool) = Uninitialized : r0_9 +# 155| m0_10(bool) = Uninitialized[b] : r0_9 # 157| r0_11(glval) = VariableAddress[p] : # 157| r0_12(int *) = Load : r0_11, m0_4 # 157| r0_13(glval) = VariableAddress[i] : @@ -748,7 +748,7 @@ ir.cpp: # 171| r0_5(glval) = VariableAddress[i] : # 171| m0_6(int) = InitializeParameter[i] : r0_5 # 172| r0_7(glval) = VariableAddress[x] : -# 172| m0_8(int) = Uninitialized : r0_7 +# 172| m0_8(int) = Uninitialized[x] : r0_7 # 174| r0_9(glval) = VariableAddress[p] : # 174| r0_10(int *) = Load : r0_9, m0_4 # 174| r0_11(glval) = VariableAddress[i] : @@ -784,7 +784,7 @@ ir.cpp: # 178| m0_41(int) = Store : r0_40, r0_35 # 178| m0_42(unknown) = Chi : m0_33, m0_41 # 180| r0_43(glval) = VariableAddress[a] : -# 180| m0_44(int[10]) = Uninitialized : r0_43 +# 180| m0_44(int[10]) = Uninitialized[a] : r0_43 # 181| r0_45(glval) = VariableAddress[a] : # 181| r0_46(int *) = Convert : r0_45 # 181| r0_47(glval) = VariableAddress[i] : @@ -867,7 +867,7 @@ ir.cpp: # 193| r0_5(glval) = VariableAddress[q] : # 193| m0_6(int *) = InitializeParameter[q] : r0_5 # 194| r0_7(glval) = VariableAddress[b] : -# 194| m0_8(bool) = Uninitialized : r0_7 +# 194| m0_8(bool) = Uninitialized[b] : r0_7 # 196| r0_9(glval) = VariableAddress[p] : # 196| r0_10(int *) = Load : r0_9, m0_4 # 196| r0_11(glval) = VariableAddress[q] : @@ -923,7 +923,7 @@ ir.cpp: # 204| r0_3(glval) = VariableAddress[p] : # 204| m0_4(int *) = InitializeParameter[p] : r0_3 # 205| r0_5(glval) = VariableAddress[q] : -# 205| m0_6(int *) = Uninitialized : r0_5 +# 205| m0_6(int *) = Uninitialized[q] : r0_5 # 207| r0_7(glval) = VariableAddress[p] : # 207| r0_8(int *) = Load : r0_7, m0_4 # 207| r0_9(int) = Constant[1] : @@ -1007,7 +1007,7 @@ ir.cpp: # 230| m0_1(unknown) = AliasedDefinition : # 230| mu0_2(unknown) = UnmodeledDefinition : # 231| r0_3(glval) = VariableAddress[x] : -# 231| m0_4(int) = Uninitialized : r0_3 +# 231| m0_4(int) = Uninitialized[x] : r0_3 # 232| r0_5(glval) = VariableAddress[y] : # 232| r0_6(glval) = VariableAddress[x] : # 232| r0_7(int) = Load : r0_6, m0_4 @@ -1170,7 +1170,7 @@ ir.cpp: # 265| m0_1(unknown) = AliasedDefinition : # 265| mu0_2(unknown) = UnmodeledDefinition : # 266| r0_3(glval) = VariableAddress[j] : -# 266| m0_4(int) = Uninitialized : r0_3 +# 266| m0_4(int) = Uninitialized[j] : r0_3 #-----| Goto -> Block 2 # 265| Block 1 @@ -1721,7 +1721,7 @@ ir.cpp: # 384| r0_3(glval) = VariableAddress[x] : # 384| m0_4(int) = InitializeParameter[x] : r0_3 # 385| r0_5(glval) = VariableAddress[y] : -# 385| m0_6(int) = Uninitialized : r0_5 +# 385| m0_6(int) = Uninitialized[y] : r0_5 # 386| r0_7(glval) = VariableAddress[x] : # 386| r0_8(int) = Load : r0_7, m0_4 # 386| v0_9(void) = Switch : r0_8 @@ -1816,7 +1816,7 @@ ir.cpp: # 426| m0_1(unknown) = AliasedDefinition : # 426| mu0_2(unknown) = UnmodeledDefinition : # 427| r0_3(glval) = VariableAddress[pt] : -# 427| m0_4(Point) = Uninitialized : r0_3 +# 427| m0_4(Point) = Uninitialized[pt] : r0_3 # 428| r0_5(int) = Constant[5] : # 428| r0_6(glval) = VariableAddress[pt] : # 428| r0_7(glval) = FieldAddress[x] : r0_6 @@ -1848,7 +1848,7 @@ ir.cpp: # 433| r0_5(glval) = VariableAddress[b] : # 433| m0_6(bool) = InitializeParameter[b] : r0_5 # 434| r0_7(glval) = VariableAddress[x] : -# 434| m0_8(int) = Uninitialized : r0_7 +# 434| m0_8(int) = Uninitialized[x] : r0_7 # 435| r0_9(glval) = VariableAddress[a] : # 435| r0_10(bool) = Load : r0_9, m0_4 # 435| v0_11(void) = ConditionalBranch : r0_10 @@ -1910,7 +1910,7 @@ ir.cpp: # 447| r0_5(glval) = VariableAddress[b] : # 447| m0_6(bool) = InitializeParameter[b] : r0_5 # 448| r0_7(glval) = VariableAddress[x] : -# 448| m0_8(int) = Uninitialized : r0_7 +# 448| m0_8(int) = Uninitialized[x] : r0_7 # 449| r0_9(glval) = VariableAddress[a] : # 449| r0_10(bool) = Load : r0_9, m0_4 # 449| v0_11(void) = ConditionalBranch : r0_10 @@ -1972,7 +1972,7 @@ ir.cpp: # 461| r0_5(glval) = VariableAddress[b] : # 461| m0_6(bool) = InitializeParameter[b] : r0_5 # 462| r0_7(glval) = VariableAddress[x] : -# 462| m0_8(int) = Uninitialized : r0_7 +# 462| m0_8(int) = Uninitialized[x] : r0_7 # 463| r0_9(glval) = VariableAddress[a] : # 463| r0_10(bool) = Load : r0_9, m0_4 # 463| v0_11(void) = ConditionalBranch : r0_10 @@ -2027,7 +2027,7 @@ ir.cpp: # 475| r0_5(glval) = VariableAddress[b] : # 475| m0_6(bool) = InitializeParameter[b] : r0_5 # 476| r0_7(glval) = VariableAddress[x] : -# 476| m0_8(bool) = Uninitialized : r0_7 +# 476| m0_8(bool) = Uninitialized[x] : r0_7 # 477| r0_9(glval) = VariableAddress[a] : # 477| r0_10(bool) = Load : r0_9, m0_4 # 477| v0_11(void) = ConditionalBranch : r0_10 @@ -2177,10 +2177,10 @@ ir.cpp: # 486| r0_3(glval) = VariableAddress[a] : # 486| m0_4(bool) = InitializeParameter[a] : r0_3 # 487| r0_5(glval) = VariableAddress[x] : -# 487| m0_6(int) = Uninitialized : r0_5 +# 487| m0_6(int) = Uninitialized[x] : r0_5 # 487| m0_7(unknown) = Chi : m0_1, m0_6 # 488| r0_8(glval) = VariableAddress[y] : -# 488| m0_9(int) = Uninitialized : r0_8 +# 488| m0_9(int) = Uninitialized[y] : r0_8 # 488| m0_10(unknown) = Chi : m0_7, m0_9 # 489| r0_11(int) = Constant[5] : # 489| r0_12(glval) = VariableAddress[a] : @@ -2277,7 +2277,7 @@ ir.cpp: # 503| r0_5(glval) = VariableAddress[f] : # 503| m0_6(float) = InitializeParameter[f] : r0_5 # 504| r0_7(glval) = VariableAddress[pt1] : -# 504| m0_8(Point) = Uninitialized : r0_7 +# 504| m0_8(Point) = Uninitialized[pt1] : r0_7 # 504| r0_9(glval) = FieldAddress[x] : r0_7 # 504| r0_10(glval) = VariableAddress[x] : # 504| r0_11(int) = Load : r0_10, m0_4 @@ -2290,7 +2290,7 @@ ir.cpp: # 504| m0_18(int) = Store : r0_14, r0_17 # 504| m0_19(Point) = Chi : m0_13, m0_18 # 505| r0_20(glval) = VariableAddress[pt2] : -# 505| m0_21(Point) = Uninitialized : r0_20 +# 505| m0_21(Point) = Uninitialized[pt2] : r0_20 # 505| r0_22(glval) = FieldAddress[x] : r0_20 # 505| r0_23(glval) = VariableAddress[x] : # 505| r0_24(int) = Load : r0_23, m0_4 @@ -2301,7 +2301,7 @@ ir.cpp: # 505| m0_29(int) = Store : r0_27, r0_28 # 505| m0_30(Point) = Chi : m0_26, m0_29 # 506| r0_31(glval) = VariableAddress[pt3] : -# 506| m0_32(Point) = Uninitialized : r0_31 +# 506| m0_32(Point) = Uninitialized[pt3] : r0_31 # 506| r0_33(glval) = FieldAddress[x] : r0_31 # 506| r0_34(int) = Constant[0] : # 506| m0_35(int) = Store : r0_33, r0_34 @@ -2331,7 +2331,7 @@ ir.cpp: # 512| r0_5(glval) = VariableAddress[f] : # 512| m0_6(float) = InitializeParameter[f] : r0_5 # 513| r0_7(glval) = VariableAddress[r1] : -# 513| m0_8(Rect) = Uninitialized : r0_7 +# 513| m0_8(Rect) = Uninitialized[r1] : r0_7 # 513| r0_9(glval) = FieldAddress[topLeft] : r0_7 # 513| r0_10(Point) = Constant[0] : # 513| m0_11(Point) = Store : r0_9, r0_10 @@ -2341,7 +2341,7 @@ ir.cpp: # 513| m0_15(Point) = Store : r0_13, r0_14 # 513| m0_16(Rect) = Chi : m0_12, m0_15 # 514| r0_17(glval) = VariableAddress[r2] : -# 514| m0_18(Rect) = Uninitialized : r0_17 +# 514| m0_18(Rect) = Uninitialized[r2] : r0_17 # 514| r0_19(glval) = FieldAddress[topLeft] : r0_17 # 514| r0_20(glval) = FieldAddress[x] : r0_19 # 514| r0_21(glval) = VariableAddress[x] : @@ -2359,7 +2359,7 @@ ir.cpp: # 514| m0_33(Point) = Store : r0_31, r0_32 # 514| m0_34(Rect) = Chi : m0_30, m0_33 # 515| r0_35(glval) = VariableAddress[r3] : -# 515| m0_36(Rect) = Uninitialized : r0_35 +# 515| m0_36(Rect) = Uninitialized[r3] : r0_35 # 515| r0_37(glval) = FieldAddress[topLeft] : r0_35 # 515| r0_38(glval) = FieldAddress[x] : r0_37 # 515| r0_39(glval) = VariableAddress[x] : @@ -2385,7 +2385,7 @@ ir.cpp: # 515| m0_59(int) = Store : r0_55, r0_58 # 515| m0_60(Rect) = Chi : m0_54, m0_59 # 516| r0_61(glval) = VariableAddress[r4] : -# 516| m0_62(Rect) = Uninitialized : r0_61 +# 516| m0_62(Rect) = Uninitialized[r4] : r0_61 # 516| r0_63(glval) = FieldAddress[topLeft] : r0_61 # 516| r0_64(glval) = FieldAddress[x] : r0_63 # 516| r0_65(glval) = VariableAddress[x] : @@ -2421,7 +2421,7 @@ ir.cpp: # 519| r0_5(glval) = VariableAddress[f] : # 519| m0_6(float) = InitializeParameter[f] : r0_5 # 520| r0_7(glval) = VariableAddress[a1] : -# 520| m0_8(int[3]) = Uninitialized : r0_7 +# 520| m0_8(int[3]) = Uninitialized[a1] : r0_7 # 520| m0_9(unknown) = Chi : m0_1, m0_8 # 520| r0_10(int) = Constant[0] : # 520| r0_11(glval) = PointerAdd : r0_7, r0_10 @@ -2429,7 +2429,7 @@ ir.cpp: # 520| m0_13(unknown[12]) = Store : r0_11, r0_12 # 520| m0_14(unknown) = Chi : m0_9, m0_13 # 521| r0_15(glval) = VariableAddress[a2] : -# 521| m0_16(int[3]) = Uninitialized : r0_15 +# 521| m0_16(int[3]) = Uninitialized[a2] : r0_15 # 521| m0_17(unknown) = Chi : m0_14, m0_16 # 521| r0_18(int) = Constant[0] : # 521| r0_19(glval) = PointerAdd : r0_15, r0_18 @@ -2450,7 +2450,7 @@ ir.cpp: # 521| m0_34(int) = Store : r0_32, r0_33 # 521| m0_35(unknown) = Chi : m0_30, m0_34 # 522| r0_36(glval) = VariableAddress[a3] : -# 522| m0_37(int[3]) = Uninitialized : r0_36 +# 522| m0_37(int[3]) = Uninitialized[a3] : r0_36 # 522| m0_38(unknown) = Chi : m0_35, m0_37 # 522| r0_39(int) = Constant[0] : # 522| r0_40(glval) = PointerAdd : r0_36, r0_39 @@ -2478,7 +2478,7 @@ ir.cpp: # 530| r0_5(glval) = VariableAddress[f] : # 530| m0_6(float) = InitializeParameter[f] : r0_5 # 531| r0_7(glval) = VariableAddress[u1] : -# 531| m0_8(U) = Uninitialized : r0_7 +# 531| m0_8(U) = Uninitialized[u1] : r0_7 # 531| r0_9(glval) = FieldAddress[d] : r0_7 # 531| r0_10(glval) = VariableAddress[f] : # 531| r0_11(float) = Load : r0_10, m0_6 @@ -2653,9 +2653,9 @@ ir.cpp: # 574| r0_19(char[5]) = Load : r0_18, m0_12 # 574| m0_20(char[5]) = Store : r0_17, r0_19 # 575| r0_21(glval) = VariableAddress[b] : -# 575| m0_22(char[2]) = Uninitialized : r0_21 +# 575| m0_22(char[2]) = Uninitialized[b] : r0_21 # 576| r0_23(glval) = VariableAddress[c] : -# 576| m0_24(char[2]) = Uninitialized : r0_23 +# 576| m0_24(char[2]) = Uninitialized[c] : r0_23 # 576| m0_25(unknown) = Chi : m0_12, m0_24 # 576| r0_26(int) = Constant[0] : # 576| r0_27(glval) = PointerAdd : r0_23, r0_26 @@ -2663,7 +2663,7 @@ ir.cpp: # 576| m0_29(unknown[2]) = Store : r0_27, r0_28 # 576| m0_30(unknown) = Chi : m0_25, m0_29 # 577| r0_31(glval) = VariableAddress[d] : -# 577| m0_32(char[2]) = Uninitialized : r0_31 +# 577| m0_32(char[2]) = Uninitialized[d] : r0_31 # 577| m0_33(unknown) = Chi : m0_30, m0_32 # 577| r0_34(int) = Constant[0] : # 577| r0_35(glval) = PointerAdd : r0_31, r0_34 @@ -2676,7 +2676,7 @@ ir.cpp: # 577| m0_42(char) = Store : r0_40, r0_41 # 577| m0_43(unknown) = Chi : m0_38, m0_42 # 578| r0_44(glval) = VariableAddress[e] : -# 578| m0_45(char[2]) = Uninitialized : r0_44 +# 578| m0_45(char[2]) = Uninitialized[e] : r0_44 # 578| m0_46(unknown) = Chi : m0_43, m0_45 # 578| r0_47(int) = Constant[0] : # 578| r0_48(glval) = PointerAdd : r0_44, r0_47 @@ -2689,7 +2689,7 @@ ir.cpp: # 578| m0_55(char) = Store : r0_53, r0_54 # 578| m0_56(unknown) = Chi : m0_51, m0_55 # 579| r0_57(glval) = VariableAddress[f] : -# 579| m0_58(char[3]) = Uninitialized : r0_57 +# 579| m0_58(char[3]) = Uninitialized[f] : r0_57 # 579| m0_59(unknown) = Chi : m0_56, m0_58 # 579| r0_60(int) = Constant[0] : # 579| r0_61(glval) = PointerAdd : r0_57, r0_60 @@ -2891,7 +2891,7 @@ ir.cpp: # 645| m0_17(int) = Store : r0_16, r0_14 # 645| m0_18(unknown) = Chi : m0_13, m0_17 # 646| r0_19(glval) = VariableAddress[x] : -# 646| m0_20(int) = Uninitialized : r0_19 +# 646| m0_20(int) = Uninitialized[x] : r0_19 # 647| r0_21(C *) = CopyValue : r0_3 # 647| r0_22(glval) = FieldAddress[m_a] : r0_21 # 647| r0_23(int) = Load : r0_22, m0_18 @@ -3038,7 +3038,7 @@ ir.cpp: # 691| m0_1(unknown) = AliasedDefinition : # 691| mu0_2(unknown) = UnmodeledDefinition : # 692| r0_3(glval) = VariableAddress[a] : -# 692| m0_4(int[10]) = Uninitialized : r0_3 +# 692| m0_4(int[10]) = Uninitialized[a] : r0_3 # 693| r0_5(glval) = VariableAddress[ra] : # 693| r0_6(glval) = VariableAddress[a] : # 693| m0_7(int(&)[10]) = Store : r0_5, r0_6 @@ -4013,7 +4013,7 @@ ir.cpp: # 871| m0_1(unknown) = AliasedDefinition : # 871| mu0_2(unknown) = UnmodeledDefinition : # 872| r0_3(glval) = VariableAddress[a] : -# 872| m0_4(char[5]) = Uninitialized : r0_3 +# 872| m0_4(char[5]) = Uninitialized[a] : r0_3 # 873| r0_5(glval) = VariableAddress[p] : # 873| r0_6(glval) = VariableAddress[a] : # 873| r0_7(char *) = Convert : r0_6 @@ -4087,14 +4087,14 @@ ir.cpp: # 888| m0_4(int) = InitializeParameter[x] : r0_3 # 888| m0_5(unknown) = Chi : m0_1, m0_4 # 889| r0_6(glval<__va_list_tag[1]>) = VariableAddress[args] : -# 889| m0_7(__va_list_tag[1]) = Uninitialized : r0_6 +# 889| m0_7(__va_list_tag[1]) = Uninitialized[args] : r0_6 # 889| m0_8(unknown) = Chi : m0_5, m0_7 # 891| r0_9(glval<__va_list_tag[1]>) = VariableAddress[args] : # 891| r0_10(__va_list_tag *) = Convert : r0_9 # 891| r0_11(glval) = VariableAddress[x] : # 891| v0_12(void) = VarArgsStart : r0_10, r0_11 # 892| r0_13(glval<__va_list_tag[1]>) = VariableAddress[args2] : -# 892| m0_14(__va_list_tag[1]) = Uninitialized : r0_13 +# 892| m0_14(__va_list_tag[1]) = Uninitialized[args2] : r0_13 # 892| m0_15(unknown) = Chi : m0_8, m0_14 # 893| r0_16(glval<__va_list_tag[1]>) = VariableAddress[args2] : # 893| r0_17(__va_list_tag *) = Convert : r0_16 @@ -4347,7 +4347,7 @@ ir.cpp: # 961| m0_1(unknown) = AliasedDefinition : # 961| mu0_2(unknown) = UnmodeledDefinition : # 962| r0_3(glval) = VariableAddress[a1] : -# 962| m0_4(int[1000]) = Uninitialized : r0_3 +# 962| m0_4(int[1000]) = Uninitialized[a1] : r0_3 # 962| m0_5(unknown) = Chi : m0_1, m0_4 # 962| r0_6(int) = Constant[0] : # 962| r0_7(glval) = PointerAdd : r0_3, r0_6 diff --git a/cpp/ql/test/library-tests/ir/ir/raw_ir.expected b/cpp/ql/test/library-tests/ir/ir/raw_ir.expected index 6ad6fd5a6853..ccaf2a9969f4 100644 --- a/cpp/ql/test/library-tests/ir/ir/raw_ir.expected +++ b/cpp/ql/test/library-tests/ir/ir/raw_ir.expected @@ -5,7 +5,7 @@ bad_asts.cpp: # 14| mu0_1(unknown) = AliasedDefinition : # 14| mu0_2(unknown) = UnmodeledDefinition : # 15| r0_3(glval) = VariableAddress[s] : -# 15| mu0_4(S) = Uninitialized : r0_3 +# 15| mu0_4(S) = Uninitialized[s] : r0_3 # 15| r0_5(glval) = FieldAddress[x] : r0_3 # 15| r0_6(int) = Constant[0] : # 15| mu0_7(int) = Store : r0_5, r0_6 @@ -186,7 +186,7 @@ ir.cpp: # 50| r0_5(glval) = VariableAddress[y] : # 50| mu0_6(int) = InitializeParameter[y] : r0_5 # 51| r0_7(glval) = VariableAddress[z] : -# 51| mu0_8(int) = Uninitialized : r0_7 +# 51| mu0_8(int) = Uninitialized[z] : r0_7 # 53| r0_9(glval) = VariableAddress[x] : # 53| r0_10(int) = Load : r0_9, mu0_2 # 53| r0_11(glval) = VariableAddress[y] : @@ -359,7 +359,7 @@ ir.cpp: # 87| r0_5(glval) = VariableAddress[y] : # 87| mu0_6(int) = InitializeParameter[y] : r0_5 # 88| r0_7(glval) = VariableAddress[b] : -# 88| mu0_8(bool) = Uninitialized : r0_7 +# 88| mu0_8(bool) = Uninitialized[b] : r0_7 # 90| r0_9(glval) = VariableAddress[x] : # 90| r0_10(int) = Load : r0_9, mu0_2 # 90| r0_11(glval) = VariableAddress[y] : @@ -415,7 +415,7 @@ ir.cpp: # 98| r0_3(glval) = VariableAddress[x] : # 98| mu0_4(int) = InitializeParameter[x] : r0_3 # 99| r0_5(glval) = VariableAddress[y] : -# 99| mu0_6(int) = Uninitialized : r0_5 +# 99| mu0_6(int) = Uninitialized[y] : r0_5 # 101| r0_7(glval) = VariableAddress[x] : # 101| r0_8(int) = Load : r0_7, mu0_2 # 101| r0_9(int) = Constant[1] : @@ -457,7 +457,7 @@ ir.cpp: # 107| r0_3(glval) = VariableAddress[x] : # 107| mu0_4(int) = InitializeParameter[x] : r0_3 # 108| r0_5(glval) = VariableAddress[p] : -# 108| mu0_6(int *) = Uninitialized : r0_5 +# 108| mu0_6(int *) = Uninitialized[p] : r0_5 # 110| r0_7(glval) = VariableAddress[x] : # 110| r0_8(int) = Load : r0_7, mu0_2 # 110| r0_9(int) = Constant[1] : @@ -487,7 +487,7 @@ ir.cpp: # 114| r0_5(glval) = VariableAddress[y] : # 114| mu0_6(double) = InitializeParameter[y] : r0_5 # 115| r0_7(glval) = VariableAddress[z] : -# 115| mu0_8(double) = Uninitialized : r0_7 +# 115| mu0_8(double) = Uninitialized[z] : r0_7 # 117| r0_9(glval) = VariableAddress[x] : # 117| r0_10(double) = Load : r0_9, mu0_2 # 117| r0_11(glval) = VariableAddress[y] : @@ -569,7 +569,7 @@ ir.cpp: # 133| r0_5(glval) = VariableAddress[y] : # 133| mu0_6(double) = InitializeParameter[y] : r0_5 # 134| r0_7(glval) = VariableAddress[b] : -# 134| mu0_8(bool) = Uninitialized : r0_7 +# 134| mu0_8(bool) = Uninitialized[b] : r0_7 # 136| r0_9(glval) = VariableAddress[x] : # 136| r0_10(double) = Load : r0_9, mu0_2 # 136| r0_11(glval) = VariableAddress[y] : @@ -625,7 +625,7 @@ ir.cpp: # 144| r0_3(glval) = VariableAddress[x] : # 144| mu0_4(float) = InitializeParameter[x] : r0_3 # 145| r0_5(glval) = VariableAddress[y] : -# 145| mu0_6(float) = Uninitialized : r0_5 +# 145| mu0_6(float) = Uninitialized[y] : r0_5 # 147| r0_7(glval) = VariableAddress[x] : # 147| r0_8(float) = Load : r0_7, mu0_2 # 147| r0_9(float) = Constant[1.0] : @@ -669,9 +669,9 @@ ir.cpp: # 153| r0_5(glval) = VariableAddress[i] : # 153| mu0_6(int) = InitializeParameter[i] : r0_5 # 154| r0_7(glval) = VariableAddress[q] : -# 154| mu0_8(int *) = Uninitialized : r0_7 +# 154| mu0_8(int *) = Uninitialized[q] : r0_7 # 155| r0_9(glval) = VariableAddress[b] : -# 155| mu0_10(bool) = Uninitialized : r0_9 +# 155| mu0_10(bool) = Uninitialized[b] : r0_9 # 157| r0_11(glval) = VariableAddress[p] : # 157| r0_12(int *) = Load : r0_11, mu0_2 # 157| r0_13(glval) = VariableAddress[i] : @@ -745,7 +745,7 @@ ir.cpp: # 171| r0_5(glval) = VariableAddress[i] : # 171| mu0_6(int) = InitializeParameter[i] : r0_5 # 172| r0_7(glval) = VariableAddress[x] : -# 172| mu0_8(int) = Uninitialized : r0_7 +# 172| mu0_8(int) = Uninitialized[x] : r0_7 # 174| r0_9(glval) = VariableAddress[p] : # 174| r0_10(int *) = Load : r0_9, mu0_2 # 174| r0_11(glval) = VariableAddress[i] : @@ -779,7 +779,7 @@ ir.cpp: # 178| r0_39(int *) = PointerAdd[4] : r0_36, r0_38 # 178| mu0_40(int) = Store : r0_39, r0_34 # 180| r0_41(glval) = VariableAddress[a] : -# 180| mu0_42(int[10]) = Uninitialized : r0_41 +# 180| mu0_42(int[10]) = Uninitialized[a] : r0_41 # 181| r0_43(glval) = VariableAddress[a] : # 181| r0_44(int *) = Convert : r0_43 # 181| r0_45(glval) = VariableAddress[i] : @@ -860,7 +860,7 @@ ir.cpp: # 193| r0_5(glval) = VariableAddress[q] : # 193| mu0_6(int *) = InitializeParameter[q] : r0_5 # 194| r0_7(glval) = VariableAddress[b] : -# 194| mu0_8(bool) = Uninitialized : r0_7 +# 194| mu0_8(bool) = Uninitialized[b] : r0_7 # 196| r0_9(glval) = VariableAddress[p] : # 196| r0_10(int *) = Load : r0_9, mu0_2 # 196| r0_11(glval) = VariableAddress[q] : @@ -916,7 +916,7 @@ ir.cpp: # 204| r0_3(glval) = VariableAddress[p] : # 204| mu0_4(int *) = InitializeParameter[p] : r0_3 # 205| r0_5(glval) = VariableAddress[q] : -# 205| mu0_6(int *) = Uninitialized : r0_5 +# 205| mu0_6(int *) = Uninitialized[q] : r0_5 # 207| r0_7(glval) = VariableAddress[p] : # 207| r0_8(int *) = Load : r0_7, mu0_2 # 207| r0_9(int) = Constant[1] : @@ -1000,7 +1000,7 @@ ir.cpp: # 230| mu0_1(unknown) = AliasedDefinition : # 230| mu0_2(unknown) = UnmodeledDefinition : # 231| r0_3(glval) = VariableAddress[x] : -# 231| mu0_4(int) = Uninitialized : r0_3 +# 231| mu0_4(int) = Uninitialized[x] : r0_3 # 232| r0_5(glval) = VariableAddress[y] : # 232| r0_6(glval) = VariableAddress[x] : # 232| r0_7(int) = Load : r0_6, mu0_2 @@ -1160,7 +1160,7 @@ ir.cpp: # 265| mu0_1(unknown) = AliasedDefinition : # 265| mu0_2(unknown) = UnmodeledDefinition : # 266| r0_3(glval) = VariableAddress[j] : -# 266| mu0_4(int) = Uninitialized : r0_3 +# 266| mu0_4(int) = Uninitialized[j] : r0_3 #-----| Goto -> Block 2 # 265| Block 1 @@ -1697,7 +1697,7 @@ ir.cpp: # 384| r0_3(glval) = VariableAddress[x] : # 384| mu0_4(int) = InitializeParameter[x] : r0_3 # 385| r0_5(glval) = VariableAddress[y] : -# 385| mu0_6(int) = Uninitialized : r0_5 +# 385| mu0_6(int) = Uninitialized[y] : r0_5 # 386| r0_7(glval) = VariableAddress[x] : # 386| r0_8(int) = Load : r0_7, mu0_2 # 386| v0_9(void) = Switch : r0_8 @@ -1792,7 +1792,7 @@ ir.cpp: # 426| mu0_1(unknown) = AliasedDefinition : # 426| mu0_2(unknown) = UnmodeledDefinition : # 427| r0_3(glval) = VariableAddress[pt] : -# 427| mu0_4(Point) = Uninitialized : r0_3 +# 427| mu0_4(Point) = Uninitialized[pt] : r0_3 # 428| r0_5(int) = Constant[5] : # 428| r0_6(glval) = VariableAddress[pt] : # 428| r0_7(glval) = FieldAddress[x] : r0_6 @@ -1822,7 +1822,7 @@ ir.cpp: # 433| r0_5(glval) = VariableAddress[b] : # 433| mu0_6(bool) = InitializeParameter[b] : r0_5 # 434| r0_7(glval) = VariableAddress[x] : -# 434| mu0_8(int) = Uninitialized : r0_7 +# 434| mu0_8(int) = Uninitialized[x] : r0_7 # 435| r0_9(glval) = VariableAddress[a] : # 435| r0_10(bool) = Load : r0_9, mu0_2 # 435| v0_11(void) = ConditionalBranch : r0_10 @@ -1884,7 +1884,7 @@ ir.cpp: # 447| r0_5(glval) = VariableAddress[b] : # 447| mu0_6(bool) = InitializeParameter[b] : r0_5 # 448| r0_7(glval) = VariableAddress[x] : -# 448| mu0_8(int) = Uninitialized : r0_7 +# 448| mu0_8(int) = Uninitialized[x] : r0_7 # 449| r0_9(glval) = VariableAddress[a] : # 449| r0_10(bool) = Load : r0_9, mu0_2 # 449| v0_11(void) = ConditionalBranch : r0_10 @@ -1946,7 +1946,7 @@ ir.cpp: # 461| r0_5(glval) = VariableAddress[b] : # 461| mu0_6(bool) = InitializeParameter[b] : r0_5 # 462| r0_7(glval) = VariableAddress[x] : -# 462| mu0_8(int) = Uninitialized : r0_7 +# 462| mu0_8(int) = Uninitialized[x] : r0_7 # 463| r0_9(glval) = VariableAddress[a] : # 463| r0_10(bool) = Load : r0_9, mu0_2 # 463| v0_11(void) = ConditionalBranch : r0_10 @@ -2001,7 +2001,7 @@ ir.cpp: # 475| r0_5(glval) = VariableAddress[b] : # 475| mu0_6(bool) = InitializeParameter[b] : r0_5 # 476| r0_7(glval) = VariableAddress[x] : -# 476| mu0_8(bool) = Uninitialized : r0_7 +# 476| mu0_8(bool) = Uninitialized[x] : r0_7 # 477| r0_9(glval) = VariableAddress[a] : # 477| r0_10(bool) = Load : r0_9, mu0_2 # 477| v0_11(void) = ConditionalBranch : r0_10 @@ -2147,9 +2147,9 @@ ir.cpp: # 486| r0_3(glval) = VariableAddress[a] : # 486| mu0_4(bool) = InitializeParameter[a] : r0_3 # 487| r0_5(glval) = VariableAddress[x] : -# 487| mu0_6(int) = Uninitialized : r0_5 +# 487| mu0_6(int) = Uninitialized[x] : r0_5 # 488| r0_7(glval) = VariableAddress[y] : -# 488| mu0_8(int) = Uninitialized : r0_7 +# 488| mu0_8(int) = Uninitialized[y] : r0_7 # 489| r0_9(int) = Constant[5] : # 489| r0_10(glval) = VariableAddress[a] : # 489| r0_11(bool) = Load : r0_10, mu0_2 @@ -2241,7 +2241,7 @@ ir.cpp: # 503| r0_5(glval) = VariableAddress[f] : # 503| mu0_6(float) = InitializeParameter[f] : r0_5 # 504| r0_7(glval) = VariableAddress[pt1] : -# 504| mu0_8(Point) = Uninitialized : r0_7 +# 504| mu0_8(Point) = Uninitialized[pt1] : r0_7 # 504| r0_9(glval) = FieldAddress[x] : r0_7 # 504| r0_10(glval) = VariableAddress[x] : # 504| r0_11(int) = Load : r0_10, mu0_2 @@ -2252,7 +2252,7 @@ ir.cpp: # 504| r0_16(int) = Convert : r0_15 # 504| mu0_17(int) = Store : r0_13, r0_16 # 505| r0_18(glval) = VariableAddress[pt2] : -# 505| mu0_19(Point) = Uninitialized : r0_18 +# 505| mu0_19(Point) = Uninitialized[pt2] : r0_18 # 505| r0_20(glval) = FieldAddress[x] : r0_18 # 505| r0_21(glval) = VariableAddress[x] : # 505| r0_22(int) = Load : r0_21, mu0_2 @@ -2261,7 +2261,7 @@ ir.cpp: # 505| r0_25(int) = Constant[0] : # 505| mu0_26(int) = Store : r0_24, r0_25 # 506| r0_27(glval) = VariableAddress[pt3] : -# 506| mu0_28(Point) = Uninitialized : r0_27 +# 506| mu0_28(Point) = Uninitialized[pt3] : r0_27 # 506| r0_29(glval) = FieldAddress[x] : r0_27 # 506| r0_30(int) = Constant[0] : # 506| mu0_31(int) = Store : r0_29, r0_30 @@ -2289,7 +2289,7 @@ ir.cpp: # 512| r0_5(glval) = VariableAddress[f] : # 512| mu0_6(float) = InitializeParameter[f] : r0_5 # 513| r0_7(glval) = VariableAddress[r1] : -# 513| mu0_8(Rect) = Uninitialized : r0_7 +# 513| mu0_8(Rect) = Uninitialized[r1] : r0_7 # 513| r0_9(glval) = FieldAddress[topLeft] : r0_7 # 513| r0_10(Point) = Constant[0] : # 513| mu0_11(Point) = Store : r0_9, r0_10 @@ -2297,7 +2297,7 @@ ir.cpp: # 513| r0_13(Point) = Constant[0] : # 513| mu0_14(Point) = Store : r0_12, r0_13 # 514| r0_15(glval) = VariableAddress[r2] : -# 514| mu0_16(Rect) = Uninitialized : r0_15 +# 514| mu0_16(Rect) = Uninitialized[r2] : r0_15 # 514| r0_17(glval) = FieldAddress[topLeft] : r0_15 # 514| r0_18(glval) = FieldAddress[x] : r0_17 # 514| r0_19(glval) = VariableAddress[x] : @@ -2312,7 +2312,7 @@ ir.cpp: # 514| r0_28(Point) = Constant[0] : # 514| mu0_29(Point) = Store : r0_27, r0_28 # 515| r0_30(glval) = VariableAddress[r3] : -# 515| mu0_31(Rect) = Uninitialized : r0_30 +# 515| mu0_31(Rect) = Uninitialized[r3] : r0_30 # 515| r0_32(glval) = FieldAddress[topLeft] : r0_30 # 515| r0_33(glval) = FieldAddress[x] : r0_32 # 515| r0_34(glval) = VariableAddress[x] : @@ -2334,7 +2334,7 @@ ir.cpp: # 515| r0_50(int) = Convert : r0_49 # 515| mu0_51(int) = Store : r0_47, r0_50 # 516| r0_52(glval) = VariableAddress[r4] : -# 516| mu0_53(Rect) = Uninitialized : r0_52 +# 516| mu0_53(Rect) = Uninitialized[r4] : r0_52 # 516| r0_54(glval) = FieldAddress[topLeft] : r0_52 # 516| r0_55(glval) = FieldAddress[x] : r0_54 # 516| r0_56(glval) = VariableAddress[x] : @@ -2366,13 +2366,13 @@ ir.cpp: # 519| r0_5(glval) = VariableAddress[f] : # 519| mu0_6(float) = InitializeParameter[f] : r0_5 # 520| r0_7(glval) = VariableAddress[a1] : -# 520| mu0_8(int[3]) = Uninitialized : r0_7 +# 520| mu0_8(int[3]) = Uninitialized[a1] : r0_7 # 520| r0_9(int) = Constant[0] : # 520| r0_10(glval) = PointerAdd : r0_7, r0_9 # 520| r0_11(unknown[12]) = Constant[0] : # 520| mu0_12(unknown[12]) = Store : r0_10, r0_11 # 521| r0_13(glval) = VariableAddress[a2] : -# 521| mu0_14(int[3]) = Uninitialized : r0_13 +# 521| mu0_14(int[3]) = Uninitialized[a2] : r0_13 # 521| r0_15(int) = Constant[0] : # 521| r0_16(glval) = PointerAdd : r0_13, r0_15 # 521| r0_17(glval) = VariableAddress[x] : @@ -2389,7 +2389,7 @@ ir.cpp: # 521| r0_28(int) = Constant[0] : # 521| mu0_29(int) = Store : r0_27, r0_28 # 522| r0_30(glval) = VariableAddress[a3] : -# 522| mu0_31(int[3]) = Uninitialized : r0_30 +# 522| mu0_31(int[3]) = Uninitialized[a3] : r0_30 # 522| r0_32(int) = Constant[0] : # 522| r0_33(glval) = PointerAdd : r0_30, r0_32 # 522| r0_34(glval) = VariableAddress[x] : @@ -2414,7 +2414,7 @@ ir.cpp: # 530| r0_5(glval) = VariableAddress[f] : # 530| mu0_6(float) = InitializeParameter[f] : r0_5 # 531| r0_7(glval) = VariableAddress[u1] : -# 531| mu0_8(U) = Uninitialized : r0_7 +# 531| mu0_8(U) = Uninitialized[u1] : r0_7 # 531| r0_9(glval) = FieldAddress[d] : r0_7 # 531| r0_10(glval) = VariableAddress[f] : # 531| r0_11(float) = Load : r0_10, mu0_2 @@ -2584,15 +2584,15 @@ ir.cpp: # 574| r0_17(char[5]) = Load : r0_16, mu0_2 # 574| mu0_18(char[5]) = Store : r0_15, r0_17 # 575| r0_19(glval) = VariableAddress[b] : -# 575| mu0_20(char[2]) = Uninitialized : r0_19 +# 575| mu0_20(char[2]) = Uninitialized[b] : r0_19 # 576| r0_21(glval) = VariableAddress[c] : -# 576| mu0_22(char[2]) = Uninitialized : r0_21 +# 576| mu0_22(char[2]) = Uninitialized[c] : r0_21 # 576| r0_23(int) = Constant[0] : # 576| r0_24(glval) = PointerAdd : r0_21, r0_23 # 576| r0_25(unknown[2]) = Constant[0] : # 576| mu0_26(unknown[2]) = Store : r0_24, r0_25 # 577| r0_27(glval) = VariableAddress[d] : -# 577| mu0_28(char[2]) = Uninitialized : r0_27 +# 577| mu0_28(char[2]) = Uninitialized[d] : r0_27 # 577| r0_29(int) = Constant[0] : # 577| r0_30(glval) = PointerAdd : r0_27, r0_29 # 577| r0_31(char) = Constant[0] : @@ -2602,7 +2602,7 @@ ir.cpp: # 577| r0_35(char) = Constant[0] : # 577| mu0_36(char) = Store : r0_34, r0_35 # 578| r0_37(glval) = VariableAddress[e] : -# 578| mu0_38(char[2]) = Uninitialized : r0_37 +# 578| mu0_38(char[2]) = Uninitialized[e] : r0_37 # 578| r0_39(int) = Constant[0] : # 578| r0_40(glval) = PointerAdd : r0_37, r0_39 # 578| r0_41(char) = Constant[0] : @@ -2612,7 +2612,7 @@ ir.cpp: # 578| r0_45(char) = Constant[1] : # 578| mu0_46(char) = Store : r0_44, r0_45 # 579| r0_47(glval) = VariableAddress[f] : -# 579| mu0_48(char[3]) = Uninitialized : r0_47 +# 579| mu0_48(char[3]) = Uninitialized[f] : r0_47 # 579| r0_49(int) = Constant[0] : # 579| r0_50(glval) = PointerAdd : r0_47, r0_49 # 579| r0_51(char) = Constant[0] : @@ -2799,7 +2799,7 @@ ir.cpp: # 645| r0_14(glval) = FieldAddress[m_a] : r0_13 # 645| mu0_15(int) = Store : r0_14, r0_12 # 646| r0_16(glval) = VariableAddress[x] : -# 646| mu0_17(int) = Uninitialized : r0_16 +# 646| mu0_17(int) = Uninitialized[x] : r0_16 # 647| r0_18(C *) = CopyValue : r0_3 # 647| r0_19(glval) = FieldAddress[m_a] : r0_18 # 647| r0_20(int) = Load : r0_19, mu0_2 @@ -2937,7 +2937,7 @@ ir.cpp: # 691| mu0_1(unknown) = AliasedDefinition : # 691| mu0_2(unknown) = UnmodeledDefinition : # 692| r0_3(glval) = VariableAddress[a] : -# 692| mu0_4(int[10]) = Uninitialized : r0_3 +# 692| mu0_4(int[10]) = Uninitialized[a] : r0_3 # 693| r0_5(glval) = VariableAddress[ra] : # 693| r0_6(glval) = VariableAddress[a] : # 693| mu0_7(int(&)[10]) = Store : r0_5, r0_6 @@ -3851,7 +3851,7 @@ ir.cpp: # 871| mu0_1(unknown) = AliasedDefinition : # 871| mu0_2(unknown) = UnmodeledDefinition : # 872| r0_3(glval) = VariableAddress[a] : -# 872| mu0_4(char[5]) = Uninitialized : r0_3 +# 872| mu0_4(char[5]) = Uninitialized[a] : r0_3 # 873| r0_5(glval) = VariableAddress[p] : # 873| r0_6(glval) = VariableAddress[a] : # 873| r0_7(char *) = Convert : r0_6 @@ -3924,13 +3924,13 @@ ir.cpp: # 888| r0_3(glval) = VariableAddress[x] : # 888| mu0_4(int) = InitializeParameter[x] : r0_3 # 889| r0_5(glval<__va_list_tag[1]>) = VariableAddress[args] : -# 889| mu0_6(__va_list_tag[1]) = Uninitialized : r0_5 +# 889| mu0_6(__va_list_tag[1]) = Uninitialized[args] : r0_5 # 891| r0_7(glval<__va_list_tag[1]>) = VariableAddress[args] : # 891| r0_8(__va_list_tag *) = Convert : r0_7 # 891| r0_9(glval) = VariableAddress[x] : # 891| v0_10(void) = VarArgsStart : r0_8, r0_9 # 892| r0_11(glval<__va_list_tag[1]>) = VariableAddress[args2] : -# 892| mu0_12(__va_list_tag[1]) = Uninitialized : r0_11 +# 892| mu0_12(__va_list_tag[1]) = Uninitialized[args2] : r0_11 # 893| r0_13(glval<__va_list_tag[1]>) = VariableAddress[args2] : # 893| r0_14(__va_list_tag *) = Convert : r0_13 # 893| r0_15(glval<__va_list_tag[1]>) = VariableAddress[args] : @@ -4161,7 +4161,7 @@ ir.cpp: # 961| mu0_1(unknown) = AliasedDefinition : # 961| mu0_2(unknown) = UnmodeledDefinition : # 962| r0_3(glval) = VariableAddress[a1] : -# 962| mu0_4(int[1000]) = Uninitialized : r0_3 +# 962| mu0_4(int[1000]) = Uninitialized[a1] : r0_3 # 962| r0_5(int) = Constant[0] : # 962| r0_6(glval) = PointerAdd : r0_3, r0_5 # 962| r0_7(unknown[8]) = Constant[0] : diff --git a/cpp/ql/test/library-tests/ir/ir/unaliased_ssa_ir.expected b/cpp/ql/test/library-tests/ir/ir/unaliased_ssa_ir.expected index a7a282ac5584..4f0432879133 100644 --- a/cpp/ql/test/library-tests/ir/ir/unaliased_ssa_ir.expected +++ b/cpp/ql/test/library-tests/ir/ir/unaliased_ssa_ir.expected @@ -5,7 +5,7 @@ bad_asts.cpp: # 14| mu0_1(unknown) = AliasedDefinition : # 14| mu0_2(unknown) = UnmodeledDefinition : # 15| r0_3(glval) = VariableAddress[s] : -# 15| mu0_4(S) = Uninitialized : r0_3 +# 15| mu0_4(S) = Uninitialized[s] : r0_3 # 15| r0_5(glval) = FieldAddress[x] : r0_3 # 15| r0_6(int) = Constant[0] : # 15| mu0_7(int) = Store : r0_5, r0_6 @@ -186,7 +186,7 @@ ir.cpp: # 50| r0_5(glval) = VariableAddress[y] : # 50| m0_6(int) = InitializeParameter[y] : r0_5 # 51| r0_7(glval) = VariableAddress[z] : -# 51| m0_8(int) = Uninitialized : r0_7 +# 51| m0_8(int) = Uninitialized[z] : r0_7 # 53| r0_9(glval) = VariableAddress[x] : # 53| r0_10(int) = Load : r0_9, m0_4 # 53| r0_11(glval) = VariableAddress[y] : @@ -359,7 +359,7 @@ ir.cpp: # 87| r0_5(glval) = VariableAddress[y] : # 87| m0_6(int) = InitializeParameter[y] : r0_5 # 88| r0_7(glval) = VariableAddress[b] : -# 88| m0_8(bool) = Uninitialized : r0_7 +# 88| m0_8(bool) = Uninitialized[b] : r0_7 # 90| r0_9(glval) = VariableAddress[x] : # 90| r0_10(int) = Load : r0_9, m0_4 # 90| r0_11(glval) = VariableAddress[y] : @@ -415,7 +415,7 @@ ir.cpp: # 98| r0_3(glval) = VariableAddress[x] : # 98| m0_4(int) = InitializeParameter[x] : r0_3 # 99| r0_5(glval) = VariableAddress[y] : -# 99| m0_6(int) = Uninitialized : r0_5 +# 99| m0_6(int) = Uninitialized[y] : r0_5 # 101| r0_7(glval) = VariableAddress[x] : # 101| r0_8(int) = Load : r0_7, m0_4 # 101| r0_9(int) = Constant[1] : @@ -457,7 +457,7 @@ ir.cpp: # 107| r0_3(glval) = VariableAddress[x] : # 107| mu0_4(int) = InitializeParameter[x] : r0_3 # 108| r0_5(glval) = VariableAddress[p] : -# 108| m0_6(int *) = Uninitialized : r0_5 +# 108| m0_6(int *) = Uninitialized[p] : r0_5 # 110| r0_7(glval) = VariableAddress[x] : # 110| r0_8(int) = Load : r0_7, mu0_2 # 110| r0_9(int) = Constant[1] : @@ -487,7 +487,7 @@ ir.cpp: # 114| r0_5(glval) = VariableAddress[y] : # 114| m0_6(double) = InitializeParameter[y] : r0_5 # 115| r0_7(glval) = VariableAddress[z] : -# 115| m0_8(double) = Uninitialized : r0_7 +# 115| m0_8(double) = Uninitialized[z] : r0_7 # 117| r0_9(glval) = VariableAddress[x] : # 117| r0_10(double) = Load : r0_9, m0_4 # 117| r0_11(glval) = VariableAddress[y] : @@ -569,7 +569,7 @@ ir.cpp: # 133| r0_5(glval) = VariableAddress[y] : # 133| m0_6(double) = InitializeParameter[y] : r0_5 # 134| r0_7(glval) = VariableAddress[b] : -# 134| m0_8(bool) = Uninitialized : r0_7 +# 134| m0_8(bool) = Uninitialized[b] : r0_7 # 136| r0_9(glval) = VariableAddress[x] : # 136| r0_10(double) = Load : r0_9, m0_4 # 136| r0_11(glval) = VariableAddress[y] : @@ -625,7 +625,7 @@ ir.cpp: # 144| r0_3(glval) = VariableAddress[x] : # 144| m0_4(float) = InitializeParameter[x] : r0_3 # 145| r0_5(glval) = VariableAddress[y] : -# 145| m0_6(float) = Uninitialized : r0_5 +# 145| m0_6(float) = Uninitialized[y] : r0_5 # 147| r0_7(glval) = VariableAddress[x] : # 147| r0_8(float) = Load : r0_7, m0_4 # 147| r0_9(float) = Constant[1.0] : @@ -669,9 +669,9 @@ ir.cpp: # 153| r0_5(glval) = VariableAddress[i] : # 153| m0_6(int) = InitializeParameter[i] : r0_5 # 154| r0_7(glval) = VariableAddress[q] : -# 154| m0_8(int *) = Uninitialized : r0_7 +# 154| m0_8(int *) = Uninitialized[q] : r0_7 # 155| r0_9(glval) = VariableAddress[b] : -# 155| m0_10(bool) = Uninitialized : r0_9 +# 155| m0_10(bool) = Uninitialized[b] : r0_9 # 157| r0_11(glval) = VariableAddress[p] : # 157| r0_12(int *) = Load : r0_11, m0_4 # 157| r0_13(glval) = VariableAddress[i] : @@ -745,7 +745,7 @@ ir.cpp: # 171| r0_5(glval) = VariableAddress[i] : # 171| m0_6(int) = InitializeParameter[i] : r0_5 # 172| r0_7(glval) = VariableAddress[x] : -# 172| m0_8(int) = Uninitialized : r0_7 +# 172| m0_8(int) = Uninitialized[x] : r0_7 # 174| r0_9(glval) = VariableAddress[p] : # 174| r0_10(int *) = Load : r0_9, m0_4 # 174| r0_11(glval) = VariableAddress[i] : @@ -779,7 +779,7 @@ ir.cpp: # 178| r0_39(int *) = PointerAdd[4] : r0_36, r0_38 # 178| mu0_40(int) = Store : r0_39, r0_34 # 180| r0_41(glval) = VariableAddress[a] : -# 180| m0_42(int[10]) = Uninitialized : r0_41 +# 180| m0_42(int[10]) = Uninitialized[a] : r0_41 # 181| r0_43(glval) = VariableAddress[a] : # 181| r0_44(int *) = Convert : r0_43 # 181| r0_45(glval) = VariableAddress[i] : @@ -860,7 +860,7 @@ ir.cpp: # 193| r0_5(glval) = VariableAddress[q] : # 193| m0_6(int *) = InitializeParameter[q] : r0_5 # 194| r0_7(glval) = VariableAddress[b] : -# 194| m0_8(bool) = Uninitialized : r0_7 +# 194| m0_8(bool) = Uninitialized[b] : r0_7 # 196| r0_9(glval) = VariableAddress[p] : # 196| r0_10(int *) = Load : r0_9, m0_4 # 196| r0_11(glval) = VariableAddress[q] : @@ -916,7 +916,7 @@ ir.cpp: # 204| r0_3(glval) = VariableAddress[p] : # 204| m0_4(int *) = InitializeParameter[p] : r0_3 # 205| r0_5(glval) = VariableAddress[q] : -# 205| m0_6(int *) = Uninitialized : r0_5 +# 205| m0_6(int *) = Uninitialized[q] : r0_5 # 207| r0_7(glval) = VariableAddress[p] : # 207| r0_8(int *) = Load : r0_7, m0_4 # 207| r0_9(int) = Constant[1] : @@ -1000,7 +1000,7 @@ ir.cpp: # 230| mu0_1(unknown) = AliasedDefinition : # 230| mu0_2(unknown) = UnmodeledDefinition : # 231| r0_3(glval) = VariableAddress[x] : -# 231| m0_4(int) = Uninitialized : r0_3 +# 231| m0_4(int) = Uninitialized[x] : r0_3 # 232| r0_5(glval) = VariableAddress[y] : # 232| r0_6(glval) = VariableAddress[x] : # 232| r0_7(int) = Load : r0_6, m0_4 @@ -1163,7 +1163,7 @@ ir.cpp: # 265| mu0_1(unknown) = AliasedDefinition : # 265| mu0_2(unknown) = UnmodeledDefinition : # 266| r0_3(glval) = VariableAddress[j] : -# 266| m0_4(int) = Uninitialized : r0_3 +# 266| m0_4(int) = Uninitialized[j] : r0_3 #-----| Goto -> Block 2 # 265| Block 1 @@ -1709,7 +1709,7 @@ ir.cpp: # 384| r0_3(glval) = VariableAddress[x] : # 384| m0_4(int) = InitializeParameter[x] : r0_3 # 385| r0_5(glval) = VariableAddress[y] : -# 385| m0_6(int) = Uninitialized : r0_5 +# 385| m0_6(int) = Uninitialized[y] : r0_5 # 386| r0_7(glval) = VariableAddress[x] : # 386| r0_8(int) = Load : r0_7, m0_4 # 386| v0_9(void) = Switch : r0_8 @@ -1804,7 +1804,7 @@ ir.cpp: # 426| mu0_1(unknown) = AliasedDefinition : # 426| mu0_2(unknown) = UnmodeledDefinition : # 427| r0_3(glval) = VariableAddress[pt] : -# 427| mu0_4(Point) = Uninitialized : r0_3 +# 427| mu0_4(Point) = Uninitialized[pt] : r0_3 # 428| r0_5(int) = Constant[5] : # 428| r0_6(glval) = VariableAddress[pt] : # 428| r0_7(glval) = FieldAddress[x] : r0_6 @@ -1834,7 +1834,7 @@ ir.cpp: # 433| r0_5(glval) = VariableAddress[b] : # 433| m0_6(bool) = InitializeParameter[b] : r0_5 # 434| r0_7(glval) = VariableAddress[x] : -# 434| m0_8(int) = Uninitialized : r0_7 +# 434| m0_8(int) = Uninitialized[x] : r0_7 # 435| r0_9(glval) = VariableAddress[a] : # 435| r0_10(bool) = Load : r0_9, m0_4 # 435| v0_11(void) = ConditionalBranch : r0_10 @@ -1896,7 +1896,7 @@ ir.cpp: # 447| r0_5(glval) = VariableAddress[b] : # 447| m0_6(bool) = InitializeParameter[b] : r0_5 # 448| r0_7(glval) = VariableAddress[x] : -# 448| m0_8(int) = Uninitialized : r0_7 +# 448| m0_8(int) = Uninitialized[x] : r0_7 # 449| r0_9(glval) = VariableAddress[a] : # 449| r0_10(bool) = Load : r0_9, m0_4 # 449| v0_11(void) = ConditionalBranch : r0_10 @@ -1958,7 +1958,7 @@ ir.cpp: # 461| r0_5(glval) = VariableAddress[b] : # 461| m0_6(bool) = InitializeParameter[b] : r0_5 # 462| r0_7(glval) = VariableAddress[x] : -# 462| m0_8(int) = Uninitialized : r0_7 +# 462| m0_8(int) = Uninitialized[x] : r0_7 # 463| r0_9(glval) = VariableAddress[a] : # 463| r0_10(bool) = Load : r0_9, m0_4 # 463| v0_11(void) = ConditionalBranch : r0_10 @@ -2013,7 +2013,7 @@ ir.cpp: # 475| r0_5(glval) = VariableAddress[b] : # 475| m0_6(bool) = InitializeParameter[b] : r0_5 # 476| r0_7(glval) = VariableAddress[x] : -# 476| m0_8(bool) = Uninitialized : r0_7 +# 476| m0_8(bool) = Uninitialized[x] : r0_7 # 477| r0_9(glval) = VariableAddress[a] : # 477| r0_10(bool) = Load : r0_9, m0_4 # 477| v0_11(void) = ConditionalBranch : r0_10 @@ -2163,9 +2163,9 @@ ir.cpp: # 486| r0_3(glval) = VariableAddress[a] : # 486| m0_4(bool) = InitializeParameter[a] : r0_3 # 487| r0_5(glval) = VariableAddress[x] : -# 487| mu0_6(int) = Uninitialized : r0_5 +# 487| mu0_6(int) = Uninitialized[x] : r0_5 # 488| r0_7(glval) = VariableAddress[y] : -# 488| mu0_8(int) = Uninitialized : r0_7 +# 488| mu0_8(int) = Uninitialized[y] : r0_7 # 489| r0_9(int) = Constant[5] : # 489| r0_10(glval) = VariableAddress[a] : # 489| r0_11(bool) = Load : r0_10, m0_4 @@ -2258,7 +2258,7 @@ ir.cpp: # 503| r0_5(glval) = VariableAddress[f] : # 503| m0_6(float) = InitializeParameter[f] : r0_5 # 504| r0_7(glval) = VariableAddress[pt1] : -# 504| m0_8(Point) = Uninitialized : r0_7 +# 504| m0_8(Point) = Uninitialized[pt1] : r0_7 # 504| r0_9(glval) = FieldAddress[x] : r0_7 # 504| r0_10(glval) = VariableAddress[x] : # 504| r0_11(int) = Load : r0_10, m0_4 @@ -2269,7 +2269,7 @@ ir.cpp: # 504| r0_16(int) = Convert : r0_15 # 504| mu0_17(int) = Store : r0_13, r0_16 # 505| r0_18(glval) = VariableAddress[pt2] : -# 505| m0_19(Point) = Uninitialized : r0_18 +# 505| m0_19(Point) = Uninitialized[pt2] : r0_18 # 505| r0_20(glval) = FieldAddress[x] : r0_18 # 505| r0_21(glval) = VariableAddress[x] : # 505| r0_22(int) = Load : r0_21, m0_4 @@ -2278,7 +2278,7 @@ ir.cpp: # 505| r0_25(int) = Constant[0] : # 505| mu0_26(int) = Store : r0_24, r0_25 # 506| r0_27(glval) = VariableAddress[pt3] : -# 506| m0_28(Point) = Uninitialized : r0_27 +# 506| m0_28(Point) = Uninitialized[pt3] : r0_27 # 506| r0_29(glval) = FieldAddress[x] : r0_27 # 506| r0_30(int) = Constant[0] : # 506| m0_31(int) = Store : r0_29, r0_30 @@ -2306,7 +2306,7 @@ ir.cpp: # 512| r0_5(glval) = VariableAddress[f] : # 512| m0_6(float) = InitializeParameter[f] : r0_5 # 513| r0_7(glval) = VariableAddress[r1] : -# 513| m0_8(Rect) = Uninitialized : r0_7 +# 513| m0_8(Rect) = Uninitialized[r1] : r0_7 # 513| r0_9(glval) = FieldAddress[topLeft] : r0_7 # 513| r0_10(Point) = Constant[0] : # 513| m0_11(Point) = Store : r0_9, r0_10 @@ -2314,7 +2314,7 @@ ir.cpp: # 513| r0_13(Point) = Constant[0] : # 513| mu0_14(Point) = Store : r0_12, r0_13 # 514| r0_15(glval) = VariableAddress[r2] : -# 514| m0_16(Rect) = Uninitialized : r0_15 +# 514| m0_16(Rect) = Uninitialized[r2] : r0_15 # 514| r0_17(glval) = FieldAddress[topLeft] : r0_15 # 514| r0_18(glval) = FieldAddress[x] : r0_17 # 514| r0_19(glval) = VariableAddress[x] : @@ -2329,7 +2329,7 @@ ir.cpp: # 514| r0_28(Point) = Constant[0] : # 514| mu0_29(Point) = Store : r0_27, r0_28 # 515| r0_30(glval) = VariableAddress[r3] : -# 515| m0_31(Rect) = Uninitialized : r0_30 +# 515| m0_31(Rect) = Uninitialized[r3] : r0_30 # 515| r0_32(glval) = FieldAddress[topLeft] : r0_30 # 515| r0_33(glval) = FieldAddress[x] : r0_32 # 515| r0_34(glval) = VariableAddress[x] : @@ -2351,7 +2351,7 @@ ir.cpp: # 515| r0_50(int) = Convert : r0_49 # 515| mu0_51(int) = Store : r0_47, r0_50 # 516| r0_52(glval) = VariableAddress[r4] : -# 516| m0_53(Rect) = Uninitialized : r0_52 +# 516| m0_53(Rect) = Uninitialized[r4] : r0_52 # 516| r0_54(glval) = FieldAddress[topLeft] : r0_52 # 516| r0_55(glval) = FieldAddress[x] : r0_54 # 516| r0_56(glval) = VariableAddress[x] : @@ -2383,13 +2383,13 @@ ir.cpp: # 519| r0_5(glval) = VariableAddress[f] : # 519| m0_6(float) = InitializeParameter[f] : r0_5 # 520| r0_7(glval) = VariableAddress[a1] : -# 520| mu0_8(int[3]) = Uninitialized : r0_7 +# 520| mu0_8(int[3]) = Uninitialized[a1] : r0_7 # 520| r0_9(int) = Constant[0] : # 520| r0_10(glval) = PointerAdd : r0_7, r0_9 # 520| r0_11(unknown[12]) = Constant[0] : # 520| mu0_12(unknown[12]) = Store : r0_10, r0_11 # 521| r0_13(glval) = VariableAddress[a2] : -# 521| mu0_14(int[3]) = Uninitialized : r0_13 +# 521| mu0_14(int[3]) = Uninitialized[a2] : r0_13 # 521| r0_15(int) = Constant[0] : # 521| r0_16(glval) = PointerAdd : r0_13, r0_15 # 521| r0_17(glval) = VariableAddress[x] : @@ -2406,7 +2406,7 @@ ir.cpp: # 521| r0_28(int) = Constant[0] : # 521| mu0_29(int) = Store : r0_27, r0_28 # 522| r0_30(glval) = VariableAddress[a3] : -# 522| mu0_31(int[3]) = Uninitialized : r0_30 +# 522| mu0_31(int[3]) = Uninitialized[a3] : r0_30 # 522| r0_32(int) = Constant[0] : # 522| r0_33(glval) = PointerAdd : r0_30, r0_32 # 522| r0_34(glval) = VariableAddress[x] : @@ -2431,7 +2431,7 @@ ir.cpp: # 530| r0_5(glval) = VariableAddress[f] : # 530| m0_6(float) = InitializeParameter[f] : r0_5 # 531| r0_7(glval) = VariableAddress[u1] : -# 531| m0_8(U) = Uninitialized : r0_7 +# 531| m0_8(U) = Uninitialized[u1] : r0_7 # 531| r0_9(glval) = FieldAddress[d] : r0_7 # 531| r0_10(glval) = VariableAddress[f] : # 531| r0_11(float) = Load : r0_10, m0_6 @@ -2603,15 +2603,15 @@ ir.cpp: # 574| r0_17(char[5]) = Load : r0_16, mu0_2 # 574| m0_18(char[5]) = Store : r0_15, r0_17 # 575| r0_19(glval) = VariableAddress[b] : -# 575| m0_20(char[2]) = Uninitialized : r0_19 +# 575| m0_20(char[2]) = Uninitialized[b] : r0_19 # 576| r0_21(glval) = VariableAddress[c] : -# 576| mu0_22(char[2]) = Uninitialized : r0_21 +# 576| mu0_22(char[2]) = Uninitialized[c] : r0_21 # 576| r0_23(int) = Constant[0] : # 576| r0_24(glval) = PointerAdd : r0_21, r0_23 # 576| r0_25(unknown[2]) = Constant[0] : # 576| mu0_26(unknown[2]) = Store : r0_24, r0_25 # 577| r0_27(glval) = VariableAddress[d] : -# 577| mu0_28(char[2]) = Uninitialized : r0_27 +# 577| mu0_28(char[2]) = Uninitialized[d] : r0_27 # 577| r0_29(int) = Constant[0] : # 577| r0_30(glval) = PointerAdd : r0_27, r0_29 # 577| r0_31(char) = Constant[0] : @@ -2621,7 +2621,7 @@ ir.cpp: # 577| r0_35(char) = Constant[0] : # 577| mu0_36(char) = Store : r0_34, r0_35 # 578| r0_37(glval) = VariableAddress[e] : -# 578| mu0_38(char[2]) = Uninitialized : r0_37 +# 578| mu0_38(char[2]) = Uninitialized[e] : r0_37 # 578| r0_39(int) = Constant[0] : # 578| r0_40(glval) = PointerAdd : r0_37, r0_39 # 578| r0_41(char) = Constant[0] : @@ -2631,7 +2631,7 @@ ir.cpp: # 578| r0_45(char) = Constant[1] : # 578| mu0_46(char) = Store : r0_44, r0_45 # 579| r0_47(glval) = VariableAddress[f] : -# 579| mu0_48(char[3]) = Uninitialized : r0_47 +# 579| mu0_48(char[3]) = Uninitialized[f] : r0_47 # 579| r0_49(int) = Constant[0] : # 579| r0_50(glval) = PointerAdd : r0_47, r0_49 # 579| r0_51(char) = Constant[0] : @@ -2818,7 +2818,7 @@ ir.cpp: # 645| r0_14(glval) = FieldAddress[m_a] : r0_13 # 645| mu0_15(int) = Store : r0_14, r0_12 # 646| r0_16(glval) = VariableAddress[x] : -# 646| m0_17(int) = Uninitialized : r0_16 +# 646| m0_17(int) = Uninitialized[x] : r0_16 # 647| r0_18(C *) = CopyValue : r0_3 # 647| r0_19(glval) = FieldAddress[m_a] : r0_18 # 647| r0_20(int) = Load : r0_19, mu0_2 @@ -2956,7 +2956,7 @@ ir.cpp: # 691| mu0_1(unknown) = AliasedDefinition : # 691| mu0_2(unknown) = UnmodeledDefinition : # 692| r0_3(glval) = VariableAddress[a] : -# 692| mu0_4(int[10]) = Uninitialized : r0_3 +# 692| mu0_4(int[10]) = Uninitialized[a] : r0_3 # 693| r0_5(glval) = VariableAddress[ra] : # 693| r0_6(glval) = VariableAddress[a] : # 693| m0_7(int(&)[10]) = Store : r0_5, r0_6 @@ -3871,7 +3871,7 @@ ir.cpp: # 871| mu0_1(unknown) = AliasedDefinition : # 871| mu0_2(unknown) = UnmodeledDefinition : # 872| r0_3(glval) = VariableAddress[a] : -# 872| mu0_4(char[5]) = Uninitialized : r0_3 +# 872| mu0_4(char[5]) = Uninitialized[a] : r0_3 # 873| r0_5(glval) = VariableAddress[p] : # 873| r0_6(glval) = VariableAddress[a] : # 873| r0_7(char *) = Convert : r0_6 @@ -3944,13 +3944,13 @@ ir.cpp: # 888| r0_3(glval) = VariableAddress[x] : # 888| mu0_4(int) = InitializeParameter[x] : r0_3 # 889| r0_5(glval<__va_list_tag[1]>) = VariableAddress[args] : -# 889| mu0_6(__va_list_tag[1]) = Uninitialized : r0_5 +# 889| mu0_6(__va_list_tag[1]) = Uninitialized[args] : r0_5 # 891| r0_7(glval<__va_list_tag[1]>) = VariableAddress[args] : # 891| r0_8(__va_list_tag *) = Convert : r0_7 # 891| r0_9(glval) = VariableAddress[x] : # 891| v0_10(void) = VarArgsStart : r0_8, r0_9 # 892| r0_11(glval<__va_list_tag[1]>) = VariableAddress[args2] : -# 892| mu0_12(__va_list_tag[1]) = Uninitialized : r0_11 +# 892| mu0_12(__va_list_tag[1]) = Uninitialized[args2] : r0_11 # 893| r0_13(glval<__va_list_tag[1]>) = VariableAddress[args2] : # 893| r0_14(__va_list_tag *) = Convert : r0_13 # 893| r0_15(glval<__va_list_tag[1]>) = VariableAddress[args] : @@ -4182,7 +4182,7 @@ ir.cpp: # 961| mu0_1(unknown) = AliasedDefinition : # 961| mu0_2(unknown) = UnmodeledDefinition : # 962| r0_3(glval) = VariableAddress[a1] : -# 962| mu0_4(int[1000]) = Uninitialized : r0_3 +# 962| mu0_4(int[1000]) = Uninitialized[a1] : r0_3 # 962| r0_5(int) = Constant[0] : # 962| r0_6(glval) = PointerAdd : r0_3, r0_5 # 962| r0_7(unknown[8]) = Constant[0] : diff --git a/cpp/ql/test/library-tests/valuenumbering/GlobalValueNumbering/ir_gvn.expected b/cpp/ql/test/library-tests/valuenumbering/GlobalValueNumbering/ir_gvn.expected index c823ee3eac73..dd3c96a23adf 100644 --- a/cpp/ql/test/library-tests/valuenumbering/GlobalValueNumbering/ir_gvn.expected +++ b/cpp/ql/test/library-tests/valuenumbering/GlobalValueNumbering/ir_gvn.expected @@ -16,15 +16,15 @@ test.cpp: # 1| valnum = m0_6 # 2| r0_7(glval) = VariableAddress[x] : # 2| valnum = r0_7 -# 2| m0_8(int) = Uninitialized : r0_7 +# 2| m0_8(int) = Uninitialized[x] : r0_7 # 2| valnum = unique # 2| r0_9(glval) = VariableAddress[y] : # 2| valnum = r0_9 -# 2| m0_10(int) = Uninitialized : r0_9 +# 2| m0_10(int) = Uninitialized[y] : r0_9 # 2| valnum = unique # 3| r0_11(glval) = VariableAddress[b] : # 3| valnum = unique -# 3| m0_12(unsigned char) = Uninitialized : r0_11 +# 3| m0_12(unsigned char) = Uninitialized[b] : r0_11 # 3| valnum = unique # 5| r0_13(glval) = VariableAddress[p0] : # 5| valnum = r0_3 @@ -86,15 +86,15 @@ test.cpp: # 12| valnum = m0_6 # 13| r0_7(glval) = VariableAddress[x] : # 13| valnum = r0_7 -# 13| m0_8(int) = Uninitialized : r0_7 +# 13| m0_8(int) = Uninitialized[x] : r0_7 # 13| valnum = unique # 13| r0_9(glval) = VariableAddress[y] : # 13| valnum = r0_9 -# 13| m0_10(int) = Uninitialized : r0_9 +# 13| m0_10(int) = Uninitialized[y] : r0_9 # 13| valnum = unique # 14| r0_11(glval) = VariableAddress[b] : # 14| valnum = unique -# 14| m0_12(unsigned char) = Uninitialized : r0_11 +# 14| m0_12(unsigned char) = Uninitialized[b] : r0_11 # 14| valnum = unique # 16| r0_13(glval) = VariableAddress[p0] : # 16| valnum = r0_3 @@ -168,15 +168,15 @@ test.cpp: # 25| valnum = m0_6 # 26| r0_7(glval) = VariableAddress[x] : # 26| valnum = r0_7 -# 26| m0_8(int) = Uninitialized : r0_7 +# 26| m0_8(int) = Uninitialized[x] : r0_7 # 26| valnum = unique # 26| r0_9(glval) = VariableAddress[y] : # 26| valnum = r0_9 -# 26| m0_10(int) = Uninitialized : r0_9 +# 26| m0_10(int) = Uninitialized[y] : r0_9 # 26| valnum = unique # 27| r0_11(glval) = VariableAddress[b] : # 27| valnum = unique -# 27| m0_12(unsigned char) = Uninitialized : r0_11 +# 27| m0_12(unsigned char) = Uninitialized[b] : r0_11 # 27| valnum = unique # 29| r0_13(glval) = VariableAddress[p0] : # 29| valnum = r0_3 @@ -261,15 +261,15 @@ test.cpp: # 39| valnum = m0_8 # 40| r0_9(glval) = VariableAddress[x] : # 40| valnum = r0_9 -# 40| m0_10(int) = Uninitialized : r0_9 +# 40| m0_10(int) = Uninitialized[x] : r0_9 # 40| valnum = unique # 40| r0_11(glval) = VariableAddress[y] : # 40| valnum = r0_11 -# 40| m0_12(int) = Uninitialized : r0_11 +# 40| m0_12(int) = Uninitialized[y] : r0_11 # 40| valnum = unique # 41| r0_13(glval) = VariableAddress[b] : # 41| valnum = unique -# 41| m0_14(unsigned char) = Uninitialized : r0_13 +# 41| m0_14(unsigned char) = Uninitialized[b] : r0_13 # 41| valnum = unique # 43| r0_15(glval) = VariableAddress[p0] : # 43| valnum = r0_3 @@ -353,7 +353,7 @@ test.cpp: # 49| valnum = m0_6 # 50| r0_7(glval) = VariableAddress[ptr] : # 50| valnum = r0_7 -# 50| m0_8(char *) = Uninitialized : r0_7 +# 50| m0_8(char *) = Uninitialized[ptr] : r0_7 # 50| valnum = unique # 51| r0_9(glval) = VariableAddress[result] : # 51| valnum = r0_9 @@ -601,7 +601,7 @@ test.cpp: # 84| valnum = m0_8 # 86| r0_9(glval) = VariableAddress[v] : # 86| valnum = r0_9 -# 86| m0_10(int) = Uninitialized : r0_9 +# 86| m0_10(int) = Uninitialized[v] : r0_9 # 86| valnum = unique # 88| r0_11(glval) = VariableAddress[p] : # 88| valnum = r0_7 From 2822d145882b483375f3c1d560fe4c13d6c60032 Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Sun, 2 Dec 2018 22:22:34 -0800 Subject: [PATCH 06/15] C++: Add missing changes to test_ir.expected --- .../dataflow/dataflow-tests/test_ir.expected | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected index 3e1beebf2799..fe7b875e7732 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected @@ -7,15 +7,23 @@ | test.cpp:30:8:30:8 | Load: t | test.cpp:35:10:35:15 | Call: call to source | | test.cpp:31:8:31:8 | Load: c | test.cpp:36:13:36:18 | Call: call to source | | test.cpp:58:10:58:10 | Load: t | test.cpp:50:14:50:19 | Call: call to source | +| test.cpp:76:8:76:9 | Load: u1 | test.cpp:75:7:75:8 | Uninitialized: definition of u1 | +| test.cpp:84:8:84:18 | Load: ... ? ... : ... | test.cpp:83:7:83:8 | Uninitialized: definition of u2 | +| test.cpp:86:8:86:9 | Load: i1 | test.cpp:83:7:83:8 | Uninitialized: definition of u2 | | test.cpp:90:8:90:14 | Load: source1 | test.cpp:89:28:89:34 | InitializeParameter: source1 | | test.cpp:92:8:92:14 | Load: source1 | test.cpp:89:28:89:34 | InitializeParameter: source1 | | test.cpp:132:22:132:23 | Load: m1 | test.cpp:122:18:122:30 | InitializeParameter: sourceStruct1 | | test.cpp:140:22:140:23 | Load: m1 | test.cpp:122:18:122:30 | InitializeParameter: sourceStruct1 | +| test.cpp:188:8:188:8 | Load: y | test.cpp:186:27:186:32 | Call: call to source | | test.cpp:192:8:192:8 | Load: s | test.cpp:199:33:199:38 | Call: call to source | +| test.cpp:200:8:200:8 | Load: y | test.cpp:199:33:199:38 | Call: call to source | | test.cpp:205:8:205:8 | Load: x | test.cpp:212:34:212:39 | Call: call to source | +| test.cpp:213:8:213:8 | Load: y | test.cpp:212:34:212:39 | Call: call to source | | test.cpp:226:8:226:8 | Load: y | test.cpp:219:11:219:16 | Call: call to source | | test.cpp:308:12:308:12 | Load: x | test.cpp:293:14:293:19 | Call: call to source | +| test.cpp:314:12:314:12 | Load: x | test.cpp:313:22:313:27 | Call: call to source | | test.cpp:337:14:337:14 | Load: x | test.cpp:353:17:353:22 | Call: call to source | +| test.cpp:366:7:366:7 | Load: x | test.cpp:362:4:362:9 | Call: call to source | | true_upon_entry.cpp:13:8:13:8 | Load: x | true_upon_entry.cpp:9:11:9:16 | Call: call to source | | true_upon_entry.cpp:21:8:21:8 | Load: x | true_upon_entry.cpp:17:11:17:16 | Call: call to source | | true_upon_entry.cpp:29:8:29:8 | Load: x | true_upon_entry.cpp:27:9:27:14 | Call: call to source | From e11b4b6c401f819727c11090d79cf429689c3273 Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Tue, 4 Dec 2018 07:31:13 -0800 Subject: [PATCH 07/15] C++: Fix IR Dataflow PR feedback --- .../semmle/code/cpp/ir/dataflow/DataFlow.qll | 10 +- .../code/cpp/ir/dataflow/TaintTracking.qll | 189 ------------------ .../ir/dataflow/internal/DataFlowPrivate.qll | 4 +- .../dataflow-tests/test_diff.expected | 14 ++ .../dataflow/dataflow-tests/test_diff.ql | 37 ++++ 5 files changed, 58 insertions(+), 196 deletions(-) delete mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/TaintTracking.qll create mode 100644 cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected create mode 100644 cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.ql diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow.qll index a2c9a905f589..3d5a077b0d15 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DataFlow.qll @@ -1,13 +1,15 @@ /** * Provides a library for local (intra-procedural) and global (inter-procedural) * data flow analysis: deciding whether data can flow from a _source_ to a - * _sink_. + * _sink_. This library differs from the one in `semmle.code.cpp.dataflow` in that + * this library uses the IR (Intermediate Representation) library, which provides + * a more precise semantic representation of the program, whereas the other dataflow + * library uses the more syntax-oriented ASTs. This library should provide more accurate + * results than the AST-based library in most scenarios. * * Unless configured otherwise, _flow_ means that the exact value of * the source may reach the sink. We do not track flow across pointer - * dereferences or array indexing. To track these types of flow, where the - * exact value may not be preserved, import - * `semmle.code.cpp.dataflow.TaintTracking`. + * dereferences or array indexing. * * To use global (interprocedural) data flow, extend the class * `DataFlow::Configuration` as documented on that class. To use local diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/TaintTracking.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/TaintTracking.qll deleted file mode 100644 index 068b35fc12a2..000000000000 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/TaintTracking.qll +++ /dev/null @@ -1,189 +0,0 @@ -/** - * Provides classes for performing local (intra-procedural) and - * global (inter-procedural) taint-tracking analyses. - * - * We define _taint propagation_ informally to mean that a substantial part of - * the information from the source is preserved at the sink. For example, taint - * propagates from `x` to `x + 100`, but it does not propagate from `x` to `x > - * 100` since we consider a single bit of information to be too little. - */ -import semmle.code.cpp.ir.dataflow.DataFlow -import semmle.code.cpp.ir.dataflow.DataFlow2 -private import semmle.code.cpp.ir.IR - -module TaintTracking { - - /** - * A configuration of interprocedural taint tracking analysis. This defines - * sources, sinks, and any other configurable aspect of the analysis. Each - * use of the taint tracking library must define its own unique extension of - * this abstract class. - * - * A taint-tracking configuration is a special data flow configuration - * (`DataFlow::Configuration`) that allows for flow through nodes that do not - * necessarily preserve values but are still relevant from a taint-tracking - * perspective. (For example, string concatenation, where one of the operands - * is tainted.) - * - * To create a configuration, extend this class with a subclass whose - * characteristic predicate is a unique singleton string. For example, write - * - * ``` - * class MyAnalysisConfiguration extends TaintTracking::Configuration { - * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } - * // Override `isSource` and `isSink`. - * // Optionally override `isSanitizer`. - * // Optionally override `isAdditionalTaintStep`. - * } - * ``` - * - * Then, to query whether there is flow between some `source` and `sink`, - * write - * - * ``` - * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) - * ``` - * - * Multiple configurations can coexist, but it is unsupported to depend on a - * `TaintTracking::Configuration` or a `DataFlow::Configuration` in the - * overridden predicates that define sources, sinks, or additional steps. - * Instead, the dependency should go to a `TaintTracking::Configuration2` or - * a `DataFlow{2,3,4}::Configuration`. - */ - abstract class Configuration extends DataFlow::Configuration { - bindingset[this] - Configuration() { any() } - - /** Holds if `source` is a taint source. */ - // overridden to provide taint-tracking specific qldoc - abstract override predicate isSource(DataFlow::Node source); - - /** Holds if `sink` is a taint sink. */ - // overridden to provide taint-tracking specific qldoc - abstract override predicate isSink(DataFlow::Node sink); - - /** - * Holds if taint should not flow into `node`. - */ - predicate isSanitizer(DataFlow::Node node) { none() } - - /** - * Holds if the additional taint propagation step - * from `source` to `target` must be taken into account in the analysis. - * This step will only be followed if `target` is not in the `isSanitizer` - * predicate. - */ - predicate isAdditionalTaintStep(DataFlow::Node source, - DataFlow::Node target) - { none() } - - final override - predicate isBarrier(DataFlow::Node node) { isSanitizer(node) } - - final override - predicate isAdditionalFlowStep(DataFlow::Node source, DataFlow::Node target) { - this.isAdditionalTaintStep(source, target) - or - localTaintStep(source, target) - } - } - - /** - * A taint-tracking configuration that is backed by the `DataFlow2` library - * instead of `DataFlow`. Use this class when taint-tracking configurations - * or data-flow configurations must depend on each other. - * - * See `TaintTracking::Configuration` for the full documentation. - */ - abstract class Configuration2 extends DataFlow2::Configuration { - bindingset[this] - Configuration2() { any() } - - /** Holds if `source` is a taint source. */ - // overridden to provide taint-tracking specific qldoc - abstract override predicate isSource(DataFlow::Node source); - - /** Holds if `sink` is a taint sink. */ - // overridden to provide taint-tracking specific qldoc - abstract override predicate isSink(DataFlow::Node sink); - - /** - * Holds if taint should not flow into `node`. - */ - predicate isSanitizer(DataFlow::Node node) { none() } - - /** - * Holds if the additional taint propagation step - * from `source` to `target` must be taken into account in the analysis. - * This step will only be followed if `target` is not in the `isSanitizer` - * predicate. - */ - predicate isAdditionalTaintStep(DataFlow::Node source, - DataFlow::Node target) - { none() } - - final override - predicate isBarrier(DataFlow::Node node) { isSanitizer(node) } - - final override - predicate isAdditionalFlowStep(DataFlow::Node source, DataFlow::Node target) { - this.isAdditionalTaintStep(source, target) - or - localTaintStep(source, target) - } - } - - /** - * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local - * (intra-procedural) step. - */ - predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // Taint can flow into using ordinary data flow. - DataFlow::localFlowStep(nodeFrom, nodeTo) - or - // Taint can flow through expressions that alter the value but preserve - // more than one bit of it _or_ expressions that follow data through - // pointer indirections. - not nodeTo instanceof CompareInstruction and - not nodeTo instanceof InvokeInstruction and - nodeTo.getAnOperand() = nodeFrom - } - - /** - * Holds if taint may propagate from `source` to `sink` in zero or more local - * (intra-procedural) steps. - */ - predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { - localTaintStep*(source, sink) - } - - /** - * Holds if we do not propagate taint from `fromExpr` to `toExpr` - * even though `toExpr` is the AST parent of `fromExpr`. - */ - private predicate noParentExprFlow(Expr fromExpr, Expr toExpr) { - fromExpr = toExpr.(ConditionalExpr).getCondition() - or - fromExpr = toExpr.(CommaExpr).getLeftOperand() - or - fromExpr = toExpr.(AssignExpr).getLValue() // LHS of `=` - } - - /** - * Holds if we do not propagate taint from a child of `e` to `e` itself. - */ - private predicate noFlowFromChildExpr(Expr e) { - e instanceof ComparisonOperation - or - e instanceof LogicalAndExpr - or - e instanceof LogicalOrExpr - or - e instanceof Call - or - e instanceof SizeofOperator - or - e instanceof AlignofOperator - } - -} \ No newline at end of file diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index e9c1247d41c3..2d29f1b47d91 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -3,9 +3,7 @@ private import DataFlowUtil /** * A data flow node that occurs as the argument of a call and is passed as-is - * to the callable. Arguments that are wrapped in an implicit varargs array - * creation are not included, but the implicitly created array is. - * Instance arguments are also included. + * to the callable. Instance arguments (`this` pointer) are also included. */ class ArgumentNode extends Node { ArgumentNode() { diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected new file mode 100644 index 000000000000..bd2e2be14d78 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected @@ -0,0 +1,14 @@ +| test.cpp:6:12:6:17 | test.cpp:21:8:21:9 | IR only | +| test.cpp:66:30:66:36 | test.cpp:71:8:71:9 | AST only | +| test.cpp:89:28:89:34 | test.cpp:92:8:92:14 | IR only | +| test.cpp:100:13:100:18 | test.cpp:103:10:103:12 | AST only | +| test.cpp:120:9:120:20 | test.cpp:126:8:126:19 | AST only | +| test.cpp:122:18:122:30 | test.cpp:132:22:132:23 | IR only | +| test.cpp:122:18:122:30 | test.cpp:140:22:140:23 | IR only | +| test.cpp:136:27:136:32 | test.cpp:137:27:137:28 | AST only | +| test.cpp:136:27:136:32 | test.cpp:140:22:140:23 | AST only | +| test.cpp:395:17:395:22 | test.cpp:397:10:397:18 | AST only | +| test.cpp:421:13:421:18 | test.cpp:423:10:423:14 | AST only | +| true_upon_entry.cpp:9:11:9:16 | true_upon_entry.cpp:13:8:13:8 | IR only | +| true_upon_entry.cpp:62:11:62:16 | true_upon_entry.cpp:66:8:66:8 | IR only | +| true_upon_entry.cpp:98:11:98:16 | true_upon_entry.cpp:105:8:105:8 | IR only | diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.ql b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.ql new file mode 100644 index 000000000000..659a75528bb4 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.ql @@ -0,0 +1,37 @@ +import cpp +import DataflowTestCommon as ASTCommon +import IRDataflowTestCommon as IRCommon +import semmle.code.cpp.dataflow.DataFlow as ASTDataFlow +import semmle.code.cpp.ir.dataflow.DataFlow as IRDataFlow + +predicate astFlow(Location sourceLocation, Location sinkLocation) { + exists(ASTDataFlow::DataFlow::Node source, ASTDataFlow::DataFlow::Node sink, + ASTCommon::TestAllocationConfig cfg | + cfg.hasFlow(source, sink) and + sourceLocation = source.getLocation() and + sinkLocation = sink.getLocation() + ) +} + +predicate irFlow(Location sourceLocation, Location sinkLocation) { + exists(IRDataFlow::DataFlow::Node source, IRDataFlow::DataFlow::Node sink, + IRCommon::TestAllocationConfig cfg | + cfg.hasFlow(source, sink) and + sourceLocation = source.getLocation() and + sinkLocation = sink.getLocation() + ) +} + +from Location sourceLocation, Location sinkLocation, string note +where + ( + astFlow(sourceLocation, sinkLocation) and + not irFlow(sourceLocation, sinkLocation) and + note = "AST only" + ) or + ( + irFlow(sourceLocation, sinkLocation) and + not astFlow(sourceLocation, sinkLocation) and + note = "IR only" + ) +select sourceLocation.toString(), sinkLocation.toString(), note From 65360b23f99fa23a7435529e1666ea301a482961 Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Wed, 5 Dec 2018 10:58:46 -0800 Subject: [PATCH 08/15] C++: Change model API based on feedback I've separated the model interface for memory side effects from the model for escaped addresses. It will be fairly common for a given model to extend both interfaces, but they are used for two different purposes. I've also put each model interface and the non-member predicates that query it into a named module, which seemed cleaner than having predicates named `functionModelReadsMemory()` and `getFunctionModelParameterAliasBehavior()`. --- .../raw/internal/TranslatedCall.qll | 7 +- .../implementations/IdentityFunction.qll | 19 +++- .../code/cpp/models/interfaces/Alias.qll | 101 ++++++++++++++++++ .../interfaces/FunctionInputsAndOutputs.qll | 2 +- .../code/cpp/models/interfaces/SideEffect.qll | 54 ++++++++++ .../models/interfaces/SideEffectFunction.qll | 68 ------------ 6 files changed, 173 insertions(+), 78 deletions(-) create mode 100644 cpp/ql/src/semmle/code/cpp/models/interfaces/Alias.qll create mode 100644 cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffect.qll delete mode 100644 cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffectFunction.qll diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll index 91260e2d3052..3df91fd423e7 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll @@ -1,7 +1,7 @@ import cpp private import semmle.code.cpp.ir.implementation.Opcode private import semmle.code.cpp.ir.internal.OperandTag -private import semmle.code.cpp.models.interfaces.SideEffectFunction +private import semmle.code.cpp.models.interfaces.SideEffect private import InstructionTag private import TranslatedElement private import TranslatedExpr @@ -308,11 +308,11 @@ class TranslatedFunctionCall extends TranslatedCallExpr, TranslatedDirectCall { } override predicate hasReadSideEffect() { - functionReadsMemory(funcCall.getTarget()) + SideEffectModel::functionReadsMemory(funcCall.getTarget()) } override predicate hasWriteSideEffect() { - functionWritesMemory(funcCall.getTarget()) + SideEffectModel::functionWritesMemory(funcCall.getTarget()) } } @@ -336,4 +336,3 @@ class TranslatedStructorCall extends TranslatedFunctionCall { any() } } - diff --git a/cpp/ql/src/semmle/code/cpp/models/implementations/IdentityFunction.qll b/cpp/ql/src/semmle/code/cpp/models/implementations/IdentityFunction.qll index 1399e0e8a397..3edd20ef40b6 100644 --- a/cpp/ql/src/semmle/code/cpp/models/implementations/IdentityFunction.qll +++ b/cpp/ql/src/semmle/code/cpp/models/implementations/IdentityFunction.qll @@ -1,11 +1,13 @@ import semmle.code.cpp.Function +import semmle.code.cpp.models.interfaces.Alias import semmle.code.cpp.models.interfaces.DataFlow -import semmle.code.cpp.models.interfaces.SideEffectFunction +import semmle.code.cpp.models.interfaces.SideEffect /** * The standard function templates `std::move` and `std::identity` */ -class IdentityFunction extends DataFlowFunction, SideEffectFunction { +class IdentityFunction extends DataFlowFunction, SideEffectModel::SideEffectFunction, + AliasModel::AliasFunction { IdentityFunction() { this.getNamespace().getParentNamespace() instanceof GlobalNamespace and this.getNamespace().getName() = "std" and @@ -23,9 +25,16 @@ class IdentityFunction extends DataFlowFunction, SideEffectFunction { none() } - override predicate parameterEscapes(int index) { - // Note that returning the value of the parameter does not count as escaping. - none() + override AliasModel::ParameterEscape getParameterEscapeBehavior(int index) { + exists(getParameter(index)) and + if index = 0 then + result instanceof AliasModel::EscapesOnlyViaReturn + else + result instanceof AliasModel::DoesNotEscape + } + + override predicate parameterIsAlwaysReturned(int index) { + index = 0 } override predicate hasDataFlow(FunctionInput input, FunctionOutput output) { diff --git a/cpp/ql/src/semmle/code/cpp/models/interfaces/Alias.qll b/cpp/ql/src/semmle/code/cpp/models/interfaces/Alias.qll new file mode 100644 index 000000000000..43e4ecd2a9a4 --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/models/interfaces/Alias.qll @@ -0,0 +1,101 @@ +/** + * Provides an abstract class for accurate alias modeling of library + * functions when source code is not available. To use this QL library, + * create a QL class extending `AliasFunction` with a characteristic + * predicate that selects the function or set of functions you are modeling. + * Within that class, override the predicates provided by `AliasFunction` + * to match the flow within that function. + */ + +import semmle.code.cpp.Function +import semmle.code.cpp.models.Models + +module AliasModel { + private newtype TParameterEscape = + TDoesNotEscape() or + TEscapesOnlyViaReturn() or + TEscapes() + + class ParameterEscape extends TParameterEscape { + string toString() { + result = "Unknown" + } + } + + class DoesNotEscape extends ParameterEscape, TDoesNotEscape { + override string toString() { + result = "DoesNotEscape" + } + } + + class EscapesOnlyViaReturn extends ParameterEscape, TEscapesOnlyViaReturn { + override string toString() { + result = "EscapesOnlyViaReturn" + } + } + + class Escapes extends ParameterEscape, TEscapes { + override string toString() { + result = "Escapes" + } + } + + /** + * Models the aliasing behavior of a library function. + */ + abstract class AliasFunction extends Function { + /** + * Specifies whether the address passed to the parameter at the specified index is retained after + * the function returns. The result is given as a `ParameterEscape` object. See the comments for + * that class and its subclasses for a description of each possible result. + * + * Example: + * ``` + * int* g; + * int* func(int* p, int* q, int* r, int* s, int n) { + * *s = 1; // `s` does not escape. + * g = p; // Stored in global. `p` escapes. + * if (rand()) { + * return q; // `q` escapes via the return value. + * } + * else { + * return r + n; // `r` escapes via the return value, even though an offset has been added. + * } + * } + * ``` + * + * For the above function, the following terms hold: + * - `getParameterEscapeBehavior(0) instanceof Escapes` + * - `getParameterEscapeBehavior(1) instanceof EscapesOnlyViaReturn` + * - `getParameterEscapeBehavior(2) instanceof EscapesOnlyViaReturn` + * - `getParameterEscapeBehavior(3) instanceof DoesNotEscape` + */ + abstract ParameterEscape getParameterEscapeBehavior(int index); + + /** + * Holds if the function always returns the value of the parameter at the specified index. + */ + abstract predicate parameterIsAlwaysReturned(int index); + } + + /** + * Specifies whether the address passed to the parameter at the specified index is retained after + * the function returns. The result is given as a `ParameterEscape` object. See the comments for + * that class and its subclasses for a description of each possible result. + */ + ParameterEscape getParameterEscapeBehavior(Function f, int index) { + result = f.(AliasFunction).getParameterEscapeBehavior(index) or + ( + not f instanceof AliasFunction and + exists(f.getParameter(index)) and + result instanceof Escapes + ) + } + + /** + * Holds if the function always returns the value of the parameter at the specified index. + */ + predicate parameterIsAlwaysReturned(Function f, int index) { + f.(AliasFunction).parameterIsAlwaysReturned(index) + } +} \ No newline at end of file diff --git a/cpp/ql/src/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll b/cpp/ql/src/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll index db4e5a0faa01..4ee4dc962d23 100644 --- a/cpp/ql/src/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll +++ b/cpp/ql/src/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll @@ -1,5 +1,5 @@ /** - * Provides a set of QL clcasses for indicating dataflows through a particular + * Provides a set of QL classes for indicating dataflows through a particular * parameter, return value, or qualifier, as well as flows at one level of * pointer indirection. */ diff --git a/cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffect.qll b/cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffect.qll new file mode 100644 index 000000000000..447417c4750e --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffect.qll @@ -0,0 +1,54 @@ +/** + * Provides an abstract class for accurate dataflow modeling of library + * functions when source code is not available. To use this QL library, + * create a QL class extending `SideEffectFunction` with a characteristic + * predicate that selects the function or set of functions you are modeling. + * Within that class, override the predicates provided by `SideEffectFunction` + * to match the flow within that function. + */ + +import semmle.code.cpp.Function +import semmle.code.cpp.models.Models + +module SideEffectModel { + /** + * Models the side effects of a library function. + */ + abstract class SideEffectFunction extends Function { + /** + * Holds if the function may read from memory that was defined before entry to the function. This + * memory could be from global variables, or from other memory that was reachable from a pointer + * that was passed into the function. + */ + abstract predicate readsMemory(); + + /** + * Holds if the function may write to memory that remains allocated after the function returns. + * This memory could be from global variables, or from other memory that was reachable from a + * pointer that was passed into the function. + */ + abstract predicate writesMemory(); + } + + /** + * Holds if the function `f` may read from memory that was defined before entry to the function. + * This memory could be from global variables, or from other memory that was reachable from a + * pointer that was passed into the function. + */ + predicate functionReadsMemory(Function f) { + not exists(SideEffectFunction sideEffect | + sideEffect = f and not sideEffect.readsMemory() + ) + } + + /** + * Holds if the function `f` may write to memory that remains allocated after the function returns. + * This memory could be from global variables, or from other memory that was reachable from a + * pointer that was passed into the function. + */ + predicate functionWritesMemory(Function f) { + not exists(SideEffectFunction sideEffect | + sideEffect = f and not sideEffect.writesMemory() + ) + } +} diff --git a/cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffectFunction.qll b/cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffectFunction.qll deleted file mode 100644 index 134f170cf19f..000000000000 --- a/cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffectFunction.qll +++ /dev/null @@ -1,68 +0,0 @@ -/** - * Provides an abstract class for accurate dataflow modeling of library - * functions when source code is not available. To use this QL library, - * create a QL class extending `SideEffectFunction` with a characteristic - * predicate that selects the function or set of functions you are modeling. - * Within that class, override the predicates provided by `SideEffectFunction` - * to match the flow within that function. - */ - -import semmle.code.cpp.Function -import semmle.code.cpp.models.Models - -/** - * Models the side effects of a library function. - */ -abstract class SideEffectFunction extends Function { - /** - * Holds if the function may read from memory that was defined before entry to the function. This - * memory could be from global variables, or from other memory that was reachable from a pointer - * that was passed into the function. - */ - abstract predicate readsMemory(); - - /** - * Holds if the function may write to memory that remains allocated after the function returns. - * This memory could be from global variables, or from other memory that was reachable from a - * pointer that was passed into the function. - */ - abstract predicate writesMemory(); - - /** - * Holds if any address passed to the parameter at the specified index is retained after the - * function returns. - */ - abstract predicate parameterEscapes(int index); -} - -/** - * Holds if the function `f` may read from memory that was defined before entry to the function. - * This memory could be from global variables, or from other memory that was reachable from a - * pointer that was passed into the function. - */ -predicate functionReadsMemory(Function f) { - not exists(SideEffectFunction sideEffect | - sideEffect = f and not sideEffect.readsMemory() - ) -} - -/** -* Holds if the function `f` may write to memory that remains allocated after the function returns. -* This memory could be from global variables, or from other memory that was reachable from a -* pointer that was passed into the function. -*/ -predicate functionWritesMemory(Function f) { - not exists(SideEffectFunction sideEffect | - sideEffect = f and not sideEffect.writesMemory() - ) -} - -/** - * Holds if any address passed to the parameter at the specified index is retained after the - * function returns. - */ -predicate functionParameterEscapes(Function f, int index) { - not exists(SideEffectFunction sideEffect | - exists(f.getParameter(index)) and sideEffect = f and not sideEffect.parameterEscapes(index) - ) -} From e8efb321568fb7e3c29a21d513644e68f5a49e49 Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Wed, 5 Dec 2018 11:33:48 -0800 Subject: [PATCH 09/15] C++: Remove `StoreDestinationAsPostUpdateNode` --- .../code/cpp/ir/dataflow/internal/DataFlowUtil.qll | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index dfacd48a68fa..a6f398989383 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -84,6 +84,9 @@ class UninitializedNode extends Node, UninitializedInstruction { * Nodes corresponding to AST elements, for example `ExprNode`, usually refer * to the value before the update with the exception of `ClassInstanceExpr`, * which represents the value after the constructor has run. + * + * This class exists to match the interface used by Java. There are currently no non-abstract + * classes that extend it. When we implement field flow, we can revisit this. */ abstract class PostUpdateNode extends Node { /** @@ -92,17 +95,6 @@ abstract class PostUpdateNode extends Node { abstract Node getPreUpdateNode(); } -class StoreDestinationAsPostUpdateNode extends PostUpdateNode { - StoreInstruction si; - StoreDestinationAsPostUpdateNode() { - this = si.getDestinationAddress() - } - - override Node getPreUpdateNode() { - result = si.getDestinationAddress() - } -} - /** * Gets the `Node` corresponding to `e`. */ From 2b80aee55721c3366923ab28ae8f6b8a52119451 Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Wed, 5 Dec 2018 12:34:44 -0800 Subject: [PATCH 10/15] C++: Use `getConvertedResultExpr` in IR-based dataflow This sort of fixes one FP and causes a new FN, but for the wrong reasons. The IR dataflow is tracking the reference itself, rather than the referred-to object. Once we can better model indirections, we can make this work correctly. This change is still the right thing to do, because it ensures that the dataflow is looking at actual expression being computed by the instruction. --- .../semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll | 8 +++++--- .../dataflow/dataflow-tests/IRDataflowTestCommon.qll | 2 +- .../dataflow/dataflow-tests/test_diff.expected | 2 +- .../dataflow/dataflow-tests/test_ir.expected | 2 -- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index a6f398989383..5c7f1a20c946 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -25,7 +25,7 @@ class Node extends Instruction { } /** Gets the expression corresponding to this node, if any. */ - Expr asExpr() { result = this.getUnconvertedResultExpression() } + Expr asExpr() { result = this.getConvertedResultExpression() } /** Gets the parameter corresponding to this node, if any. */ Parameter asParameter() { result = this.(InitializeParameterInstruction).getParameter() } @@ -48,8 +48,10 @@ class Node extends Instruction { * An expression, viewed as a node in a data flow graph. */ class ExprNode extends Node { - ExprNode() { getAST() instanceof Expr } - Expr getExpr() { result = getAST() } + Expr expr; + + ExprNode() { expr = this.getConvertedResultExpression() } + Expr getExpr() { result = expr } } /** diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/IRDataflowTestCommon.qll b/cpp/ql/test/library-tests/dataflow/dataflow-tests/IRDataflowTestCommon.qll index ce2eb1ac97ad..ec972d40039c 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/IRDataflowTestCommon.qll +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/IRDataflowTestCommon.qll @@ -19,7 +19,7 @@ class TestAllocationConfig extends DataFlow::Configuration { override predicate isSink(DataFlow::Node sink) { exists(FunctionCall call | call.getTarget().getName() = "sink" and - sink.asExpr() = call.getAnArgument() + sink.asExpr() = call.getAnArgument().getFullyConverted() ) } diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected index bd2e2be14d78..720e00df1d43 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected @@ -1,6 +1,6 @@ | test.cpp:6:12:6:17 | test.cpp:21:8:21:9 | IR only | | test.cpp:66:30:66:36 | test.cpp:71:8:71:9 | AST only | -| test.cpp:89:28:89:34 | test.cpp:92:8:92:14 | IR only | +| test.cpp:89:28:89:34 | test.cpp:90:8:90:14 | AST only | | test.cpp:100:13:100:18 | test.cpp:103:10:103:12 | AST only | | test.cpp:120:9:120:20 | test.cpp:126:8:126:19 | AST only | | test.cpp:122:18:122:30 | test.cpp:132:22:132:23 | IR only | diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected index fe7b875e7732..b990760a2a5d 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected @@ -10,8 +10,6 @@ | test.cpp:76:8:76:9 | Load: u1 | test.cpp:75:7:75:8 | Uninitialized: definition of u1 | | test.cpp:84:8:84:18 | Load: ... ? ... : ... | test.cpp:83:7:83:8 | Uninitialized: definition of u2 | | test.cpp:86:8:86:9 | Load: i1 | test.cpp:83:7:83:8 | Uninitialized: definition of u2 | -| test.cpp:90:8:90:14 | Load: source1 | test.cpp:89:28:89:34 | InitializeParameter: source1 | -| test.cpp:92:8:92:14 | Load: source1 | test.cpp:89:28:89:34 | InitializeParameter: source1 | | test.cpp:132:22:132:23 | Load: m1 | test.cpp:122:18:122:30 | InitializeParameter: sourceStruct1 | | test.cpp:140:22:140:23 | Load: m1 | test.cpp:122:18:122:30 | InitializeParameter: sourceStruct1 | | test.cpp:188:8:188:8 | Load: y | test.cpp:186:27:186:32 | Call: call to source | From 84b39bf999be985671a90250e83f1695438f239c Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Thu, 6 Dec 2018 12:35:33 -0800 Subject: [PATCH 11/15] C++: Simplify models for side effects and alias info. --- .../raw/internal/TranslatedCall.qll | 4 +- .../implementations/IdentityFunction.qll | 23 ++-- .../code/cpp/models/interfaces/Alias.qll | 117 ++++++------------ .../code/cpp/models/interfaces/SideEffect.qll | 46 ++----- 4 files changed, 60 insertions(+), 130 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll index 3df91fd423e7..f8c55464de7e 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/TranslatedCall.qll @@ -308,11 +308,11 @@ class TranslatedFunctionCall extends TranslatedCallExpr, TranslatedDirectCall { } override predicate hasReadSideEffect() { - SideEffectModel::functionReadsMemory(funcCall.getTarget()) + not funcCall.getTarget().(SideEffectFunction).neverReadsMemory() } override predicate hasWriteSideEffect() { - SideEffectModel::functionWritesMemory(funcCall.getTarget()) + not funcCall.getTarget().(SideEffectFunction).neverWritesMemory() } } diff --git a/cpp/ql/src/semmle/code/cpp/models/implementations/IdentityFunction.qll b/cpp/ql/src/semmle/code/cpp/models/implementations/IdentityFunction.qll index 3edd20ef40b6..169d5903b56b 100644 --- a/cpp/ql/src/semmle/code/cpp/models/implementations/IdentityFunction.qll +++ b/cpp/ql/src/semmle/code/cpp/models/implementations/IdentityFunction.qll @@ -6,8 +6,7 @@ import semmle.code.cpp.models.interfaces.SideEffect /** * The standard function templates `std::move` and `std::identity` */ -class IdentityFunction extends DataFlowFunction, SideEffectModel::SideEffectFunction, - AliasModel::AliasFunction { +class IdentityFunction extends DataFlowFunction, SideEffectFunction, AliasFunction { IdentityFunction() { this.getNamespace().getParentNamespace() instanceof GlobalNamespace and this.getNamespace().getName() = "std" and @@ -17,23 +16,25 @@ class IdentityFunction extends DataFlowFunction, SideEffectModel::SideEffectFunc ) } - override predicate readsMemory() { - none() + override predicate neverReadsMemory() { + any() + } + + override predicate neverWritesMemory() { + any() } - override predicate writesMemory() { + override predicate parameterNeverEscapes(int index) { none() } - override AliasModel::ParameterEscape getParameterEscapeBehavior(int index) { - exists(getParameter(index)) and - if index = 0 then - result instanceof AliasModel::EscapesOnlyViaReturn - else - result instanceof AliasModel::DoesNotEscape + override predicate parameterEscapesOnlyViaReturn(int index) { + // These functions simply return the argument value. + index = 0 } override predicate parameterIsAlwaysReturned(int index) { + // These functions simply return the argument value. index = 0 } diff --git a/cpp/ql/src/semmle/code/cpp/models/interfaces/Alias.qll b/cpp/ql/src/semmle/code/cpp/models/interfaces/Alias.qll index 43e4ecd2a9a4..0d489afa9582 100644 --- a/cpp/ql/src/semmle/code/cpp/models/interfaces/Alias.qll +++ b/cpp/ql/src/semmle/code/cpp/models/interfaces/Alias.qll @@ -10,92 +10,45 @@ import semmle.code.cpp.Function import semmle.code.cpp.models.Models -module AliasModel { - private newtype TParameterEscape = - TDoesNotEscape() or - TEscapesOnlyViaReturn() or - TEscapes() - - class ParameterEscape extends TParameterEscape { - string toString() { - result = "Unknown" - } - } - - class DoesNotEscape extends ParameterEscape, TDoesNotEscape { - override string toString() { - result = "DoesNotEscape" - } - } - - class EscapesOnlyViaReturn extends ParameterEscape, TEscapesOnlyViaReturn { - override string toString() { - result = "EscapesOnlyViaReturn" - } - } - - class Escapes extends ParameterEscape, TEscapes { - override string toString() { - result = "Escapes" - } - } - +/** + * Models the aliasing behavior of a library function. + */ +abstract class AliasFunction extends Function { /** - * Models the aliasing behavior of a library function. - */ - abstract class AliasFunction extends Function { - /** - * Specifies whether the address passed to the parameter at the specified index is retained after - * the function returns. The result is given as a `ParameterEscape` object. See the comments for - * that class and its subclasses for a description of each possible result. - * - * Example: - * ``` - * int* g; - * int* func(int* p, int* q, int* r, int* s, int n) { - * *s = 1; // `s` does not escape. - * g = p; // Stored in global. `p` escapes. - * if (rand()) { - * return q; // `q` escapes via the return value. - * } - * else { - * return r + n; // `r` escapes via the return value, even though an offset has been added. - * } - * } - * ``` - * - * For the above function, the following terms hold: - * - `getParameterEscapeBehavior(0) instanceof Escapes` - * - `getParameterEscapeBehavior(1) instanceof EscapesOnlyViaReturn` - * - `getParameterEscapeBehavior(2) instanceof EscapesOnlyViaReturn` - * - `getParameterEscapeBehavior(3) instanceof DoesNotEscape` - */ - abstract ParameterEscape getParameterEscapeBehavior(int index); - - /** - * Holds if the function always returns the value of the parameter at the specified index. - */ - abstract predicate parameterIsAlwaysReturned(int index); - } + * Holds if the address passed to the parameter at the specified index is never retained after + * the function returns. + * + * Example: + * ``` + * int* g; + * int* func(int* p, int* q, int* r, int* s, int n) { + * *s = 1; // `s` does not escape. + * g = p; // Stored in global. `p` escapes. + * if (rand()) { + * return q; // `q` escapes via the return value. + * } + * else { + * return r + n; // `r` escapes via the return value, even though an offset has been added. + * } + * } + * ``` + * + * For the above function, the following terms hold: + * - `parameterEscapesOnlyViaReturn(1)` + * - `parameterEscapesOnlyViaReturn(2)` + * - `parameterNeverEscapes(3)` + */ + abstract predicate parameterNeverEscapes(int index); /** - * Specifies whether the address passed to the parameter at the specified index is retained after - * the function returns. The result is given as a `ParameterEscape` object. See the comments for - * that class and its subclasses for a description of each possible result. - */ - ParameterEscape getParameterEscapeBehavior(Function f, int index) { - result = f.(AliasFunction).getParameterEscapeBehavior(index) or - ( - not f instanceof AliasFunction and - exists(f.getParameter(index)) and - result instanceof Escapes - ) - } + * Holds if the address passed to the parameter at the specified index escapes via the return + * value of the function, but does not otherwise escape. See the comment for + * `parameterNeverEscapes` for an example. + */ + abstract predicate parameterEscapesOnlyViaReturn(int index); /** * Holds if the function always returns the value of the parameter at the specified index. */ - predicate parameterIsAlwaysReturned(Function f, int index) { - f.(AliasFunction).parameterIsAlwaysReturned(index) - } -} \ No newline at end of file + abstract predicate parameterIsAlwaysReturned(int index); +} diff --git a/cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffect.qll b/cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffect.qll index 447417c4750e..4d358089896a 100644 --- a/cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffect.qll +++ b/cpp/ql/src/semmle/code/cpp/models/interfaces/SideEffect.qll @@ -10,45 +10,21 @@ import semmle.code.cpp.Function import semmle.code.cpp.models.Models -module SideEffectModel { - /** - * Models the side effects of a library function. - */ - abstract class SideEffectFunction extends Function { - /** - * Holds if the function may read from memory that was defined before entry to the function. This - * memory could be from global variables, or from other memory that was reachable from a pointer - * that was passed into the function. - */ - abstract predicate readsMemory(); - - /** - * Holds if the function may write to memory that remains allocated after the function returns. - * This memory could be from global variables, or from other memory that was reachable from a - * pointer that was passed into the function. - */ - abstract predicate writesMemory(); - } - +/** + * Models the side effects of a library function. + */ +abstract class SideEffectFunction extends Function { /** - * Holds if the function `f` may read from memory that was defined before entry to the function. + * Holds if the function never reads from memory that was defined before entry to the function. * This memory could be from global variables, or from other memory that was reachable from a * pointer that was passed into the function. */ - predicate functionReadsMemory(Function f) { - not exists(SideEffectFunction sideEffect | - sideEffect = f and not sideEffect.readsMemory() - ) - } + abstract predicate neverReadsMemory(); /** - * Holds if the function `f` may write to memory that remains allocated after the function returns. - * This memory could be from global variables, or from other memory that was reachable from a - * pointer that was passed into the function. - */ - predicate functionWritesMemory(Function f) { - not exists(SideEffectFunction sideEffect | - sideEffect = f and not sideEffect.writesMemory() - ) - } + * Holds if the function never writes to memory that remains allocated after the function + * returns. This memory could be from global variables, or from other memory that was reachable + * from a pointer that was passed into the function. + */ + abstract predicate neverWritesMemory(); } From ebbd701188c80e66498bbf8050e17a3f6ffc6eb9 Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Thu, 6 Dec 2018 12:35:43 -0800 Subject: [PATCH 12/15] C++: Fix PR feedback --- .../src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 5c7f1a20c946..cd9b874781bd 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -50,7 +50,7 @@ class Node extends Instruction { class ExprNode extends Node { Expr expr; - ExprNode() { expr = this.getConvertedResultExpression() } + ExprNode() { expr = this.asExpr() } Expr getExpr() { result = expr } } From df882a9e72122557542af440b3b8f922b552fb68 Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Mon, 10 Dec 2018 10:09:42 -0800 Subject: [PATCH 13/15] C++: Avoid creating `ExprNode`s for `Conversion`s --- .../code/cpp/ir/dataflow/internal/DataFlowUtil.qll | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index cd9b874781bd..e9e62fedfc6b 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -25,7 +25,12 @@ class Node extends Instruction { } /** Gets the expression corresponding to this node, if any. */ - Expr asExpr() { result = this.getConvertedResultExpression() } + Expr asExpr() { + result = this.getConvertedResultExpression() and + // Ignore conversions. The AST-based library does have an `ExprNode` for each `Conversion`, but + // there is no flow involving those nodes. + not result instanceof Conversion + } /** Gets the parameter corresponding to this node, if any. */ Parameter asParameter() { result = this.(InitializeParameterInstruction).getParameter() } @@ -100,7 +105,9 @@ abstract class PostUpdateNode extends Node { /** * Gets the `Node` corresponding to `e`. */ -ExprNode exprNode(Expr e) { result.getExpr() = e } +ExprNode exprNode(Expr e) { + result.getExpr() = e +} /** * Gets the `Node` corresponding to the value of `p` at function entry. From 23993710d15d87511024d30868ee9e63cf3e914a Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Mon, 10 Dec 2018 15:06:29 -0800 Subject: [PATCH 14/15] Revert "C++: Avoid creating `ExprNode`s for `Conversion`s" This reverts commit df882a9e72122557542af440b3b8f922b552fb68. --- .../code/cpp/ir/dataflow/internal/DataFlowUtil.qll | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index e9e62fedfc6b..cd9b874781bd 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -25,12 +25,7 @@ class Node extends Instruction { } /** Gets the expression corresponding to this node, if any. */ - Expr asExpr() { - result = this.getConvertedResultExpression() and - // Ignore conversions. The AST-based library does have an `ExprNode` for each `Conversion`, but - // there is no flow involving those nodes. - not result instanceof Conversion - } + Expr asExpr() { result = this.getConvertedResultExpression() } /** Gets the parameter corresponding to this node, if any. */ Parameter asParameter() { result = this.(InitializeParameterInstruction).getParameter() } @@ -105,9 +100,7 @@ abstract class PostUpdateNode extends Node { /** * Gets the `Node` corresponding to `e`. */ -ExprNode exprNode(Expr e) { - result.getExpr() = e -} +ExprNode exprNode(Expr e) { result.getExpr() = e } /** * Gets the `Node` corresponding to the value of `p` at function entry. From 78e5b3ad6349b61c0c26f7f9f0907fadd87c0b4f Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Mon, 10 Dec 2018 15:09:49 -0800 Subject: [PATCH 15/15] C++: Add IR dataflow to ImportAdditionalQueries.ql --- cpp/ql/src/filters/ImportAdditionalLibraries.ql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/ql/src/filters/ImportAdditionalLibraries.ql b/cpp/ql/src/filters/ImportAdditionalLibraries.ql index 261ee3de0ca3..ca4b9856585f 100644 --- a/cpp/ql/src/filters/ImportAdditionalLibraries.ql +++ b/cpp/ql/src/filters/ImportAdditionalLibraries.ql @@ -14,6 +14,10 @@ import semmle.code.cpp.dataflow.DataFlow2 import semmle.code.cpp.dataflow.DataFlow3 import semmle.code.cpp.dataflow.DataFlow4 import semmle.code.cpp.dataflow.TaintTracking +import semmle.code.cpp.ir.dataflow.DataFlow +import semmle.code.cpp.ir.dataflow.DataFlow2 +import semmle.code.cpp.ir.dataflow.DataFlow3 +import semmle.code.cpp.ir.dataflow.DataFlow4 import semmle.code.cpp.valuenumbering.HashCons from File f, string tag