From f4d0c5e905f9448812d659eecc3eac641d751657 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Wed, 15 Jan 2020 13:19:06 +0100 Subject: [PATCH 1/3] C++ IR: Support for global virtual dispatch The IR data flow library now supports virtual dispatch with a library that's similar to `security.TaintTracking`. In particular, it should have the same performance characteristics. The main difference is that non-recursive callers of `flowsFrom` now pass `_` instead of `true` for `boolean allowFromArg`. This change allows flow through `return` to actually work. --- .../ir/dataflow/internal/DataFlowDispatch.qll | 161 +++++++++++++----- .../dataflow/dataflow-tests/dispatch.cpp | 63 +++++++ .../dataflow-tests/test_diff.expected | 6 + .../dataflow/dataflow-tests/test_ir.expected | 6 + 4 files changed, 190 insertions(+), 46 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll index 9572639de595..ff0d9eb0028d 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll @@ -1,6 +1,7 @@ private import cpp private import semmle.code.cpp.ir.IR private import semmle.code.cpp.ir.dataflow.DataFlow +private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate Function viableImpl(CallInstruction call) { result = viableCallable(call) } @@ -22,57 +23,125 @@ Function viableCallable(CallInstruction call) { strictcount(Function other | functionSignatureWithBody(qualifiedName, nparams, other)) = 1 ) or - // Rudimentary virtual dispatch support. It's essentially local data flow - // where the source is a derived-to-base conversion and the target is the - // qualifier of a call. - exists(Class derived, DataFlow::Node thisArgument | - nodeMayHaveClass(derived, thisArgument) and - overrideMayAffectCall(derived, thisArgument, _, result, call) - ) + // Virtual dispatch + result = call.(VirtualDispatch::DataSensitiveCall).resolve() } /** - * Holds if `call` is a virtual function call with qualifier `thisArgument` in - * `enclosingFunction`, whose static target is overridden by - * `overridingFunction` in `overridingClass`. + * Provides virtual dispatch support compatible with the original + * implementation of `semmle.code.cpp.security.TaintTracking`. */ -pragma[noinline] -private predicate overrideMayAffectCall( - Class overridingClass, DataFlow::Node thisArgument, Function enclosingFunction, - MemberFunction overridingFunction, CallInstruction call -) { - call.getEnclosingFunction() = enclosingFunction and - overridingFunction.getAnOverriddenFunction+() = call.getStaticCallTarget() and - overridingFunction.getDeclaringType() = overridingClass and - thisArgument = DataFlow::instructionNode(call.getThisArgument()) -} +private module VirtualDispatch { + /** A call that may dispatch differently depending on the qualifier value. */ + abstract class DataSensitiveCall extends DataFlowCall { + abstract DataFlow::Node getSrc(); -/** - * Holds if `node` may have dynamic class `derived`, where `derived` is a class - * that may affect virtual dispatch within the enclosing function. - * - * For the sake of performance, this recursion is written out manually to make - * it a relation on `Class x Node` rather than `Node x Node` or `MemberFunction - * x Node`, both of which would be larger. It's a forward search since there - * should usually be fewer classes than calls. - * - * If a value is cast several classes up in the hierarchy, that will be modeled - * as a chain of `ConvertToBaseInstruction`s and will cause the search to start - * from each of them and pass through subsequent ones. There might be - * performance to gain by stopping before a second upcast and reconstructing - * the full chain in a "big-step" recursion after this one. - */ -private predicate nodeMayHaveClass(Class derived, DataFlow::Node node) { - exists(ConvertToBaseInstruction toBase | - derived = toBase.getDerivedClass() and - overrideMayAffectCall(derived, _, toBase.getEnclosingFunction(), _, _) and - node.asInstruction() = toBase - ) - or - exists(DataFlow::Node prev | - nodeMayHaveClass(derived, prev) and - DataFlow::localFlowStep(prev, node) - ) + /** Gets a candidate target for this call. */ + cached + abstract Function resolve(); + + /** + * Whether `src` can flow to this call. + * + * Searches backwards from `getSrc()` to `src`. The `allowFromArg` + * parameter is true when the search is allowed to continue backwards into + * a parameter; non-recursive callers should pass `_` for `allowFromArg`. + */ + predicate flowsFrom(DataFlow::Node src, boolean allowFromArg) { + src = this.getSrc() and allowFromArg = true + or + exists(DataFlow::Node other, boolean allowOtherFromArg | + this.flowsFrom(other, allowOtherFromArg) + | + // Call argument + exists(DataFlowCall call, int i | + other.(DataFlow::ParameterNode).isParameterOf(call.getStaticCallTarget(), i) and + src.(ArgumentNode).argumentOf(call, i) + ) and + allowOtherFromArg = true and + allowFromArg = true + or + // Call return + exists(DataFlowCall call, ReturnKind returnKind | + other = getAnOutNode(call, returnKind) and + src.(ReturnNode).getKind() = returnKind and + call.getStaticCallTarget() = src.getEnclosingCallable() + ) and + allowFromArg = false + or + // Local flow + DataFlow::localFlowStep(src, other) and + allowFromArg = allowOtherFromArg + ) + or + // Flow through global variable + exists(StoreInstruction store, Variable var | + store = src.asInstruction() and + var = store.getDestinationAddress().(VariableAddressInstruction).getASTVariable() and + this.flowsFromGlobal(var) and + allowFromArg = true + ) + } + + private predicate flowsFromGlobal(GlobalOrNamespaceVariable var) { + exists(LoadInstruction load | + this.flowsFrom(DataFlow::instructionNode(load), _) and + load.getSourceAddress().(VariableAddressInstruction).getASTVariable() = var + ) + } + } + + /** Call through a function pointer. */ + private class DataSensitiveExprCall extends DataSensitiveCall { + DataSensitiveExprCall() { not exists(this.getStaticCallTarget()) } + + override DataFlow::Node getSrc() { result.asInstruction() = this.getCallTarget() } + + override Function resolve() { + exists(FunctionInstruction fi | + this.flowsFrom(DataFlow::instructionNode(fi), _) and + result = fi.getFunctionSymbol() + ) + } + } + + /** Call to a virtual function. */ + private class DataSensitiveOverriddenFunctionCall extends DataSensitiveCall { + DataSensitiveOverriddenFunctionCall() { + exists(this.getStaticCallTarget().(VirtualFunction).getAnOverridingFunction()) + } + + override DataFlow::Node getSrc() { result.asInstruction() = this.getThisArgument() } + + override MemberFunction resolve() { + exists(Class overridingClass | + this.overrideMayAffectCall(overridingClass, result) and + this.hasFlowFromCastFrom(overridingClass) + ) + } + + /** + * Holds if `this` is a virtual function call whose static target is + * overridden by `overridingFunction` in `overridingClass`. + */ + pragma[noinline] + private predicate overrideMayAffectCall(Class overridingClass, MemberFunction overridingFunction) { + overridingFunction.getAnOverriddenFunction+() = this.getStaticCallTarget().(VirtualFunction) and + overridingFunction.getDeclaringType() = overridingClass + } + + /** + * Holds if the qualifier of `this` has flow from an upcast from + * `derivedClass`. + */ + pragma[noinline] + private predicate hasFlowFromCastFrom(Class derivedClass) { + exists(ConvertToBaseInstruction toBase | + this.flowsFrom(DataFlow::instructionNode(toBase), _) and + derivedClass = toBase.getDerivedClass() + ) + } + } } /** diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp index 5e4f2f97f465..543d9c805032 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp @@ -44,3 +44,66 @@ void VirtualDispatch(Bottom *bottomPtr, Bottom &bottomRef) { sink(topRef.notSource2()); // no flow [FALSE POSITIVE] topRef.notSink(source()); // no flow [FALSE POSITIVE] } + +Top *globalBottom, *globalMiddle; + +Top *readGlobalBottom() { + return globalBottom; +} + +void DispatchThroughGlobal() { + sink(globalBottom->isSource1()); // flow [NOT DETECTED by AST] + sink(globalMiddle->isSource1()); // no flow + + sink(readGlobalBottom()->isSource1()); // flow [NOT DETECTED by AST] + + globalBottom = new Bottom(); + globalMiddle = new Middle(); +} + +Top *allocateBottom() { + return new Bottom(); +} + +void callSinkByPointer(Top *top) { + top->isSink(source()); // flow [NOT DETECTED by AST] +} + +void callSinkByReference(Top &top) { + top.isSink(source()); // flow [NOT DETECTED by AST] +} + +void globalVirtualDispatch() { + callSinkByPointer(allocateBottom()); + callSinkByReference(*allocateBottom()); + + Top *x = allocateBottom(); + x->isSink(source()); // flow [NOT DETECTED by AST] +} + +Top *identity(Top *top) { + return top; +} + +void callIdentityFunctions(Top *top, Bottom *bottom) { + identity(bottom)->isSink(source()); // flow [NOT DETECTED] + identity(top)->isSink(source()); // now flow +} + +using SinkFunctionType = void (*)(int); + +void callSink(int x) { + sink(x); +} + +SinkFunctionType returnCallSink() { + return callSink; +} + +void testFunctionPointer(SinkFunctionType maybeCallSink, SinkFunctionType dontCallSink, bool b) { + if (b) { + maybeCallSink = returnCallSink(); + } + maybeCallSink(source()); // flow [NOT DETECTED by AST] + dontCallSink(source()); // no flow +} diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected index 7a2728229c6c..fdfddb07ed6b 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected @@ -10,8 +10,14 @@ | dispatch.cpp:16:37:16:42 | dispatch.cpp:40:15:40:23 | IR only | | dispatch.cpp:22:37:22:42 | dispatch.cpp:31:16:31:24 | IR only | | dispatch.cpp:22:37:22:42 | dispatch.cpp:39:15:39:23 | IR only | +| dispatch.cpp:22:37:22:42 | dispatch.cpp:55:22:55:30 | IR only | +| dispatch.cpp:22:37:22:42 | dispatch.cpp:58:28:58:36 | IR only | | dispatch.cpp:33:18:33:23 | dispatch.cpp:23:38:23:38 | IR only | | dispatch.cpp:41:17:41:22 | dispatch.cpp:23:38:23:38 | IR only | +| dispatch.cpp:69:15:69:20 | dispatch.cpp:23:38:23:38 | IR only | +| dispatch.cpp:73:14:73:19 | dispatch.cpp:23:38:23:38 | IR only | +| dispatch.cpp:81:13:81:18 | dispatch.cpp:23:38:23:38 | IR only | +| dispatch.cpp:107:17:107:22 | dispatch.cpp:96:8:96:8 | IR only | | lambdas.cpp:8:10:8:15 | lambdas.cpp:14:3:14:6 | AST only | | lambdas.cpp:8:10:8:15 | lambdas.cpp:18:8:18:8 | AST only | | lambdas.cpp:8:10:8:15 | lambdas.cpp:21:3:21:6 | AST only | diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected index 9de0724ec387..ea980b3b1a0d 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected @@ -14,6 +14,9 @@ | dispatch.cpp:11:38:11:38 | x | dispatch.cpp:45:18:45:23 | call to source | | dispatch.cpp:23:38:23:38 | x | dispatch.cpp:33:18:33:23 | call to source | | dispatch.cpp:23:38:23:38 | x | dispatch.cpp:41:17:41:22 | call to source | +| dispatch.cpp:23:38:23:38 | x | dispatch.cpp:69:15:69:20 | call to source | +| dispatch.cpp:23:38:23:38 | x | dispatch.cpp:73:14:73:19 | call to source | +| dispatch.cpp:23:38:23:38 | x | dispatch.cpp:81:13:81:18 | call to source | | dispatch.cpp:31:16:31:24 | call to isSource1 | dispatch.cpp:22:37:22:42 | call to source | | dispatch.cpp:32:16:32:24 | call to isSource2 | dispatch.cpp:16:37:16:42 | call to source | | dispatch.cpp:35:16:35:25 | call to notSource1 | dispatch.cpp:9:37:9:42 | call to source | @@ -22,6 +25,9 @@ | dispatch.cpp:40:15:40:23 | call to isSource2 | dispatch.cpp:16:37:16:42 | call to source | | dispatch.cpp:43:15:43:24 | call to notSource1 | dispatch.cpp:9:37:9:42 | call to source | | dispatch.cpp:44:15:44:24 | call to notSource2 | dispatch.cpp:10:37:10:42 | call to source | +| dispatch.cpp:55:22:55:30 | call to isSource1 | dispatch.cpp:22:37:22:42 | call to source | +| dispatch.cpp:58:28:58:36 | call to isSource1 | dispatch.cpp:22:37:22:42 | call to source | +| dispatch.cpp:96:8:96:8 | x | dispatch.cpp:107:17:107:22 | call to source | | test.cpp:7:8:7:9 | t1 | test.cpp:6:12:6:17 | call to source | | test.cpp:9:8:9:9 | t1 | test.cpp:6:12:6:17 | call to source | | test.cpp:10:8:10:9 | t2 | test.cpp:6:12:6:17 | call to source | From 2a0fc31b6854f20e8fe74d7d093feb3bffb446ad Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Mon, 20 Jan 2020 11:03:03 +0100 Subject: [PATCH 2/3] C++: Comment and rename getSrc -> getDispatchValue Better clarity was requested in the PR review. --- .../ir/dataflow/internal/DataFlowDispatch.qll | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll index ff0d9eb0028d..eaf472d0652f 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll @@ -34,7 +34,15 @@ Function viableCallable(CallInstruction call) { private module VirtualDispatch { /** A call that may dispatch differently depending on the qualifier value. */ abstract class DataSensitiveCall extends DataFlowCall { - abstract DataFlow::Node getSrc(); + /** + * Gets the node whose value determines the target of this call. This node + * could be the qualifier of a virtual dispatch or the function-pointer + * expression in a call to a function pointer. What they have in common is + * that we need to find out which data flows there, and then it's up to the + * `resolve` predicate to stitch that information together and resolve the + * call. + */ + abstract DataFlow::Node getDispatchValue(); /** Gets a candidate target for this call. */ cached @@ -43,12 +51,12 @@ private module VirtualDispatch { /** * Whether `src` can flow to this call. * - * Searches backwards from `getSrc()` to `src`. The `allowFromArg` + * Searches backwards from `getDispatchValue()` to `src`. The `allowFromArg` * parameter is true when the search is allowed to continue backwards into * a parameter; non-recursive callers should pass `_` for `allowFromArg`. */ predicate flowsFrom(DataFlow::Node src, boolean allowFromArg) { - src = this.getSrc() and allowFromArg = true + src = this.getDispatchValue() and allowFromArg = true or exists(DataFlow::Node other, boolean allowOtherFromArg | this.flowsFrom(other, allowOtherFromArg) @@ -95,7 +103,7 @@ private module VirtualDispatch { private class DataSensitiveExprCall extends DataSensitiveCall { DataSensitiveExprCall() { not exists(this.getStaticCallTarget()) } - override DataFlow::Node getSrc() { result.asInstruction() = this.getCallTarget() } + override DataFlow::Node getDispatchValue() { result.asInstruction() = this.getCallTarget() } override Function resolve() { exists(FunctionInstruction fi | @@ -111,7 +119,7 @@ private module VirtualDispatch { exists(this.getStaticCallTarget().(VirtualFunction).getAnOverridingFunction()) } - override DataFlow::Node getSrc() { result.asInstruction() = this.getThisArgument() } + override DataFlow::Node getDispatchValue() { result.asInstruction() = this.getThisArgument() } override MemberFunction resolve() { exists(Class overridingClass | From 391b80eac4c999582f6692dea1a9ea46b99ec60c Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Mon, 20 Jan 2020 11:17:44 +0100 Subject: [PATCH 3/3] C++: Show virtual inheritance problem in vdispatch --- .../dataflow/dataflow-tests/dispatch.cpp | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp index 543d9c805032..e304aafe27f4 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp @@ -107,3 +107,26 @@ void testFunctionPointer(SinkFunctionType maybeCallSink, SinkFunctionType dontCa maybeCallSink(source()); // flow [NOT DETECTED by AST] dontCallSink(source()); // no flow } + +namespace virtual_inheritance { + struct Top { + virtual int isSource() { return 0; } + }; + + struct Middle : virtual Top { + int isSource() override { return source(); } + }; + + struct Bottom : Middle { + }; + + void VirtualDispatch(Bottom *bottomPtr, Bottom &bottomRef) { + // Because the inheritance from `Top` is virtual, the following casts go + // directly from `Bottom` to `Top`, skipping `Middle`. That means we don't + // get flow from a `Middle` value to the call qualifier. + Top *topPtr = bottomPtr, &topRef = bottomRef; + + sink(topPtr->isSource()); // flow [NOT DETECTED] + sink(topRef.isSource()); // flow [NOT DETECTED] + } +}