From d7681bf122cab8c4bcb0d396cc8e574beb535e0d Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Tue, 20 Aug 2019 15:06:26 +0200 Subject: [PATCH 1/3] C++: Don't use definitionByReference for data flow The data flow library conflates pointers and objects enough for the `definitionByReference` predicate to be too strict in some cases. It was too permissive in other cases that are now (or will be) handled better by field flow. See also the change note entry. --- change-notes/1.23/analysis-cpp.md | 4 +++ .../code/cpp/dataflow/internal/FlowVar.qll | 32 ++++++++++++++++--- .../test/library-tests/dataflow/fields/A.cpp | 4 +-- .../dataflow/fields/flow.expected | 19 ++++++----- .../dataflow/taint-tests/localTaint.expected | 1 + 5 files changed, 44 insertions(+), 16 deletions(-) diff --git a/change-notes/1.23/analysis-cpp.md b/change-notes/1.23/analysis-cpp.md index e883f885f0b6..a8131207840f 100644 --- a/change-notes/1.23/analysis-cpp.md +++ b/change-notes/1.23/analysis-cpp.md @@ -25,3 +25,7 @@ The following changes in version 1.23 affect C/C++ analysis in all applications. picture of the partial flow paths from a given source. The feature is disabled by default and can be enabled for individual configurations by overriding `int explorationLimit()`. +* The `DataFlow::DefinitionByReferenceNode` class now considers `f(x)` to be a + definition of `x` when `x` is a variable of pointer type. It no longer + considers deep paths such as `f(&x.myField)` to be definitions of `x`. These + changes are in line with the user expectations we've observed. diff --git a/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll b/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll index bea733b45be1..bc5b4e3a53ed 100644 --- a/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll +++ b/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll @@ -100,7 +100,7 @@ private module PartialDefinitions { ) } or TExplicitCallQualifier(Expr qualifier, Call call) { qualifier = call.getQualifier() } or - TReferenceArgument(Expr arg, VariableAccess va) { definitionByReference(va, arg) } + TReferenceArgument(Expr arg, VariableAccess va) { referenceArgument(va, arg) } private predicate isInstanceFieldWrite(FieldAccess fa, ControlFlowNode node) { not fa.getTarget().isStatic() and @@ -138,18 +138,42 @@ private module PartialDefinitions { } /** - * A partial definition that's a definition by reference (in the sense of the - * `definitionByReference` predicate). + * A partial definition that's a definition by reference. */ class DefinitionByReference extends PartialDefinition, TReferenceArgument { VariableAccess va; - DefinitionByReference() { definitionByReference(va, definedExpr) } + DefinitionByReference() { referenceArgument(va, definedExpr) } VariableAccess getVariableAccess() { result = va } override predicate partiallyDefines(Variable v) { va = v.getAnAccess() } } + + private predicate referenceArgument(VariableAccess va, Expr argument) { + argument = any(Call c).getAnArgument() and + exists(Type argumentType | + argumentType = argument.getFullyConverted().getType().stripTopLevelSpecifiers() + | + argumentType instanceof ReferenceType and + not argumentType.(ReferenceType).getBaseType().isConst() and + va = argument + or + argumentType instanceof PointerType and + not argumentType.(PointerType).getBaseType().isConst() and + ( + // f(variable) + va = argument + or + // f(&variable) + va = argument.(AddressOfExpr).getOperand() + or + // f(&array[0]) + va.getType().getUnspecifiedType() instanceof ArrayType and + va = argument.(AddressOfExpr).getOperand().(ArrayExpr).getArrayBase() + ) + ) + } } import PartialDefinitions private import FlowVar_internal diff --git a/cpp/ql/test/library-tests/dataflow/fields/A.cpp b/cpp/ql/test/library-tests/dataflow/fields/A.cpp index 7668db1fbbcc..fd49e1e85ee2 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/A.cpp +++ b/cpp/ql/test/library-tests/dataflow/fields/A.cpp @@ -129,7 +129,7 @@ class A { B *b = new B(); f7(b); - sink(b->c); // flow [NOT DETECTED] + sink(b->c); // flow } class D @@ -151,7 +151,7 @@ class A D *d = new D(b, r()); sink(d->b); // flow x2 sink(d->b->c); // flow - sink(b->c); // flow [NOT DETECTED] + sink(b->c); // flow } void f10() diff --git a/cpp/ql/test/library-tests/dataflow/fields/flow.expected b/cpp/ql/test/library-tests/dataflow/fields/flow.expected index 5406b0da40ef..78068216d3c5 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/flow.expected +++ b/cpp/ql/test/library-tests/dataflow/fields/flow.expected @@ -16,15 +16,12 @@ edges | A.cpp:73:10:73:19 | call to setOnBWrap [c, ... (1)] | A.cpp:75:10:75:11 | b2 [c, ... (1)] | | A.cpp:73:25:73:32 | new [void] | A.cpp:73:10:73:19 | call to setOnBWrap [c, ... (1)] | | A.cpp:75:10:75:11 | b2 [c, ... (1)] | A.cpp:75:14:75:14 | c | -| A.cpp:98:12:98:18 | new [void] | A.cpp:100:5:100:13 | ... = ... [void] | -| A.cpp:100:5:100:6 | c1 [post update] [a, ... (1)] | A.cpp:101:8:101:9 | c1 [a, ... (1)] | -| A.cpp:100:5:100:13 | ... = ... [void] | A.cpp:100:5:100:6 | c1 [post update] [a, ... (1)] | -| A.cpp:101:8:101:9 | c1 [a, ... (1)] | A.cpp:103:14:103:14 | c [a, ... (1)] | -| A.cpp:103:14:103:14 | c [a, ... (1)] | A.cpp:107:12:107:13 | c1 [a, ... (1)] | -| A.cpp:103:14:103:14 | c [a, ... (1)] | A.cpp:120:12:120:13 | c1 [a, ... (1)] | -| A.cpp:107:12:107:13 | c1 [a, ... (1)] | A.cpp:107:16:107:16 | a | -| A.cpp:120:12:120:13 | c1 [a, ... (1)] | A.cpp:120:16:120:16 | a | +| A.cpp:126:5:126:5 | b [post update] [c, ... (1)] | A.cpp:131:8:131:8 | ref arg b [c, ... (1)] | +| A.cpp:126:12:126:18 | new [void] | A.cpp:126:5:126:5 | b [post update] [c, ... (1)] | +| A.cpp:131:8:131:8 | ref arg b [c, ... (1)] | A.cpp:132:10:132:10 | b [c, ... (1)] | +| A.cpp:132:10:132:10 | b [c, ... (1)] | A.cpp:132:13:132:13 | c | | A.cpp:142:7:142:7 | b [post update] [c, ... (1)] | A.cpp:143:7:143:31 | ... = ... [c, ... (1)] | +| A.cpp:142:7:142:7 | b [post update] [c, ... (1)] | A.cpp:151:18:151:18 | ref arg b [c, ... (1)] | | A.cpp:142:7:142:20 | ... = ... [void] | A.cpp:142:7:142:7 | b [post update] [c, ... (1)] | | A.cpp:142:14:142:20 | new [void] | A.cpp:142:7:142:20 | ... = ... [void] | | A.cpp:143:7:143:10 | this [post update] [b, ... (1)] | A.cpp:151:12:151:24 | call to D [b, ... (1)] | @@ -36,9 +33,11 @@ edges | A.cpp:151:12:151:24 | call to D [b, ... (1)] | A.cpp:152:10:152:10 | d [b, ... (1)] | | A.cpp:151:12:151:24 | call to D [b, ... (2)] | A.cpp:153:10:153:10 | d [b, ... (2)] | | A.cpp:151:18:151:18 | b [void] | A.cpp:151:12:151:24 | call to D [b, ... (1)] | +| A.cpp:151:18:151:18 | ref arg b [c, ... (1)] | A.cpp:154:10:154:10 | b [c, ... (1)] | | A.cpp:152:10:152:10 | d [b, ... (1)] | A.cpp:152:13:152:13 | b | | A.cpp:153:10:153:10 | d [b, ... (2)] | A.cpp:153:13:153:13 | b [c, ... (1)] | | A.cpp:153:13:153:13 | b [c, ... (1)] | A.cpp:153:16:153:16 | c | +| A.cpp:154:10:154:10 | b [c, ... (1)] | A.cpp:154:13:154:13 | c | | A.cpp:159:12:159:18 | new [void] | A.cpp:160:29:160:29 | b [void] | | A.cpp:160:18:160:60 | call to MyList [head, ... (1)] | A.cpp:161:38:161:39 | l1 [head, ... (1)] | | A.cpp:160:29:160:29 | b [void] | A.cpp:160:18:160:60 | call to MyList [head, ... (1)] | @@ -170,11 +169,11 @@ edges | A.cpp:57:28:57:30 | call to get | A.cpp:57:17:57:23 | new [void] | A.cpp:57:28:57:30 | call to get | call to get flows from $@ | A.cpp:57:17:57:23 | new [void] | new [void] | | A.cpp:66:14:66:14 | c | A.cpp:64:21:64:28 | new [void] | A.cpp:66:14:66:14 | c | c flows from $@ | A.cpp:64:21:64:28 | new [void] | new [void] | | A.cpp:75:14:75:14 | c | A.cpp:73:25:73:32 | new [void] | A.cpp:75:14:75:14 | c | c flows from $@ | A.cpp:73:25:73:32 | new [void] | new [void] | -| A.cpp:107:16:107:16 | a | A.cpp:98:12:98:18 | new [void] | A.cpp:107:16:107:16 | a | a flows from $@ | A.cpp:98:12:98:18 | new [void] | new [void] | -| A.cpp:120:16:120:16 | a | A.cpp:98:12:98:18 | new [void] | A.cpp:120:16:120:16 | a | a flows from $@ | A.cpp:98:12:98:18 | new [void] | new [void] | +| A.cpp:132:13:132:13 | c | A.cpp:126:12:126:18 | new [void] | A.cpp:132:13:132:13 | c | c flows from $@ | A.cpp:126:12:126:18 | new [void] | new [void] | | A.cpp:152:13:152:13 | b | A.cpp:143:25:143:31 | new [void] | A.cpp:152:13:152:13 | b | b flows from $@ | A.cpp:143:25:143:31 | new [void] | new [void] | | A.cpp:152:13:152:13 | b | A.cpp:150:12:150:18 | new [void] | A.cpp:152:13:152:13 | b | b flows from $@ | A.cpp:150:12:150:18 | new [void] | new [void] | | A.cpp:153:16:153:16 | c | A.cpp:142:14:142:20 | new [void] | A.cpp:153:16:153:16 | c | c flows from $@ | A.cpp:142:14:142:20 | new [void] | new [void] | +| A.cpp:154:13:154:13 | c | A.cpp:142:14:142:20 | new [void] | A.cpp:154:13:154:13 | c | c flows from $@ | A.cpp:142:14:142:20 | new [void] | new [void] | | A.cpp:165:26:165:29 | head | A.cpp:159:12:159:18 | new [void] | A.cpp:165:26:165:29 | head | head flows from $@ | A.cpp:159:12:159:18 | new [void] | new [void] | | A.cpp:169:15:169:18 | head | A.cpp:159:12:159:18 | new [void] | A.cpp:169:15:169:18 | head | head flows from $@ | A.cpp:159:12:159:18 | new [void] | new [void] | | B.cpp:9:20:9:24 | elem1 | B.cpp:6:15:6:24 | new [void] | B.cpp:9:20:9:24 | elem1 | elem1 flows from $@ | B.cpp:6:15:6:24 | new [void] | new [void] | diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected b/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected index ab23866fe0e6..1e9530653f57 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected @@ -133,6 +133,7 @@ | taint.cpp:165:22:165:25 | {...} | taint.cpp:172:10:172:15 | buffer | | | taint.cpp:165:22:165:25 | {...} | taint.cpp:173:8:173:13 | buffer | | | taint.cpp:165:24:165:24 | 0 | taint.cpp:165:22:165:25 | {...} | TAINT | +| taint.cpp:168:8:168:14 | ref arg tainted | taint.cpp:172:18:172:24 | tainted | | | taint.cpp:170:10:170:15 | buffer | taint.cpp:170:3:170:8 | call to strcpy | | | taint.cpp:170:10:170:15 | ref arg buffer | taint.cpp:170:3:170:8 | call to strcpy | | | taint.cpp:170:10:170:15 | ref arg buffer | taint.cpp:171:8:171:13 | buffer | | From 067c55adb948f9ef66150775eb24c159d7d9868b Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Wed, 4 Sep 2019 09:31:03 +0200 Subject: [PATCH 2/3] C++: Fix ConditionDeclExpr data flow Data flow probably never worked when a variable declared in a `ConditionDeclExpr` was modeled with `BlockVar`. That combination did not come up in testing before the last commit. --- .../code/cpp/dataflow/internal/FlowVar.qll | 17 ++++++++++------- .../library-tests/dataflow/fields/flow.expected | 10 ++++++++++ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll b/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll index bc5b4e3a53ed..0409d5d92cbe 100644 --- a/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll +++ b/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll @@ -244,7 +244,7 @@ module FlowVar_internal { not v instanceof Field and // Fields are interprocedural data flow, not local reachable(sbb) and ( - initializer(sbb.getANode(), v, _) + initializer(v, sbb.getANode()) or assignmentLikeOperation(sbb, v, _, _) or @@ -361,7 +361,12 @@ module FlowVar_internal { assignmentLikeOperation(node, v, _, e) and node = sbb or - initializer(node, v, e) and + // We pick the defining `ControlFlowNode` of an `Initializer` to be its + // expression rather than the `Initializer` itself. That's because the + // `Initializer` of a `ConditionDeclExpr` is for historical reasons not + // part of the CFG and therefore ends up in the wrong basic block. + initializer(v, e) and + node = e and node = sbb.getANode() } @@ -719,13 +724,11 @@ module FlowVar_internal { } /** - * Holds if `v` is initialized by `init` to have value `assignedExpr`. + * Holds if `v` is initialized to have value `assignedExpr`. */ - predicate initializer( - Initializer init, LocalVariable v, Expr assignedExpr) + predicate initializer(LocalVariable v, Expr assignedExpr) { - v = init.getDeclaration() and - assignedExpr = init.getExpr() + assignedExpr = v.getInitializer().getExpr() } /** diff --git a/cpp/ql/test/library-tests/dataflow/fields/flow.expected b/cpp/ql/test/library-tests/dataflow/fields/flow.expected index 78068216d3c5..4052b89ab693 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/flow.expected +++ b/cpp/ql/test/library-tests/dataflow/fields/flow.expected @@ -16,6 +16,14 @@ edges | A.cpp:73:10:73:19 | call to setOnBWrap [c, ... (1)] | A.cpp:75:10:75:11 | b2 [c, ... (1)] | | A.cpp:73:25:73:32 | new [void] | A.cpp:73:10:73:19 | call to setOnBWrap [c, ... (1)] | | A.cpp:75:10:75:11 | b2 [c, ... (1)] | A.cpp:75:14:75:14 | c | +| A.cpp:98:12:98:18 | new [void] | A.cpp:100:5:100:13 | ... = ... [void] | +| A.cpp:100:5:100:6 | c1 [post update] [a, ... (1)] | A.cpp:101:8:101:9 | c1 [a, ... (1)] | +| A.cpp:100:5:100:13 | ... = ... [void] | A.cpp:100:5:100:6 | c1 [post update] [a, ... (1)] | +| A.cpp:101:8:101:9 | c1 [a, ... (1)] | A.cpp:103:14:103:14 | c [a, ... (1)] | +| A.cpp:103:14:103:14 | c [a, ... (1)] | A.cpp:107:12:107:13 | c1 [a, ... (1)] | +| A.cpp:103:14:103:14 | c [a, ... (1)] | A.cpp:120:12:120:13 | c1 [a, ... (1)] | +| A.cpp:107:12:107:13 | c1 [a, ... (1)] | A.cpp:107:16:107:16 | a | +| A.cpp:120:12:120:13 | c1 [a, ... (1)] | A.cpp:120:16:120:16 | a | | A.cpp:126:5:126:5 | b [post update] [c, ... (1)] | A.cpp:131:8:131:8 | ref arg b [c, ... (1)] | | A.cpp:126:12:126:18 | new [void] | A.cpp:126:5:126:5 | b [post update] [c, ... (1)] | | A.cpp:131:8:131:8 | ref arg b [c, ... (1)] | A.cpp:132:10:132:10 | b [c, ... (1)] | @@ -169,6 +177,8 @@ edges | A.cpp:57:28:57:30 | call to get | A.cpp:57:17:57:23 | new [void] | A.cpp:57:28:57:30 | call to get | call to get flows from $@ | A.cpp:57:17:57:23 | new [void] | new [void] | | A.cpp:66:14:66:14 | c | A.cpp:64:21:64:28 | new [void] | A.cpp:66:14:66:14 | c | c flows from $@ | A.cpp:64:21:64:28 | new [void] | new [void] | | A.cpp:75:14:75:14 | c | A.cpp:73:25:73:32 | new [void] | A.cpp:75:14:75:14 | c | c flows from $@ | A.cpp:73:25:73:32 | new [void] | new [void] | +| A.cpp:107:16:107:16 | a | A.cpp:98:12:98:18 | new [void] | A.cpp:107:16:107:16 | a | a flows from $@ | A.cpp:98:12:98:18 | new [void] | new [void] | +| A.cpp:120:16:120:16 | a | A.cpp:98:12:98:18 | new [void] | A.cpp:120:16:120:16 | a | a flows from $@ | A.cpp:98:12:98:18 | new [void] | new [void] | | A.cpp:132:13:132:13 | c | A.cpp:126:12:126:18 | new [void] | A.cpp:132:13:132:13 | c | c flows from $@ | A.cpp:126:12:126:18 | new [void] | new [void] | | A.cpp:152:13:152:13 | b | A.cpp:143:25:143:31 | new [void] | A.cpp:152:13:152:13 | b | b flows from $@ | A.cpp:143:25:143:31 | new [void] | new [void] | | A.cpp:152:13:152:13 | b | A.cpp:150:12:150:18 | new [void] | A.cpp:152:13:152:13 | b | b flows from $@ | A.cpp:150:12:150:18 | new [void] | new [void] | From 79c713bd87612f7ca6190056ec867a722b0a8128 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Thu, 5 Sep 2019 09:36:46 +0200 Subject: [PATCH 3/3] C++: Remark in DefinitionByReference charpred --- cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll b/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll index 0e453349fd7c..74e96d8d0633 100644 --- a/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll +++ b/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll @@ -159,7 +159,13 @@ private module PartialDefinitions { class DefinitionByReference extends PartialDefinition, TReferenceArgument { VariableAccess va; - DefinitionByReference() { referenceArgument(va, definedExpr) } + DefinitionByReference() { + // `this` is not restricted in this charpred. That's because the full + // extent of this class includes the charpred of the superclass, which + // relates `this` to `definedExpr`, and `va` is functionally determined + // by `definedExpr`. + referenceArgument(va, definedExpr) + } VariableAccess getVariableAccess() { result = va }