From 6fca23bc8bbb9650c3b893d769a674e87546daad Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 20 Apr 2020 08:50:31 +0200 Subject: [PATCH 1/5] C++: Demonstrate lack of flow through single-field structs --- .../library-tests/dataflow/fields/flow.expected | 10 ++++++++++ .../test/library-tests/dataflow/fields/simple.cpp | 14 ++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/cpp/ql/test/library-tests/dataflow/fields/flow.expected b/cpp/ql/test/library-tests/dataflow/fields/flow.expected index ca851dc89746..650b5dcc0735 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/flow.expected +++ b/cpp/ql/test/library-tests/dataflow/fields/flow.expected @@ -221,6 +221,10 @@ edges | simple.cpp:48:9:48:9 | g [b_] | simple.cpp:26:15:26:15 | f [b_] | | simple.cpp:51:9:51:9 | h [a_] | simple.cpp:26:15:26:15 | f [a_] | | simple.cpp:51:9:51:9 | h [b_] | simple.cpp:26:15:26:15 | f [b_] | +| simple.cpp:65:5:65:5 | a [post update] [i] | simple.cpp:67:10:67:11 | a2 [i] | +| simple.cpp:65:5:65:22 | ... = ... | simple.cpp:65:5:65:5 | a [post update] [i] | +| simple.cpp:65:11:65:20 | call to user_input | simple.cpp:65:5:65:22 | ... = ... | +| simple.cpp:67:10:67:11 | a2 [i] | simple.cpp:67:13:67:13 | i | | struct_init.c:14:24:14:25 | ab [a] | struct_init.c:15:8:15:9 | ab [a] | | struct_init.c:15:8:15:9 | ab [a] | struct_init.c:15:12:15:12 | a | | struct_init.c:20:17:20:36 | {...} [a] | struct_init.c:22:8:22:9 | ab [a] | @@ -504,6 +508,11 @@ nodes | simple.cpp:48:9:48:9 | g [b_] | semmle.label | g [b_] | | simple.cpp:51:9:51:9 | h [a_] | semmle.label | h [a_] | | simple.cpp:51:9:51:9 | h [b_] | semmle.label | h [b_] | +| simple.cpp:65:5:65:5 | a [post update] [i] | semmle.label | a [post update] [i] | +| simple.cpp:65:5:65:22 | ... = ... | semmle.label | ... = ... | +| simple.cpp:65:11:65:20 | call to user_input | semmle.label | call to user_input | +| simple.cpp:67:10:67:11 | a2 [i] | semmle.label | a2 [i] | +| simple.cpp:67:13:67:13 | i | semmle.label | i | | struct_init.c:14:24:14:25 | ab [a] | semmle.label | ab [a] | | struct_init.c:15:8:15:9 | ab [a] | semmle.label | ab [a] | | struct_init.c:15:12:15:12 | a | semmle.label | a | @@ -580,6 +589,7 @@ nodes | simple.cpp:28:12:28:12 | call to a | simple.cpp:41:12:41:21 | call to user_input | simple.cpp:28:12:28:12 | call to a | call to a flows from $@ | simple.cpp:41:12:41:21 | call to user_input | call to user_input | | simple.cpp:29:12:29:12 | call to b | simple.cpp:40:12:40:21 | call to user_input | simple.cpp:29:12:29:12 | call to b | call to b flows from $@ | simple.cpp:40:12:40:21 | call to user_input | call to user_input | | simple.cpp:29:12:29:12 | call to b | simple.cpp:42:12:42:21 | call to user_input | simple.cpp:29:12:29:12 | call to b | call to b flows from $@ | simple.cpp:42:12:42:21 | call to user_input | call to user_input | +| simple.cpp:67:13:67:13 | i | simple.cpp:65:11:65:20 | call to user_input | simple.cpp:67:13:67:13 | i | i flows from $@ | simple.cpp:65:11:65:20 | call to user_input | call to user_input | | struct_init.c:15:12:15:12 | a | struct_init.c:20:20:20:29 | call to user_input | struct_init.c:15:12:15:12 | a | a flows from $@ | struct_init.c:20:20:20:29 | call to user_input | call to user_input | | struct_init.c:15:12:15:12 | a | struct_init.c:27:7:27:16 | call to user_input | struct_init.c:15:12:15:12 | a | a flows from $@ | struct_init.c:27:7:27:16 | call to user_input | call to user_input | | struct_init.c:15:12:15:12 | a | struct_init.c:40:20:40:29 | call to user_input | struct_init.c:15:12:15:12 | a | a flows from $@ | struct_init.c:40:20:40:29 | call to user_input | call to user_input | diff --git a/cpp/ql/test/library-tests/dataflow/fields/simple.cpp b/cpp/ql/test/library-tests/dataflow/fields/simple.cpp index 6f36fa6551df..64b6748d0d55 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/simple.cpp +++ b/cpp/ql/test/library-tests/dataflow/fields/simple.cpp @@ -53,4 +53,18 @@ void foo() // Nothing should alert bar(i); } + +struct A +{ + int i; +}; + +void single_field_test() +{ + A a; + a.i = user_input(); + A a2 = a; + sink(a2.i); +} + } // namespace Simple From a6e619ce5b4235c809dca60b12c43043eaa80991 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 20 Apr 2020 08:52:46 +0200 Subject: [PATCH 2/5] C++: Add field flow through single-field structs and accept tests --- .../ir/dataflow/internal/DataFlowPrivate.qll | 26 ++++++++++++++----- .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 22 ++++++++++++++++ .../fields/dataflow-ir-consistency.expected | 1 + .../dataflow/fields/ir-flow.expected | 8 ++++++ 4 files changed, 51 insertions(+), 6 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index 51290bc32931..531fcdfd3681 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -180,12 +180,16 @@ private class ArrayContent extends Content, TArrayContent { override Type getType() { none() } } -/** - * Holds if data can flow from `node1` to `node2` via an assignment to `f`. - * Thus, `node2` references an object with a field `f` that contains the - * value of `node1`. - */ -predicate storeStep(Node node1, Content f, PostUpdateNode node2) { +private predicate storeStepNoChi(Node node1, Content f, PostUpdateNode node2) { + exists(FieldAddressInstruction fa, StoreInstruction store | + store = node2.asInstruction() and + store.getDestinationAddress() = fa and + store.getSourceValue() = node1.asInstruction() and + f.(FieldContent).getField() = fa.getField() + ) +} + +private predicate storeStepChi(Node node1, Content f, PostUpdateNode node2) { exists(FieldAddressInstruction fa, StoreInstruction store | node1.asInstruction() = store and store.getDestinationAddress() = fa and @@ -194,6 +198,16 @@ predicate storeStep(Node node1, Content f, PostUpdateNode node2) { ) } +/** + * Holds if data can flow from `node1` to `node2` via an assignment to `f`. + * Thus, `node2` references an object with a field `f` that contains the + * value of `node1`. + */ +predicate storeStep(Node node1, Content f, PostUpdateNode node2) { + storeStepNoChi(node1, f, node2) or + storeStepChi(node1, f, node2) +} + /** * Holds if data can flow from `node1` to `node2` via a read of `f`. * Thus, `node1` references an object with a field `f` whose value ends up in diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 8b0ade838dd2..ac0d7fa6ce38 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -270,6 +270,19 @@ private class ExplicitFieldStoreQualifierNode extends PartialDefinitionNode { override Node getPreUpdateNode() { result.asInstruction() = instr.getTotal() } } +private class ExplicitSingleFieldStoreQualifierNode extends PartialDefinitionNode { + override StoreInstruction instr; + + ExplicitSingleFieldStoreQualifierNode() { + exists(FieldAddressInstruction field | + field = instr.getDestinationAddress() and + not exists(ChiInstruction chi | chi.getPartial() = instr) + ) + } + + override Node getPreUpdateNode() { none() } +} + /** * A node that represents the value of a variable after a function call that * may have changed the variable because it's passed by reference. @@ -404,6 +417,8 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { simpleInstructionLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asInstruction()) } +private predicate hasSize(Type t, int size) { t.getSize() = size } + cached private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction iTo) { iTo.(CopyInstruction).getSourceValue() = iFrom @@ -452,6 +467,13 @@ private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction iTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = chi ) or + iTo.(CopyInstruction).getSourceValueOperand().getAnyDef() = iFrom and + exists(Class c, int size | + c = iTo.getResultType() and + hasSize(c, size) and + hasSize(iFrom.getResultType(), size) + ) + or // Flow through modeled functions modelFlow(iFrom, iTo) } diff --git a/cpp/ql/test/library-tests/dataflow/fields/dataflow-ir-consistency.expected b/cpp/ql/test/library-tests/dataflow/fields/dataflow-ir-consistency.expected index 1714355138c8..227023fda166 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/dataflow-ir-consistency.expected +++ b/cpp/ql/test/library-tests/dataflow/fields/dataflow-ir-consistency.expected @@ -19,6 +19,7 @@ unreachableNodeCCtx localCallNodes postIsNotPre postHasUniquePre +| simple.cpp:65:5:65:22 | Store | PostUpdateNode should have one pre-update node but has 0. | uniquePostUpdate postIsInSameCallable reverseRead diff --git a/cpp/ql/test/library-tests/dataflow/fields/ir-flow.expected b/cpp/ql/test/library-tests/dataflow/fields/ir-flow.expected index 7a6432e29f36..4286d556cb9a 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/ir-flow.expected +++ b/cpp/ql/test/library-tests/dataflow/fields/ir-flow.expected @@ -25,6 +25,9 @@ edges | aliasing.cpp:79:11:79:20 | call to user_input | aliasing.cpp:80:12:80:13 | m1 | | aliasing.cpp:86:10:86:19 | call to user_input | aliasing.cpp:87:12:87:13 | m1 | | aliasing.cpp:92:12:92:21 | call to user_input | aliasing.cpp:93:12:93:13 | m1 | +| simple.cpp:65:5:65:22 | Store [i] | simple.cpp:66:12:66:12 | Store [i] | +| simple.cpp:65:11:65:20 | call to user_input | simple.cpp:65:5:65:22 | Store [i] | +| simple.cpp:66:12:66:12 | Store [i] | simple.cpp:67:13:67:13 | i | | struct_init.c:20:20:20:29 | call to user_input | struct_init.c:22:11:22:11 | a | | struct_init.c:27:7:27:16 | call to user_input | struct_init.c:31:23:31:23 | a | nodes @@ -63,6 +66,10 @@ nodes | aliasing.cpp:87:12:87:13 | m1 | semmle.label | m1 | | aliasing.cpp:92:12:92:21 | call to user_input | semmle.label | call to user_input | | aliasing.cpp:93:12:93:13 | m1 | semmle.label | m1 | +| simple.cpp:65:5:65:22 | Store [i] | semmle.label | Store [i] | +| simple.cpp:65:11:65:20 | call to user_input | semmle.label | call to user_input | +| simple.cpp:66:12:66:12 | Store [i] | semmle.label | Store [i] | +| simple.cpp:67:13:67:13 | i | semmle.label | i | | struct_init.c:20:20:20:29 | call to user_input | semmle.label | call to user_input | | struct_init.c:22:11:22:11 | a | semmle.label | a | | struct_init.c:27:7:27:16 | call to user_input | semmle.label | call to user_input | @@ -78,5 +85,6 @@ nodes | aliasing.cpp:80:12:80:13 | m1 | aliasing.cpp:79:11:79:20 | call to user_input | aliasing.cpp:80:12:80:13 | m1 | m1 flows from $@ | aliasing.cpp:79:11:79:20 | call to user_input | call to user_input | | aliasing.cpp:87:12:87:13 | m1 | aliasing.cpp:86:10:86:19 | call to user_input | aliasing.cpp:87:12:87:13 | m1 | m1 flows from $@ | aliasing.cpp:86:10:86:19 | call to user_input | call to user_input | | aliasing.cpp:93:12:93:13 | m1 | aliasing.cpp:92:12:92:21 | call to user_input | aliasing.cpp:93:12:93:13 | m1 | m1 flows from $@ | aliasing.cpp:92:12:92:21 | call to user_input | call to user_input | +| simple.cpp:67:13:67:13 | i | simple.cpp:65:11:65:20 | call to user_input | simple.cpp:67:13:67:13 | i | i flows from $@ | simple.cpp:65:11:65:20 | call to user_input | call to user_input | | struct_init.c:22:11:22:11 | a | struct_init.c:20:20:20:29 | call to user_input | struct_init.c:22:11:22:11 | a | a flows from $@ | struct_init.c:20:20:20:29 | call to user_input | call to user_input | | struct_init.c:31:23:31:23 | a | struct_init.c:27:7:27:16 | call to user_input | struct_init.c:31:23:31:23 | a | a flows from $@ | struct_init.c:27:7:27:16 | call to user_input | call to user_input | From e0cd595d54abcc5e6c122e85e9d2bdb9d1e95cc8 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 20 Apr 2020 11:46:10 +0200 Subject: [PATCH 3/5] C++: Reduce intermediate tuple counts --- .../code/cpp/ir/dataflow/internal/DataFlowUtil.qll | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index ac0d7fa6ce38..4a87ed503137 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -417,8 +417,6 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { simpleInstructionLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asInstruction()) } -private predicate hasSize(Type t, int size) { t.getSize() = size } - cached private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction iTo) { iTo.(CopyInstruction).getSourceValue() = iFrom @@ -467,11 +465,12 @@ private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction iTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = chi ) or - iTo.(CopyInstruction).getSourceValueOperand().getAnyDef() = iFrom and - exists(Class c, int size | + iTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = iFrom.(StoreInstruction) and + exists(Class c, Type t | c = iTo.getResultType() and - hasSize(c, size) and - hasSize(iFrom.getResultType(), size) + t = iFrom.getResultType() and + c.getAField().getUnspecifiedType() = t and + c.getSize() = t.getSize() ) or // Flow through modeled functions From 8be1bfe8d0667e1099887481ef6fc70ae07af6d4 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 20 Apr 2020 14:13:12 +0200 Subject: [PATCH 4/5] C++: Add comments and accept expected dataflow sanity failures --- .../semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll | 7 +++++++ .../syntax-zoo/dataflow-ir-consistency.expected | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 4a87ed503137..87193fe19f3d 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -270,6 +270,12 @@ private class ExplicitFieldStoreQualifierNode extends PartialDefinitionNode { override Node getPreUpdateNode() { result.asInstruction() = instr.getTotal() } } +/** + * Not every store instruction generates a chi instruction that we can attach a PostUpdateNode to. + * For instance, an update to a field of a struct containing only one field. For these cases we + * attach the PostUpdateNode to the store instruction. There's no obvious pre update node for this case + * (as the entire memory is updated), so `getPreUpdateNode` is implemented as `none()`. + */ private class ExplicitSingleFieldStoreQualifierNode extends PartialDefinitionNode { override StoreInstruction instr; @@ -465,6 +471,7 @@ private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction iTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = chi ) or + // Flow from stores to structs with a single field to a load of that field. iTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = iFrom.(StoreInstruction) and exists(Class c, Type t | c = iTo.getResultType() and diff --git a/cpp/ql/test/library-tests/syntax-zoo/dataflow-ir-consistency.expected b/cpp/ql/test/library-tests/syntax-zoo/dataflow-ir-consistency.expected index f3e74c759b6e..42ed7144f477 100644 --- a/cpp/ql/test/library-tests/syntax-zoo/dataflow-ir-consistency.expected +++ b/cpp/ql/test/library-tests/syntax-zoo/dataflow-ir-consistency.expected @@ -788,6 +788,10 @@ unreachableNodeCCtx localCallNodes postIsNotPre postHasUniquePre +| assignexpr.cpp:9:2:9:12 | Store | PostUpdateNode should have one pre-update node but has 0. | +| bad_asts.cpp:15:10:15:12 | Store | PostUpdateNode should have one pre-update node but has 0. | +| file://:0:0:0:0 | Store | PostUpdateNode should have one pre-update node but has 0. | +| ir.cpp:531:14:531:14 | Store | PostUpdateNode should have one pre-update node but has 0. | uniquePostUpdate postIsInSameCallable reverseRead From a49d22e6e4a794032cf20f1954a229622fe5e29e Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 21 Apr 2020 13:25:06 +0200 Subject: [PATCH 5/5] C++: Fix join ordering --- .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 87193fe19f3d..9f9659a506e3 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -423,6 +423,15 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { simpleInstructionLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asInstruction()) } +pragma[noinline] +private predicate getFieldSizeOfClass(Class c, Type type, int size) { + exists(Field f | + f.getDeclaringType() = c and + f.getType() = type and + type.getSize() = size + ) +} + cached private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction iTo) { iTo.(CopyInstruction).getSourceValue() = iFrom @@ -472,12 +481,11 @@ private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction ) or // Flow from stores to structs with a single field to a load of that field. - iTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = iFrom.(StoreInstruction) and - exists(Class c, Type t | - c = iTo.getResultType() and - t = iFrom.getResultType() and - c.getAField().getUnspecifiedType() = t and - c.getSize() = t.getSize() + iTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = iFrom and + exists(int size, Type type | + type = iFrom.getResultType() and + iTo.getResultType().getSize() = size and + getFieldSizeOfClass(iTo.getResultType(), type, size) ) or // Flow through modeled functions