From 2b720b332b1f9060d456ddd11cff4611b12ff20c Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Thu, 27 Aug 2020 09:39:47 +0200 Subject: [PATCH 1/3] C++: Fix join order in reachesWithoutAssignment The negation in this predicate did not get pulled into an `#antijoin_rhs` predicate but got materialized as part of each iteration, which meant that the temporary `ControlFlowNode` column did not get projected away. The tuple counts looked like this on kamailio/kamailio (iteration 20): 5724 ~13% {3} r9 = JOIN r8 WITH BasicBlocks::Cached::bb_successor_cached#ff@staged_ext AS R ON FIRST 2 OUTPUT r8.<2>, r8.<3>, r8.<1> 5724 ~12% {3} r10 = JOIN r8 WITH BasicBlocks::Cached::bb_successor_cached#ff@staged_ext AS R ON FIRST 2 OUTPUT r8.<3>, r8.<2>, r8.<1> 124717061 ~11% {4} r11 = JOIN r10 WITH project#FlowVar::FlowVar_internal::assignmentLikeOperation#ffff_10#join_rhs AS R ON FIRST 1 OUTPUT R.<1>, r10.<2>, r10.<1>, r10.<0> 66 ~0% {3} r12 = JOIN r11 WITH project#BasicBlocks::Cached::basic_block_member AS R ON FIRST 2 OUTPUT r11.<2>, r11.<3>, r11.<1> 66 {3} r13 = MATERIALIZE r12 AS antijoin_rhs 5658 ~14% {3} r14 = r9 AND NOT r13(r9.<0>, r9.<1>, r9.<2>) After manually pulling out the join inside the negation, the time per iteration drops from ~30 to <1s. The pipeline above is replaced with 892394 ~0% {4} r6 = r5 AND NOT FlowVar::FlowVar_internal::assignsToVar#fb AS R(r5.<3>, r5.<2>) 892394 ~0% {4} r7 = SCAN r6 OUTPUT r6.<1>, r6.<3>, r6.<0>, r6.<2> 5658 ~11% {3} r8 = JOIN r7 WITH BasicBlocks::Cached::bb_successor_cached#ff@staged_ext AS R ON FIRST 2 OUTPUT r7.<2>, r7.<1>, r7.<3> --- .../src/semmle/code/cpp/dataflow/internal/FlowVar.qll | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll b/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll index 72033b0f72df..4b94e8af5ef7 100644 --- a/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll +++ b/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll @@ -427,7 +427,7 @@ module FlowVar_internal { /** * Gets a variable that is assigned in this loop and read outside the loop. */ - private Variable getARelevantVariable() { + Variable getARelevantVariable() { result = this.getAVariableAssignedInLoop() and exists(VariableAccess va | va.getTarget() = result and @@ -472,10 +472,16 @@ module FlowVar_internal { reachesWithoutAssignment(bb.getAPredecessor(), v) and this.bbInLoop(bb) ) and - not assignmentLikeOperation(bb.getANode(), v, _, _) + not assignsToVar(bb, v) } } + pragma[noinline] + private predicate assignsToVar(BasicBlock bb, Variable v) { + assignmentLikeOperation(bb.getANode(), v, _, _) and + exists(AlwaysTrueUponEntryLoop loop | v = loop.getARelevantVariable()) + } + /** * Holds if `loop` always assigns to `v` before leaving through an edge * from `bbInside` in its condition to `bbOutside` outside the loop. Also, From f3e98c3beaed0a7b10bebd1db832153d26659b83 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Thu, 27 Aug 2020 09:25:45 +0200 Subject: [PATCH 2/3] C++: Fix join order of FlowVar::definedPartiallyAt This predicate was very slow on kamailio/kamailio: (696s) Tuple counts for FlowVar::FlowVar::definedPartiallyAt_dispred#ff: 703569 ~3% {3} r1 = SCAN FlowVar::FlowVar_internal::TBlockVar#fff AS I OUTPUT I.<1>, I.<0>, I.<2> 7679540588 ~3% {3} r2 = JOIN r1 WITH FlowVar::PartialDefinitions::PartialDefinition::partiallyDefines_dispred#ff_10#join_rhs AS R ON FIRST 1 OUTPUT R.<1>, r1.<1>, r1.<2> 567217 ~2% {2} r3 = JOIN r2 WITH project#FlowVar::PartialDefinitions::PartialDefinition#class#fff#2 AS R ON FIRST 2 OUTPUT r2.<2>, r2.<0> return r3 After this change, the predicate takes no time at all: (22s) Tuple counts for FlowVar::FlowVar::definedPartiallyAt_dispred#ff: 703569 ~3% {3} r1 = SCAN FlowVar::FlowVar_internal::TBlockVar#fff AS I OUTPUT I.<1>, I.<0>, I.<2> 567217 ~2% {2} r2 = JOIN r1 WITH FlowVar::PartialDefinitions::PartialDefinition::partiallyDefinesVariableAt#fff_120#join_rhs AS R ON FIRST 2 OUTPUT r1.<2>, R.<2> return r2 Looking at the code, it turned out that the predicates `partiallyDefines` and `getSubBasicBlockStart` were almost always used together and could therefore be merged into a single predicate to get better join orderings. The predicate `partiallyDefinesThis` was never used. --- .../code/cpp/dataflow/internal/FlowVar.qll | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll b/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll index 4b94e8af5ef7..0869b1152083 100644 --- a/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll +++ b/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll @@ -120,14 +120,19 @@ private module PartialDefinitions { ) } - predicate partiallyDefines(Variable v) { innerDefinedExpr = v.getAnAccess() } + deprecated predicate partiallyDefines(Variable v) { innerDefinedExpr = v.getAnAccess() } - predicate partiallyDefinesThis(ThisExpr e) { innerDefinedExpr = e } + deprecated predicate partiallyDefinesThis(ThisExpr e) { innerDefinedExpr = e } /** - * Gets the subBasicBlock where this `PartialDefinition` is defined. + * Holds if this `PartialDefinition` defines variable `v` at control-flow + * node `cfn`. */ - ControlFlowNode getSubBasicBlockStart() { result = node } + pragma[noinline] + predicate partiallyDefinesVariableAt(Variable v, ControlFlowNode cfn) { + innerDefinedExpr = v.getAnAccess() and + cfn = node + } /** * Holds if this partial definition may modify `inner` (or what it points @@ -188,7 +193,7 @@ module FlowVar_internal { predicate fullySupportedSsaVariable(Variable v) { v = any(SsaDefinition def).getAVariable() and // A partially-defined variable is handled using the partial definitions logic. - not any(PartialDefinition p).partiallyDefines(v) and + not any(PartialDefinition p).partiallyDefinesVariableAt(v, _) and // SSA variables do not exist before their first assignment, but one // feature of this data flow library is to track where uninitialized data // ends up. @@ -232,7 +237,7 @@ module FlowVar_internal { or assignmentLikeOperation(sbb, v, _, _) or - sbb = any(PartialDefinition p | p.partiallyDefines(v)).getSubBasicBlockStart() + exists(PartialDefinition p | p.partiallyDefinesVariableAt(v, sbb)) or blockVarDefinedByVariable(sbb, v) ) @@ -363,8 +368,7 @@ module FlowVar_internal { override predicate definedPartiallyAt(Expr e) { exists(PartialDefinition p | - p.partiallyDefines(v) and - sbb = p.getSubBasicBlockStart() and + p.partiallyDefinesVariableAt(v, sbb) and p.definesExpressions(_, e) ) } @@ -742,7 +746,7 @@ module FlowVar_internal { exists(Variable v | not fullySupportedSsaVariable(v) | assignmentLikeOperation(this, v, _, _) or - this = any(PartialDefinition p | p.partiallyDefines(v)).getSubBasicBlockStart() + exists(PartialDefinition p | p.partiallyDefinesVariableAt(v, this)) // It is not necessary to cut the basic blocks at `Initializer` nodes // because the affected variable can have no _other_ value before its // initializer. It is not necessary to cut basic blocks at procedure From e949c167fa6ff0b32ba9e6925fafa6c12cdfbf4a Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Thu, 27 Aug 2020 11:14:47 +0200 Subject: [PATCH 3/3] C++: Add back `getSubBasicBlockStart` It turns out this predicate was used in a test, and that use can't be replaced with the new `partiallyDefinesVariableAt` predicate since `partiallyDefinesVariableAt` doesn't hold for a `PartialDefinition` that defines something other than a variable. --- cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll b/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll index 0869b1152083..6dc5da2f20bd 100644 --- a/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll +++ b/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll @@ -124,6 +124,11 @@ private module PartialDefinitions { deprecated predicate partiallyDefinesThis(ThisExpr e) { innerDefinedExpr = e } + /** + * Gets the subBasicBlock where this `PartialDefinition` is defined. + */ + ControlFlowNode getSubBasicBlockStart() { result = node } + /** * Holds if this `PartialDefinition` defines variable `v` at control-flow * node `cfn`.