diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index a8860bf6cb377c..3e1236cd26566b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -403,7 +403,8 @@ public class Rewriter extends AbstractBatchJobExecutor { new EliminateFilter(), new PushDownFilterThroughProject(), new MergeProjects(), - new PruneOlapScanTablet() + new PruneOlapScanTablet(), + new AdjustAggregateNullableForEmptySet() ), custom(RuleType.COLUMN_PRUNING, ColumnPruning::new), bottomUp(RuleSet.PUSH_DOWN_FILTERS), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java index a79953dc71ee81..89e61ab86d222b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java @@ -22,16 +22,20 @@ import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.properties.RequireProperties; import org.apache.doris.nereids.properties.RequirePropertiesSupplier; +import org.apache.doris.nereids.trees.expressions.AggregateExpression; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateParam; +import org.apache.doris.nereids.trees.expressions.functions.agg.NullableAggregateFunction; import org.apache.doris.nereids.trees.plans.AggMode; import org.apache.doris.nereids.trees.plans.AggPhase; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.PlanType; import org.apache.doris.nereids.trees.plans.algebra.Aggregate; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.nereids.util.Utils; import org.apache.doris.statistics.Statistics; @@ -91,8 +95,9 @@ public PhysicalHashAggregate(List groupByExpressions, List groupByExpressions, List resetLogicalProperties() { requireProperties, physicalProperties, statistics, child()); } + + /** + * sql: select sum(distinct c1) from t; + * assume c1 is not null, because there is no group by + * sum(distinct c1)'s nullable is alwasNullable in rewritten phase. + * But in implementation phase, we may create 3 phase agg with group by key c1. + * And the sum(distinct c1)'s nullability should be changed depending on if there is any group by expressions. + * This pr update the agg function's nullability accordingly + */ + private List adjustNullableForOutputs(List outputs, boolean alwaysNullable) { + return ExpressionUtils.rewriteDownShortCircuit(outputs, output -> { + if (output instanceof AggregateExpression) { + AggregateFunction function = ((AggregateExpression) output).getFunction(); + if (function instanceof NullableAggregateFunction + && ((NullableAggregateFunction) function).isAlwaysNullable() != alwaysNullable) { + AggregateParam param = ((AggregateExpression) output).getAggregateParam(); + Expression child = ((AggregateExpression) output).child(); + AggregateFunction newFunction = ((NullableAggregateFunction) function) + .withAlwaysNullable(alwaysNullable); + if (function == child) { + // function is also child + child = newFunction; + } + return new AggregateExpression(newFunction, param, child); + } + } + return output; + }); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/AggregateStrategiesTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/AggregateStrategiesTest.java index 34c16309181466..e1e03e64d9849a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/AggregateStrategiesTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/AggregateStrategiesTest.java @@ -29,8 +29,10 @@ import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.StatementScopeIdGenerator; +import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateParam; import org.apache.doris.nereids.trees.expressions.functions.agg.Count; +import org.apache.doris.nereids.trees.expressions.functions.agg.NullableAggregateFunction; import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral; import org.apache.doris.nereids.trees.plans.AggMode; @@ -54,6 +56,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Optional; +import java.util.Set; @TestInstance(TestInstance.Lifecycle.PER_CLASS) public class AggregateStrategiesTest implements MemoPatternMatchSupported { @@ -138,7 +141,7 @@ public void globalAggregate() { Plan root = new LogicalAggregate<>(groupExpressionList, outputExpressionList, true, Optional.empty(), rStudent); - Sum localOutput0 = new Sum(rStudent.getOutput().get(0).toSlot()); + Sum localOutput0 = new Sum(false, true, rStudent.getOutput().get(0).toSlot()); PlanChecker.from(MemoTestUtils.createConnectContext(), root) .applyImplementation(twoPhaseAggregateWithoutDistinct()) @@ -380,6 +383,40 @@ public void distinctWithNormalAggregateFunctionApply4PhaseRule() { ); } + @Test + public void distinctApply4PhaseRuleNullableChange() { + Slot id = rStudent.getOutput().get(0).toSlot(); + List groupExpressionList = Lists.newArrayList(); + List outputExpressionList = Lists.newArrayList( + new Alias(new Count(true, id), "count_id"), + new Alias(new Sum(id), "sum_id")); + Plan root = new LogicalAggregate<>(groupExpressionList, outputExpressionList, + true, Optional.empty(), rStudent); + + // select count(distinct id), sum(id) from t; + PlanChecker.from(MemoTestUtils.createConnectContext(), root) + .applyImplementation(fourPhaseAggregateWithDistinct()) + .matches( + physicalHashAggregate( + physicalHashAggregate( + physicalHashAggregate( + physicalHashAggregate() + .when(agg -> agg.getAggPhase().equals(AggPhase.LOCAL)) + .when(agg -> agg.getGroupByExpressions().get(0).equals(id)) + .when(agg -> verifyAlwaysNullableFlag( + agg.getAggregateFunctions(), false))) + .when(agg -> agg.getAggPhase().equals(AggPhase.GLOBAL)) + .when(agg -> agg.getGroupByExpressions().get(0).equals(id)) + .when(agg -> verifyAlwaysNullableFlag(agg.getAggregateFunctions(), + false))) + .when(agg -> agg.getAggPhase().equals(AggPhase.DISTINCT_LOCAL)) + .when(agg -> agg.getGroupByExpressions().isEmpty()) + .when(agg -> verifyAlwaysNullableFlag(agg.getAggregateFunctions(), true))) + .when(agg -> agg.getAggPhase().equals(AggPhase.DISTINCT_GLOBAL)) + .when(agg -> agg.getGroupByExpressions().isEmpty()) + .when(agg -> verifyAlwaysNullableFlag(agg.getAggregateFunctions(), true))); + } + private Rule twoPhaseAggregateWithoutDistinct() { return new AggregateStrategies().buildRules() .stream() @@ -400,8 +437,18 @@ private Rule twoPhaseAggregateWithDistinct() { private Rule fourPhaseAggregateWithDistinct() { return new AggregateStrategies().buildRules() .stream() - .filter(rule -> rule.getRuleType() == RuleType.TWO_PHASE_AGGREGATE_WITH_DISTINCT) + .filter(rule -> rule.getRuleType() == RuleType.FOUR_PHASE_AGGREGATE_WITH_DISTINCT) .findFirst() .get(); } + + private boolean verifyAlwaysNullableFlag(Set functions, boolean alwaysNullable) { + for (AggregateFunction f : functions) { + if (f instanceof NullableAggregateFunction + && ((NullableAggregateFunction) f).isAlwaysNullable() != alwaysNullable) { + return false; + } + } + return true; + } }