diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java index 3e1f3943a73afd..24031bfb8924ee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java @@ -722,6 +722,16 @@ public static boolean containsAggregate(List in return false; } + public static void extractSlots(Expr root, Set slotIdSet) { + if (root instanceof SlotRef) { + slotIdSet.add(((SlotRef) root).getDesc().getId()); + return; + } + for (Expr child : root.getChildren()) { + extractSlots(child, slotIdSet); + } + } + /** * Returns an analyzed clone of 'this' with exprs substituted according to smap. * Removes implicit casts and analysis state while cloning/substituting exprs within diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index f1dacc32e63ce5..18c6774a32355a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -65,7 +65,6 @@ import org.apache.doris.nereids.stats.StatsErrorEstimator; import org.apache.doris.nereids.trees.UnaryNode; import org.apache.doris.nereids.trees.expressions.AggregateExpression; -import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.CTEId; import org.apache.doris.nereids.trees.expressions.EqualTo; import org.apache.doris.nereids.trees.expressions.ExprId; @@ -175,6 +174,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import org.apache.commons.collections.CollectionUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -1413,12 +1413,11 @@ public PlanFragment visitPhysicalProject(PhysicalProject project PlanFragment inputFragment = project.child(0).accept(this, context); - List execExprList = project.getProjects() + List projectionExprs = project.getProjects() .stream() .map(e -> ExpressionTranslator.translate(e, context)) .collect(Collectors.toList()); - // TODO: fix the project alias of an aliased relation. - List slotList = project.getProjects() + List slots = project.getProjects() .stream() .map(NamedExpression::toSlot) .collect(Collectors.toList()); @@ -1428,45 +1427,45 @@ public PlanFragment visitPhysicalProject(PhysicalProject project MultiCastDataSink multiCastDataSink = (MultiCastDataSink) inputFragment.getSink(); DataStreamSink dataStreamSink = multiCastDataSink.getDataStreamSinks().get( multiCastDataSink.getDataStreamSinks().size() - 1); - TupleDescriptor tupleDescriptor = generateTupleDesc(slotList, null, context); - dataStreamSink.setProjections(execExprList); - dataStreamSink.setOutputTupleDesc(tupleDescriptor); + TupleDescriptor projectionTuple = generateTupleDesc(slots, null, context); + dataStreamSink.setProjections(projectionExprs); + dataStreamSink.setOutputTupleDesc(projectionTuple); return inputFragment; } PlanNode inputPlanNode = inputFragment.getPlanRoot(); - List predicateList = inputPlanNode.getConjuncts(); + List conjuncts = inputPlanNode.getConjuncts(); Set requiredSlotIdSet = Sets.newHashSet(); - for (Expr expr : execExprList) { - extractExecSlot(expr, requiredSlotIdSet); + for (Expr expr : projectionExprs) { + Expr.extractSlots(expr, requiredSlotIdSet); } Set requiredByProjectSlotIdSet = Sets.newHashSet(requiredSlotIdSet); - for (Expr expr : predicateList) { - extractExecSlot(expr, requiredSlotIdSet); + for (Expr expr : conjuncts) { + Expr.extractSlots(expr, requiredSlotIdSet); } // For hash join node, use vSrcToOutputSMap to describe the expression calculation, use // vIntermediateTupleDescList as input, and set vOutputTupleDesc as the final output. // TODO: HashJoinNode's be implementation is not support projection yet, remove this after when supported. if (inputPlanNode instanceof JoinNodeBase) { - TupleDescriptor tupleDescriptor = generateTupleDesc(slotList, null, context); - JoinNodeBase hashJoinNode = (JoinNodeBase) inputPlanNode; - hashJoinNode.setvOutputTupleDesc(tupleDescriptor); - hashJoinNode.setvSrcToOutputSMap(execExprList); + TupleDescriptor tupleDescriptor = generateTupleDesc(slots, null, context); + JoinNodeBase joinNode = (JoinNodeBase) inputPlanNode; + joinNode.setvOutputTupleDesc(tupleDescriptor); + joinNode.setvSrcToOutputSMap(projectionExprs); // prune the hashOutputSlotIds - if (hashJoinNode instanceof HashJoinNode) { - ((HashJoinNode) hashJoinNode).getHashOutputSlotIds().clear(); + if (joinNode instanceof HashJoinNode) { + ((HashJoinNode) joinNode).getHashOutputSlotIds().clear(); Set requiredExprIds = Sets.newHashSet(); Set requiredOtherConjunctsSlotIdSet = Sets.newHashSet(); - List otherConjuncts = ((HashJoinNode) hashJoinNode).getOtherJoinConjuncts(); + List otherConjuncts = ((HashJoinNode) joinNode).getOtherJoinConjuncts(); for (Expr expr : otherConjuncts) { - extractExecSlot(expr, requiredOtherConjunctsSlotIdSet); + Expr.extractSlots(expr, requiredOtherConjunctsSlotIdSet); } requiredOtherConjunctsSlotIdSet.forEach(e -> requiredExprIds.add(context.findExprId(e))); requiredSlotIdSet.forEach(e -> requiredExprIds.add(context.findExprId(e))); for (ExprId exprId : requiredExprIds) { - SlotId slotId = ((HashJoinNode) hashJoinNode).getHashOutputExprSlotIdMap().get(exprId); + SlotId slotId = ((HashJoinNode) joinNode).getHashOutputExprSlotIdMap().get(exprId); Preconditions.checkState(slotId != null); - ((HashJoinNode) hashJoinNode).addSlotIdToHashOutputSlotIds(slotId); + ((HashJoinNode) joinNode).addSlotIdToHashOutputSlotIds(slotId); } } return inputFragment; @@ -1478,42 +1477,49 @@ public PlanFragment visitPhysicalProject(PhysicalProject project } if (inputPlanNode instanceof ScanNode) { - TupleDescriptor tupleDescriptor = null; + TupleDescriptor projectionTuple = null; + // slotIdsByOrder is used to ensure the ScanNode's output order is same with current Project + // if we change the output order in translate project, the upper node will receive wrong order + // tuple, since they get the order from project.getOutput() not scan.getOutput()./ + List slotIdsByOrder = Lists.newArrayList(); if (requiredByProjectSlotIdSet.size() != requiredSlotIdSet.size() - || new HashSet<>(execExprList).size() != execExprList.size() - || execExprList.stream().anyMatch(expr -> !(expr instanceof SlotRef))) { - tupleDescriptor = generateTupleDesc(slotList, null, context); - inputPlanNode.setProjectList(execExprList); - inputPlanNode.setOutputTupleDesc(tupleDescriptor); + || new HashSet<>(projectionExprs).size() != projectionExprs.size() + || projectionExprs.stream().anyMatch(expr -> !(expr instanceof SlotRef))) { + projectionTuple = generateTupleDesc(slots, null, context); + inputPlanNode.setProjectList(projectionExprs); + inputPlanNode.setOutputTupleDesc(projectionTuple); } else { - for (int i = 0; i < slotList.size(); ++i) { - context.addExprIdSlotRefPair(slotList.get(i).getExprId(), - (SlotRef) execExprList.get(i)); + for (int i = 0; i < slots.size(); ++i) { + context.addExprIdSlotRefPair(slots.get(i).getExprId(), + (SlotRef) projectionExprs.get(i)); + slotIdsByOrder.add(((SlotRef) projectionExprs.get(i)).getSlotId()); } } // TODO: this is a temporary scheme to support two phase read when has project. // we need to refactor all topn opt into rbo stage. if (inputPlanNode instanceof OlapScanNode) { - ArrayList slots = + ArrayList olapScanSlots = context.getTupleDesc(inputPlanNode.getTupleIds().get(0)).getSlots(); - SlotDescriptor lastSlot = slots.get(slots.size() - 1); + SlotDescriptor lastSlot = olapScanSlots.get(olapScanSlots.size() - 1); if (lastSlot.getColumn() != null && lastSlot.getColumn().getName().equals(Column.ROWID_COL)) { - if (tupleDescriptor != null) { - injectRowIdColumnSlot(tupleDescriptor); + if (projectionTuple != null) { + injectRowIdColumnSlot(projectionTuple); SlotRef slotRef = new SlotRef(lastSlot); inputPlanNode.getProjectList().add(slotRef); requiredByProjectSlotIdSet.add(lastSlot.getId()); + } else { + slotIdsByOrder.add(lastSlot.getId()); } requiredSlotIdSet.add(lastSlot.getId()); } } - updateChildSlotsMaterialization(inputPlanNode, requiredSlotIdSet, - requiredByProjectSlotIdSet, context); + updateScanSlotsMaterialization((ScanNode) inputPlanNode, requiredSlotIdSet, + requiredByProjectSlotIdSet, slotIdsByOrder, context); } else { - TupleDescriptor tupleDescriptor = generateTupleDesc(slotList, null, context); - inputPlanNode.setProjectList(execExprList); + TupleDescriptor tupleDescriptor = generateTupleDesc(slots, null, context); + inputPlanNode.setProjectList(projectionExprs); inputPlanNode.setOutputTupleDesc(tupleDescriptor); } return inputFragment; @@ -1837,34 +1843,24 @@ public PlanFragment visitPhysicalWindow(PhysicalWindow physicalW * ******************************************************************************************** */ private PartitionSortNode translatePartitionSortNode(PhysicalPartitionTopN partitionTopN, - PlanNode childNode, PlanTranslatorContext context) { - // Generate the SortInfo, similar to 'translateSortNode'. - List oldOrderingExprList = Lists.newArrayList(); - List ascOrderList = Lists.newArrayList(); - List nullsFirstParamList = Lists.newArrayList(); - List orderKeyList = partitionTopN.getOrderKeys(); - // 1. Get previous slotRef - orderKeyList.forEach(k -> { - oldOrderingExprList.add(ExpressionTranslator.translate(k.getExpr(), context)); - ascOrderList.add(k.isAsc()); - nullsFirstParamList.add(k.isNullFirst()); + PlanNode childNode, PlanTranslatorContext context) { + TupleDescriptor sortTuple = generateTupleDesc(partitionTopN.child().getOutput(), null, context); + List orderingExprs = Lists.newArrayList(); + List ascOrders = Lists.newArrayList(); + List nullsFirstParams = Lists.newArrayList(); + List orderKeys = partitionTopN.getOrderKeys(); + orderKeys.forEach(k -> { + orderingExprs.add(ExpressionTranslator.translate(k.getExpr(), context)); + ascOrders.add(k.isAsc()); + nullsFirstParams.add(k.isNullFirst()); }); - List sortTupleOutputList = new ArrayList<>(); - List outputList = partitionTopN.getOutput(); - outputList.forEach(k -> sortTupleOutputList.add(ExpressionTranslator.translate(k, context))); List partitionExprs = partitionTopN.getPartitionKeys().stream() .map(e -> ExpressionTranslator.translate(e, context)) .collect(Collectors.toList()); - // 2. Generate new Tuple and get current slotRef for newOrderingExprList - List newOrderingExprList = Lists.newArrayList(); - TupleDescriptor tupleDesc = generateTupleDesc(outputList, orderKeyList, newOrderingExprList, context, null); - // 3. fill in SortInfo members - SortInfo sortInfo = new SortInfo(newOrderingExprList, ascOrderList, nullsFirstParamList, tupleDesc); - + SortInfo sortInfo = new SortInfo(orderingExprs, ascOrders, nullsFirstParams, sortTuple); PartitionSortNode partitionSortNode = new PartitionSortNode(context.nextPlanNodeId(), childNode, partitionTopN.getFunction(), partitionExprs, sortInfo, partitionTopN.hasGlobalLimit(), - partitionTopN.getPartitionLimit(), sortTupleOutputList, oldOrderingExprList); - + partitionTopN.getPartitionLimit()); if (partitionTopN.getStats() != null) { partitionSortNode.setCardinality((long) partitionTopN.getStats().getRowCount()); } @@ -1874,33 +1870,23 @@ private PartitionSortNode translatePartitionSortNode(PhysicalPartitionTopN sort, PlanNode childNode, PlanTranslatorContext context) { - List oldOrderingExprList = Lists.newArrayList(); - List ascOrderList = Lists.newArrayList(); - List nullsFirstParamList = Lists.newArrayList(); - List orderKeyList = sort.getOrderKeys(); - // 1. Get previous slotRef - orderKeyList.forEach(k -> { - oldOrderingExprList.add(ExpressionTranslator.translate(k.getExpr(), context)); - ascOrderList.add(k.isAsc()); - nullsFirstParamList.add(k.isNullFirst()); + TupleDescriptor sortTuple = generateTupleDesc(sort.child().getOutput(), null, context); + List orderingExprs = Lists.newArrayList(); + List ascOrders = Lists.newArrayList(); + List nullsFirstParams = Lists.newArrayList(); + List orderKeys = sort.getOrderKeys(); + orderKeys.forEach(k -> { + orderingExprs.add(ExpressionTranslator.translate(k.getExpr(), context)); + ascOrders.add(k.isAsc()); + nullsFirstParams.add(k.isNullFirst()); }); - List sortTupleOutputList = new ArrayList<>(); - List outputList = sort.getOutput(); - outputList.forEach(k -> sortTupleOutputList.add(ExpressionTranslator.translate(k, context))); - // 2. Generate new Tuple and get current slotRef for newOrderingExprList - List newOrderingExprList = Lists.newArrayList(); - TupleDescriptor tupleDesc = generateTupleDesc(outputList, orderKeyList, newOrderingExprList, context, null); - // 3. fill in SortInfo members - SortInfo sortInfo = new SortInfo(newOrderingExprList, ascOrderList, nullsFirstParamList, tupleDesc); - SortNode sortNode = new SortNode(context.nextPlanNodeId(), childNode, sortInfo, true); - sortNode.finalizeForNereids(tupleDesc, sortTupleOutputList, oldOrderingExprList); + SortInfo sortInfo = new SortInfo(orderingExprs, ascOrders, nullsFirstParams, sortTuple); + SortNode sortNode = new SortNode(context.nextPlanNodeId(), childNode, sortInfo, sort instanceof PhysicalTopN); if (sort.getMutableState(PhysicalTopN.TWO_PHASE_READ_OPT).isPresent()) { sortNode.setUseTwoPhaseReadOpt(true); sortNode.getSortInfo().setUseTwoPhaseRead(); injectRowIdColumnSlot(sortNode.getSortInfo().getSortTupleDescriptor()); - TupleDescriptor childTuple = childNode.getOutputTupleDesc() != null - ? childNode.getOutputTupleDesc() : context.getTupleDesc(childNode.getTupleIds().get(0)); - SlotDescriptor childRowIdDesc = childTuple.getSlots().get(childTuple.getSlots().size() - 1); + SlotDescriptor childRowIdDesc = sortTuple.getSlots().get(sortTuple.getSlots().size() - 1); sortNode.getResolvedTupleExprs().add(new SlotRef(childRowIdDesc)); } if (sort.getStats() != null) { @@ -1910,34 +1896,32 @@ private SortNode translateSortNode(AbstractPhysicalSort sort, Pl return sortNode; } - private void updateChildSlotsMaterialization(PlanNode execPlan, + private void updateScanSlotsMaterialization(ScanNode scanNode, Set requiredSlotIdSet, Set requiredByProjectSlotIdSet, - PlanTranslatorContext context) { - Set slotRefSet = new HashSet<>(); - for (Expr expr : execPlan.getConjuncts()) { - expr.collect(SlotRef.class, slotRefSet); - } - Set slotIdSet = slotRefSet.stream() - .map(SlotRef::getSlotId).collect(Collectors.toSet()); - slotIdSet.addAll(requiredSlotIdSet); - boolean noneMaterialized = execPlan.getTupleIds().stream() - .map(context::getTupleDesc) - .map(TupleDescriptor::getSlots) - .flatMap(List::stream) - .peek(s -> s.setIsMaterialized(slotIdSet.contains(s.getId()))) - .filter(SlotDescriptor::isMaterialized) - .count() == 0; - if (noneMaterialized) { - context.getDescTable() - .getTupleDesc(execPlan.getTupleIds().get(0)).getSlots().get(0).setIsMaterialized(true); - } - if (execPlan instanceof ScanNode) { - try { - ((ScanNode) execPlan).updateRequiredSlots(context, requiredByProjectSlotIdSet); - } catch (UserException e) { - Util.logAndThrowRuntimeException(LOG, - "User Exception while reset external file scan node contexts.", e); + List slotIdsByOrder, PlanTranslatorContext context) { + // TODO: use smallest slot if do not need any slot in upper node + SlotDescriptor smallest = scanNode.getTupleDesc().getSlots().get(0); + if (CollectionUtils.isNotEmpty(slotIdsByOrder)) { + // if we eliminate project above scan, we should ensure the slot order of scan's output is same with + // the projection's output. So, we need to reorder the output slot in scan's tuple. + Map idToSlotDescMap = scanNode.getTupleDesc().getSlots().stream() + .filter(s -> requiredSlotIdSet.contains(s.getId())) + .collect(Collectors.toMap(SlotDescriptor::getId, s -> s)); + scanNode.getTupleDesc().getSlots().clear(); + for (SlotId slotId : slotIdsByOrder) { + scanNode.getTupleDesc().getSlots().add(idToSlotDescMap.get(slotId)); } + } else { + scanNode.getTupleDesc().getSlots().removeIf(s -> !requiredSlotIdSet.contains(s.getId())); + } + if (scanNode.getTupleDesc().getSlots().isEmpty()) { + scanNode.getTupleDesc().getSlots().add(smallest); + } + try { + scanNode.updateRequiredSlots(context, requiredByProjectSlotIdSet); + } catch (UserException e) { + Util.logAndThrowRuntimeException(LOG, + "User Exception while reset external file scan node contexts.", e); } } @@ -1950,16 +1934,6 @@ private void addConjunctsToPlanNode(PhysicalFilter filter, updateLegacyPlanIdToPhysicalPlan(planNode, filter); } - private void extractExecSlot(Expr root, Set slotIdList) { - if (root instanceof SlotRef) { - slotIdList.add(((SlotRef) root).getDesc().getId()); - return; - } - for (Expr child : root.getChildren()) { - extractExecSlot(child, slotIdList); - } - } - private TupleDescriptor generateTupleDesc(List slotList, TableIf table, Set deferredMaterializedExprIds, PlanTranslatorContext context) { TupleDescriptor tupleDescriptor = context.generateTupleDesc(); @@ -1982,39 +1956,6 @@ private TupleDescriptor generateTupleDesc(List slotList, TableIf table, Pl return tupleDescriptor; } - private TupleDescriptor generateTupleDesc(List slotList, List orderKeyList, - List newOrderingExprList, - PlanTranslatorContext context, Table table) { - TupleDescriptor tupleDescriptor = context.generateTupleDesc(); - Set alreadyExists = Sets.newHashSet(); - tupleDescriptor.setTable(table); - for (OrderKey orderKey : orderKeyList) { - SlotReference slotReference; - if (orderKey.getExpr() instanceof SlotReference) { - slotReference = (SlotReference) orderKey.getExpr(); - } else { - slotReference = (SlotReference) new Alias(orderKey.getExpr(), orderKey.getExpr().toString()).toSlot(); - } - // TODO: trick here, we need semanticEquals to remove redundant expression - if (alreadyExists.contains(slotReference.getExprId())) { - newOrderingExprList.add(context.findSlotRef(slotReference.getExprId())); - continue; - } - context.createSlotDesc(tupleDescriptor, slotReference); - newOrderingExprList.add(context.findSlotRef(slotReference.getExprId())); - alreadyExists.add(slotReference.getExprId()); - } - for (Slot slot : slotList) { - if (alreadyExists.contains(slot.getExprId())) { - continue; - } - context.createSlotDesc(tupleDescriptor, (SlotReference) slot); - alreadyExists.add(slot.getExprId()); - } - - return tupleDescriptor; - } - private PlanFragment connectJoinNode(HashJoinNode hashJoinNode, PlanFragment leftFragment, PlanFragment rightFragment, PlanTranslatorContext context, AbstractPlan join) { hashJoinNode.setChild(0, leftFragment.getPlanRoot()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NormalizeSort.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NormalizeSort.java index 9480a1bb835e20..5426b5af6a7a48 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NormalizeSort.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NormalizeSort.java @@ -17,33 +17,52 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.nereids.properties.OrderKey; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; -import java.util.stream.Collectors; +import java.util.List; +import java.util.stream.Stream; /** - * the sort node will create new slots for order by keys if the order by keys is not in the output - * so need create a project above sort node to prune the unnecessary order by keys. This means the - * Tuple slots size is difference to PhysicalSort.output.size. If not prune and hide the order key, - * the upper plan node will see the temporary slots and treat as output, and then translate failed. - * This is trick, we should add sort output tuple to ensure the tuple slot size is equals, but it - * has large workload. I think we should refactor the PhysicalPlanTranslator in the future, and - * process PhysicalProject(output)/PhysicalDistribute more general. + * SortNode on BE always output order keys because BE needs them to do merge sort. So we normalize LogicalSort as BE + * expected to materialize order key before sort by bottom project and then prune the useless column after sort by + * top project. */ public class NormalizeSort extends OneRewriteRuleFactory { @Override public Rule build() { - return logicalSort() - .when(sort -> !sort.isNormalized() && !sort.getOutputSet() - .containsAll(sort.getOrderKeys().stream() - .map(orderKey -> orderKey.getExpr()).collect(Collectors.toSet()))) + return logicalSort().whenNot(sort -> sort.getOrderKeys().stream() + .map(OrderKey::getExpr).allMatch(Slot.class::isInstance)) .then(sort -> { - return new LogicalProject(sort.getOutput(), ImmutableList.of(), false, - sort.withNormalize(true)); + List newProjects = Lists.newArrayList(); + List newOrderKeys = sort.getOrderKeys().stream() + .map(orderKey -> { + Expression expr = orderKey.getExpr(); + if (!(expr instanceof Slot)) { + Alias alias = new Alias(expr, expr.toSql()); + newProjects.add(alias); + expr = alias.toSlot(); + } + return orderKey.withExpression(expr); + }).collect(ImmutableList.toImmutableList()); + List bottomProjections = Stream.concat( + sort.child().getOutput().stream(), + newProjects.stream() + ).collect(ImmutableList.toImmutableList()); + List topProjections = sort.getOutput().stream() + .map(NamedExpression.class::cast) + .collect(ImmutableList.toImmutableList()); + return new LogicalProject<>(topProjections, sort.withOrderKeysAndChild(newOrderKeys, + new LogicalProject<>(bottomProjections, sort.child()))); }).toRule(RuleType.NORMALIZE_SORT); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSort.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSort.java index 5918422966fea2..f139656f01c925 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSort.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSort.java @@ -46,29 +46,17 @@ public class LogicalSort extends LogicalUnary orderKeys; - private final boolean normalized; - public LogicalSort(List orderKeys, CHILD_TYPE child) { this(orderKeys, Optional.empty(), Optional.empty(), child); } - public LogicalSort(List orderKeys, CHILD_TYPE child, boolean normalized) { - this(orderKeys, Optional.empty(), Optional.empty(), child, normalized); - } - /** * Constructor for LogicalSort. */ public LogicalSort(List orderKeys, Optional groupExpression, Optional logicalProperties, CHILD_TYPE child) { - this(orderKeys, groupExpression, logicalProperties, child, false); - } - - public LogicalSort(List orderKeys, Optional groupExpression, - Optional logicalProperties, CHILD_TYPE child, boolean normalized) { super(PlanType.LOGICAL_SORT, groupExpression, logicalProperties, child); this.orderKeys = ImmutableList.copyOf(Objects.requireNonNull(orderKeys, "orderKeys can not be null")); - this.normalized = normalized; } @Override @@ -80,10 +68,6 @@ public List getOrderKeys() { return orderKeys; } - public boolean isNormalized() { - return normalized; - } - @Override public String toString() { return Utils.toSqlString("LogicalSort[" + id.asInt() + "]", @@ -98,7 +82,7 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) { return false; } - LogicalSort that = (LogicalSort) o; + LogicalSort that = (LogicalSort) o; return Objects.equals(orderKeys, that.orderKeys); } @@ -122,30 +106,27 @@ public List getExpressions() { @Override public LogicalSort withChildren(List children) { Preconditions.checkArgument(children.size() == 1); - return new LogicalSort<>(orderKeys, children.get(0), normalized); + return new LogicalSort<>(orderKeys, children.get(0)); } @Override public LogicalSort withGroupExpression(Optional groupExpression) { - return new LogicalSort<>(orderKeys, groupExpression, Optional.of(getLogicalProperties()), child(), - normalized); + return new LogicalSort<>(orderKeys, groupExpression, Optional.of(getLogicalProperties()), child()); } @Override - public Plan withGroupExprLogicalPropChildren(Optional groupExpression, + public LogicalSort withGroupExprLogicalPropChildren(Optional groupExpression, Optional logicalProperties, List children) { Preconditions.checkArgument(children.size() == 1); - return new LogicalSort<>(orderKeys, groupExpression, logicalProperties, children.get(0), - normalized); + return new LogicalSort<>(orderKeys, groupExpression, logicalProperties, children.get(0)); } public LogicalSort withOrderKeys(List orderKeys) { return new LogicalSort<>(orderKeys, Optional.empty(), - Optional.of(getLogicalProperties()), child(), false); + Optional.of(getLogicalProperties()), child()); } - public LogicalSort withNormalize(boolean orderKeysPruned) { - return new LogicalSort<>(orderKeys, groupExpression, Optional.of(getLogicalProperties()), child(), - orderKeysPruned); + public LogicalSort withOrderKeysAndChild(List orderKeys, Plan child) { + return new LogicalSort<>(orderKeys, child); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionSortNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionSortNode.java index ef198891ccd9df..9e14b4f267de99 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionSortNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionSortNode.java @@ -17,12 +17,8 @@ package org.apache.doris.planner; -import org.apache.doris.analysis.Analyzer; import org.apache.doris.analysis.Expr; -import org.apache.doris.analysis.SlotDescriptor; -import org.apache.doris.analysis.SlotId; import org.apache.doris.analysis.SortInfo; -import org.apache.doris.common.NotImplementedException; import org.apache.doris.nereids.trees.plans.WindowFuncType; import org.apache.doris.statistics.StatisticalType; import org.apache.doris.thrift.TExplainLevel; @@ -34,40 +30,29 @@ import com.google.common.base.Joiner; import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import java.util.ArrayList; -import java.util.HashSet; import java.util.Iterator; import java.util.List; -import java.util.Set; /** * PartitionSortNode. * PartitionSortNode is only used in the Nereids. */ public class PartitionSortNode extends PlanNode { - private static final Logger LOG = LogManager.getLogger(PartitionSortNode.class); - private List resolvedTupleExprs; private final WindowFuncType function; private final List partitionExprs; private final SortInfo info; private final boolean hasGlobalLimit; private final long partitionLimit; - private boolean isUnusedExprRemoved = false; - private ArrayList nullabilityChangedFlags = Lists.newArrayList(); - /** * Constructor. */ public PartitionSortNode(PlanNodeId id, PlanNode input, WindowFuncType function, List partitionExprs, - SortInfo info, boolean hasGlobalLimit, long partitionLimit, - List outputList, List orderingExpr) { + SortInfo info, boolean hasGlobalLimit, long partitionLimit) { super(id, "PartitionTopN", StatisticalType.PARTITION_TOPN_NODE); + Preconditions.checkArgument(info.getOrderingExprs().size() == info.getIsAscOrder().size()); this.function = function; this.partitionExprs = partitionExprs; this.info = info; @@ -77,38 +62,12 @@ public PartitionSortNode(PlanNodeId id, PlanNode input, WindowFuncType function, this.tblRefIds.addAll(Lists.newArrayList(info.getSortTupleDescriptor().getId())); this.nullableTupleIds.addAll(input.getNullableTupleIds()); this.children.add(input); - - List resolvedTupleExprs = new ArrayList<>(); - for (Expr order : orderingExpr) { - if (!resolvedTupleExprs.contains(order)) { - resolvedTupleExprs.add(order); - } - } - for (Expr output : outputList) { - if (!resolvedTupleExprs.contains(output)) { - resolvedTupleExprs.add(output); - } - } - this.resolvedTupleExprs = ImmutableList.copyOf(resolvedTupleExprs); - info.setSortTupleSlotExprs(resolvedTupleExprs); - - nullabilityChangedFlags.clear(); - for (int i = 0; i < resolvedTupleExprs.size(); i++) { - nullabilityChangedFlags.add(false); - } - Preconditions.checkArgument(info.getOrderingExprs().size() == info.getIsAscOrder().size()); } public SortInfo getSortInfo() { return info; } - @Override - public void getMaterializedIds(Analyzer analyzer, List ids) { - super.getMaterializedIds(analyzer, ids); - Expr.getIds(info.getOrderingExprs(), null, ids); - } - @Override public String getNodeExplainString(String prefix, TExplainLevel detailLevel) { if (detailLevel == TExplainLevel.BRIEF) { @@ -164,34 +123,12 @@ public String getNodeExplainString(String prefix, TExplainLevel detailLevel) { return output.toString(); } - private void removeUnusedExprs() { - if (!isUnusedExprRemoved) { - if (resolvedTupleExprs != null) { - List slotDescriptorList = this.info.getSortTupleDescriptor().getSlots(); - for (int i = slotDescriptorList.size() - 1; i >= 0; i--) { - if (!slotDescriptorList.get(i).isMaterialized()) { - resolvedTupleExprs.remove(i); - nullabilityChangedFlags.remove(i); - } - } - } - isUnusedExprRemoved = true; - } - } - @Override protected void toThrift(TPlanNode msg) { msg.node_type = TPlanNodeType.PARTITION_SORT_NODE; TSortInfo sortInfo = info.toThrift(); Preconditions.checkState(tupleIds.size() == 1, "Incorrect size for tupleIds in PartitionSortNode"); - removeUnusedExprs(); - if (resolvedTupleExprs != null) { - sortInfo.setSortTupleSlotExprs(Expr.treesToThrift(resolvedTupleExprs)); - // FIXME this is a bottom line solution for wrong nullability of resolvedTupleExprs - // remove the following line after nereids online - sortInfo.setSlotExprsNullabilityChangedFlags(nullabilityChangedFlags); - } TopNAlgorithm topNAlgorithm; if (function == WindowFuncType.ROW_NUMBER) { @@ -210,13 +147,4 @@ protected void toThrift(TPlanNode msg) { partitionSortNode.setPartitionInnerLimit(partitionLimit); msg.partition_sort_node = partitionSortNode; } - - @Override - public Set computeInputSlotIds(Analyzer analyzer) throws NotImplementedException { - removeUnusedExprs(); - List materializedTupleExprs = new ArrayList<>(resolvedTupleExprs); - List result = Lists.newArrayList(); - Expr.getIds(materializedTupleExprs, null, result); - return new HashSet<>(result); - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java index aab8f44186d7f1..c4dbe606444af1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java @@ -27,7 +27,6 @@ import org.apache.doris.analysis.SlotId; import org.apache.doris.analysis.SlotRef; import org.apache.doris.analysis.SortInfo; -import org.apache.doris.analysis.TupleDescriptor; import org.apache.doris.common.NotImplementedException; import org.apache.doris.common.UserException; import org.apache.doris.statistics.StatisticalType; @@ -329,31 +328,4 @@ public Set computeInputSlotIds(Analyzer analyzer) throws NotImplementedE Expr.getIds(materializedTupleExprs, null, result); return new HashSet<>(result); } - - /** - * Supplement the information needed by be for the sort node. - * TODO: currently we only process slotref, so when order key is a + 1, we will failed. - */ - public void finalizeForNereids(TupleDescriptor tupleDescriptor, - List outputList, List orderingExpr) { - resolvedTupleExprs = Lists.newArrayList(); - // TODO: should fix the duplicate order by exprs in nereids code later - for (Expr order : orderingExpr) { - if (!resolvedTupleExprs.contains(order)) { - resolvedTupleExprs.add(order); - } - } - for (Expr output : outputList) { - if (!resolvedTupleExprs.contains(output)) { - resolvedTupleExprs.add(output); - } - } - info.setSortTupleDesc(tupleDescriptor); - info.setSortTupleSlotExprs(resolvedTupleExprs); - - nullabilityChangedFlags.clear(); - for (int i = 0; i < resolvedTupleExprs.size(); i++) { - nullabilityChangedFlags.add(false); - } - } } diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out index ed8bcb300fd4ed..7b27c69128a9b9 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out @@ -34,13 +34,13 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------PhysicalDistribute --------PhysicalTopN ----------PhysicalProject -------------hashJoin[INNER_JOIN](s_store_name = v1_lead.s_store_name)(v1.i_category = v1_lead.i_category)(v1.i_brand = v1_lead.i_brand)(v1.s_company_name = v1_lead.s_company_name)(v1.rn = expr_(rn - 1)) +------------hashJoin[INNER_JOIN](v1.i_category = v1_lead.i_category)(v1.i_brand = v1_lead.i_brand)(v1.s_store_name = v1_lead.s_store_name)(v1.s_company_name = v1_lead.s_company_name)(v1.rn = expr_(rn - 1)) --------------PhysicalDistribute ----------------PhysicalProject ------------------PhysicalCteConsumer ( cteId=CTEId#0 ) --------------PhysicalDistribute ----------------PhysicalProject -------------------hashJoin[INNER_JOIN](s_store_name = v1_lag.s_store_name)(v1.i_category = v1_lag.i_category)(v1.i_brand = v1_lag.i_brand)(v1.s_company_name = v1_lag.s_company_name)(v1.rn = expr_(rn + 1)) +------------------hashJoin[INNER_JOIN](v1.i_category = v1_lag.i_category)(v1.i_brand = v1_lag.i_brand)(v1.s_store_name = v1_lag.s_store_name)(v1.s_company_name = v1_lag.s_company_name)(v1.rn = expr_(rn + 1)) --------------------PhysicalDistribute ----------------------PhysicalProject ------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out index b54f9d5abca810..0e13df083b4ceb 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out @@ -34,13 +34,13 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------PhysicalDistribute --------PhysicalTopN ----------PhysicalProject -------------hashJoin[INNER_JOIN](i_brand = v1_lead.i_brand)(v1.i_category = v1_lead.i_category)(v1.cc_name = v1_lead.cc_name)(v1.rn = expr_(rn - 1)) +------------hashJoin[INNER_JOIN](v1.i_category = v1_lead.i_category)(v1.i_brand = v1_lead.i_brand)(v1.cc_name = v1_lead.cc_name)(v1.rn = expr_(rn - 1)) --------------PhysicalDistribute ----------------PhysicalProject ------------------PhysicalCteConsumer ( cteId=CTEId#0 ) --------------PhysicalDistribute ----------------PhysicalProject -------------------hashJoin[INNER_JOIN](i_brand = v1_lag.i_brand)(v1.i_category = v1_lag.i_category)(v1.cc_name = v1_lag.cc_name)(v1.rn = expr_(rn + 1)) +------------------hashJoin[INNER_JOIN](v1.i_category = v1_lag.i_category)(v1.i_brand = v1_lag.i_brand)(v1.cc_name = v1_lag.cc_name)(v1.rn = expr_(rn + 1)) --------------------PhysicalDistribute ----------------------PhysicalProject ------------------------PhysicalCteConsumer ( cteId=CTEId#0 )