From ee85f1cc16f4a423e12933318bc0cffd26140077 Mon Sep 17 00:00:00 2001 From: jianghaochen Date: Thu, 7 Apr 2022 17:08:59 +0800 Subject: [PATCH 1/4] [feature](statistics) Statistics derivation.Step 1:ScanNode implementation --- .../apache/doris/planner/OlapScanNode.java | 40 +++--- .../org/apache/doris/planner/PlanNode.java | 33 +++++ .../doris/planner/SingleNodePlanner.java | 10 +- .../doris/statistics/BaseStatsDerive.java | 122 ++++++++++++++++++ .../doris/statistics/ScanStatsDerive.java | 88 +++++++++++++ .../doris/statistics/StatsDeriveResult.java | 73 +++++++++++ .../statistics/StatsRecursiveDerive.java | 57 ++++++++ 7 files changed, 407 insertions(+), 16 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/ScanStatsDerive.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index f74666a18caf2e..561f993c4a6d68 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -134,6 +134,7 @@ public class OlapScanNode extends ScanNode { private int selectedPartitionNum = 0; private Collection selectedPartitionIds = Lists.newArrayList(); private long totalBytes = 0; + private long tmpRowCount = 0; // List of tablets will be scanned by current olap_scan_node private ArrayList scanTabletIds = Lists.newArrayList(); @@ -199,6 +200,11 @@ public void setSelectedPartitionIds(Collection selectedPartitionIds) { this.selectedPartitionIds = selectedPartitionIds; } + @Override + public NodeType getNodeType() { + return NodeType.OLAP_SCAN_NODE; + } + /** * The function is used to directly select the index id of the base table as the selectedIndexId. * It makes sure that the olap scan node must scan the base data rather than scan the materialized view data. @@ -338,16 +344,8 @@ public void init(Analyzer analyzer) throws UserException { computePartitionInfo(); computeTupleState(analyzer); - /** - * Compute InAccurate cardinality before mv selector and tablet pruning. - * - Accurate statistical information relies on the selector of materialized views and bucket reduction. - * - However, Those both processes occur after the reorder algorithm is completed. - * - When Join reorder is turned on, the cardinality must be calculated before the reorder algorithm. - * - So only an inaccurate cardinality can be calculated here. - */ - if (analyzer.safeIsEnableJoinReorderBasedCost()) { - computeInaccurateCardinality(); - } + // compute for incorrect cardinality + computeTmpRowCount(); } @Override @@ -411,16 +409,18 @@ protected void computeNumNodes() { * 2. Apply conjunct * 3. Apply limit */ - private void computeInaccurateCardinality() { + private void computeTmpRowCount() { // step1: Calculate how many rows were scanned - cardinality = 0; + tmpRowCount = 0; for (long selectedPartitionId : selectedPartitionIds) { final Partition partition = olapTable.getPartition(selectedPartitionId); final MaterializedIndex baseIndex = partition.getBaseIndex(); - cardinality += baseIndex.getRowCount(); + tmpRowCount += baseIndex.getRowCount(); } - applyConjunctsSelectivity(); - capCardinalityAtLimit(); + } + + public long getTmpRowCount() { + return tmpRowCount; } private Collection partitionPrune(PartitionInfo partitionInfo, PartitionNames partitionNames) throws AnalysisException { @@ -563,7 +563,17 @@ private void addScanRangeLocations(Partition partition, result.add(scanRangeLocations); } + // FIXME(dhc): we use cardinality here to simulate ndv + // update statsDeriveResult for real statistics + statsDeriveResult.setRowCount(cardinality); + for (Map.Entry entry : statsDeriveResult.getColumnToNdv().entrySet()) { + if (entry.getValue() > 0) { + cardinality = Math.min(cardinality, entry.getValue()); + } + } + statsDeriveResult.setCardinality(cardinality); + if (tablets.size() == 0) { desc.setCardinality(0); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java index 58a0019962d1eb..cae34431a93471 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java @@ -36,6 +36,7 @@ import org.apache.doris.common.TreeNode; import org.apache.doris.common.UserException; import org.apache.doris.common.util.VectorizedUtil; +import org.apache.doris.statistics.StatsDeriveResult; import org.apache.doris.thrift.TExplainLevel; import org.apache.doris.thrift.TFunctionBinaryType; import org.apache.doris.thrift.TPlan; @@ -135,6 +136,9 @@ abstract public class PlanNode extends TreeNode { protected List outputSlotIds; + private NodeType nodeType = NodeType.DEFAULT; + protected StatsDeriveResult statsDeriveResult = new StatsDeriveResult(); + protected PlanNode(PlanNodeId id, ArrayList tupleIds, String planNodeName) { this.id = id; this.limit = -1; @@ -173,12 +177,41 @@ protected PlanNode(PlanNodeId id, PlanNode node, String planNodeName) { this.planNodeName = VectorizedUtil.isVectorized() ? "V" + planNodeName : planNodeName; this.numInstances = 1; + this.nodeType = node.getNodeType(); + this.statsDeriveResult.set(node.getStatsDeriveResult()); + } + + public enum NodeType { + DEFAULT, + AGG_NODE, + OLAP_SCAN_NODE, + HASH_JOIN_NODE, + MERGE_NODE } public String getPlanNodeName() { return planNodeName; } + public StatsDeriveResult getStatsDeriveResult() { + if (statsDeriveResult == null) { + statsDeriveResult = new StatsDeriveResult(); + } + return statsDeriveResult; + } + + public NodeType getNodeType() { + return nodeType; + } + + public void setStatsDeriveResult(StatsDeriveResult statsDeriveResult) { + this.statsDeriveResult = statsDeriveResult; + } + + public void setNodeType(NodeType nodeType) { + this.nodeType = nodeType; + } + /** * Sets tblRefIds_, tupleIds_, and nullableTupleIds_. * The default implementation is a no-op. diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index 8ad921bf0928b5..4e65ca75d83e4b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -71,6 +71,7 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import org.apache.doris.statistics.StatsRecursiveDerive; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -96,6 +97,7 @@ public class SingleNodePlanner { private final PlannerContext ctx_; private final ArrayList scanNodes = Lists.newArrayList(); private Map> selectStmtToScanNodes = Maps.newHashMap(); + private StatsRecursiveDerive statsRecursiveDerive; public SingleNodePlanner(PlannerContext ctx) { ctx_ = ctx; @@ -164,6 +166,8 @@ public PlanNode createSingleNodePlan() throws UserException, AnalysisException { if (LOG.isTraceEnabled()) { LOG.trace("desctbl: " + analyzer.getDescTbl().debugString()); } + statsRecursiveDerive = new StatsRecursiveDerive(); + statsRecursiveDerive.creteNodeTypeToDeriveMap(); PlanNode singleNodePlan = createQueryPlan(queryStmt, analyzer, ctx_.getQueryOptions().getDefaultOrderByLimit()); Preconditions.checkNotNull(singleNodePlan); @@ -1725,7 +1729,11 @@ private PlanNode createScanNode(Analyzer analyzer, TableRef tblRef, SelectStmt s scanNodeList.add(scanNode); scanNode.init(analyzer); - + if (analyzer.safeIsEnableJoinReorderBasedCost()) { + statsRecursiveDerive.statsRecursiveDerive(scanNode); + // Update the node's cardinality value for the current architecture + scanNode.cardinality = scanNode.getStatsDeriveResult().getCardinality(); + } return scanNode; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java new file mode 100644 index 00000000000000..4f40b451df651d --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import org.apache.doris.analysis.Expr; +import org.apache.doris.planner.PlanNode; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public abstract class BaseStatsDerive { + // estimate of the output cardinality of this node; + // invalid: -1 + protected long cardinality; + protected long limit; + + protected List conjuncts = Lists.newArrayList(); + protected List childrenStatsResult = Lists.newArrayList(); + + protected BaseStatsDerive init(PlanNode node) { + cardinality = -1; + limit = node.getLimit(); + conjuncts.addAll(node.getConjuncts()); + + for (PlanNode childNode : node.getChildren()) { + childrenStatsResult.add(childNode.getStatsDeriveResult()); + } + return this; + } + + public abstract StatsDeriveResult deriveStats(); + + public boolean hasLimit() { + return limit > -1; + } + + protected void applyConjunctsSelectivity() { + if (cardinality == -1) { + return; + } + applySelectivity(); + } + + private void applySelectivity() { + double selectivity = computeSelectivity(); + Preconditions.checkState(cardinality >= 0); + long preConjunctCardinality = cardinality; + cardinality = Math.round(cardinality * selectivity); + // don't round cardinality down to zero for safety. + if (cardinality == 0 && preConjunctCardinality > 0) { + cardinality = 1; + } + } + + protected double computeSelectivity() { + for (Expr expr : conjuncts) { + expr.setSelectivity(); + } + return computeCombinedSelectivity(conjuncts); + } + + /** + * Returns the estimated combined selectivity of all conjuncts. Uses heuristics to + * address the following estimation challenges: + * 1. The individual selectivities of conjuncts may be unknown. + * 2. Two selectivities, whether known or unknown, could be correlated. Assuming + * independence can lead to significant underestimation. + *

+ * The first issue is addressed by using a single default selectivity that is + * representative of all conjuncts with unknown selectivities. + * The second issue is addressed by an exponential backoff when multiplying each + * additional selectivity into the final result. + */ + static protected double computeCombinedSelectivity(List conjuncts) { + // Collect all estimated selectivities. + List selectivities = new ArrayList<>(); + for (Expr e : conjuncts) { + if (e.hasSelectivity()) selectivities.add(e.getSelectivity()); + } + if (selectivities.size() != conjuncts.size()) { + // Some conjuncts have no estimated selectivity. Use a single default + // representative selectivity for all those conjuncts. + selectivities.add(Expr.DEFAULT_SELECTIVITY); + } + // Sort the selectivities to get a consistent estimate, regardless of the original + // conjunct order. Sort in ascending order such that the most selective conjunct + // is fully applied. + Collections.sort(selectivities); + double result = 1.0; + // selectivity = 1 * (s1)^(1/1) * (s2)^(1/2) * ... * (sn-1)^(1/(n-1)) * (sn)^(1/n) + for (int i = 0; i < selectivities.size(); ++i) { + // Exponential backoff for each selectivity multiplied into the final result. + result *= Math.pow(selectivities.get(i), 1.0 / (double) (i + 1)); + } + // Bound result in [0, 1] + return Math.max(0.0, Math.min(1.0, result)); + } + + protected void capCardinalityAtLimit() { + if (hasLimit()) { + cardinality = cardinality == -1 ? limit : Math.min(cardinality, limit); + } + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ScanStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ScanStatsDerive.java new file mode 100644 index 00000000000000..711d5a13a0abc9 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ScanStatsDerive.java @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import com.google.common.base.Preconditions; +import org.apache.doris.analysis.SlotDescriptor; +import org.apache.doris.planner.OlapScanNode; +import org.apache.doris.planner.PlanNode; + +import java.util.HashMap; +import java.util.Map; + +public class ScanStatsDerive extends BaseStatsDerive { + // Currently, due to the structure of doris, + // the selected materialized view is not determined when calculating the statistical information of scan, + // so baseIndex is used for calculation when generating Planner. + + // The tmpRowCount here is the temporary number of rows. + private long tmpRowCount; + private Map slotIdToDataSize; + private Map slotIdToNdv; + + @Override + public ScanStatsDerive init(PlanNode node) { + Preconditions.checkState(node instanceof OlapScanNode); + super.init(node); + tmpRowCount = ((OlapScanNode)node).getTmpRowCount(); + buildColumnToStats((OlapScanNode)node); + return this; + } + + @Override + public StatsDeriveResult deriveStats() { + /** + * Compute InAccurate cardinality before mv selector and tablet pruning. + * - Accurate statistical information relies on the selector of materialized views and bucket reduction. + * - However, Those both processes occur after the reorder algorithm is completed. + * - When Join reorder is turned on, the cardinality must be calculated before the reorder algorithm. + * - So only an inaccurate cardinality can be calculated here. + */ + cardinality = tmpRowCount; + applyConjunctsSelectivity(); + capCardinalityAtLimit(); + return new StatsDeriveResult(cardinality, tmpRowCount, slotIdToDataSize, slotIdToNdv); + } + + public void buildColumnToStats(OlapScanNode node) { + slotIdToDataSize = new HashMap<>(); + slotIdToNdv = new HashMap<>(); + for (SlotDescriptor slot : node.getTupleDesc().getSlots()) { + if (slot.getParent() != null + && slot.getParent().getTable() != null + && slot.getColumn() != null) { + long tableId = slot.getParent().getTable().getId(); + String columnName = slot.getColumn().getName(); + /*TODO:Implement the getStatistics interface + //now there is nothing in statistics, need to wait for collection finished + if (Catalog.getCurrentCatalog() + .getStatisticsManager() + .getStatistics() + .getColumnStats(tableId) != null) { + ndv = Catalog.getCurrentCatalog() + .getStatisticsManager() + .getStatistics() + .getColumnStats(tableId).get(columnName).getNdv(); + slotIdToNdv.put(slot.getId(), ndv); + //same as slotIdToDataSize + } + */ + } + } + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java new file mode 100644 index 00000000000000..0c81eb89412b61 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import com.google.common.collect.Maps; + +import java.util.Map; + +// This structure is maintained in each operator to store the statistical information results obtained by the operator. +public class StatsDeriveResult { + private long cardinality; + private long rowCount; + // The data size of the corresponding column in the operator + // The actual key is slotId + private final Map columnToDataSize = Maps.newHashMap(); + // The ndv of the corresponding column in the operator + // The actual key is slotId + private final Map columnToNdv = Maps.newHashMap(); + + public StatsDeriveResult() { + cardinality = -1; + rowCount = -1; + } + + public StatsDeriveResult(long cardinality, long rowCount, Map columnToDataSize, Map columnToNdv) { + this.cardinality = cardinality; + this.rowCount = rowCount; + this.columnToDataSize.putAll(columnToDataSize); + this.columnToNdv.putAll(columnToNdv); + } + + public void set(StatsDeriveResult statsDeriveResult) { + this.cardinality = statsDeriveResult.cardinality; + this.rowCount = statsDeriveResult.rowCount; + this.columnToDataSize.putAll(statsDeriveResult.columnToDataSize); + this.columnToNdv.putAll(statsDeriveResult.columnToNdv); + } + + public void setRowCount(long rowCount) { + this.rowCount = rowCount; + } + + public void setCardinality(long cardinality) { + this.cardinality = cardinality; + } + + public boolean isStatsDerived() { + return cardinality != -1 && rowCount != -1 && columnToDataSize.isEmpty() && columnToNdv.isEmpty(); + } + + public Map getColumnToNdv() { + return columnToNdv; + } + + public long getCardinality() { + return cardinality; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java new file mode 100644 index 00000000000000..f61972526318bd --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.planner.PlanNode; + +import java.util.HashMap; +import java.util.Map; + + +public class StatsRecursiveDerive { + Map typeToDerive = new HashMap<>(); + + /** + * Recursively complete the derivation of statistics for this node and all its children + * @param node + * This parameter is an input and output parameter, + * which will store the derivation result of statistical information in the corresponding node + */ + public void statsRecursiveDerive(PlanNode node) { + if (node.getStatsDeriveResult().isStatsDerived()) { + return; + } + for (PlanNode childNode : node.getChildren()) { + if (!childNode.getStatsDeriveResult().isStatsDerived()) { + statsRecursiveDerive(childNode); + } + } + + node.setStatsDeriveResult(typeToDerive.get(node.getNodeType()).init(node).deriveStats()); + } + + public void creteNodeTypeToDeriveMap() { + typeToDerive.put(PlanNode.NodeType.DEFAULT, new BaseStatsDerive() { + @Override + public StatsDeriveResult deriveStats() { + return new StatsDeriveResult(); + } + }); + typeToDerive.put(PlanNode.NodeType.OLAP_SCAN_NODE, new ScanStatsDerive()); + } +} From 9fbbecef0c95d6a7529f053f25439c60a79cd1d7 Mon Sep 17 00:00:00 2001 From: jianghaochen Date: Fri, 15 Apr 2022 10:50:15 +0800 Subject: [PATCH 2/4] Fix problems in review --- .../org/apache/doris/planner/EsScanNode.java | 2 +- .../apache/doris/planner/LoadScanNode.java | 2 +- .../apache/doris/planner/MysqlScanNode.java | 2 +- .../apache/doris/planner/OdbcScanNode.java | 2 +- .../apache/doris/planner/OlapScanNode.java | 66 +++++++++---------- .../org/apache/doris/planner/PlanNode.java | 29 ++++---- .../org/apache/doris/planner/ScanNode.java | 3 +- .../apache/doris/planner/SchemaScanNode.java | 2 +- .../doris/planner/SingleNodePlanner.java | 9 --- .../doris/statistics/BaseStatsDerive.java | 8 +-- .../doris/statistics/DeriveFactory.java | 41 ++++++++++++ .../doris/statistics/ScanStatsDerive.java | 16 +++-- .../apache/doris/statistics/Statistics.java | 12 ++++ .../doris/statistics/StatisticsManager.java | 4 ++ .../doris/statistics/StatsDeriveResult.java | 30 ++++----- .../statistics/StatsRecursiveDerive.java | 31 ++++----- .../apache/doris/statistics/TableStats.java | 12 ++++ 17 files changed, 165 insertions(+), 106 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java index 2ed1a4bde80859..96dbdbe934ec27 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java @@ -72,7 +72,7 @@ public class EsScanNode extends ScanNode { boolean isFinalized = false; public EsScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { - super(id, desc, planNodeName); + super(id, desc, planNodeName, NodeType.ES_SCAN_NODE); table = (EsTable) (desc.getTable()); esTablePartitions = table.getEsTablePartitions(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java index b6dbb4f782ca1e..4479530941e854 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java @@ -54,7 +54,7 @@ public abstract class LoadScanNode extends ScanNode { protected LoadTask.MergeType mergeType = LoadTask.MergeType.APPEND; public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { - super(id, desc, planNodeName); + super(id, desc, planNodeName, NodeType.LOAD_SCAN_NODE); } protected void initAndSetWhereExpr(Expr whereExpr, TupleDescriptor tupleDesc, Analyzer analyzer) throws UserException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java index fcc22125c99117..9235b1fc75cc4d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java @@ -56,7 +56,7 @@ public class MysqlScanNode extends ScanNode { * Constructs node to scan given data files of table 'tbl'. */ public MysqlScanNode(PlanNodeId id, TupleDescriptor desc, MysqlTable tbl) { - super(id, desc, "SCAN MYSQL"); + super(id, desc, "SCAN MYSQL", NodeType.MYSQL_SCAN_NODE); tblName = "`" + tbl.getMysqlTableName() + "`"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java index 90f989d35c97e6..1f32b9e93823fe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java @@ -73,7 +73,7 @@ private static boolean shouldPushDownConjunct(TOdbcTableType tableType, Expr exp * Constructs node to scan given data files of table 'tbl'. */ public OdbcScanNode(PlanNodeId id, TupleDescriptor desc, OdbcTable tbl) { - super(id, desc, "SCAN ODBC"); + super(id, desc, "SCAN ODBC", NodeType.ODBC_SCAN_NODE); connectString = tbl.getConnectString(); odbcType = tbl.getOdbcTableType(); tblName = OdbcTable.databaseProperName(odbcType, tbl.getOdbcTableName()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index 561f993c4a6d68..1fdd6ac01ebf64 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -53,6 +53,7 @@ import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.SessionVariable; import org.apache.doris.resource.Tag; +import org.apache.doris.statistics.StatsRecursiveDerive; import org.apache.doris.system.Backend; import org.apache.doris.thrift.TExplainLevel; import org.apache.doris.thrift.TNetworkAddress; @@ -134,7 +135,6 @@ public class OlapScanNode extends ScanNode { private int selectedPartitionNum = 0; private Collection selectedPartitionIds = Lists.newArrayList(); private long totalBytes = 0; - private long tmpRowCount = 0; // List of tablets will be scanned by current olap_scan_node private ArrayList scanTabletIds = Lists.newArrayList(); @@ -147,7 +147,7 @@ public class OlapScanNode extends ScanNode { // Constructs node to scan given data files of table 'tbl'. public OlapScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { - super(id, desc, planNodeName); + super(id, desc, planNodeName, NodeType.OLAP_SCAN_NODE); olapTable = (OlapTable) desc.getTable(); } @@ -200,11 +200,6 @@ public void setSelectedPartitionIds(Collection selectedPartitionIds) { this.selectedPartitionIds = selectedPartitionIds; } - @Override - public NodeType getNodeType() { - return NodeType.OLAP_SCAN_NODE; - } - /** * The function is used to directly select the index id of the base table as the selectedIndexId. * It makes sure that the olap scan node must scan the base data rather than scan the materialized view data. @@ -344,8 +339,31 @@ public void init(Analyzer analyzer) throws UserException { computePartitionInfo(); computeTupleState(analyzer); - // compute for incorrect cardinality - computeTmpRowCount(); + /** + * Compute InAccurate cardinality before mv selector and tablet pruning. + * - Accurate statistical information relies on the selector of materialized views and bucket reduction. + * - However, Those both processes occur after the reorder algorithm is completed. + * - When Join reorder is turned on, the cardinality must be calculated before the reorder algorithm. + * - So only an inaccurate cardinality can be calculated here. + */ + if (analyzer.safeIsEnableJoinReorderBasedCost()) { + mockRowCountInStatistic(); + computeInaccurateCardinality(); + } + } + + /** + * Remove the method after statistics collection is working properly + */ + public void mockRowCountInStatistic() { + long tableId = desc.getTable().getId(); + cardinality = 0; + for (long selectedPartitionId : selectedPartitionIds) { + final Partition partition = olapTable.getPartition(selectedPartitionId); + final MaterializedIndex baseIndex = partition.getBaseIndex(); + cardinality += baseIndex.getRowCount(); + } + Catalog.getCurrentCatalog().getStatisticsManager().getStatistics().mockTableStatsWithRowCount(tableId, cardinality); } @Override @@ -395,32 +413,9 @@ protected void computeNumNodes() { numNodes = numNodes <= 0 ? 1 : numNodes; } - /** - * Calculate inaccurate cardinality. - * cardinality: the value of cardinality is the sum of rowcount which belongs to selectedPartitionIds - * The cardinality here is actually inaccurate, it will be greater than the actual value. - * There are two reasons - * 1. During the actual execution, not all tablets belonging to the selected partition will be scanned. - * Some tablets may have been pruned before execution. - * 2. The base index may eventually be replaced by mv index. - *

- * There are three steps to calculate cardinality - * 1. Calculate how many rows were scanned - * 2. Apply conjunct - * 3. Apply limit - */ - private void computeTmpRowCount() { - // step1: Calculate how many rows were scanned - tmpRowCount = 0; - for (long selectedPartitionId : selectedPartitionIds) { - final Partition partition = olapTable.getPartition(selectedPartitionId); - final MaterializedIndex baseIndex = partition.getBaseIndex(); - tmpRowCount += baseIndex.getRowCount(); - } - } - - public long getTmpRowCount() { - return tmpRowCount; + private void computeInaccurateCardinality() { + StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this); + cardinality = statsDeriveResult.getCardinality(); } private Collection partitionPrune(PartitionInfo partitionInfo, PartitionNames partitionNames) throws AnalysisException { @@ -566,6 +561,7 @@ private void addScanRangeLocations(Partition partition, // FIXME(dhc): we use cardinality here to simulate ndv // update statsDeriveResult for real statistics + // After statistics collection is complete, remove the logic statsDeriveResult.setRowCount(cardinality); for (Map.Entry entry : statsDeriveResult.getColumnToNdv().entrySet()) { if (entry.getValue() > 0) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java index cae34431a93471..39fe7a3a4d4d18 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java @@ -55,6 +55,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.Set; /** @@ -136,7 +137,7 @@ abstract public class PlanNode extends TreeNode { protected List outputSlotIds; - private NodeType nodeType = NodeType.DEFAULT; + protected NodeType nodeType = NodeType.DEFAULT; protected StatsDeriveResult statsDeriveResult = new StatsDeriveResult(); protected PlanNode(PlanNodeId id, ArrayList tupleIds, String planNodeName) { @@ -178,26 +179,32 @@ protected PlanNode(PlanNodeId id, PlanNode node, String planNodeName) { "V" + planNodeName : planNodeName; this.numInstances = 1; this.nodeType = node.getNodeType(); - this.statsDeriveResult.set(node.getStatsDeriveResult()); + this.statsDeriveResult = new StatsDeriveResult( + node.getStatsDeriveResult().get().getCardinality(), + node.getStatsDeriveResult().get().getRowCount(), + node.getStatsDeriveResult().get().getColumnToDataSize(), + node.getStatsDeriveResult().get().getColumnToNdv()); } public enum NodeType { DEFAULT, AGG_NODE, - OLAP_SCAN_NODE, HASH_JOIN_NODE, - MERGE_NODE + MERGE_NODE, + ES_SCAN_NODE, + LOAD_SCAN_NODE, + MYSQL_SCAN_NODE, + ODBC_SCAN_NODE, + OLAP_SCAN_NODE, + SCHEMA_SCAN_NODE, } public String getPlanNodeName() { return planNodeName; } - public StatsDeriveResult getStatsDeriveResult() { - if (statsDeriveResult == null) { - statsDeriveResult = new StatsDeriveResult(); - } - return statsDeriveResult; + public Optional getStatsDeriveResult() { + return Optional.ofNullable(statsDeriveResult); } public NodeType getNodeType() { @@ -208,10 +215,6 @@ public void setStatsDeriveResult(StatsDeriveResult statsDeriveResult) { this.statsDeriveResult = statsDeriveResult; } - public void setNodeType(NodeType nodeType) { - this.nodeType = nodeType; - } - /** * Sets tblRefIds_, tupleIds_, and nullableTupleIds_. * The default implementation is a no-op. diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java index 8b8c52b5bcccd4..a891f7616f8007 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java @@ -65,8 +65,9 @@ abstract public class ScanNode extends PlanNode { protected String sortColumn = null; protected Analyzer analyzer; - public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { + public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) { super(id, desc.getId().asList(), planNodeName); + super.nodeType = nodeType; this.desc = desc; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java index c5692c38e8d25c..07152bfd5e262b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java @@ -57,7 +57,7 @@ public class SchemaScanNode extends ScanNode { * Constructs node to scan given data files of table 'tbl'. */ public SchemaScanNode(PlanNodeId id, TupleDescriptor desc) { - super(id, desc, "SCAN SCHEMA"); + super(id, desc, "SCAN SCHEMA", NodeType.SCHEMA_SCAN_NODE); this.tableName = desc.getTable().getName(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index 4e65ca75d83e4b..a78435a3dd5880 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -71,7 +71,6 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; -import org.apache.doris.statistics.StatsRecursiveDerive; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -97,7 +96,6 @@ public class SingleNodePlanner { private final PlannerContext ctx_; private final ArrayList scanNodes = Lists.newArrayList(); private Map> selectStmtToScanNodes = Maps.newHashMap(); - private StatsRecursiveDerive statsRecursiveDerive; public SingleNodePlanner(PlannerContext ctx) { ctx_ = ctx; @@ -166,8 +164,6 @@ public PlanNode createSingleNodePlan() throws UserException, AnalysisException { if (LOG.isTraceEnabled()) { LOG.trace("desctbl: " + analyzer.getDescTbl().debugString()); } - statsRecursiveDerive = new StatsRecursiveDerive(); - statsRecursiveDerive.creteNodeTypeToDeriveMap(); PlanNode singleNodePlan = createQueryPlan(queryStmt, analyzer, ctx_.getQueryOptions().getDefaultOrderByLimit()); Preconditions.checkNotNull(singleNodePlan); @@ -1729,11 +1725,6 @@ private PlanNode createScanNode(Analyzer analyzer, TableRef tblRef, SelectStmt s scanNodeList.add(scanNode); scanNode.init(analyzer); - if (analyzer.safeIsEnableJoinReorderBasedCost()) { - statsRecursiveDerive.statsRecursiveDerive(scanNode); - // Update the node's cardinality value for the current architecture - scanNode.cardinality = scanNode.getStatsDeriveResult().getCardinality(); - } return scanNode; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java index 4f40b451df651d..95f5d77fb356d6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java @@ -25,18 +25,18 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; public abstract class BaseStatsDerive { // estimate of the output cardinality of this node; // invalid: -1 - protected long cardinality; - protected long limit; + protected long cardinality = -1; + protected long limit = -1; protected List conjuncts = Lists.newArrayList(); - protected List childrenStatsResult = Lists.newArrayList(); + protected List> childrenStatsResult = Lists.newArrayList(); protected BaseStatsDerive init(PlanNode node) { - cardinality = -1; limit = node.getLimit(); conjuncts.addAll(node.getConjuncts()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java new file mode 100644 index 00000000000000..b9c085eb567049 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.planner.PlanNode; + +public class DeriveFactory { + + public BaseStatsDerive getStatsDerive(PlanNode.NodeType nodeType) { + switch (nodeType) { + case AGG_NODE: + case HASH_JOIN_NODE: + case MERGE_NODE: + break; + case OLAP_SCAN_NODE: + return new ScanStatsDerive(); + case DEFAULT: + } + return new BaseStatsDerive() { + @Override + public StatsDeriveResult deriveStats() { + return new StatsDeriveResult(); + } + }; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ScanStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ScanStatsDerive.java index 711d5a13a0abc9..8ec63c331e942e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ScanStatsDerive.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ScanStatsDerive.java @@ -19,6 +19,7 @@ import com.google.common.base.Preconditions; import org.apache.doris.analysis.SlotDescriptor; +import org.apache.doris.catalog.Catalog; import org.apache.doris.planner.OlapScanNode; import org.apache.doris.planner.PlanNode; @@ -30,8 +31,8 @@ public class ScanStatsDerive extends BaseStatsDerive { // the selected materialized view is not determined when calculating the statistical information of scan, // so baseIndex is used for calculation when generating Planner. - // The tmpRowCount here is the temporary number of rows. - private long tmpRowCount; + // The rowCount here is the number of rows. + private long rowCount = -1; private Map slotIdToDataSize; private Map slotIdToNdv; @@ -39,7 +40,6 @@ public class ScanStatsDerive extends BaseStatsDerive { public ScanStatsDerive init(PlanNode node) { Preconditions.checkState(node instanceof OlapScanNode); super.init(node); - tmpRowCount = ((OlapScanNode)node).getTmpRowCount(); buildColumnToStats((OlapScanNode)node); return this; } @@ -53,15 +53,21 @@ public StatsDeriveResult deriveStats() { * - When Join reorder is turned on, the cardinality must be calculated before the reorder algorithm. * - So only an inaccurate cardinality can be calculated here. */ - cardinality = tmpRowCount; + cardinality = rowCount; applyConjunctsSelectivity(); capCardinalityAtLimit(); - return new StatsDeriveResult(cardinality, tmpRowCount, slotIdToDataSize, slotIdToNdv); + return new StatsDeriveResult(cardinality, rowCount, slotIdToDataSize, slotIdToNdv); } public void buildColumnToStats(OlapScanNode node) { slotIdToDataSize = new HashMap<>(); slotIdToNdv = new HashMap<>(); + if (node.getTupleDesc() != null + && node.getTupleDesc().getTable() != null) { + long tableId = node.getTupleDesc().getTable().getId(); + rowCount = Catalog.getCurrentCatalog().getStatisticsManager() + .getStatistics().getTableStats(tableId).getRowCount(); + } for (SlotDescriptor slot : node.getTupleDesc().getSlots()) { if (slot.getParent() != null && slot.getParent().getTable() != null diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java index b17a37858d8793..72aaa1cee4f723 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java @@ -70,4 +70,16 @@ public Map getColumnStats(long tableId) { } return tableStats.getNameToColumnStats(); } + + public void mockTableStatsWithRowCount(long tableId, long rowCount) { + TableStats tableStats = idToTableStats.get(tableId); + if (tableStats == null) { + tableStats = new TableStats(); + idToTableStats.put(tableId, tableStats); + } + + if (tableStats.getRowCount() != rowCount) { + tableStats.setRowCount(rowCount); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java index e994cce2120460..b1c88e36a58c63 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java @@ -136,4 +136,8 @@ private Table validateTableName(TableName dbTableName) throws AnalysisException Table table = db.getTableOrAnalysisException(tableName); return table; } + + public Statistics getStatistics() { + return statistics; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java index 0c81eb89412b61..d14aae9ed62cf2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java @@ -23,8 +23,8 @@ // This structure is maintained in each operator to store the statistical information results obtained by the operator. public class StatsDeriveResult { - private long cardinality; - private long rowCount; + private long cardinality = -1; + private long rowCount = -1; // The data size of the corresponding column in the operator // The actual key is slotId private final Map columnToDataSize = Maps.newHashMap(); @@ -32,10 +32,7 @@ public class StatsDeriveResult { // The actual key is slotId private final Map columnToNdv = Maps.newHashMap(); - public StatsDeriveResult() { - cardinality = -1; - rowCount = -1; - } + public StatsDeriveResult() {} public StatsDeriveResult(long cardinality, long rowCount, Map columnToDataSize, Map columnToNdv) { this.cardinality = cardinality; @@ -44,13 +41,6 @@ public StatsDeriveResult(long cardinality, long rowCount, Map column this.columnToNdv.putAll(columnToNdv); } - public void set(StatsDeriveResult statsDeriveResult) { - this.cardinality = statsDeriveResult.cardinality; - this.rowCount = statsDeriveResult.rowCount; - this.columnToDataSize.putAll(statsDeriveResult.columnToDataSize); - this.columnToNdv.putAll(statsDeriveResult.columnToNdv); - } - public void setRowCount(long rowCount) { this.rowCount = rowCount; } @@ -60,14 +50,22 @@ public void setCardinality(long cardinality) { } public boolean isStatsDerived() { - return cardinality != -1 && rowCount != -1 && columnToDataSize.isEmpty() && columnToNdv.isEmpty(); + return cardinality != -1 && rowCount != -1 && !columnToDataSize.isEmpty() && !columnToNdv.isEmpty(); + } + + public long getCardinality() { + return cardinality; + } + + public long getRowCount() { + return rowCount; } public Map getColumnToNdv() { return columnToNdv; } - public long getCardinality() { - return cardinality; + public Map getColumnToDataSize() { + return columnToDataSize; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java index f61972526318bd..27cd7eca86b1d4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java @@ -19,12 +19,17 @@ import org.apache.doris.planner.PlanNode; -import java.util.HashMap; -import java.util.Map; - public class StatsRecursiveDerive { - Map typeToDerive = new HashMap<>(); + private StatsRecursiveDerive() {} + + public static StatsRecursiveDerive getStatsRecursiveDerive() { + return Inner.INSTANCE; + } + + private static class Inner { + private static final StatsRecursiveDerive INSTANCE = new StatsRecursiveDerive(); + } /** * Recursively complete the derivation of statistics for this node and all its children @@ -33,25 +38,15 @@ public class StatsRecursiveDerive { * which will store the derivation result of statistical information in the corresponding node */ public void statsRecursiveDerive(PlanNode node) { - if (node.getStatsDeriveResult().isStatsDerived()) { + if (node.getStatsDeriveResult().get().isStatsDerived()) { return; } for (PlanNode childNode : node.getChildren()) { - if (!childNode.getStatsDeriveResult().isStatsDerived()) { + if (!childNode.getStatsDeriveResult().get().isStatsDerived()) { statsRecursiveDerive(childNode); } } - - node.setStatsDeriveResult(typeToDerive.get(node.getNodeType()).init(node).deriveStats()); - } - - public void creteNodeTypeToDeriveMap() { - typeToDerive.put(PlanNode.NodeType.DEFAULT, new BaseStatsDerive() { - @Override - public StatsDeriveResult deriveStats() { - return new StatsDeriveResult(); - } - }); - typeToDerive.put(PlanNode.NodeType.OLAP_SCAN_NODE, new ScanStatsDerive()); + DeriveFactory deriveFactory = new DeriveFactory(); + node.setStatsDeriveResult(deriveFactory.getStatsDerive(node.getNodeType()).init(node).deriveStats()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java index efb35bbe10f491..ef494bd9f66126 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java @@ -94,4 +94,16 @@ public List getShowInfo() { public Map getNameToColumnStats() { return nameToColumnStats; } + + public long getRowCount() { + return rowCount; + } + + public long getDataSize() { + return dataSize; + } + + public void setRowCount(long rowCount) { + this.rowCount = rowCount; + } } From 1e4da1cd4dcc704221b0987e236a8f5d829f77cc Mon Sep 17 00:00:00 2001 From: jianghaochen Date: Thu, 21 Apr 2022 11:16:04 +0800 Subject: [PATCH 3/4] Modify the review question --- .../apache/doris/planner/BrokerScanNode.java | 7 ++ .../apache/doris/planner/OlapScanNode.java | 16 ++-- .../org/apache/doris/planner/PlanNode.java | 14 +--- .../doris/statistics/BaseStatsDerive.java | 73 ++++++++++++++----- .../doris/statistics/DeriveFactory.java | 9 +-- ...tsDerive.java => OlapScanStatsDerive.java} | 69 +++++++++++------- .../apache/doris/statistics/Statistics.java | 1 + .../doris/statistics/StatsDeriveResult.java | 18 +---- .../statistics/StatsRecursiveDerive.java | 12 ++- 9 files changed, 128 insertions(+), 91 deletions(-) rename fe/fe-core/src/main/java/org/apache/doris/statistics/{ScanStatsDerive.java => OlapScanStatsDerive.java} (63%) diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java index 7338d9bbb04e0c..fc05ac462d62bd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java @@ -143,6 +143,13 @@ public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planN this.filesAdded = filesAdded; } + public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName, + List> fileStatusesList, int filesAdded) { + super(id, destTupleDesc, planNodeName, NodeType.BROKER_SCAN_NODE); + this.fileStatusesList = fileStatusesList; + this.filesAdded = filesAdded; + } + @Override public void init(Analyzer analyzer) throws UserException { super.init(analyzer); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index 1fdd6ac01ebf64..139451da16897a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -413,9 +413,9 @@ protected void computeNumNodes() { numNodes = numNodes <= 0 ? 1 : numNodes; } - private void computeInaccurateCardinality() { + private void computeInaccurateCardinality() throws UserException { StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this); - cardinality = statsDeriveResult.getCardinality(); + cardinality = statsDeriveResult.getRowCount(); } private Collection partitionPrune(PartitionInfo partitionInfo, PartitionNames partitionNames) throws AnalysisException { @@ -562,13 +562,15 @@ private void addScanRangeLocations(Partition partition, // FIXME(dhc): we use cardinality here to simulate ndv // update statsDeriveResult for real statistics // After statistics collection is complete, remove the logic - statsDeriveResult.setRowCount(cardinality); - for (Map.Entry entry : statsDeriveResult.getColumnToNdv().entrySet()) { - if (entry.getValue() > 0) { - cardinality = Math.min(cardinality, entry.getValue()); + if (analyzer.safeIsEnableJoinReorderBasedCost()) { + statsDeriveResult.setRowCount(cardinality); + for (Map.Entry entry : statsDeriveResult.getColumnToNdv().entrySet()) { + if (entry.getValue() > 0) { + cardinality = Math.min(cardinality, entry.getValue()); + } } + statsDeriveResult.setRowCount(cardinality); } - statsDeriveResult.setCardinality(cardinality); if (tablets.size() == 0) { desc.setCardinality(0); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java index 39fe7a3a4d4d18..e8147f155f18c0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java @@ -55,7 +55,6 @@ import java.util.Collection; import java.util.Collections; import java.util.List; -import java.util.Optional; import java.util.Set; /** @@ -138,7 +137,7 @@ abstract public class PlanNode extends TreeNode { protected List outputSlotIds; protected NodeType nodeType = NodeType.DEFAULT; - protected StatsDeriveResult statsDeriveResult = new StatsDeriveResult(); + protected StatsDeriveResult statsDeriveResult; protected PlanNode(PlanNodeId id, ArrayList tupleIds, String planNodeName) { this.id = id; @@ -178,12 +177,7 @@ protected PlanNode(PlanNodeId id, PlanNode node, String planNodeName) { this.planNodeName = VectorizedUtil.isVectorized() ? "V" + planNodeName : planNodeName; this.numInstances = 1; - this.nodeType = node.getNodeType(); - this.statsDeriveResult = new StatsDeriveResult( - node.getStatsDeriveResult().get().getCardinality(), - node.getStatsDeriveResult().get().getRowCount(), - node.getStatsDeriveResult().get().getColumnToDataSize(), - node.getStatsDeriveResult().get().getColumnToNdv()); + this.nodeType = nodeType; } public enum NodeType { @@ -203,8 +197,8 @@ public String getPlanNodeName() { return planNodeName; } - public Optional getStatsDeriveResult() { - return Optional.ofNullable(statsDeriveResult); + public StatsDeriveResult getStatsDeriveResult() { + return statsDeriveResult; } public NodeType getNodeType() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java index 95f5d77fb356d6..b2ada3b5479be2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java @@ -20,40 +20,50 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.doris.analysis.Expr; +import org.apache.doris.common.UserException; import org.apache.doris.planner.PlanNode; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; -import java.util.Optional; -public abstract class BaseStatsDerive { - // estimate of the output cardinality of this node; +public class BaseStatsDerive { + private static final Logger LOG = LogManager.getLogger(BaseStatsDerive.class); + // estimate of the output rowCount of this node; // invalid: -1 - protected long cardinality = -1; + protected long rowCount = -1; protected long limit = -1; protected List conjuncts = Lists.newArrayList(); - protected List> childrenStatsResult = Lists.newArrayList(); + protected List childrenStatsResult = Lists.newArrayList(); - protected BaseStatsDerive init(PlanNode node) { + protected void init(PlanNode node) throws UserException { limit = node.getLimit(); conjuncts.addAll(node.getConjuncts()); for (PlanNode childNode : node.getChildren()) { - childrenStatsResult.add(childNode.getStatsDeriveResult()); + StatsDeriveResult result = childNode.getStatsDeriveResult(); + if (result == null) { + throw new UserException("childNode statsDeriveResult is null, childNodeType is " + childNode.getNodeType() + + "parentNodeType is " + node.getNodeType()); + } + childrenStatsResult.add(result); } - return this; } - public abstract StatsDeriveResult deriveStats(); + public StatsDeriveResult deriveStats() { + return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv()); + } public boolean hasLimit() { return limit > -1; } protected void applyConjunctsSelectivity() { - if (cardinality == -1) { + if (rowCount == -1) { return; } applySelectivity(); @@ -61,12 +71,12 @@ protected void applyConjunctsSelectivity() { private void applySelectivity() { double selectivity = computeSelectivity(); - Preconditions.checkState(cardinality >= 0); - long preConjunctCardinality = cardinality; - cardinality = Math.round(cardinality * selectivity); - // don't round cardinality down to zero for safety. - if (cardinality == 0 && preConjunctCardinality > 0) { - cardinality = 1; + Preconditions.checkState(rowCount >= 0); + long preConjunctrowCount = rowCount; + rowCount = Math.round(rowCount * selectivity); + // don't round rowCount down to zero for safety. + if (rowCount == 0 && preConjunctrowCount > 0) { + rowCount = 1; } } @@ -89,7 +99,7 @@ protected double computeSelectivity() { * The second issue is addressed by an exponential backoff when multiplying each * additional selectivity into the final result. */ - static protected double computeCombinedSelectivity(List conjuncts) { + protected double computeCombinedSelectivity(List conjuncts) { // Collect all estimated selectivities. List selectivities = new ArrayList<>(); for (Expr e : conjuncts) { @@ -114,9 +124,34 @@ static protected double computeCombinedSelectivity(List conjuncts) { return Math.max(0.0, Math.min(1.0, result)); } - protected void capCardinalityAtLimit() { + protected void capRowCountAtLimit() { if (hasLimit()) { - cardinality = cardinality == -1 ? limit : Math.min(cardinality, limit); + rowCount = rowCount == -1 ? limit : Math.min(rowCount, limit); + } + } + + + // Currently it simply adds the number of rows of children + protected long deriveRowCount() { + applyConjunctsSelectivity(); + capRowCountAtLimit(); + return rowCount; + } + + + protected HashMap deriveColumnToDataSize() { + HashMap columnToDataSize = new HashMap<>(); + for (StatsDeriveResult child : childrenStatsResult) { + columnToDataSize.putAll(child.getColumnToDataSize()); + } + return columnToDataSize; + } + + protected HashMap deriveColumnToNdv() { + HashMap columnToNdv = new HashMap<>(); + for (StatsDeriveResult child : childrenStatsResult) { + columnToNdv.putAll(child.getColumnToNdv()); } + return columnToNdv; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java index b9c085eb567049..d663bf5e08c677 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java @@ -28,14 +28,9 @@ public BaseStatsDerive getStatsDerive(PlanNode.NodeType nodeType) { case MERGE_NODE: break; case OLAP_SCAN_NODE: - return new ScanStatsDerive(); + return new OlapScanStatsDerive(); case DEFAULT: } - return new BaseStatsDerive() { - @Override - public StatsDeriveResult deriveStats() { - return new StatsDeriveResult(); - } - }; + return new BaseStatsDerive(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ScanStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java similarity index 63% rename from fe/fe-core/src/main/java/org/apache/doris/statistics/ScanStatsDerive.java rename to fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java index 8ec63c331e942e..fe47adada68cb7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ScanStatsDerive.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java @@ -20,28 +20,28 @@ import com.google.common.base.Preconditions; import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.catalog.Catalog; +import org.apache.doris.common.UserException; import org.apache.doris.planner.OlapScanNode; import org.apache.doris.planner.PlanNode; import java.util.HashMap; import java.util.Map; -public class ScanStatsDerive extends BaseStatsDerive { +public class OlapScanStatsDerive extends BaseStatsDerive { // Currently, due to the structure of doris, // the selected materialized view is not determined when calculating the statistical information of scan, // so baseIndex is used for calculation when generating Planner. // The rowCount here is the number of rows. - private long rowCount = -1; + private long inputRowCount = -1; private Map slotIdToDataSize; private Map slotIdToNdv; @Override - public ScanStatsDerive init(PlanNode node) { + public void init(PlanNode node) throws UserException { Preconditions.checkState(node instanceof OlapScanNode); super.init(node); buildColumnToStats((OlapScanNode)node); - return this; } @Override @@ -53,10 +53,8 @@ public StatsDeriveResult deriveStats() { * - When Join reorder is turned on, the cardinality must be calculated before the reorder algorithm. * - So only an inaccurate cardinality can be calculated here. */ - cardinality = rowCount; - applyConjunctsSelectivity(); - capCardinalityAtLimit(); - return new StatsDeriveResult(cardinality, rowCount, slotIdToDataSize, slotIdToNdv); + rowCount = inputRowCount; + return super.deriveStats(); } public void buildColumnToStats(OlapScanNode node) { @@ -65,30 +63,45 @@ public void buildColumnToStats(OlapScanNode node) { if (node.getTupleDesc() != null && node.getTupleDesc().getTable() != null) { long tableId = node.getTupleDesc().getTable().getId(); - rowCount = Catalog.getCurrentCatalog().getStatisticsManager() + inputRowCount = Catalog.getCurrentCatalog().getStatisticsManager() .getStatistics().getTableStats(tableId).getRowCount(); } for (SlotDescriptor slot : node.getTupleDesc().getSlots()) { - if (slot.getParent() != null - && slot.getParent().getTable() != null - && slot.getColumn() != null) { - long tableId = slot.getParent().getTable().getId(); - String columnName = slot.getColumn().getName(); - /*TODO:Implement the getStatistics interface - //now there is nothing in statistics, need to wait for collection finished - if (Catalog.getCurrentCatalog() - .getStatisticsManager() - .getStatistics() - .getColumnStats(tableId) != null) { - ndv = Catalog.getCurrentCatalog() - .getStatisticsManager() - .getStatistics() - .getColumnStats(tableId).get(columnName).getNdv(); - slotIdToNdv.put(slot.getId(), ndv); - //same as slotIdToDataSize - } - */ + if (!slot.isMaterialized()) { + continue; } + + long tableId = slot.getParent().getTable().getId(); + String columnName = slot.getColumn().getName(); + /*TODO:Implement the getStatistics interface + //now there is nothing in statistics, need to wait for collection finished + long ndv = -1; + long dataSize = -1; + getNdvAndDataSizeFromStatistics(ndv, dataSize); + + slotIdToNdv.put(slot.getId(), ndv); + slotIdToDataSize.put(slot.getId(), dataSize); + */ } } + + public void getNdvAndDataSizeFromStatistics(long ndv, long dataSize) { + /* + ndv = -1; + dataSize = -1; + if (Catalog.getCurrentCatalog() + .getStatisticsManager() + .getStatistics() + .getColumnStats(tableId) != null) { + ndv = Catalog.getCurrentCatalog() + .getStatisticsManager() + .getStatistics() + .getColumnStats(tableId).get(columnName).getNdv(); + dataSize = Catalog.getCurrentCatalog() + .getStatisticsManager() + .getStatistics() + .getColumnStats(tableId).get(columnName).getDataSize(); + } + */ + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java index 72aaa1cee4f723..58003de90c06af 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java @@ -71,6 +71,7 @@ public Map getColumnStats(long tableId) { return tableStats.getNameToColumnStats(); } + // TODO: mock statistics need to be removed in the future public void mockTableStatsWithRowCount(long tableId, long rowCount) { TableStats tableStats = idToTableStats.get(tableId); if (tableStats == null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java index d14aae9ed62cf2..e5419ae54cb3b7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java @@ -23,7 +23,6 @@ // This structure is maintained in each operator to store the statistical information results obtained by the operator. public class StatsDeriveResult { - private long cardinality = -1; private long rowCount = -1; // The data size of the corresponding column in the operator // The actual key is slotId @@ -34,8 +33,7 @@ public class StatsDeriveResult { public StatsDeriveResult() {} - public StatsDeriveResult(long cardinality, long rowCount, Map columnToDataSize, Map columnToNdv) { - this.cardinality = cardinality; + public StatsDeriveResult(long rowCount, Map columnToDataSize, Map columnToNdv) { this.rowCount = rowCount; this.columnToDataSize.putAll(columnToDataSize); this.columnToNdv.putAll(columnToNdv); @@ -44,19 +42,7 @@ public StatsDeriveResult(long cardinality, long rowCount, Map column public void setRowCount(long rowCount) { this.rowCount = rowCount; } - - public void setCardinality(long cardinality) { - this.cardinality = cardinality; - } - - public boolean isStatsDerived() { - return cardinality != -1 && rowCount != -1 && !columnToDataSize.isEmpty() && !columnToNdv.isEmpty(); - } - - public long getCardinality() { - return cardinality; - } - + public long getRowCount() { return rowCount; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java index 27cd7eca86b1d4..e6159a594de6ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java @@ -17,6 +17,7 @@ package org.apache.doris.statistics; +import org.apache.doris.common.UserException; import org.apache.doris.planner.PlanNode; @@ -37,16 +38,19 @@ private static class Inner { * This parameter is an input and output parameter, * which will store the derivation result of statistical information in the corresponding node */ - public void statsRecursiveDerive(PlanNode node) { - if (node.getStatsDeriveResult().get().isStatsDerived()) { + public void statsRecursiveDerive(PlanNode node) throws UserException { + if (node.getStatsDeriveResult() != null) { return; } for (PlanNode childNode : node.getChildren()) { - if (!childNode.getStatsDeriveResult().get().isStatsDerived()) { + if (childNode.getStatsDeriveResult() == null) { statsRecursiveDerive(childNode); } } DeriveFactory deriveFactory = new DeriveFactory(); - node.setStatsDeriveResult(deriveFactory.getStatsDerive(node.getNodeType()).init(node).deriveStats()); + BaseStatsDerive deriveStats = deriveFactory.getStatsDerive(node.getNodeType()); + deriveStats.init(node); + StatsDeriveResult result = deriveStats.deriveStats(); + node.setStatsDeriveResult(result); } } From 25bd40790e53e2e26ae3218926461decaa765a17 Mon Sep 17 00:00:00 2001 From: jianghaochen Date: Thu, 21 Apr 2022 16:55:35 +0800 Subject: [PATCH 4/4] Fix the ut problem introduced by #9047 and fixed the review problem --- .../apache/doris/planner/BrokerScanNode.java | 6 +-- .../apache/doris/planner/HiveScanNode.java | 2 +- .../apache/doris/planner/IcebergScanNode.java | 2 +- .../apache/doris/planner/LoadScanNode.java | 4 ++ .../apache/doris/planner/OlapScanNode.java | 19 +++----- .../org/apache/doris/planner/PlanNode.java | 3 ++ .../org/apache/doris/qe/StmtExecutor.java | 8 ---- .../doris/statistics/BaseStatsDerive.java | 12 +++-- .../doris/statistics/OlapScanStatsDerive.java | 45 ++++++++++--------- .../doris/statistics/StatsDeriveResult.java | 15 +++---- .../apache/doris/analysis/ExplainTest.java | 13 ++++-- .../apache/doris/planner/QueryPlanTest.java | 7 +-- 12 files changed, 72 insertions(+), 64 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java index fc05ac462d62bd..73aa1fb04d3cf9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java @@ -138,14 +138,14 @@ private static class ParamCreateContext { public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName, List> fileStatusesList, int filesAdded) { - super(id, destTupleDesc, planNodeName); + super(id, destTupleDesc, planNodeName, NodeType.BROKER_SCAN_NODE); this.fileStatusesList = fileStatusesList; this.filesAdded = filesAdded; } public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName, - List> fileStatusesList, int filesAdded) { - super(id, destTupleDesc, planNodeName, NodeType.BROKER_SCAN_NODE); + List> fileStatusesList, int filesAdded, NodeType nodeType) { + super(id, destTupleDesc, planNodeName, nodeType); this.fileStatusesList = fileStatusesList; this.filesAdded = filesAdded; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java index 76cf53431316e1..c18533d3023f65 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java @@ -100,7 +100,7 @@ public List getPartitionKeys() { public HiveScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName, List> fileStatusesList, int filesAdded) { - super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded); + super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded, NodeType.HIVE_SCAN_NODE); this.hiveTable = (HiveTable) destTupleDesc.getTable(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java index 5428c9ee555ec8..5c7dd1fad00625 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java @@ -47,7 +47,7 @@ public class IcebergScanNode extends BrokerScanNode { public IcebergScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, List> fileStatusesList, int filesAdded) { - super(id, desc, planNodeName, fileStatusesList, filesAdded); + super(id, desc, planNodeName, fileStatusesList, filesAdded, NodeType.ICEBREG_SCAN_NODE); icebergTable = (IcebergTable) desc.getTable(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java index 4479530941e854..82299e2ea6ebf3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java @@ -57,6 +57,10 @@ public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { super(id, desc, planNodeName, NodeType.LOAD_SCAN_NODE); } + public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) { + super(id, desc, planNodeName, nodeType); + } + protected void initAndSetWhereExpr(Expr whereExpr, TupleDescriptor tupleDesc, Analyzer analyzer) throws UserException { Expr newWhereExpr = initWhereExpr(whereExpr, tupleDesc, analyzer); if (newWhereExpr != null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index 139451da16897a..6a1fc49b03911d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -402,6 +402,12 @@ public void computeStats(Analyzer analyzer) { } // when node scan has no data, cardinality should be 0 instead of a invalid value after computeStats() cardinality = cardinality == -1 ? 0 : cardinality; + + // update statsDeriveResult for real statistics + // After statistics collection is complete, remove the logic + if (analyzer.safeIsEnableJoinReorderBasedCost()) { + statsDeriveResult.setRowCount(cardinality); + } } @Override @@ -559,19 +565,6 @@ private void addScanRangeLocations(Partition partition, result.add(scanRangeLocations); } - // FIXME(dhc): we use cardinality here to simulate ndv - // update statsDeriveResult for real statistics - // After statistics collection is complete, remove the logic - if (analyzer.safeIsEnableJoinReorderBasedCost()) { - statsDeriveResult.setRowCount(cardinality); - for (Map.Entry entry : statsDeriveResult.getColumnToNdv().entrySet()) { - if (entry.getValue() > 0) { - cardinality = Math.min(cardinality, entry.getValue()); - } - } - statsDeriveResult.setRowCount(cardinality); - } - if (tablets.size() == 0) { desc.setCardinality(0); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java index e8147f155f18c0..13ac58c40fef83 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java @@ -183,9 +183,12 @@ protected PlanNode(PlanNodeId id, PlanNode node, String planNodeName) { public enum NodeType { DEFAULT, AGG_NODE, + BROKER_SCAN_NODE, HASH_JOIN_NODE, + HIVE_SCAN_NODE, MERGE_NODE, ES_SCAN_NODE, + ICEBREG_SCAN_NODE, LOAD_SCAN_NODE, MYSQL_SCAN_NODE, ODBC_SCAN_NODE, diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index e913f9cc96d51f..7195591a298829 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -699,14 +699,6 @@ private void analyzeAndGenerateQueryPlan(TQueryOptions tQueryOptions) throws Use } if (explainOptions != null) parsedStmt.setIsExplain(explainOptions); } - - if (parsedStmt instanceof InsertStmt && parsedStmt.isExplain()) { - if (ConnectContext.get() != null && - ConnectContext.get().getExecutor() != null && - ConnectContext.get().getExecutor().getParsedStmt() != null) { - ConnectContext.get().getExecutor().getParsedStmt().setIsExplain(new ExplainOptions(true, false)); - } - } } plannerProfile.setQueryAnalysisFinishTime(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java index b2ada3b5479be2..ccb58e92873d22 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java @@ -20,6 +20,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.SlotId; import org.apache.doris.common.UserException; import org.apache.doris.planner.PlanNode; import org.apache.logging.log4j.LogManager; @@ -133,22 +134,25 @@ protected void capRowCountAtLimit() { // Currently it simply adds the number of rows of children protected long deriveRowCount() { + for (StatsDeriveResult statsDeriveResult : childrenStatsResult) { + rowCount = Math.max(rowCount, statsDeriveResult.getRowCount()); + } applyConjunctsSelectivity(); capRowCountAtLimit(); return rowCount; } - protected HashMap deriveColumnToDataSize() { - HashMap columnToDataSize = new HashMap<>(); + protected HashMap deriveColumnToDataSize() { + HashMap columnToDataSize = new HashMap<>(); for (StatsDeriveResult child : childrenStatsResult) { columnToDataSize.putAll(child.getColumnToDataSize()); } return columnToDataSize; } - protected HashMap deriveColumnToNdv() { - HashMap columnToNdv = new HashMap<>(); + protected HashMap deriveColumnToNdv() { + HashMap columnToNdv = new HashMap<>(); for (StatsDeriveResult child : childrenStatsResult) { columnToNdv.putAll(child.getColumnToNdv()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java index fe47adada68cb7..ff514aa55e3b4c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java @@ -19,7 +19,9 @@ import com.google.common.base.Preconditions; import org.apache.doris.analysis.SlotDescriptor; +import org.apache.doris.analysis.SlotId; import org.apache.doris.catalog.Catalog; +import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; import org.apache.doris.planner.OlapScanNode; import org.apache.doris.planner.PlanNode; @@ -34,14 +36,15 @@ public class OlapScanStatsDerive extends BaseStatsDerive { // The rowCount here is the number of rows. private long inputRowCount = -1; - private Map slotIdToDataSize; - private Map slotIdToNdv; + private Map slotIdToDataSize; + private Map slotIdToNdv; + private Map> slotIdToTableIdAndColumnName; @Override public void init(PlanNode node) throws UserException { Preconditions.checkState(node instanceof OlapScanNode); super.init(node); - buildColumnToStats((OlapScanNode)node); + buildStructure((OlapScanNode)node); } @Override @@ -54,10 +57,17 @@ public StatsDeriveResult deriveStats() { * - So only an inaccurate cardinality can be calculated here. */ rowCount = inputRowCount; - return super.deriveStats(); + for (Map.Entry> pairEntry : slotIdToTableIdAndColumnName.entrySet()) { + Pair ndvAndDataSize = getNdvAndDataSizeFromStatistics(pairEntry.getValue()); + long ndv = ndvAndDataSize.first; + float dataSize = ndvAndDataSize.second; + slotIdToNdv.put(pairEntry.getKey(), ndv); + slotIdToDataSize.put(pairEntry.getKey(), dataSize); + } + return new StatsDeriveResult(deriveRowCount(), slotIdToDataSize, slotIdToNdv); } - public void buildColumnToStats(OlapScanNode node) { + public void buildStructure(OlapScanNode node) { slotIdToDataSize = new HashMap<>(); slotIdToNdv = new HashMap<>(); if (node.getTupleDesc() != null @@ -73,35 +83,30 @@ public void buildColumnToStats(OlapScanNode node) { long tableId = slot.getParent().getTable().getId(); String columnName = slot.getColumn().getName(); - /*TODO:Implement the getStatistics interface - //now there is nothing in statistics, need to wait for collection finished - long ndv = -1; - long dataSize = -1; - getNdvAndDataSizeFromStatistics(ndv, dataSize); - - slotIdToNdv.put(slot.getId(), ndv); - slotIdToDataSize.put(slot.getId(), dataSize); - */ + slotIdToTableIdAndColumnName.put(slot.getId(), new Pair<>(tableId, columnName)); } } - public void getNdvAndDataSizeFromStatistics(long ndv, long dataSize) { + //TODO:Implement the getStatistics interface + //now there is nothing in statistics, need to wait for collection finished + public Pair getNdvAndDataSizeFromStatistics(Pair pair) { + long ndv = -1; + float dataSize = -1; /* - ndv = -1; - dataSize = -1; if (Catalog.getCurrentCatalog() .getStatisticsManager() .getStatistics() - .getColumnStats(tableId) != null) { + .getColumnStats(pair.first) != null) { ndv = Catalog.getCurrentCatalog() .getStatisticsManager() .getStatistics() - .getColumnStats(tableId).get(columnName).getNdv(); + .getColumnStats(pair.first).get(pair.second).getNdv(); dataSize = Catalog.getCurrentCatalog() .getStatisticsManager() .getStatistics() - .getColumnStats(tableId).get(columnName).getDataSize(); + .getColumnStats(pair.first).get(pair.second).getDataSize(); } */ + return new Pair<>(ndv, dataSize); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java index e5419ae54cb3b7..b9b0d8024e85f6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java @@ -18,6 +18,7 @@ package org.apache.doris.statistics; import com.google.common.collect.Maps; +import org.apache.doris.analysis.SlotId; import java.util.Map; @@ -26,14 +27,12 @@ public class StatsDeriveResult { private long rowCount = -1; // The data size of the corresponding column in the operator // The actual key is slotId - private final Map columnToDataSize = Maps.newHashMap(); + private final Map columnToDataSize = Maps.newHashMap(); // The ndv of the corresponding column in the operator // The actual key is slotId - private final Map columnToNdv = Maps.newHashMap(); + private final Map columnToNdv = Maps.newHashMap(); - public StatsDeriveResult() {} - - public StatsDeriveResult(long rowCount, Map columnToDataSize, Map columnToNdv) { + public StatsDeriveResult(long rowCount, Map columnToDataSize, Map columnToNdv) { this.rowCount = rowCount; this.columnToDataSize.putAll(columnToDataSize); this.columnToNdv.putAll(columnToNdv); @@ -42,16 +41,16 @@ public StatsDeriveResult(long rowCount, Map columnToDataSize, Map getColumnToNdv() { + public Map getColumnToNdv() { return columnToNdv; } - public Map getColumnToDataSize() { + public Map getColumnToDataSize() { return columnToDataSize; } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java index 3fca5643a5c4a5..f94e69b142a575 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java @@ -72,20 +72,27 @@ public void after() throws Exception { Assert.assertEquals(dropDbStmt.toSql(), dropSchemaStmt.toSql()); } - public void testExplainSelect() throws Exception { - String sql = "explain select * from test_explain.explain_t1 where dt = '1001';"; + public void testExplainInsertInto() throws Exception { + String sql = "explain insert into test_explain.explain_t1 select * from test_explain.explain_t2"; String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false); System.out.println(explainString); Assert.assertFalse(explainString.contains("CAST")); } - public void testExplainInsertInto() throws Exception { + public void testExplainVerboseInsertInto() throws Exception { String sql = "explain verbose insert into test_explain.explain_t1 select * from test_explain.explain_t2"; String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, true); System.out.println(explainString); Assert.assertTrue(explainString.contains("CAST")); } + public void testExplainSelect() throws Exception { + String sql = "explain select * from test_explain.explain_t1 where dt = '1001';"; + String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false); + System.out.println(explainString); + Assert.assertFalse(explainString.contains("CAST")); + } + public void testExplainVerboseSelect() throws Exception { String queryStr = "explain verbose select * from test_explain.explain_t1 where dt = '1001';"; String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, queryStr, true); diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java index 40d1541dda3403..3b3946aeb4cba9 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java @@ -1991,8 +1991,9 @@ public void testGroupingSetOutOfBoundError() throws Exception { public void testExplainInsertInto() throws Exception { ExplainTest explainTest = new ExplainTest(); explainTest.before(connectContext); - explainTest.testExplainSelect(); explainTest.testExplainInsertInto(); + explainTest.testExplainVerboseInsertInto(); + explainTest.testExplainSelect(); explainTest.testExplainVerboseSelect(); explainTest.testExplainConcatSelect(); explainTest.testExplainVerboseConcatSelect(); @@ -2088,7 +2089,7 @@ public void testResultExprs() throws Exception { "\"storage_medium\" = \"HDD\",\n" + "\"storage_format\" = \"V2\"\n" + ");\n"); - String queryStr = "EXPLAIN INSERT INTO result_exprs\n" + + String queryStr = "EXPLAIN VERBOSE INSERT INTO result_exprs\n" + "SELECT a.aid,\n" + " b.bid\n" + "FROM\n" + @@ -2098,7 +2099,7 @@ public void testResultExprs() throws Exception { String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, queryStr); Assert.assertFalse(explainString.contains("OUTPUT EXPRS:3 | 4")); System.out.println(explainString); - Assert.assertTrue(explainString.contains("OUTPUT EXPRS:`a`.`aid` | 4")); + Assert.assertTrue(explainString.contains("OUTPUT EXPRS:CAST(`a`.`aid` AS INT) | 4")); } @Test