diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java index 7338d9bbb04e0c..73aa1fb04d3cf9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java @@ -138,7 +138,14 @@ private static class ParamCreateContext { public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName, List> fileStatusesList, int filesAdded) { - super(id, destTupleDesc, planNodeName); + super(id, destTupleDesc, planNodeName, NodeType.BROKER_SCAN_NODE); + this.fileStatusesList = fileStatusesList; + this.filesAdded = filesAdded; + } + + public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName, + List> fileStatusesList, int filesAdded, NodeType nodeType) { + super(id, destTupleDesc, planNodeName, nodeType); this.fileStatusesList = fileStatusesList; this.filesAdded = filesAdded; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java index 2ed1a4bde80859..96dbdbe934ec27 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java @@ -72,7 +72,7 @@ public class EsScanNode extends ScanNode { boolean isFinalized = false; public EsScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { - super(id, desc, planNodeName); + super(id, desc, planNodeName, NodeType.ES_SCAN_NODE); table = (EsTable) (desc.getTable()); esTablePartitions = table.getEsTablePartitions(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java index 76cf53431316e1..c18533d3023f65 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java @@ -100,7 +100,7 @@ public List getPartitionKeys() { public HiveScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName, List> fileStatusesList, int filesAdded) { - super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded); + super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded, NodeType.HIVE_SCAN_NODE); this.hiveTable = (HiveTable) destTupleDesc.getTable(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java index 5428c9ee555ec8..5c7dd1fad00625 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java @@ -47,7 +47,7 @@ public class IcebergScanNode extends BrokerScanNode { public IcebergScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, List> fileStatusesList, int filesAdded) { - super(id, desc, planNodeName, fileStatusesList, filesAdded); + super(id, desc, planNodeName, fileStatusesList, filesAdded, NodeType.ICEBREG_SCAN_NODE); icebergTable = (IcebergTable) desc.getTable(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java index b6dbb4f782ca1e..82299e2ea6ebf3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java @@ -54,7 +54,11 @@ public abstract class LoadScanNode extends ScanNode { protected LoadTask.MergeType mergeType = LoadTask.MergeType.APPEND; public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { - super(id, desc, planNodeName); + super(id, desc, planNodeName, NodeType.LOAD_SCAN_NODE); + } + + public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) { + super(id, desc, planNodeName, nodeType); } protected void initAndSetWhereExpr(Expr whereExpr, TupleDescriptor tupleDesc, Analyzer analyzer) throws UserException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java index fcc22125c99117..9235b1fc75cc4d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java @@ -56,7 +56,7 @@ public class MysqlScanNode extends ScanNode { * Constructs node to scan given data files of table 'tbl'. */ public MysqlScanNode(PlanNodeId id, TupleDescriptor desc, MysqlTable tbl) { - super(id, desc, "SCAN MYSQL"); + super(id, desc, "SCAN MYSQL", NodeType.MYSQL_SCAN_NODE); tblName = "`" + tbl.getMysqlTableName() + "`"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java index 90f989d35c97e6..1f32b9e93823fe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java @@ -73,7 +73,7 @@ private static boolean shouldPushDownConjunct(TOdbcTableType tableType, Expr exp * Constructs node to scan given data files of table 'tbl'. */ public OdbcScanNode(PlanNodeId id, TupleDescriptor desc, OdbcTable tbl) { - super(id, desc, "SCAN ODBC"); + super(id, desc, "SCAN ODBC", NodeType.ODBC_SCAN_NODE); connectString = tbl.getConnectString(); odbcType = tbl.getOdbcTableType(); tblName = OdbcTable.databaseProperName(odbcType, tbl.getOdbcTableName()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index f74666a18caf2e..6a1fc49b03911d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -53,6 +53,7 @@ import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.SessionVariable; import org.apache.doris.resource.Tag; +import org.apache.doris.statistics.StatsRecursiveDerive; import org.apache.doris.system.Backend; import org.apache.doris.thrift.TExplainLevel; import org.apache.doris.thrift.TNetworkAddress; @@ -146,7 +147,7 @@ public class OlapScanNode extends ScanNode { // Constructs node to scan given data files of table 'tbl'. public OlapScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { - super(id, desc, planNodeName); + super(id, desc, planNodeName, NodeType.OLAP_SCAN_NODE); olapTable = (OlapTable) desc.getTable(); } @@ -346,10 +347,25 @@ public void init(Analyzer analyzer) throws UserException { * - So only an inaccurate cardinality can be calculated here. */ if (analyzer.safeIsEnableJoinReorderBasedCost()) { + mockRowCountInStatistic(); computeInaccurateCardinality(); } } + /** + * Remove the method after statistics collection is working properly + */ + public void mockRowCountInStatistic() { + long tableId = desc.getTable().getId(); + cardinality = 0; + for (long selectedPartitionId : selectedPartitionIds) { + final Partition partition = olapTable.getPartition(selectedPartitionId); + final MaterializedIndex baseIndex = partition.getBaseIndex(); + cardinality += baseIndex.getRowCount(); + } + Catalog.getCurrentCatalog().getStatisticsManager().getStatistics().mockTableStatsWithRowCount(tableId, cardinality); + } + @Override public void finalize(Analyzer analyzer) throws UserException { LOG.debug("OlapScanNode get scan range locations. Tuple: {}", desc); @@ -386,6 +402,12 @@ public void computeStats(Analyzer analyzer) { } // when node scan has no data, cardinality should be 0 instead of a invalid value after computeStats() cardinality = cardinality == -1 ? 0 : cardinality; + + // update statsDeriveResult for real statistics + // After statistics collection is complete, remove the logic + if (analyzer.safeIsEnableJoinReorderBasedCost()) { + statsDeriveResult.setRowCount(cardinality); + } } @Override @@ -397,30 +419,9 @@ protected void computeNumNodes() { numNodes = numNodes <= 0 ? 1 : numNodes; } - /** - * Calculate inaccurate cardinality. - * cardinality: the value of cardinality is the sum of rowcount which belongs to selectedPartitionIds - * The cardinality here is actually inaccurate, it will be greater than the actual value. - * There are two reasons - * 1. During the actual execution, not all tablets belonging to the selected partition will be scanned. - * Some tablets may have been pruned before execution. - * 2. The base index may eventually be replaced by mv index. - *

- * There are three steps to calculate cardinality - * 1. Calculate how many rows were scanned - * 2. Apply conjunct - * 3. Apply limit - */ - private void computeInaccurateCardinality() { - // step1: Calculate how many rows were scanned - cardinality = 0; - for (long selectedPartitionId : selectedPartitionIds) { - final Partition partition = olapTable.getPartition(selectedPartitionId); - final MaterializedIndex baseIndex = partition.getBaseIndex(); - cardinality += baseIndex.getRowCount(); - } - applyConjunctsSelectivity(); - capCardinalityAtLimit(); + private void computeInaccurateCardinality() throws UserException { + StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this); + cardinality = statsDeriveResult.getRowCount(); } private Collection partitionPrune(PartitionInfo partitionInfo, PartitionNames partitionNames) throws AnalysisException { @@ -563,7 +564,7 @@ private void addScanRangeLocations(Partition partition, result.add(scanRangeLocations); } - // FIXME(dhc): we use cardinality here to simulate ndv + if (tablets.size() == 0) { desc.setCardinality(0); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java index 58a0019962d1eb..13ac58c40fef83 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java @@ -36,6 +36,7 @@ import org.apache.doris.common.TreeNode; import org.apache.doris.common.UserException; import org.apache.doris.common.util.VectorizedUtil; +import org.apache.doris.statistics.StatsDeriveResult; import org.apache.doris.thrift.TExplainLevel; import org.apache.doris.thrift.TFunctionBinaryType; import org.apache.doris.thrift.TPlan; @@ -135,6 +136,9 @@ abstract public class PlanNode extends TreeNode { protected List outputSlotIds; + protected NodeType nodeType = NodeType.DEFAULT; + protected StatsDeriveResult statsDeriveResult; + protected PlanNode(PlanNodeId id, ArrayList tupleIds, String planNodeName) { this.id = id; this.limit = -1; @@ -173,12 +177,41 @@ protected PlanNode(PlanNodeId id, PlanNode node, String planNodeName) { this.planNodeName = VectorizedUtil.isVectorized() ? "V" + planNodeName : planNodeName; this.numInstances = 1; + this.nodeType = nodeType; + } + + public enum NodeType { + DEFAULT, + AGG_NODE, + BROKER_SCAN_NODE, + HASH_JOIN_NODE, + HIVE_SCAN_NODE, + MERGE_NODE, + ES_SCAN_NODE, + ICEBREG_SCAN_NODE, + LOAD_SCAN_NODE, + MYSQL_SCAN_NODE, + ODBC_SCAN_NODE, + OLAP_SCAN_NODE, + SCHEMA_SCAN_NODE, } public String getPlanNodeName() { return planNodeName; } + public StatsDeriveResult getStatsDeriveResult() { + return statsDeriveResult; + } + + public NodeType getNodeType() { + return nodeType; + } + + public void setStatsDeriveResult(StatsDeriveResult statsDeriveResult) { + this.statsDeriveResult = statsDeriveResult; + } + /** * Sets tblRefIds_, tupleIds_, and nullableTupleIds_. * The default implementation is a no-op. diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java index 8b8c52b5bcccd4..a891f7616f8007 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java @@ -65,8 +65,9 @@ abstract public class ScanNode extends PlanNode { protected String sortColumn = null; protected Analyzer analyzer; - public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { + public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) { super(id, desc.getId().asList(), planNodeName); + super.nodeType = nodeType; this.desc = desc; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java index c5692c38e8d25c..07152bfd5e262b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java @@ -57,7 +57,7 @@ public class SchemaScanNode extends ScanNode { * Constructs node to scan given data files of table 'tbl'. */ public SchemaScanNode(PlanNodeId id, TupleDescriptor desc) { - super(id, desc, "SCAN SCHEMA"); + super(id, desc, "SCAN SCHEMA", NodeType.SCHEMA_SCAN_NODE); this.tableName = desc.getTable().getName(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index 8ad921bf0928b5..a78435a3dd5880 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -1725,7 +1725,6 @@ private PlanNode createScanNode(Analyzer analyzer, TableRef tblRef, SelectStmt s scanNodeList.add(scanNode); scanNode.init(analyzer); - return scanNode; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index e913f9cc96d51f..7195591a298829 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -699,14 +699,6 @@ private void analyzeAndGenerateQueryPlan(TQueryOptions tQueryOptions) throws Use } if (explainOptions != null) parsedStmt.setIsExplain(explainOptions); } - - if (parsedStmt instanceof InsertStmt && parsedStmt.isExplain()) { - if (ConnectContext.get() != null && - ConnectContext.get().getExecutor() != null && - ConnectContext.get().getExecutor().getParsedStmt() != null) { - ConnectContext.get().getExecutor().getParsedStmt().setIsExplain(new ExplainOptions(true, false)); - } - } } plannerProfile.setQueryAnalysisFinishTime(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java new file mode 100644 index 00000000000000..ccb58e92873d22 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java @@ -0,0 +1,161 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.SlotId; +import org.apache.doris.common.UserException; +import org.apache.doris.planner.PlanNode; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +public class BaseStatsDerive { + private static final Logger LOG = LogManager.getLogger(BaseStatsDerive.class); + // estimate of the output rowCount of this node; + // invalid: -1 + protected long rowCount = -1; + protected long limit = -1; + + protected List conjuncts = Lists.newArrayList(); + protected List childrenStatsResult = Lists.newArrayList(); + + protected void init(PlanNode node) throws UserException { + limit = node.getLimit(); + conjuncts.addAll(node.getConjuncts()); + + for (PlanNode childNode : node.getChildren()) { + StatsDeriveResult result = childNode.getStatsDeriveResult(); + if (result == null) { + throw new UserException("childNode statsDeriveResult is null, childNodeType is " + childNode.getNodeType() + + "parentNodeType is " + node.getNodeType()); + } + childrenStatsResult.add(result); + } + } + + public StatsDeriveResult deriveStats() { + return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv()); + } + + public boolean hasLimit() { + return limit > -1; + } + + protected void applyConjunctsSelectivity() { + if (rowCount == -1) { + return; + } + applySelectivity(); + } + + private void applySelectivity() { + double selectivity = computeSelectivity(); + Preconditions.checkState(rowCount >= 0); + long preConjunctrowCount = rowCount; + rowCount = Math.round(rowCount * selectivity); + // don't round rowCount down to zero for safety. + if (rowCount == 0 && preConjunctrowCount > 0) { + rowCount = 1; + } + } + + protected double computeSelectivity() { + for (Expr expr : conjuncts) { + expr.setSelectivity(); + } + return computeCombinedSelectivity(conjuncts); + } + + /** + * Returns the estimated combined selectivity of all conjuncts. Uses heuristics to + * address the following estimation challenges: + * 1. The individual selectivities of conjuncts may be unknown. + * 2. Two selectivities, whether known or unknown, could be correlated. Assuming + * independence can lead to significant underestimation. + *

+ * The first issue is addressed by using a single default selectivity that is + * representative of all conjuncts with unknown selectivities. + * The second issue is addressed by an exponential backoff when multiplying each + * additional selectivity into the final result. + */ + protected double computeCombinedSelectivity(List conjuncts) { + // Collect all estimated selectivities. + List selectivities = new ArrayList<>(); + for (Expr e : conjuncts) { + if (e.hasSelectivity()) selectivities.add(e.getSelectivity()); + } + if (selectivities.size() != conjuncts.size()) { + // Some conjuncts have no estimated selectivity. Use a single default + // representative selectivity for all those conjuncts. + selectivities.add(Expr.DEFAULT_SELECTIVITY); + } + // Sort the selectivities to get a consistent estimate, regardless of the original + // conjunct order. Sort in ascending order such that the most selective conjunct + // is fully applied. + Collections.sort(selectivities); + double result = 1.0; + // selectivity = 1 * (s1)^(1/1) * (s2)^(1/2) * ... * (sn-1)^(1/(n-1)) * (sn)^(1/n) + for (int i = 0; i < selectivities.size(); ++i) { + // Exponential backoff for each selectivity multiplied into the final result. + result *= Math.pow(selectivities.get(i), 1.0 / (double) (i + 1)); + } + // Bound result in [0, 1] + return Math.max(0.0, Math.min(1.0, result)); + } + + protected void capRowCountAtLimit() { + if (hasLimit()) { + rowCount = rowCount == -1 ? limit : Math.min(rowCount, limit); + } + } + + + // Currently it simply adds the number of rows of children + protected long deriveRowCount() { + for (StatsDeriveResult statsDeriveResult : childrenStatsResult) { + rowCount = Math.max(rowCount, statsDeriveResult.getRowCount()); + } + applyConjunctsSelectivity(); + capRowCountAtLimit(); + return rowCount; + } + + + protected HashMap deriveColumnToDataSize() { + HashMap columnToDataSize = new HashMap<>(); + for (StatsDeriveResult child : childrenStatsResult) { + columnToDataSize.putAll(child.getColumnToDataSize()); + } + return columnToDataSize; + } + + protected HashMap deriveColumnToNdv() { + HashMap columnToNdv = new HashMap<>(); + for (StatsDeriveResult child : childrenStatsResult) { + columnToNdv.putAll(child.getColumnToNdv()); + } + return columnToNdv; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java new file mode 100644 index 00000000000000..d663bf5e08c677 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.planner.PlanNode; + +public class DeriveFactory { + + public BaseStatsDerive getStatsDerive(PlanNode.NodeType nodeType) { + switch (nodeType) { + case AGG_NODE: + case HASH_JOIN_NODE: + case MERGE_NODE: + break; + case OLAP_SCAN_NODE: + return new OlapScanStatsDerive(); + case DEFAULT: + } + return new BaseStatsDerive(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java new file mode 100644 index 00000000000000..ff514aa55e3b4c --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java @@ -0,0 +1,112 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import com.google.common.base.Preconditions; +import org.apache.doris.analysis.SlotDescriptor; +import org.apache.doris.analysis.SlotId; +import org.apache.doris.catalog.Catalog; +import org.apache.doris.common.Pair; +import org.apache.doris.common.UserException; +import org.apache.doris.planner.OlapScanNode; +import org.apache.doris.planner.PlanNode; + +import java.util.HashMap; +import java.util.Map; + +public class OlapScanStatsDerive extends BaseStatsDerive { + // Currently, due to the structure of doris, + // the selected materialized view is not determined when calculating the statistical information of scan, + // so baseIndex is used for calculation when generating Planner. + + // The rowCount here is the number of rows. + private long inputRowCount = -1; + private Map slotIdToDataSize; + private Map slotIdToNdv; + private Map> slotIdToTableIdAndColumnName; + + @Override + public void init(PlanNode node) throws UserException { + Preconditions.checkState(node instanceof OlapScanNode); + super.init(node); + buildStructure((OlapScanNode)node); + } + + @Override + public StatsDeriveResult deriveStats() { + /** + * Compute InAccurate cardinality before mv selector and tablet pruning. + * - Accurate statistical information relies on the selector of materialized views and bucket reduction. + * - However, Those both processes occur after the reorder algorithm is completed. + * - When Join reorder is turned on, the cardinality must be calculated before the reorder algorithm. + * - So only an inaccurate cardinality can be calculated here. + */ + rowCount = inputRowCount; + for (Map.Entry> pairEntry : slotIdToTableIdAndColumnName.entrySet()) { + Pair ndvAndDataSize = getNdvAndDataSizeFromStatistics(pairEntry.getValue()); + long ndv = ndvAndDataSize.first; + float dataSize = ndvAndDataSize.second; + slotIdToNdv.put(pairEntry.getKey(), ndv); + slotIdToDataSize.put(pairEntry.getKey(), dataSize); + } + return new StatsDeriveResult(deriveRowCount(), slotIdToDataSize, slotIdToNdv); + } + + public void buildStructure(OlapScanNode node) { + slotIdToDataSize = new HashMap<>(); + slotIdToNdv = new HashMap<>(); + if (node.getTupleDesc() != null + && node.getTupleDesc().getTable() != null) { + long tableId = node.getTupleDesc().getTable().getId(); + inputRowCount = Catalog.getCurrentCatalog().getStatisticsManager() + .getStatistics().getTableStats(tableId).getRowCount(); + } + for (SlotDescriptor slot : node.getTupleDesc().getSlots()) { + if (!slot.isMaterialized()) { + continue; + } + + long tableId = slot.getParent().getTable().getId(); + String columnName = slot.getColumn().getName(); + slotIdToTableIdAndColumnName.put(slot.getId(), new Pair<>(tableId, columnName)); + } + } + + //TODO:Implement the getStatistics interface + //now there is nothing in statistics, need to wait for collection finished + public Pair getNdvAndDataSizeFromStatistics(Pair pair) { + long ndv = -1; + float dataSize = -1; + /* + if (Catalog.getCurrentCatalog() + .getStatisticsManager() + .getStatistics() + .getColumnStats(pair.first) != null) { + ndv = Catalog.getCurrentCatalog() + .getStatisticsManager() + .getStatistics() + .getColumnStats(pair.first).get(pair.second).getNdv(); + dataSize = Catalog.getCurrentCatalog() + .getStatisticsManager() + .getStatistics() + .getColumnStats(pair.first).get(pair.second).getDataSize(); + } + */ + return new Pair<>(ndv, dataSize); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java index b17a37858d8793..58003de90c06af 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java @@ -70,4 +70,17 @@ public Map getColumnStats(long tableId) { } return tableStats.getNameToColumnStats(); } + + // TODO: mock statistics need to be removed in the future + public void mockTableStatsWithRowCount(long tableId, long rowCount) { + TableStats tableStats = idToTableStats.get(tableId); + if (tableStats == null) { + tableStats = new TableStats(); + idToTableStats.put(tableId, tableStats); + } + + if (tableStats.getRowCount() != rowCount) { + tableStats.setRowCount(rowCount); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java index e994cce2120460..b1c88e36a58c63 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java @@ -136,4 +136,8 @@ private Table validateTableName(TableName dbTableName) throws AnalysisException Table table = db.getTableOrAnalysisException(tableName); return table; } + + public Statistics getStatistics() { + return statistics; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java new file mode 100644 index 00000000000000..b9b0d8024e85f6 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import com.google.common.collect.Maps; +import org.apache.doris.analysis.SlotId; + +import java.util.Map; + +// This structure is maintained in each operator to store the statistical information results obtained by the operator. +public class StatsDeriveResult { + private long rowCount = -1; + // The data size of the corresponding column in the operator + // The actual key is slotId + private final Map columnToDataSize = Maps.newHashMap(); + // The ndv of the corresponding column in the operator + // The actual key is slotId + private final Map columnToNdv = Maps.newHashMap(); + + public StatsDeriveResult(long rowCount, Map columnToDataSize, Map columnToNdv) { + this.rowCount = rowCount; + this.columnToDataSize.putAll(columnToDataSize); + this.columnToNdv.putAll(columnToNdv); + } + + public void setRowCount(long rowCount) { + this.rowCount = rowCount; + } + + public long getRowCount() { + return rowCount; + } + + public Map getColumnToNdv() { + return columnToNdv; + } + + public Map getColumnToDataSize() { + return columnToDataSize; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java new file mode 100644 index 00000000000000..e6159a594de6ba --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.common.UserException; +import org.apache.doris.planner.PlanNode; + + +public class StatsRecursiveDerive { + private StatsRecursiveDerive() {} + + public static StatsRecursiveDerive getStatsRecursiveDerive() { + return Inner.INSTANCE; + } + + private static class Inner { + private static final StatsRecursiveDerive INSTANCE = new StatsRecursiveDerive(); + } + + /** + * Recursively complete the derivation of statistics for this node and all its children + * @param node + * This parameter is an input and output parameter, + * which will store the derivation result of statistical information in the corresponding node + */ + public void statsRecursiveDerive(PlanNode node) throws UserException { + if (node.getStatsDeriveResult() != null) { + return; + } + for (PlanNode childNode : node.getChildren()) { + if (childNode.getStatsDeriveResult() == null) { + statsRecursiveDerive(childNode); + } + } + DeriveFactory deriveFactory = new DeriveFactory(); + BaseStatsDerive deriveStats = deriveFactory.getStatsDerive(node.getNodeType()); + deriveStats.init(node); + StatsDeriveResult result = deriveStats.deriveStats(); + node.setStatsDeriveResult(result); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java index efb35bbe10f491..ef494bd9f66126 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java @@ -94,4 +94,16 @@ public List getShowInfo() { public Map getNameToColumnStats() { return nameToColumnStats; } + + public long getRowCount() { + return rowCount; + } + + public long getDataSize() { + return dataSize; + } + + public void setRowCount(long rowCount) { + this.rowCount = rowCount; + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java index 3fca5643a5c4a5..f94e69b142a575 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java @@ -72,20 +72,27 @@ public void after() throws Exception { Assert.assertEquals(dropDbStmt.toSql(), dropSchemaStmt.toSql()); } - public void testExplainSelect() throws Exception { - String sql = "explain select * from test_explain.explain_t1 where dt = '1001';"; + public void testExplainInsertInto() throws Exception { + String sql = "explain insert into test_explain.explain_t1 select * from test_explain.explain_t2"; String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false); System.out.println(explainString); Assert.assertFalse(explainString.contains("CAST")); } - public void testExplainInsertInto() throws Exception { + public void testExplainVerboseInsertInto() throws Exception { String sql = "explain verbose insert into test_explain.explain_t1 select * from test_explain.explain_t2"; String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, true); System.out.println(explainString); Assert.assertTrue(explainString.contains("CAST")); } + public void testExplainSelect() throws Exception { + String sql = "explain select * from test_explain.explain_t1 where dt = '1001';"; + String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false); + System.out.println(explainString); + Assert.assertFalse(explainString.contains("CAST")); + } + public void testExplainVerboseSelect() throws Exception { String queryStr = "explain verbose select * from test_explain.explain_t1 where dt = '1001';"; String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, queryStr, true); diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java index 40d1541dda3403..3b3946aeb4cba9 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java @@ -1991,8 +1991,9 @@ public void testGroupingSetOutOfBoundError() throws Exception { public void testExplainInsertInto() throws Exception { ExplainTest explainTest = new ExplainTest(); explainTest.before(connectContext); - explainTest.testExplainSelect(); explainTest.testExplainInsertInto(); + explainTest.testExplainVerboseInsertInto(); + explainTest.testExplainSelect(); explainTest.testExplainVerboseSelect(); explainTest.testExplainConcatSelect(); explainTest.testExplainVerboseConcatSelect(); @@ -2088,7 +2089,7 @@ public void testResultExprs() throws Exception { "\"storage_medium\" = \"HDD\",\n" + "\"storage_format\" = \"V2\"\n" + ");\n"); - String queryStr = "EXPLAIN INSERT INTO result_exprs\n" + + String queryStr = "EXPLAIN VERBOSE INSERT INTO result_exprs\n" + "SELECT a.aid,\n" + " b.bid\n" + "FROM\n" + @@ -2098,7 +2099,7 @@ public void testResultExprs() throws Exception { String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, queryStr); Assert.assertFalse(explainString.contains("OUTPUT EXPRS:3 | 4")); System.out.println(explainString); - Assert.assertTrue(explainString.contains("OUTPUT EXPRS:`a`.`aid` | 4")); + Assert.assertTrue(explainString.contains("OUTPUT EXPRS:CAST(`a`.`aid` AS INT) | 4")); } @Test