From fba9e9541dc673ae721b027f91778bd59a5e0b9c Mon Sep 17 00:00:00 2001 From: Jibing Li Date: Sun, 28 Apr 2024 12:01:57 +0800 Subject: [PATCH] Use column update rows to decide min/max stats are valid or not. --- .../doris/nereids/stats/StatsCalculator.java | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 34248d5a55a87e..96d08e6f3a1219 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -19,6 +19,7 @@ import org.apache.doris.analysis.IntLiteral; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; @@ -124,6 +125,7 @@ import org.apache.doris.nereids.util.PlanUtils; import org.apache.doris.qe.ConnectContext; import org.apache.doris.statistics.AnalysisManager; +import org.apache.doris.statistics.ColStatsMeta; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; import org.apache.doris.statistics.Histogram; @@ -764,10 +766,10 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { Set slotSet = slotSetBuilder.build(); Map columnStatisticBuilderMap = new HashMap<>(); TableIf table = catalogRelation.getTable(); + boolean isOlapTable = table instanceof OlapTable; AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(table.getId()); - // rows newly updated after last analyze - long deltaRowCount = tableMeta == null ? 0 : tableMeta.updatedRows.get(); + long tableUpdatedRows = tableMeta == null ? 0 : tableMeta.updatedRows.get(); double rowCount = catalogRelation.getTable().getRowCountForNereids(); boolean hasUnknownCol = false; long idxId = -1; @@ -777,10 +779,6 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { idxId = olapScan.getSelectedIndexId(); } } - if (deltaRowCount > 0 && LOG.isDebugEnabled()) { - LOG.debug("{} is partially analyzed, clear min/max values in column stats", - catalogRelation.getTable().getName()); - } for (SlotReference slotReference : slotSet) { String colName = slotReference.getColumn().isPresent() ? slotReference.getColumn().get().getName() @@ -790,6 +788,13 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { if (colName == null) { throw new RuntimeException(String.format("Invalid slot: %s", slotReference.getExprId())); } + long deltaRowCount = 0; + if (isOlapTable) { + OlapTable olapTable = (OlapTable) table; + ColStatsMeta colMeta = tableMeta == null ? null : tableMeta.findColumnStatsMeta( + olapTable.getIndexNameById(idxId == -1 ? olapTable.getBaseIndexId() : idxId), colName); + deltaRowCount = colMeta == null ? 0 : tableUpdatedRows - colMeta.updatedRows; + } ColumnStatistic cache; if (!FeConstants.enableInternalSchemaDb || shouldIgnoreThisCol) { @@ -807,13 +812,20 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { hasUnknownCol = true; } if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) { + // deltaRowCount > 0 indicates that + // new data is loaded to the table after this column was analyzed last time. + // In this case, need to eliminate min/max value for this column. if (deltaRowCount > 0) { // clear min-max to avoid error estimation // for example, after yesterday data loaded, user send query about yesterday immediately. // since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer // estimates the filter result is zero colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY) - .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY); + .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY); + if (LOG.isDebugEnabled()) { + LOG.debug("{}.{} is partially analyzed, clear min/max values in column stats", + table.getName(), colName); + } } columnStatisticBuilderMap.put(slotReference, colStatsBuilder); } else {