Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,21 @@ public FilterEstimation(Set<Slot> aggSlots) {
/**
* This method will update the stats according to the selectivity.
*/
public Statistics estimate(Expression expression, Statistics statistics) {
public Statistics estimate(Expression expression, Statistics inputStats) {
// For a comparison predicate, only when it's left side is a slot and right side is a literal, we would
// consider is a valid predicate.
Statistics stats = expression.accept(this, new EstimationContext(statistics));
stats.enforceValid();
return stats;
Statistics outputStats = expression.accept(this, new EstimationContext(inputStats));
if (outputStats.getRowCount() == 0 && inputStats.getDeltaRowCount() > 0) {
StatisticsBuilder deltaStats = new StatisticsBuilder();
deltaStats.setDeltaRowCount(0);
deltaStats.setRowCount(inputStats.getDeltaRowCount());
for (Expression expr : inputStats.columnStatistics().keySet()) {
deltaStats.putColumnStatistics(expr, ColumnStatistic.UNKNOWN);
}
outputStats = expression.accept(this, new EstimationContext(deltaStats.build()));
}
outputStats.enforceValid();
return outputStats;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -646,10 +646,10 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
idxId = olapScan.getSelectedIndexId();
}
}
if (deltaRowCount > 0 && LOG.isDebugEnabled()) {
LOG.debug("{} is partially analyzed, clear min/max values in column stats",
catalogRelation.getTable().getName());
}
// if (deltaRowCount > 0 && LOG.isDebugEnabled()) {
// LOG.debug("{} is partially analyzed, clear min/max values in column stats",
// catalogRelation.getTable().getName());
// }
for (SlotReference slotReference : slotSet) {
String colName = slotReference.getColumn().isPresent()
? slotReference.getColumn().get().getName()
Expand All @@ -676,14 +676,14 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
hasUnknownCol = true;
}
if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) {
if (deltaRowCount > 0) {
// clear min-max to avoid error estimation
// for example, after yesterday data loaded, user send query about yesterday immediately.
// since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer
// estimates the filter result is zero
colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
}
// if (deltaRowCount > 0) {
// // clear min-max to avoid error estimation
// // for example, after yesterday data loaded, user send query about yesterday immediately.
// // since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer
// // estimates the filter result is zero
// colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
// .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
// }
columnStatisticBuilderMap.put(slotReference, colStatsBuilder);
} else {
columnStatisticBuilderMap.put(slotReference, new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN));
Expand All @@ -693,17 +693,18 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
if (hasUnknownCol && ConnectContext.get() != null && ConnectContext.get().getStatementContext() != null) {
ConnectContext.get().getStatementContext().setHasUnknownColStats(true);
}
return normalizeCatalogRelationColumnStatsRowCount(rowCount, columnStatisticBuilderMap);
return normalizeCatalogRelationColumnStatsRowCount(rowCount, columnStatisticBuilderMap, deltaRowCount);
}

private Statistics normalizeCatalogRelationColumnStatsRowCount(double rowCount,
Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap) {
Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap,
double deltaRowCount) {
Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>();
for (Expression slot : columnStatisticBuilderMap.keySet()) {
columnStatisticMap.put(slot,
columnStatisticBuilderMap.get(slot).setCount(rowCount).build());
}
return new Statistics(rowCount, columnStatisticMap);
return new Statistics(rowCount, columnStatisticMap, deltaRowCount);
}

private Statistics computeTopN(TopN topN) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,23 @@ public class Statistics {
// the byte size of one tuple
private double tupleSize;

private double deltaRowCount = 0.0;

public Statistics(Statistics another) {
this.rowCount = another.rowCount;
this.expressionToColumnStats = new HashMap<>(another.expressionToColumnStats);
this.tupleSize = another.tupleSize;
this.deltaRowCount = another.getDeltaRowCount();
}

public Statistics(double rowCount, Map<Expression, ColumnStatistic> expressionToColumnStats) {
public Statistics(double rowCount, Map<Expression, ColumnStatistic> expressionToColumnStats, double deltaRowCount) {
this.rowCount = rowCount;
this.expressionToColumnStats = expressionToColumnStats;
this.deltaRowCount = deltaRowCount;
}

public Statistics(double rowCount, Map<Expression, ColumnStatistic> expressionToColumnStats) {
this(rowCount, expressionToColumnStats, 0);
}

public ColumnStatistic findColumnStatistics(Expression expression) {
Expand Down Expand Up @@ -150,7 +158,11 @@ public String toString() {
return "-Infinite";
}
DecimalFormat format = new DecimalFormat("#,###.##");
return format.format(rowCount);
String rows = format.format(rowCount);
if (deltaRowCount > 0) {
rows = rows + "(" + format.format(deltaRowCount) + ")";
}
return rows;
}

public int getBENumber() {
Expand Down Expand Up @@ -209,4 +221,12 @@ public Statistics normalizeByRatio(double originRowCount) {
}
return builder.build();
}

public double getDeltaRowCount() {
return deltaRowCount;
}

public void setDeltaRowCount(double deltaRowCount) {
this.deltaRowCount = deltaRowCount;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ public class StatisticsBuilder {

private final Map<Expression, ColumnStatistic> expressionToColumnStats;

private double deltaRowCount = 0.0;

public StatisticsBuilder() {
expressionToColumnStats = new HashMap<>();
}
Expand All @@ -36,6 +38,7 @@ public StatisticsBuilder(Statistics statistics) {
this.rowCount = statistics.getRowCount();
expressionToColumnStats = new HashMap<>();
expressionToColumnStats.putAll(statistics.columnStatistics());
deltaRowCount = statistics.getDeltaRowCount();
}

public StatisticsBuilder setRowCount(double rowCount) {
Expand All @@ -54,7 +57,12 @@ public StatisticsBuilder putColumnStatistics(Expression expression, ColumnStatis
return this;
}

public StatisticsBuilder setDeltaRowCount(double deltaRowCount) {
this.deltaRowCount = deltaRowCount;
return this;
}

public Statistics build() {
return new Statistics(rowCount, expressionToColumnStats);
return new Statistics(rowCount, expressionToColumnStats, deltaRowCount);
}
}
57 changes: 57 additions & 0 deletions regression-test/suites/nereids_p0/delta_row/delta_row.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("delta_row") {
String database = context.config.getDbNameByFile(context.file)
sql """
drop database if exists ${database};
create database ${database};
use ${database};
CREATE TABLE IF NOT EXISTS t (
k int(11) null comment "",
v string replace null comment "",
) engine=olap
DISTRIBUTED BY HASH(k) BUCKETS 5 properties("replication_num" = "1");

insert into t values (1, "a"),(2, "b"),(3, 'c'),(4,'d');
analyze table t with sync;
"""
explain {
sql "physical plan select * from t where k > 6"
contains("stats=0 ")
contains("stats=4,")
// PhysicalResultSink[120] ( outputExprs=[k#0, v#1] )
// +--PhysicalDistribute[117]@@1 ( distributionSpec=DistributionSpecGather, stats=0 )
// +--PhysicalFilter[114]@1 ( predicates=(k#0 > 6), stats=0 )
// +--PhysicalOlapScan[111]@0 ( qualified=internal.default_cluster:regression_test_nereids_p0_delta_row.t, stats=4, fr=Optional[1] )
}

sql "set global enable_auto_analyze=false;"

sql "insert into t values (10, 'c');"
explain {
sql "physical plan select * from t where k > 6"
contains("stats=0.5 ")
contains("stats=5(1),")
notContains("stats=0 ")
notContains("stats=4,")
// PhysicalResultSink[120] ( outputExprs=[k#0, v#1] )
// +--PhysicalDistribute[117]@@1 ( distributionSpec=DistributionSpecGather, stats=0.5 )
// +--PhysicalFilter[114]@1 ( predicates=(k#0 > 6), stats=0.5 )
// +--PhysicalOlapScan[111]@0 ( qualified=internal.default_cluster:regression_test_nereids_p0_delta_row.t, stats=5(1), fr=Optional[1] )
}
}