diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index 33f6318808ba3b..0ce10ec0c3c61d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -104,6 +104,7 @@ public Statistics visitCompoundPredicate(CompoundPredicate predicate, Estimation Expression leftExpr = predicate.child(0); Expression rightExpr = predicate.child(1); Statistics leftStats = leftExpr.accept(this, context); + leftStats = leftStats.normalizeByRatio(context.statistics.getRowCount()); Statistics andStats = rightExpr.accept(this, new EstimationContext(leftStats)); if (predicate instanceof And) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java index 1f35d4fb0331a3..da6bf93759308e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java @@ -191,4 +191,22 @@ public String detail(String prefix) { } return builder.toString(); } + + public Statistics normalizeByRatio(double originRowCount) { + if (rowCount >= originRowCount || rowCount <= 0) { + return this; + } + StatisticsBuilder builder = new StatisticsBuilder(this); + double ratio = rowCount / originRowCount; + for (Entry entry : expressionToColumnStats.entrySet()) { + ColumnStatistic colStats = entry.getValue(); + if (colStats.numNulls != 0 || colStats.ndv > rowCount) { + ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(colStats); + colStatsBuilder.setNumNulls(colStats.numNulls * ratio); + colStatsBuilder.setNdv(Math.min(rowCount - colStatsBuilder.getNumNulls(), colStats.ndv)); + builder.putColumnStatistics(entry.getKey(), colStatsBuilder.build()); + } + } + return builder.build(); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java index da8159ef6b9898..de5e4bba877623 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java @@ -206,7 +206,7 @@ public void test1() { Statistics stat = new Statistics(1000, slotToColumnStat); FilterEstimation filterEstimation = new FilterEstimation(); Statistics expected = filterEstimation.estimate(or, stat); - Assertions.assertEquals(51.9, expected.getRowCount(), 0.1); + Assertions.assertEquals(51, expected.getRowCount(), 1); } // a > 500 and b < 100 or a > c @@ -1059,6 +1059,39 @@ public void testNumNullsAnd() { Assertions.assertEquals(result.getRowCount(), 2.0, 0.01); } + /** + * a = 1 and b is not null + */ + @Test + public void testNumNullsAndTwoCol() { + SlotReference a = new SlotReference("a", IntegerType.INSTANCE); + ColumnStatisticBuilder builderA = new ColumnStatisticBuilder() + .setNdv(2) + .setAvgSizeByte(4) + .setNumNulls(0) + .setMaxValue(2) + .setMinValue(1) + .setCount(10); + IntegerLiteral int1 = new IntegerLiteral(1); + EqualTo equalTo = new EqualTo(a, int1); + SlotReference b = new SlotReference("a", IntegerType.INSTANCE); + ColumnStatisticBuilder builderB = new ColumnStatisticBuilder() + .setNdv(2) + .setAvgSizeByte(4) + .setNumNulls(8) + .setMaxValue(2) + .setMinValue(1) + .setCount(10); + Not isNotNull = new Not(new IsNull(b)); + And and = new And(equalTo, isNotNull); + Statistics stats = new Statistics(10, new HashMap<>()); + stats.addColumnStats(a, builderA.build()); + stats.addColumnStats(b, builderB.build()); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics result = filterEstimation.estimate(and, stats); + Assertions.assertEquals(result.getRowCount(), 1.0, 0.01); + } + /** * a >= 1 or a <= 2 */