diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index d44e6198170b1a..5569580d29bf0f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -331,14 +331,28 @@ private Statistics estimateEqualTo(ComparisonPredicate cp, ColumnStatistic stats ColumnStatistic statsForRight, EstimationContext context) { double selectivity; - double ndv = statsForLeft.ndv; - double val = statsForRight.maxValue; - if (val > statsForLeft.maxValue || val < statsForLeft.minValue) { - selectivity = 0.0; + if (statsForLeft.isUnKnown) { + selectivity = DEFAULT_INEQUALITY_COEFFICIENT; } else { - selectivity = StatsMathUtil.minNonNaN(1.0, 1.0 / ndv); + double ndv = statsForLeft.ndv; + if (statsForRight.isUnKnown) { + if (ndv >= 1.0) { + selectivity = 1.0 / ndv; + } else { + selectivity = DEFAULT_INEQUALITY_COEFFICIENT; + } + } else { + double val = statsForRight.maxValue; + if (val > statsForLeft.maxValue || val < statsForLeft.minValue) { + selectivity = 0.0; + } else if (ndv >= 1.0) { + selectivity = StatsMathUtil.minNonNaN(1.0, 1.0 / ndv); + } else { + selectivity = DEFAULT_INEQUALITY_COEFFICIENT; + } + selectivity = getNotNullSelectivity(statsForLeft, selectivity); + } } - selectivity = getNotNullSelectivity(statsForLeft, selectivity); Statistics equalStats = context.statistics.withSel(selectivity); Expression left = cp.left(); equalStats.addColumnStats(left, statsForRight); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java index dd5a38a4a62caa..d7c44e082cf52e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java @@ -33,6 +33,7 @@ import org.apache.doris.nereids.trees.expressions.NullSafeEqual; import org.apache.doris.nereids.trees.expressions.Or; import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.scalar.Left; import org.apache.doris.nereids.trees.expressions.literal.DateLiteral; import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral; import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral; @@ -177,6 +178,80 @@ public void testRelatedAnd() { Assertions.assertEquals(10, aStatsEst.ndv); } + @Test + public void knownEqualToUnknown() { + SlotReference ym = new SlotReference("a", new VarcharType(7)); + double rowCount = 404962.0; + double ndv = 14.0; + ColumnStatistic ymStats = new ColumnStatisticBuilder() + .setCount(rowCount) + .setNdv(ndv) + .setMinExpr(new StringLiteral("2023-07")) + .setMinValue(14126741000630328.000000) + .setMaxExpr(new StringLiteral("2024-08")) + .setMaxValue(14126741017407544.000000) + .setAvgSizeByte(7) + .build(); + Statistics stats = new StatisticsBuilder() + .setRowCount(404962).putColumnStatistics(ym, ymStats) + .build(); + + EqualTo predicate = new EqualTo(ym, + new Left(new org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-08-14"), + new IntegerLiteral(7)) + ); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics outStats = filterEstimation.estimate(predicate, stats); + Assertions.assertEquals(rowCount / ndv, outStats.getRowCount()); + } + + @Test + public void knownEqualToUnknownWithLittleNdv() { + SlotReference ym = new SlotReference("a", new VarcharType(7)); + double rowCount = 404962.0; + double ndv = 0.5; + ColumnStatistic ymStats = new ColumnStatisticBuilder() + .setCount(rowCount) + .setNdv(ndv) + .setMinExpr(new StringLiteral("2023-07")) + .setMinValue(14126741000630328.000000) + .setMaxExpr(new StringLiteral("2024-08")) + .setMaxValue(14126741017407544.000000) + .setAvgSizeByte(7) + .build(); + Statistics stats = new StatisticsBuilder() + .setRowCount(404962).putColumnStatistics(ym, ymStats) + .build(); + + EqualTo predicate = new EqualTo(ym, + new Left(new org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-08-14"), + new IntegerLiteral(7)) + ); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics outStats = filterEstimation.estimate(predicate, stats); + Assertions.assertEquals(rowCount * FilterEstimation.DEFAULT_INEQUALITY_COEFFICIENT, + outStats.getRowCount()); + } + + @Test + public void unknownEqualToUnknown() { + SlotReference ym = new SlotReference("a", new VarcharType(7)); + ColumnStatistic ymStats = ColumnStatistic.UNKNOWN; + double rowCount = 404962.0; + Statistics stats = new StatisticsBuilder() + .setRowCount(rowCount).putColumnStatistics(ym, ymStats) + .build(); + + EqualTo predicate = new EqualTo(ym, + new Left(new org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-08-14"), + new IntegerLiteral(7)) + ); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics outStats = filterEstimation.estimate(predicate, stats); + Assertions.assertEquals(rowCount * FilterEstimation.DEFAULT_INEQUALITY_COEFFICIENT, + outStats.getRowCount()); + } + // a > 500 and b < 100 or a = c @Test public void test1() {