From 7547fe3319e599cc18372d5e9ac4a27eae7f04b1 Mon Sep 17 00:00:00 2001 From: minghong Date: Wed, 26 Mar 2025 09:38:16 +0800 Subject: [PATCH 1/4] catch any exception in stats derive --- .../nereids/stats/ExpressionEstimation.java | 22 ++++++++++++++---- .../doris/nereids/stats/StatsCalculator.java | 23 +++++++++++++++++-- .../doris/nereids/trees/plans/GroupPlan.java | 2 +- .../org/apache/doris/qe/SessionVariable.java | 6 +++++ .../doris/statistics/ColumnStatistic.java | 16 +++++++++++++ 5 files changed, 62 insertions(+), 7 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java index 86c1c08463e885..5408333623a0fb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java @@ -95,6 +95,8 @@ import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.coercion.CharacterType; +import org.apache.doris.qe.ConnectContext; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; import org.apache.doris.statistics.Statistics; @@ -121,11 +123,23 @@ public class ExpressionEstimation extends ExpressionVisitor columnStatisticMap = new HashMap<>(); + for (Slot slot : plan.getOutput()) { + columnStatisticMap.put(slot, ColumnStatistic.createUnknownByDataType(slot.getDataType())); + } + newStats = new Statistics(1, 1, columnStatisticMap); + } else { + newStats = ((GroupPlan) plan.child(0)).getStats(); + } + } newStats.normalizeColumnStatistics(); // We ensure that the rowCount remains unchanged in order to make the cost of each plan comparable. + final Statistics tmpStats = newStats; if (groupExpression.getOwnerGroup().getStatistics() == null) { boolean isReliable = groupExpression.getPlan().getExpressions().stream() - .noneMatch(e -> newStats.isInputSlotsUnknown(e.getInputSlots())); + .noneMatch(e -> tmpStats.isInputSlotsUnknown(e.getInputSlots())); groupExpression.getOwnerGroup().setStatsReliable(isReliable); groupExpression.getOwnerGroup().setStatistics(newStats); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java index 49056add395018..b45610da53ef6a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java @@ -62,7 +62,7 @@ public List getExpressions() { @Override public Statistics getStats() { - throw new IllegalStateException("GroupPlan can not invoke getStats()"); + return group.getStatistics(); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 1a5021512780d3..f3ac6f1cc5b0f9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -2271,6 +2271,11 @@ public void setDetailShapePlanNodes(String detailShapePlanNodes) { "use other health replica when the use_fix_replica meet error" }) public boolean fallbackOtherReplicaWhenFixedCorrupt = false; + @VariableMgr.VarAttr(name = "FE_DEBUG", needForward = true, fuzzy = true, + description = {"when set true, FE will throw exceptions instead swallow them. This is used for test", + "when set true, FE will throw exceptions instead swallow them. This is used for test"}) + public boolean feDebug = false; + @VariableMgr.VarAttr(name = SHOW_ALL_FE_CONNECTION, description = {"when it's true show processlist statement list all fe's connection", "当变量为true时,show processlist命令展示所有fe的连接"}) @@ -2505,6 +2510,7 @@ public boolean isEnableESParallelScroll() { @SuppressWarnings("checkstyle:Indentation") public void initFuzzyModeVariables() { Random random = new SecureRandom(); + this.feDebug = true; this.parallelPipelineTaskNum = random.nextInt(8); this.parallelPrepareThreshold = random.nextInt(32) + 1; this.enableCommonExprPushdown = random.nextBoolean(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index 43b1a994f722ad..f96a52f78a3c71 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -22,6 +22,8 @@ import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.datasource.InternalCatalog; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.coercion.CharacterType; import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.statistics.util.StatisticsUtil; @@ -369,4 +371,18 @@ public boolean isUnKnown() { public ColumnStatistic withAvgSizeByte(double avgSizeByte) { return new ColumnStatisticBuilder(this).setAvgSizeByte(avgSizeByte).build(); } + + public static ColumnStatistic createUnknownByDataType(DataType dataType) { + if (dataType instanceof CharacterType) { + return new ColumnStatisticBuilder(1).setAvgSizeByte(CharacterType.DEFAULT_PRECISION).setNdv(1) + .setNumNulls(1).setMaxValue(Double.POSITIVE_INFINITY).setMinValue(Double.NEGATIVE_INFINITY) + .setIsUnknown(true).setUpdatedTime("") + .build(); + } else { + return new ColumnStatisticBuilder(1).setAvgSizeByte(dataType.width()).setNdv(1) + .setNumNulls(1).setMaxValue(Double.POSITIVE_INFINITY).setMinValue(Double.NEGATIVE_INFINITY) + .setIsUnknown(true).setUpdatedTime("") + .build(); + } + } } From 7bdce7b1863fd85a774dfa2784ade0cee3b84eeb Mon Sep 17 00:00:00 2001 From: minghong Date: Thu, 27 Mar 2025 11:48:48 +0800 Subject: [PATCH 2/4] update --- .../java/org/apache/doris/nereids/stats/StatsCalculator.java | 1 + .../src/main/java/org/apache/doris/qe/SessionVariable.java | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 21a6c29f5d7f16..aa00f7a61dee36 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -309,6 +309,7 @@ private void estimate() { if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().feDebug) { throw e; } + LOG.warn("stats calculation failed, plan " + plan.treeString(), e); // use unknown stats or the first child's stats if (plan.children().isEmpty() || !(plan.child(0) instanceof GroupPlan)) { Map columnStatisticMap = new HashMap<>(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index f3ac6f1cc5b0f9..bc8b41df9d32ac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -2271,7 +2271,8 @@ public void setDetailShapePlanNodes(String detailShapePlanNodes) { "use other health replica when the use_fix_replica meet error" }) public boolean fallbackOtherReplicaWhenFixedCorrupt = false; - @VariableMgr.VarAttr(name = "FE_DEBUG", needForward = true, fuzzy = true, + public static final String FE_DEBUG = "fe_debug"; + @VariableMgr.VarAttr(name = FE_DEBUG, needForward = true, fuzzy = true, description = {"when set true, FE will throw exceptions instead swallow them. This is used for test", "when set true, FE will throw exceptions instead swallow them. This is used for test"}) public boolean feDebug = false; From cd008d148e22b7205d3481ab24c2d231cb6db2f5 Mon Sep 17 00:00:00 2001 From: minghong Date: Wed, 2 Apr 2025 18:31:22 +0800 Subject: [PATCH 3/4] update --- .../nereids/stats/ExpressionEstimation.java | 14 +++++------- .../doris/statistics/ColumnStatistic.java | 22 ++++++++++++++----- .../stats/ExpressionEstimationTest.java | 9 ++++++++ 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java index 5408333623a0fb..e07799707847f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java @@ -95,7 +95,6 @@ import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DataType; -import org.apache.doris.nereids.types.coercion.CharacterType; import org.apache.doris.qe.ConnectContext; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; @@ -103,6 +102,8 @@ import com.google.common.base.Preconditions; import org.apache.commons.collections.CollectionUtils; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; import java.time.Instant; import java.time.LocalDate; @@ -114,7 +115,7 @@ * Used to estimate for expressions that not producing boolean value. */ public class ExpressionEstimation extends ExpressionVisitor { - + public static final Logger LOG = LogManager.getLogger(ExpressionEstimation.class); public static final long DAYS_FROM_0_TO_1970 = 719528; public static final long DAYS_FROM_0_TO_9999 = 3652424; private static final ExpressionEstimation INSTANCE = new ExpressionEstimation(); @@ -126,7 +127,7 @@ public static ColumnStatistic estimate(Expression expression, Statistics stats) try { ColumnStatistic columnStatistic = expression.accept(INSTANCE, stats); if (columnStatistic == null) { - return ColumnStatistic.UNKNOWN; + return ColumnStatistic.createUnknownByDataType(expression.getDataType()); } return columnStatistic; } catch (Exception e) { @@ -134,11 +135,8 @@ public static ColumnStatistic estimate(Expression expression, Statistics stats) if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().feDebug) { throw e; } - int sizeByte = expression.getDataType().width(); - if (expression.getDataType().isStringType()) { - sizeByte = Math.max(1, Math.min(sizeByte, CharacterType.DEFAULT_WIDTH)); - } - return ColumnStatistic.UNKNOWN.withAvgSizeByte(sizeByte); + LOG.warn("ExpressionEstimation failed : " + expression, e); + return ColumnStatistic.createUnknownByDataType(expression.getDataType()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index f96a52f78a3c71..e0e99fcdfad83f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -374,14 +374,24 @@ public ColumnStatistic withAvgSizeByte(double avgSizeByte) { public static ColumnStatistic createUnknownByDataType(DataType dataType) { if (dataType instanceof CharacterType) { - return new ColumnStatisticBuilder(1).setAvgSizeByte(CharacterType.DEFAULT_PRECISION).setNdv(1) - .setNumNulls(1).setMaxValue(Double.POSITIVE_INFINITY).setMinValue(Double.NEGATIVE_INFINITY) - .setIsUnknown(true).setUpdatedTime("") + return new ColumnStatisticBuilder(1) + .setAvgSizeByte(Math.max(1, Math.min(dataType.width(), CharacterType.DEFAULT_WIDTH))) + .setNdv(1) + .setNumNulls(1) + .setMaxValue(Double.POSITIVE_INFINITY) + .setMinValue(Double.NEGATIVE_INFINITY) + .setIsUnknown(true) + .setUpdatedTime("") .build(); } else { - return new ColumnStatisticBuilder(1).setAvgSizeByte(dataType.width()).setNdv(1) - .setNumNulls(1).setMaxValue(Double.POSITIVE_INFINITY).setMinValue(Double.NEGATIVE_INFINITY) - .setIsUnknown(true).setUpdatedTime("") + return new ColumnStatisticBuilder(1) + .setAvgSizeByte(dataType.width()) + .setNdv(1) + .setNumNulls(1) + .setMaxValue(Double.POSITIVE_INFINITY) + .setMinValue(Double.NEGATIVE_INFINITY) + .setIsUnknown(true) + .setUpdatedTime("") .build(); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java index 91da5192b48c02..39ff8a10d4c437 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java @@ -447,4 +447,13 @@ public void testLiteral() { Assertions.assertEquals(est.avgSizeByte, 1); Assertions.assertEquals(est.numNulls, 1); } + + @Test + public void testThrowException() { + SlotReference a = new SlotReference("a", StringType.INSTANCE); + Cast cast = new Cast(a, DateType.INSTANCE); + // do not throw any exception + ColumnStatistic est = ExpressionEstimation.estimate(cast, null); + Assertions.assertTrue(est.isUnKnown()); + } } From db7e505667fd0a02672b7000885cb57365a0b4cc Mon Sep 17 00:00:00 2001 From: englefly Date: Wed, 2 Apr 2025 20:10:11 +0800 Subject: [PATCH 4/4] update --- .../java/org/apache/doris/nereids/stats/StatsCalculator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index aa00f7a61dee36..6640e968c9e9cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -309,7 +309,7 @@ private void estimate() { if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().feDebug) { throw e; } - LOG.warn("stats calculation failed, plan " + plan.treeString(), e); + LOG.warn("stats calculation failed, plan " + plan.toString(), e); // use unknown stats or the first child's stats if (plan.children().isEmpty() || !(plan.child(0) instanceof GroupPlan)) { Map columnStatisticMap = new HashMap<>();