diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java index 382d058d3dc1d6..fb3d746fb1076a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java @@ -586,15 +586,24 @@ public long getCountFromSnapshot() throws UserException { return 0; } - // `TOTAL_POSITION_DELETES` is need to 0, - // because prevent 'dangling delete' problem after `rewrite_data_files` - // ref: https://iceberg.apache.org/docs/nightly/spark-procedures/#rewrite_position_delete_files Map summary = snapshot.summary(); - if (!summary.get(IcebergUtils.TOTAL_EQUALITY_DELETES).equals("0") - || !summary.get(IcebergUtils.TOTAL_POSITION_DELETES).equals("0")) { + if (!summary.get(IcebergUtils.TOTAL_EQUALITY_DELETES).equals("0")) { + // has equality delete files, can not push down count + return -1; + } + + long deleteCount = Long.parseLong(summary.get(IcebergUtils.TOTAL_POSITION_DELETES)); + if (deleteCount == 0) { + // no delete files, can push down count directly + return Long.parseLong(summary.get(IcebergUtils.TOTAL_RECORDS)); + } + if (sessionVariable.ignoreIcebergDanglingDelete) { + // has position delete files, if we ignore dangling delete, can push down count + return Long.parseLong(summary.get(IcebergUtils.TOTAL_RECORDS)) - deleteCount; + } else { + // otherwise, can not push down count return -1; } - return Long.parseLong(summary.get(IcebergUtils.TOTAL_RECORDS)); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index ec7a4f4a25b610..848eb619299b99 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -3121,6 +3121,19 @@ public boolean isEnableESParallelScroll() { ) public boolean useV3StorageFormat = false; + public static final String IGNORE_ICEBERG_DANGLING_DELETE = "ignore_iceberg_dangling_delete"; + @VariableMgr.VarAttr(name = IGNORE_ICEBERG_DANGLING_DELETE, + description = {"是否忽略 Iceberg 表中 dangling delete 文件对 COUNT(*) 统计信息的影响。" + + "默认为 true,COUNT(*) 会直接从元信息中获取行数,性能更好,但是如果有 dangling delete,结果可能是不准确的。" + + "设置为 false 时,COUNT(*) 会扫描数据文件以排除 dangling delete 文件的影响。", + " Whether to ignore the impact of dangling delete files in Iceberg tables on COUNT(*) statistics. " + + "The default is true, COUNT(*) will directly obtain the number of rows from metadata, " + + "which has better performance, but if there are dangling deletes, " + + "the result may be inaccurate. " + + "When set to false, COUNT(*) will scan data files " + + "to exclude the impact of dangling delete files."}) + public boolean ignoreIcebergDanglingDelete = false; + // If this fe is in fuzzy mode, then will use initFuzzyModeVariables to generate some variables, // not the default value set in the code. @SuppressWarnings("checkstyle:Indentation") diff --git a/regression-test/data/external_table_p0/iceberg/test_iceberg_optimize_count.out b/regression-test/data/external_table_p0/iceberg/test_iceberg_optimize_count.out index 20d03ad9c06bec..a64d8480129d8b 100644 --- a/regression-test/data/external_table_p0/iceberg/test_iceberg_optimize_count.out +++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_optimize_count.out @@ -38,3 +38,9 @@ -- !q09 -- 2 +-- !sql_count1 -- +2 + +-- !sql_count1 -- +1 + diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy index 0f1f5535c058e9..4815de33584b5a 100644 --- a/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy +++ b/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy @@ -162,10 +162,19 @@ suite("test_iceberg_optimize_count", "p0,external,doris,external_docker,external qt_q09 """${sqlstr5}""" + sql """ set ignore_iceberg_dangling_delete=false""" explain { sql("""${sqlstr5}""") contains """pushdown agg=COUNT (-1)""" } + qt_sql_count1 """select count(*) from ${catalog_name}.test_db.dangling_delete_after_write;""" + + sql """ set ignore_iceberg_dangling_delete=true""" + explain { + sql("""${sqlstr5}""") + contains """pushdown agg=COUNT (1)""" + } + qt_sql_count1 """select count(*) from ${catalog_name}.test_db.dangling_delete_after_write;""" } finally { sql """ set enable_count_push_down_for_external_table=true; """