Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -586,15 +586,24 @@ public long getCountFromSnapshot() throws UserException {
return 0;
}

// `TOTAL_POSITION_DELETES` is need to 0,
// because prevent 'dangling delete' problem after `rewrite_data_files`
// ref: https://iceberg.apache.org/docs/nightly/spark-procedures/#rewrite_position_delete_files
Map<String, String> summary = snapshot.summary();
if (!summary.get(IcebergUtils.TOTAL_EQUALITY_DELETES).equals("0")
|| !summary.get(IcebergUtils.TOTAL_POSITION_DELETES).equals("0")) {
if (!summary.get(IcebergUtils.TOTAL_EQUALITY_DELETES).equals("0")) {
// has equality delete files, can not push down count
return -1;
}

long deleteCount = Long.parseLong(summary.get(IcebergUtils.TOTAL_POSITION_DELETES));
if (deleteCount == 0) {
// no delete files, can push down count directly
return Long.parseLong(summary.get(IcebergUtils.TOTAL_RECORDS));
}
if (sessionVariable.ignoreIcebergDanglingDelete) {
// has position delete files, if we ignore dangling delete, can push down count
return Long.parseLong(summary.get(IcebergUtils.TOTAL_RECORDS)) - deleteCount;
} else {
// otherwise, can not push down count
return -1;
}
return Long.parseLong(summary.get(IcebergUtils.TOTAL_RECORDS));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3121,6 +3121,19 @@ public boolean isEnableESParallelScroll() {
)
public boolean useV3StorageFormat = false;

public static final String IGNORE_ICEBERG_DANGLING_DELETE = "ignore_iceberg_dangling_delete";
@VariableMgr.VarAttr(name = IGNORE_ICEBERG_DANGLING_DELETE,
description = {"是否忽略 Iceberg 表中 dangling delete 文件对 COUNT(*) 统计信息的影响。"
+ "默认为 true,COUNT(*) 会直接从元信息中获取行数,性能更好,但是如果有 dangling delete,结果可能是不准确的。"
+ "设置为 false 时,COUNT(*) 会扫描数据文件以排除 dangling delete 文件的影响。",
" Whether to ignore the impact of dangling delete files in Iceberg tables on COUNT(*) statistics. "
+ "The default is true, COUNT(*) will directly obtain the number of rows from metadata, "
+ "which has better performance, but if there are dangling deletes, "
+ "the result may be inaccurate. "
+ "When set to false, COUNT(*) will scan data files "
+ "to exclude the impact of dangling delete files."})
public boolean ignoreIcebergDanglingDelete = false;

// If this fe is in fuzzy mode, then will use initFuzzyModeVariables to generate some variables,
// not the default value set in the code.
@SuppressWarnings("checkstyle:Indentation")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,9 @@
-- !q09 --
2

-- !sql_count1 --
2

-- !sql_count1 --
1

Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,19 @@ suite("test_iceberg_optimize_count", "p0,external,doris,external_docker,external

qt_q09 """${sqlstr5}"""

sql """ set ignore_iceberg_dangling_delete=false"""
explain {
sql("""${sqlstr5}""")
contains """pushdown agg=COUNT (-1)"""
}
qt_sql_count1 """select count(*) from ${catalog_name}.test_db.dangling_delete_after_write;"""

sql """ set ignore_iceberg_dangling_delete=true"""
explain {
sql("""${sqlstr5}""")
contains """pushdown agg=COUNT (1)"""
}
qt_sql_count1 """select count(*) from ${catalog_name}.test_db.dangling_delete_after_write;"""

} finally {
sql """ set enable_count_push_down_for_external_table=true; """
Expand Down
Loading